diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-03 15:57:06 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-03 15:57:06 +0000 |
commit | 7543ae2869ff426fa1c32757df16abcc614d8801 (patch) | |
tree | cad1d5eb5616bad794a737454fd994769d92b04b | |
parent | 3c567665990e48d8945dfb6ca02584152aaf5e50 (diff) | |
parent | 38a36e289b9ebc40e3a2e1f07827612fedbba2e3 (diff) | |
download | scudo-android14-mainline-adbd-release.tar.gz |
Snap for 10428683 from 38a36e289b9ebc40e3a2e1f07827612fedbba2e3 to mainline-adbd-releaseaml_adb_341520010aml_adb_341517070aml_adb_340912530aml_adb_340912350aml_adb_340912200aml_adb_340912000android14-mainline-adbd-release
Change-Id: I8c80170dcd07c98233f6cef22b2567d960b7de1a
66 files changed, 4370 insertions, 797 deletions
diff --git a/Android.bp b/Android.bp index ab3fb0d0a34..43955e9aaac 100644 --- a/Android.bp +++ b/Android.bp @@ -90,6 +90,7 @@ cc_defaults { "-Werror=pointer-to-int-cast", "-Werror=int-to-pointer-cast", + "-Werror=thread-safety", "-Werror=type-limits", "-Werror", @@ -117,9 +118,12 @@ cc_defaults { "standalone/flags.cpp", "standalone/flags_parser.cpp", "standalone/linux.cpp", + "standalone/mem_map.cpp", "standalone/release.cpp", "standalone/report.cpp", + "standalone/rss_limit_checker.cpp", "standalone/string_utils.cpp", + "standalone/timing.cpp", "standalone/wrappers_c_bionic.cpp" ], arch: { @@ -131,6 +135,11 @@ cc_defaults { cflags: ["-mcrc"], srcs: ["standalone/crc32_hw.cpp"], }, + riscv64: { + // This is a temporary fix, and should be reverted after + // yieldProcessor supports riscv. + cflags: ["-Wno-unused-parameter"], + }, x86_64: { cflags: ["-msse4.2"], srcs: ["standalone/crc32_hw.cpp"], @@ -172,10 +181,40 @@ cc_library_static { cc_library_static { name: "libscudo_for_testing", defaults: ["libscudo_defaults"], + cflags: [ + "-DSCUDO_DEBUG", + ], +} + +cc_defaults { + name: "scudo_unit_tests_default", + static_libs: ["libscudo_for_testing"], + include_dirs: [ + "external/scudo/standalone", + "external/scudo/standalone/include", + ], + cflags: [ + "-fno-emulated-tls", + // In memtag_test.cpp, some tests are disabled by GTEST_SKIP() so that + // they won't be run. However, for those disabled tests, it may contain + // unreachable code paths which will mislead some compiler checks. Given + // this flag won't be impacted too much, disable it only in the test. + "-Wno-unreachable-code-loop-increment", + "-Wno-unused-parameter", + "-DSCUDO_DEBUG", + ], + target: { + bionic: { + header_libs: ["bionic_libc_platform_headers"], + }, + }, + test_suites: ["general-tests"], + bootstrap: true, } cc_test { name: "scudo_unit_tests", + defaults: ["scudo_unit_tests_default"], // Temporarily disabled on host due to a 15-20s per-test timeout, // which is currently exceeded by ScudoCombinedTest.BasicCombined. host_supported: false, @@ -188,6 +227,7 @@ cc_test { "standalone/tests/flags_test.cpp", "standalone/tests/list_test.cpp", "standalone/tests/map_test.cpp", + "standalone/tests/memtag_test.cpp", "standalone/tests/mutex_test.cpp", "standalone/tests/primary_test.cpp", "standalone/tests/quarantine_test.cpp", @@ -198,25 +238,20 @@ cc_test { "standalone/tests/size_class_map_test.cpp", "standalone/tests/stats_test.cpp", "standalone/tests/strings_test.cpp", + "standalone/tests/timing_test.cpp", "standalone/tests/tsd_test.cpp", "standalone/tests/vector_test.cpp", ], - static_libs: ["libscudo_for_testing"], - include_dirs: [ - "external/scudo/standalone", - "external/scudo/standalone/include", - ], - cflags: [ - "-Wno-unused-parameter", - "-fno-emulated-tls", +} + +cc_test { + name: "scudo_hooks_unit_tests", + defaults: ["scudo_unit_tests_default"], + host_supported: true, + srcs: [ + "standalone/tests/scudo_hooks_test.cpp", + "standalone/tests/scudo_unit_test_main.cpp", ], - target: { - bionic: { - header_libs: ["bionic_libc_platform_headers"], - }, - }, - test_suites: ["general-tests"], - bootstrap: true, } cc_fuzz { @@ -1,3 +1,3 @@ cferris@google.com enh@google.com -kostyak@google.com +chiahungduan@google.com diff --git a/TEST_MAPPING b/TEST_MAPPING index a8f41fbb9ed..32f13f0954f 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -4,6 +4,9 @@ "name": "scudo_unit_tests" }, { + "name": "scudo_hooks_unit_tests" + }, + { "name": "memunreachable_unit_test" }, { diff --git a/standalone/allocator_config.h b/standalone/allocator_config.h index e6f46b511db..d06f6dfe4e0 100644 --- a/standalone/allocator_config.h +++ b/standalone/allocator_config.h @@ -26,7 +26,7 @@ namespace scudo { // allocator. // // struct ExampleConfig { -// // SizeClasMmap to use with the Primary. +// // SizeClassMap to use with the Primary. // using SizeClassMap = DefaultSizeClassMap; // // Indicates possible support for Memory Tagging. // static const bool MaySupportMemoryTagging = false; @@ -34,6 +34,14 @@ namespace scudo { // typedef SizeClassAllocator64<ExampleConfig> Primary; // // Log2 of the size of a size class region, as used by the Primary. // static const uptr PrimaryRegionSizeLog = 30U; +// // Log2 of the size of block group, as used by the Primary. Each group +// // contains a range of memory addresses, blocks in the range will belong to +// // the same group. In general, single region may have 1 or 2MB group size. +// // Multiple regions will have the group size equal to the region size +// // because the region size is usually smaller than 1 MB. +// // Smaller value gives fine-grained control of memory usage but the trade +// // off is that it may take longer time of deallocation. +// static const uptr PrimaryGroupSizeLog = 20U; // // Defines the type and scale of a compact pointer. A compact pointer can // // be understood as the offset of a pointer within the region it belongs // // to, in increments of a power-of-2 scale. @@ -65,6 +73,7 @@ struct DefaultConfig { #if SCUDO_CAN_USE_PRIMARY64 typedef SizeClassAllocator64<DefaultConfig> Primary; static const uptr PrimaryRegionSizeLog = 32U; + static const uptr PrimaryGroupSizeLog = 21U; typedef uptr PrimaryCompactPtrT; static const uptr PrimaryCompactPtrScale = 0; static const bool PrimaryEnableRandomOffset = true; @@ -72,6 +81,7 @@ struct DefaultConfig { #else typedef SizeClassAllocator32<DefaultConfig> Primary; static const uptr PrimaryRegionSizeLog = 19U; + static const uptr PrimaryGroupSizeLog = 19U; typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; @@ -96,11 +106,13 @@ struct AndroidConfig { static const uptr PrimaryRegionSizeLog = 28U; typedef u32 PrimaryCompactPtrT; static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr PrimaryGroupSizeLog = 20U; static const bool PrimaryEnableRandomOffset = true; static const uptr PrimaryMapSizeIncrement = 1UL << 18; #else typedef SizeClassAllocator32<AndroidConfig> Primary; static const uptr PrimaryRegionSizeLog = 18U; + static const uptr PrimaryGroupSizeLog = 18U; typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; @@ -127,11 +139,13 @@ struct AndroidSvelteConfig { static const uptr PrimaryRegionSizeLog = 27U; typedef u32 PrimaryCompactPtrT; static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr PrimaryGroupSizeLog = 18U; static const bool PrimaryEnableRandomOffset = true; static const uptr PrimaryMapSizeIncrement = 1UL << 18; #else typedef SizeClassAllocator32<AndroidSvelteConfig> Primary; static const uptr PrimaryRegionSizeLog = 16U; + static const uptr PrimaryGroupSizeLog = 16U; typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; @@ -155,7 +169,14 @@ struct FuchsiaConfig { static const bool MaySupportMemoryTagging = false; typedef SizeClassAllocator64<FuchsiaConfig> Primary; +// Support 39-bit VMA for riscv-64 +#if SCUDO_RISCV64 + static const uptr PrimaryRegionSizeLog = 28U; + static const uptr PrimaryGroupSizeLog = 19U; +#else static const uptr PrimaryRegionSizeLog = 30U; + static const uptr PrimaryGroupSizeLog = 21U; +#endif typedef u32 PrimaryCompactPtrT; static const bool PrimaryEnableRandomOffset = true; static const uptr PrimaryMapSizeIncrement = 1UL << 18; @@ -175,6 +196,7 @@ struct TrustyConfig { typedef SizeClassAllocator64<TrustyConfig> Primary; // Some apps have 1 page of heap total so small regions are necessary. static const uptr PrimaryRegionSizeLog = 10U; + static const uptr PrimaryGroupSizeLog = 10U; typedef u32 PrimaryCompactPtrT; static const bool PrimaryEnableRandomOffset = false; // Trusty is extremely memory-constrained so minimally round up map calls. diff --git a/standalone/checksum.h b/standalone/checksum.h index 0f787ce2b5c..f8eda81fd91 100644 --- a/standalone/checksum.h +++ b/standalone/checksum.h @@ -20,7 +20,8 @@ #if defined(__CRC32__) // NB: clang has <crc32intrin.h> but GCC does not #include <smmintrin.h> -#define CRC32_INTRINSIC FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di) +#define CRC32_INTRINSIC \ + FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di) #elif defined(__SSE4_2__) #include <smmintrin.h> #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) diff --git a/standalone/chunk.h b/standalone/chunk.h index 0581420dfc9..32874a8df64 100644 --- a/standalone/chunk.h +++ b/standalone/chunk.h @@ -42,7 +42,8 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { Checksum = computeBSDChecksum(Checksum, Array[I]); return Checksum; } -#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || + // defined(__ARM_FEATURE_CRC32) } namespace Chunk { @@ -84,7 +85,7 @@ constexpr uptr OffsetMask = (1UL << 16) - 1; constexpr uptr ChecksumMask = (1UL << 16) - 1; constexpr uptr getHeaderSize() { - return roundUpTo(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); + return roundUp(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); } inline AtomicPackedHeader *getAtomicHeader(void *Ptr) { diff --git a/standalone/combined.h b/standalone/combined.h index 365720d4a5f..006605659bf 100644 --- a/standalone/combined.h +++ b/standalone/combined.h @@ -18,6 +18,7 @@ #include "options.h" #include "quarantine.h" #include "report.h" +#include "rss_limit_checker.h" #include "secondary.h" #include "stack_depot.h" #include "string_utils.h" @@ -147,6 +148,9 @@ public: initFlags(); reportUnrecognizedFlags(); + RssChecker.init(scudo::getFlags()->soft_rss_limit_mb, + scudo::getFlags()->hard_rss_limit_mb); + // Store some flags locally. if (getFlags()->may_return_null) Primary.Options.set(OptionBit::MayReturnNull); @@ -173,6 +177,8 @@ public: Quarantine.init( static_cast<uptr>(getFlags()->quarantine_size_kb << 10), static_cast<uptr>(getFlags()->thread_local_quarantine_size_kb << 10)); + + initRingBuffer(); } // Initialize the embedded GWP-ASan instance. Requires the main allocator to @@ -185,6 +191,7 @@ public: getFlags()->GWP_ASAN_MaxSimultaneousAllocations; Opt.SampleRate = getFlags()->GWP_ASAN_SampleRate; Opt.InstallSignalHandlers = getFlags()->GWP_ASAN_InstallSignalHandlers; + Opt.Recoverable = getFlags()->GWP_ASAN_Recoverable; // Embedded GWP-ASan is locked through the Scudo atfork handler (via // Allocator::disable calling GWPASan.disable). Disable GWP-ASan's atfork // handler. @@ -196,7 +203,8 @@ public: gwp_asan::segv_handler::installSignalHandlers( &GuardedAlloc, Printf, gwp_asan::backtrace::getPrintBacktraceFunction(), - gwp_asan::backtrace::getSegvBacktraceFunction()); + gwp_asan::backtrace::getSegvBacktraceFunction(), + Opt.Recoverable); GuardedAllocSlotSize = GuardedAlloc.getAllocatorState()->maximumAllocationSize(); @@ -231,6 +239,7 @@ public: } TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } + QuarantineT *getQuarantine() { return &Quarantine; } // The Cache must be provided zero-initialized. void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); } @@ -241,11 +250,18 @@ public: // - unlinking the local stats from the global ones (destroying the cache does // the last two items). void commitBack(TSD<ThisT> *TSD) { - Quarantine.drain(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache)); - TSD->Cache.destroy(&Stats); + Quarantine.drain(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().destroy(&Stats); } + void drainCache(TSD<ThisT> *TSD) { + Quarantine.drainAndRecycle(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().drain(); + } + void drainCaches() { TSDRegistry.drainCaches(this); } + ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { if (!allocatorSupportsMemoryTagging<Params>()) return Ptr; @@ -297,7 +313,7 @@ public: NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, uptr Alignment = MinAlignment, - bool ZeroContents = false) { + bool ZeroContents = false) NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); const Options Options = Primary.Options.load(); @@ -334,7 +350,7 @@ public: // to be sure that there will be an address in the block that will satisfy // the alignment. const uptr NeededSize = - roundUpTo(Size, MinAlignment) + + roundUp(Size, MinAlignment) + ((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize()); // Takes care of extravagantly large sizes as well as integer overflows. @@ -346,6 +362,19 @@ public: } DCHECK_LE(Size, NeededSize); + switch (RssChecker.getRssLimitExceeded()) { + case RssLimitChecker::Neither: + break; + case RssLimitChecker::Soft: + if (Options.get(OptionBit::MayReturnNull)) + return nullptr; + reportSoftRSSLimit(RssChecker.getSoftRssLimit()); + break; + case RssLimitChecker::Hard: + reportHardRSSLimit(RssChecker.getHardRssLimit()); + break; + } + void *Block = nullptr; uptr ClassId = 0; uptr SecondaryBlockEnd = 0; @@ -354,23 +383,24 @@ public: DCHECK_NE(ClassId, 0U); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Block = TSD->Cache.allocate(ClassId); + Block = TSD->getCache().allocate(ClassId); // If the allocation failed, the most likely reason with a 32-bit primary // is the region being full. In that event, retry in each successively // larger class until it fits. If it fails to fit in the largest class, // fallback to the Secondary. if (UNLIKELY(!Block)) { while (ClassId < SizeClassMap::LargestClassId && !Block) - Block = TSD->Cache.allocate(++ClassId); + Block = TSD->getCache().allocate(++ClassId); if (!Block) ClassId = 0; } if (UnlockRequired) TSD->unlock(); } - if (UNLIKELY(ClassId == 0)) + if (UNLIKELY(ClassId == 0)) { Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, FillContents); + } if (UNLIKELY(!Block)) { if (Options.get(OptionBit::MayReturnNull)) @@ -380,7 +410,7 @@ public: const uptr BlockUptr = reinterpret_cast<uptr>(Block); const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize(); - const uptr UserPtr = roundUpTo(UnalignedUserPtr, Alignment); + const uptr UserPtr = roundUp(UnalignedUserPtr, Alignment); void *Ptr = reinterpret_cast<void *>(UserPtr); void *TaggedPtr = Ptr; @@ -439,7 +469,7 @@ public: PrevUserPtr == UserPtr && (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) { uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes; - const uptr NextPage = roundUpTo(TaggedUserPtr, getPageSizeCached()); + const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached()); if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) PrevEnd = NextPage; TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr); @@ -452,8 +482,8 @@ public: // was freed, it would not have been retagged and thus zeroed, and // therefore it needs to be zeroed now. memset(TaggedPtr, 0, - Min(Size, roundUpTo(PrevEnd - TaggedUserPtr, - archMemoryTagGranuleSize()))); + Min(Size, roundUp(PrevEnd - TaggedUserPtr, + archMemoryTagGranuleSize()))); } else if (Size) { // Clear any stack metadata that may have previously been stored in // the chunk data. @@ -666,6 +696,8 @@ public: void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); if (LIKELY(NewPtr)) { memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); + if (UNLIKELY(&__scudo_deallocate_hook)) + __scudo_deallocate_hook(OldTaggedPtr); quarantineOrDeallocateChunk(Options, OldTaggedPtr, &OldHeader, OldSize); } return NewPtr; @@ -674,7 +706,7 @@ public: // TODO(kostyak): disable() is currently best-effort. There are some small // windows of time when an allocation could still succeed after // this function finishes. We will revisit that later. - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); #ifdef GWP_ASAN_HOOKS GuardedAlloc.disable(); @@ -686,7 +718,7 @@ public: Secondary.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); Secondary.enable(); Primary.enable(); @@ -705,9 +737,7 @@ public: // sizing purposes. uptr getStats(char *Buffer, uptr Size) { ScopedString Str; - disable(); const uptr Length = getStats(&Str) + 1; - enable(); if (Length < Size) Size = Length; if (Buffer && Size) { @@ -719,15 +749,15 @@ public: void printStats() { ScopedString Str; - disable(); getStats(&Str); - enable(); Str.output(); } - void releaseToOS() { + void releaseToOS(ReleaseToOS ReleaseType) { initThreadMaybe(); - Primary.releaseToOS(); + if (ReleaseType == ReleaseToOS::ForceAll) + drainCaches(); + Primary.releaseToOS(ReleaseType); Secondary.releaseToOS(); } @@ -856,6 +886,13 @@ public: Header.State == Chunk::State::Allocated; } + void setRssLimitsTestOnly(int SoftRssLimitMb, int HardRssLimitMb, + bool MayReturnNull) { + RssChecker.init(SoftRssLimitMb, HardRssLimitMb); + if (MayReturnNull) + Primary.Options.set(OptionBit::MayReturnNull); + } + bool useMemoryTaggingTestOnly() const { return useMemoryTagging<Params>(Primary.Options.load()); } @@ -875,6 +912,10 @@ public: void setTrackAllocationStacks(bool Track) { initThreadMaybe(); + if (getFlags()->allocation_ring_buffer_size == 0) { + DCHECK(!Primary.Options.load().get(OptionBit::TrackAllocationStacks)); + return; + } if (Track) Primary.Options.set(OptionBit::TrackAllocationStacks); else @@ -906,11 +947,29 @@ public: return PrimaryT::getRegionInfoArraySize(); } - const char *getRingBufferAddress() const { - return reinterpret_cast<const char *>(&RingBuffer); + const char *getRingBufferAddress() { + initThreadMaybe(); + return RawRingBuffer; + } + + uptr getRingBufferSize() { + initThreadMaybe(); + auto *RingBuffer = getRingBuffer(); + return RingBuffer ? ringBufferSizeInBytes(RingBuffer->Size) : 0; } - static uptr getRingBufferSize() { return sizeof(RingBuffer); } + static bool setRingBufferSizeForBuffer(char *Buffer, size_t Size) { + // Need at least one entry. + if (Size < sizeof(AllocationRingBuffer) + + sizeof(typename AllocationRingBuffer::Entry)) { + return false; + } + AllocationRingBuffer *RingBuffer = + reinterpret_cast<AllocationRingBuffer *>(Buffer); + RingBuffer->Size = (Size - sizeof(AllocationRingBuffer)) / + sizeof(typename AllocationRingBuffer::Entry); + return true; + } static const uptr MaxTraceSize = 64; @@ -994,6 +1053,7 @@ private: QuarantineT Quarantine; TSDRegistryT TSDRegistry; pthread_once_t PostInitNonce = PTHREAD_ONCE_INIT; + RssLimitChecker RssChecker; #ifdef GWP_ASAN_HOOKS gwp_asan::GuardedPoolAllocator GuardedAlloc; @@ -1013,14 +1073,13 @@ private: }; atomic_uptr Pos; -#ifdef SCUDO_FUZZ - static const uptr NumEntries = 2; -#else - static const uptr NumEntries = 32768; -#endif - Entry Entries[NumEntries]; + u32 Size; + // An array of Size (at least one) elements of type Entry is immediately + // following to this struct. }; - AllocationRingBuffer RingBuffer = {}; + // Pointer to memory mapped area starting with AllocationRingBuffer struct, + // and immediately followed by Size elements of type Entry. + char *RawRingBuffer = {}; // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { @@ -1076,7 +1135,8 @@ private: } void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, - Chunk::UnpackedHeader *Header, uptr Size) { + Chunk::UnpackedHeader *Header, + uptr Size) NO_THREAD_SAFETY_ANALYSIS { void *Ptr = getHeaderTaggedPointer(TaggedPtr); Chunk::UnpackedHeader NewHeader = *Header; // If the quarantine is disabled, the actual size of a chunk is 0 or larger @@ -1118,7 +1178,7 @@ private: if (LIKELY(ClassId)) { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - TSD->Cache.deallocate(ClassId, BlockBegin); + TSD->getCache().deallocate(ClassId, BlockBegin); if (UnlockRequired) TSD->unlock(); } else { @@ -1130,8 +1190,8 @@ private: } else { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Quarantine.put(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache), Ptr, Size); + Quarantine.put(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache()), Ptr, Size); if (UnlockRequired) TSD->unlock(); } @@ -1191,15 +1251,15 @@ private: void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr NewSize, uptr BlockEnd) { - uptr RoundOldPtr = roundUpTo(OldPtr, archMemoryTagGranuleSize()); + uptr RoundOldPtr = roundUp(OldPtr, archMemoryTagGranuleSize()); uptr RoundNewPtr; if (RoundOldPtr >= NewPtr) { // If the allocation is shrinking we just need to set the tag past the end // of the allocation to 0. See explanation in storeEndMarker() above. - RoundNewPtr = roundUpTo(NewPtr, archMemoryTagGranuleSize()); + RoundNewPtr = roundUp(NewPtr, archMemoryTagGranuleSize()); } else { // Set the memory tag of the region - // [RoundOldPtr, roundUpTo(NewPtr, archMemoryTagGranuleSize())) + // [RoundOldPtr, roundUp(NewPtr, archMemoryTagGranuleSize())) // to the pointer tag stored in OldPtr. RoundNewPtr = storeTags(RoundOldPtr, NewPtr); } @@ -1217,9 +1277,9 @@ private: void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid, uptr AllocationSize, u32 DeallocationTrace, u32 DeallocationTid) { - uptr Pos = atomic_fetch_add(&RingBuffer.Pos, 1, memory_order_relaxed); + uptr Pos = atomic_fetch_add(&getRingBuffer()->Pos, 1, memory_order_relaxed); typename AllocationRingBuffer::Entry *Entry = - &RingBuffer.Entries[Pos % AllocationRingBuffer::NumEntries]; + getRingBufferEntry(RawRingBuffer, Pos % getRingBuffer()->Size); // First invalidate our entry so that we don't attempt to interpret a // partially written state in getSecondaryErrorInfo(). The fences below @@ -1363,12 +1423,14 @@ private: const char *RingBufferPtr) { auto *RingBuffer = reinterpret_cast<const AllocationRingBuffer *>(RingBufferPtr); + if (!RingBuffer || RingBuffer->Size == 0) + return; uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); - for (uptr I = Pos - 1; I != Pos - 1 - AllocationRingBuffer::NumEntries && - NextErrorReport != NumErrorReports; + for (uptr I = Pos - 1; + I != Pos - 1 - RingBuffer->Size && NextErrorReport != NumErrorReports; --I) { - auto *Entry = &RingBuffer->Entries[I % AllocationRingBuffer::NumEntries]; + auto *Entry = getRingBufferEntry(RingBufferPtr, I % RingBuffer->Size); uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); if (!EntryPtr) continue; @@ -1431,8 +1493,49 @@ private: Primary.getStats(Str); Secondary.getStats(Str); Quarantine.getStats(Str); + TSDRegistry.getStats(Str); return Str->length(); } + + static typename AllocationRingBuffer::Entry * + getRingBufferEntry(char *RawRingBuffer, uptr N) { + return &reinterpret_cast<typename AllocationRingBuffer::Entry *>( + &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + } + static const typename AllocationRingBuffer::Entry * + getRingBufferEntry(const char *RawRingBuffer, uptr N) { + return &reinterpret_cast<const typename AllocationRingBuffer::Entry *>( + &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + } + + void initRingBuffer() { + u32 AllocationRingBufferSize = + static_cast<u32>(getFlags()->allocation_ring_buffer_size); + if (AllocationRingBufferSize < 1) + return; + MapPlatformData Data = {}; + RawRingBuffer = static_cast<char *>( + map(/*Addr=*/nullptr, + roundUp(ringBufferSizeInBytes(AllocationRingBufferSize), + getPageSizeCached()), + "AllocatorRingBuffer", /*Flags=*/0, &Data)); + auto *RingBuffer = reinterpret_cast<AllocationRingBuffer *>(RawRingBuffer); + RingBuffer->Size = AllocationRingBufferSize; + static_assert(sizeof(AllocationRingBuffer) % + alignof(typename AllocationRingBuffer::Entry) == + 0, + "invalid alignment"); + } + + static constexpr size_t ringBufferSizeInBytes(u32 AllocationRingBufferSize) { + return sizeof(AllocationRingBuffer) + + AllocationRingBufferSize * + sizeof(typename AllocationRingBuffer::Entry); + } + + inline AllocationRingBuffer *getRingBuffer() { + return reinterpret_cast<AllocationRingBuffer *>(RawRingBuffer); + } }; } // namespace scudo diff --git a/standalone/common.cpp b/standalone/common.cpp index 666f95400c7..9f14faeef28 100644 --- a/standalone/common.cpp +++ b/standalone/common.cpp @@ -35,4 +35,8 @@ void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { die(); } +#if !SCUDO_LINUX +uptr GetRSS() { return 0; } +#endif + } // namespace scudo diff --git a/standalone/common.h b/standalone/common.h index bc3dfec6dbb..82e6cf4aee6 100644 --- a/standalone/common.h +++ b/standalone/common.h @@ -27,17 +27,31 @@ template <class Dest, class Source> inline Dest bit_cast(const Source &S) { return D; } -inline constexpr uptr roundUpTo(uptr X, uptr Boundary) { +inline constexpr bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } + +inline constexpr uptr roundUp(uptr X, uptr Boundary) { + DCHECK(isPowerOfTwo(Boundary)); return (X + Boundary - 1) & ~(Boundary - 1); } +inline constexpr uptr roundUpSlow(uptr X, uptr Boundary) { + return ((X + Boundary - 1) / Boundary) * Boundary; +} -inline constexpr uptr roundDownTo(uptr X, uptr Boundary) { +inline constexpr uptr roundDown(uptr X, uptr Boundary) { + DCHECK(isPowerOfTwo(Boundary)); return X & ~(Boundary - 1); } +inline constexpr uptr roundDownSlow(uptr X, uptr Boundary) { + return (X / Boundary) * Boundary; +} inline constexpr bool isAligned(uptr X, uptr Alignment) { + DCHECK(isPowerOfTwo(Alignment)); return (X & (Alignment - 1)) == 0; } +inline constexpr bool isAlignedSlow(uptr X, uptr Alignment) { + return X % Alignment == 0; +} template <class T> constexpr T Min(T A, T B) { return A < B ? A : B; } @@ -49,14 +63,12 @@ template <class T> void Swap(T &A, T &B) { B = Tmp; } -inline bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } - inline uptr getMostSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return SCUDO_WORDSIZE - 1U - static_cast<uptr>(__builtin_clzl(X)); } -inline uptr roundUpToPowerOfTwo(uptr Size) { +inline uptr roundUpPowerOfTwo(uptr Size) { DCHECK(Size); if (isPowerOfTwo(Size)) return Size; @@ -101,7 +113,7 @@ template <typename T> inline void shuffle(T *A, u32 N, u32 *RandState) { // Hardware specific inlinable functions. -inline void yieldProcessor(u8 Count) { +inline void yieldProcessor(UNUSED u8 Count) { #if defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("" ::: "memory"); for (u8 I = 0; I < Count; I++) @@ -132,7 +144,12 @@ u32 getNumberOfCPUs(); const char *getEnv(const char *Name); +uptr GetRSS(); + u64 getMonotonicTime(); +// Gets the time faster but with less accuracy. Can call getMonotonicTime +// if no fast version is available. +u64 getMonotonicTimeFast(); u32 getThreadID(); @@ -147,6 +164,7 @@ bool getRandom(void *Buffer, uptr Length, bool Blocking = false); #define MAP_NOACCESS (1U << 1) #define MAP_RESIZABLE (1U << 2) #define MAP_MEMTAG (1U << 3) +#define MAP_PRECOMMIT (1U << 4) // Our platform memory mapping use is restricted to 3 scenarios: // - reserve memory at a random address (MAP_NOACCESS); @@ -197,6 +215,13 @@ enum class Option : u8 { MaxTSDsCount, // Number of usable TSDs for the shared registry. }; +enum class ReleaseToOS : u8 { + Normal, // Follow the normal rules for releasing pages to the OS + Force, // Force release pages to the OS, but avoid cases that take too long. + ForceAll, // Force release every page possible regardless of how long it will + // take. +}; + constexpr unsigned char PatternFillByte = 0xAB; enum FillContentsMode { diff --git a/standalone/crc32_hw.cpp b/standalone/crc32_hw.cpp index d13c615498f..73f2ae000c6 100644 --- a/standalone/crc32_hw.cpp +++ b/standalone/crc32_hw.cpp @@ -14,6 +14,7 @@ namespace scudo { u32 computeHardwareCRC32(u32 Crc, uptr Data) { return static_cast<u32>(CRC32_INTRINSIC(Crc, Data)); } -#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || + // defined(__ARM_FEATURE_CRC32) } // namespace scudo diff --git a/standalone/flags.inc b/standalone/flags.inc index 690d889b8ce..c1f153bafdd 100644 --- a/standalone/flags.inc +++ b/standalone/flags.inc @@ -45,3 +45,15 @@ SCUDO_FLAG(bool, may_return_null, true, SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? INT32_MIN : 5000, "Interval (in milliseconds) at which to attempt release of unused " "memory to the OS. Negative values disable the feature.") + +SCUDO_FLAG(int, hard_rss_limit_mb, 0, + "Hard RSS Limit in Mb. If non-zero, once the limit is achieved, " + "abort the process") + +SCUDO_FLAG(int, soft_rss_limit_mb, 0, + "Soft RSS Limit in Mb. If non-zero, once the limit is reached, all " + "subsequent calls will fail or return NULL until the RSS goes below " + "the soft limit") + +SCUDO_FLAG(int, allocation_ring_buffer_size, 32768, + "Entries to keep in the allocation ring buffer for scudo.") diff --git a/standalone/fuchsia.cpp b/standalone/fuchsia.cpp index 3b473bc9e22..0788c4198e5 100644 --- a/standalone/fuchsia.cpp +++ b/standalone/fuchsia.cpp @@ -17,7 +17,9 @@ #include <lib/sync/mutex.h> // for sync_mutex_t #include <stdlib.h> // for getenv() #include <zircon/compiler.h> +#include <zircon/process.h> #include <zircon/sanitizer.h> +#include <zircon/status.h> #include <zircon/syscalls.h> namespace scudo { @@ -30,6 +32,16 @@ void NORETURN die() { __builtin_trap(); } // with ZX_HANDLE_INVALID. static_assert(ZX_HANDLE_INVALID == 0, ""); +static void NORETURN dieOnError(zx_status_t Status, const char *FnName, + uptr Size) { + char Error[128]; + formatString(Error, sizeof(Error), + "SCUDO ERROR: %s failed with size %zuKB (%s)", FnName, + Size >> 10, zx_status_get_string(Status)); + outputRaw(Error); + die(); +} + static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { // Only scenario so far. DCHECK(Data); @@ -41,7 +53,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { Size, &Data->Vmar, &Data->VmarBase); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_allocate", Size); return nullptr; } return reinterpret_cast<void *>(Data->VmarBase); @@ -56,8 +68,9 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, if (Flags & MAP_NOACCESS) return allocateVmar(Size, Data, AllowNoMem); - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); - CHECK_NE(Vmar, ZX_HANDLE_INVALID); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); zx_status_t Status; zx_handle_t Vmo; @@ -71,7 +84,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_set_size(Vmo, VmoSize + Size); if (Status != ZX_OK) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_set_size", VmoSize + Size); return nullptr; } } else { @@ -79,7 +92,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_create(Size, ZX_VMO_RESIZABLE, &Vmo); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_create", Size); return nullptr; } _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, strlen(Name)); @@ -88,11 +101,24 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, uintptr_t P; zx_vm_option_t MapFlags = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_ALLOW_FAULTS; + if (Addr) + DCHECK(Data); const uint64_t Offset = Addr ? reinterpret_cast<uintptr_t>(Addr) - Data->VmarBase : 0; if (Offset) MapFlags |= ZX_VM_SPECIFIC; Status = _zx_vmar_map(Vmar, MapFlags, Offset, Vmo, VmoSize, Size, &P); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmar_map", Size); + return nullptr; + } + + if (Flags & MAP_PRECOMMIT) { + Status = _zx_vmar_op_range(Vmar, ZX_VMAR_OP_COMMIT, P, Size, + /*buffer=*/nullptr, /*buffer_size=*/0); + } + // No need to track the Vmo if we don't intend on resizing it. Close it. if (Flags & MAP_RESIZABLE) { DCHECK(Data); @@ -105,9 +131,10 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, } if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_op_range", Size); return nullptr; } + if (Data) Data->VmoSize += Size; @@ -123,11 +150,13 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { CHECK_EQ(_zx_vmar_destroy(Vmar), ZX_OK); CHECK_EQ(_zx_handle_close(Vmar), ZX_OK); } else { - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); const zx_status_t Status = _zx_vmar_unmap(Vmar, reinterpret_cast<uintptr_t>(Addr), Size); if (UNLIKELY(Status != ZX_OK)) - dieOnMapUnmapError(); + dieOnError(Status, "zx_vmar_unmap", Size); } if (Data) { if (Data->Vmo != ZX_HANDLE_INVALID) @@ -142,12 +171,15 @@ void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE); DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); - if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK) - dieOnMapUnmapError(); + const zx_status_t Status = _zx_vmar_protect(Data->Vmar, Prot, Addr, Size); + if (Status != ZX_OK) + dieOnError(Status, "zx_vmar_protect", Size); } void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data) { + // TODO: DCHECK the BaseAddress is consistent with the data in + // MapPlatformData. DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); DCHECK_NE(Data->Vmo, ZX_HANDLE_INVALID); @@ -177,7 +209,10 @@ void HybridMutex::unlock() __TA_NO_THREAD_SAFETY_ANALYSIS { sync_mutex_unlock(&M); } +void HybridMutex::assertHeldImpl() __TA_NO_THREAD_SAFETY_ANALYSIS {} + u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } +u64 getMonotonicTimeFast() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } diff --git a/standalone/fuchsia.h b/standalone/fuchsia.h index d6993f89214..c1dfd7638ec 100644 --- a/standalone/fuchsia.h +++ b/standalone/fuchsia.h @@ -13,7 +13,8 @@ #if SCUDO_FUCHSIA -#include <zircon/process.h> +#include <stdint.h> +#include <zircon/types.h> namespace scudo { diff --git a/standalone/fuzz/get_error_info_fuzzer.cpp b/standalone/fuzz/get_error_info_fuzzer.cpp index 078e44b0dfc..74456450a47 100644 --- a/standalone/fuzz/get_error_info_fuzzer.cpp +++ b/standalone/fuzz/get_error_info_fuzzer.cpp @@ -46,15 +46,14 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { } std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); - std::vector<char> RingBuffer(AllocatorT::getRingBufferSize(), 0); - for (size_t i = 0; i < RingBufferBytes.length() && i < RingBuffer.size(); - ++i) { - RingBuffer[i] = RingBufferBytes[i]; - } + // RingBuffer is too short. + if (!AllocatorT::setRingBufferSizeForBuffer(RingBufferBytes.data(), + RingBufferBytes.size())) + return 0; scudo_error_info ErrorInfo; AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), - RegionInfo.data(), RingBuffer.data(), Memory, + RegionInfo.data(), RingBufferBytes.data(), Memory, MemoryTags, MemoryAddr, MemorySize); return 0; } diff --git a/standalone/include/scudo/interface.h b/standalone/include/scudo/interface.h index 9b9a84623c5..3c083ed7f9d 100644 --- a/standalone/include/scudo/interface.h +++ b/standalone/include/scudo/interface.h @@ -14,7 +14,7 @@ extern "C" { -__attribute__((weak)) const char *__scudo_default_options(); +__attribute__((weak)) const char *__scudo_default_options(void); // Post-allocation & pre-deallocation hooks. // They must be thread-safe and not use heap related functions. @@ -101,14 +101,14 @@ struct scudo_error_info { struct scudo_error_report reports[3]; }; -const char *__scudo_get_stack_depot_addr(); -size_t __scudo_get_stack_depot_size(); +const char *__scudo_get_stack_depot_addr(void); +size_t __scudo_get_stack_depot_size(void); -const char *__scudo_get_region_info_addr(); -size_t __scudo_get_region_info_size(); +const char *__scudo_get_region_info_addr(void); +size_t __scudo_get_region_info_size(void); -const char *__scudo_get_ring_buffer_addr(); -size_t __scudo_get_ring_buffer_size(); +const char *__scudo_get_ring_buffer_addr(void); +size_t __scudo_get_ring_buffer_size(void); #ifndef M_DECAY_TIME #define M_DECAY_TIME -100 @@ -118,6 +118,10 @@ size_t __scudo_get_ring_buffer_size(); #define M_PURGE -101 #endif +#ifndef M_PURGE_ALL +#define M_PURGE_ALL -104 +#endif + // Tune the allocator's choice of memory tags to make it more likely that // a certain class of memory errors will be detected. The value argument should // be one of the M_MEMTAG_TUNING_* constants below. diff --git a/standalone/internal_defs.h b/standalone/internal_defs.h index 621fc9c45e9..27c6b451ffe 100644 --- a/standalone/internal_defs.h +++ b/standalone/internal_defs.h @@ -133,25 +133,25 @@ void NORETURN reportCheckFailed(const char *File, int Line, #else #define DCHECK(A) \ do { \ - } while (false) + } while (false && (A)) #define DCHECK_EQ(A, B) \ do { \ - } while (false) + } while (false && (A) == (B)) #define DCHECK_NE(A, B) \ do { \ - } while (false) + } while (false && (A) != (B)) #define DCHECK_LT(A, B) \ do { \ - } while (false) + } while (false && (A) < (B)) #define DCHECK_LE(A, B) \ do { \ - } while (false) + } while (false && (A) <= (B)) #define DCHECK_GT(A, B) \ do { \ - } while (false) + } while (false && (A) > (B)) #define DCHECK_GE(A, B) \ do { \ - } while (false) + } while (false && (A) >= (B)) #endif // The superfluous die() call effectively makes this macro NORETURN. diff --git a/standalone/linux.cpp b/standalone/linux.cpp index c77c1bb600d..e285d8a3d2d 100644 --- a/standalone/linux.cpp +++ b/standalone/linux.cpp @@ -11,6 +11,7 @@ #if SCUDO_LINUX #include "common.h" +#include "internal_defs.h" #include "linux.h" #include "mutex.h" #include "string_utils.h" @@ -19,6 +20,7 @@ #include <fcntl.h> #include <linux/futex.h> #include <sched.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> @@ -127,6 +129,10 @@ void HybridMutex::unlock() { } } +void HybridMutex::assertHeldImpl() { + CHECK(atomic_load(&M, memory_order_acquire) != Unlocked); +} + u64 getMonotonicTime() { timespec TS; clock_gettime(CLOCK_MONOTONIC, &TS); @@ -134,6 +140,17 @@ u64 getMonotonicTime() { static_cast<u64>(TS.tv_nsec); } +u64 getMonotonicTimeFast() { +#if defined(CLOCK_MONOTONIC_COARSE) + timespec TS; + clock_gettime(CLOCK_MONOTONIC_COARSE, &TS); + return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast<u64>(TS.tv_nsec); +#else + return getMonotonicTime(); +#endif +} + u32 getNumberOfCPUs() { cpu_set_t CPUs; // sched_getaffinity can fail for a variety of legitimate reasons (lack of @@ -180,6 +197,39 @@ bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { extern "C" WEAK int async_safe_write_log(int pri, const char *tag, const char *msg); +static uptr GetRSSFromBuffer(const char *Buf) { + // The format of the file is: + // 1084 89 69 11 0 79 0 + // We need the second number which is RSS in pages. + const char *Pos = Buf; + // Skip the first number. + while (*Pos >= '0' && *Pos <= '9') + Pos++; + // Skip whitespaces. + while (!(*Pos >= '0' && *Pos <= '9') && *Pos != 0) + Pos++; + // Read the number. + u64 Rss = 0; + for (; *Pos >= '0' && *Pos <= '9'; Pos++) + Rss = Rss * 10 + static_cast<u64>(*Pos) - '0'; + return static_cast<uptr>(Rss * getPageSizeCached()); +} + +uptr GetRSS() { + // TODO: We currently use sanitizer_common's GetRSS which reads the + // RSS from /proc/self/statm by default. We might want to + // call getrusage directly, even if it's less accurate. + auto Fd = open("/proc/self/statm", O_RDONLY); + char Buf[64]; + s64 Len = read(Fd, Buf, sizeof(Buf) - 1); + close(Fd); + if (Len <= 0) + return 0; + Buf[Len] = 0; + + return GetRSSFromBuffer(Buf); +} + void outputRaw(const char *Buffer) { if (&async_safe_write_log) { constexpr s32 AndroidLogInfo = 4; diff --git a/standalone/list.h b/standalone/list.h index 1ac93c2f65d..0137667d1dc 100644 --- a/standalone/list.h +++ b/standalone/list.h @@ -110,6 +110,18 @@ template <class T> struct SinglyLinkedList : public IntrusiveList<T> { Size--; } + // Insert X next to Prev + void insert(T *Prev, T *X) { + DCHECK(!empty()); + DCHECK_NE(Prev, nullptr); + DCHECK_NE(X, nullptr); + X->Next = Prev->Next; + Prev->Next = X; + if (Last == Prev) + Last = X; + ++Size; + } + void extract(T *Prev, T *X) { DCHECK(!empty()); DCHECK_NE(Prev, nullptr); diff --git a/standalone/local_cache.h b/standalone/local_cache.h index f46645f9bad..c97095d6be9 100644 --- a/standalone/local_cache.h +++ b/standalone/local_cache.h @@ -10,8 +10,11 @@ #define SCUDO_LOCAL_CACHE_H_ #include "internal_defs.h" +#include "list.h" +#include "platform.h" #include "report.h" #include "stats.h" +#include "string_utils.h" namespace scudo { @@ -20,12 +23,18 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; struct TransferBatch { - static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint; - void setFromArray(CompactPtrT *Array, u32 N) { + static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint; + void setFromArray(CompactPtrT *Array, u16 N) { DCHECK_LE(N, MaxNumCached); Count = N; memcpy(Batch, Array, sizeof(Batch[0]) * Count); } + void appendFromArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + memcpy(Batch + Count, Array, sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast<u16>(Count + N); + } void clear() { Count = 0; } void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); @@ -34,21 +43,43 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { void copyToArray(CompactPtrT *Array) const { memcpy(Array, Batch, sizeof(Batch[0]) * Count); } - u32 getCount() const { return Count; } - CompactPtrT get(u32 I) const { + u16 getCount() const { return Count; } + CompactPtrT get(u16 I) const { DCHECK_LE(I, Count); return Batch[I]; } - static u32 getMaxCached(uptr Size) { + static u16 getMaxCached(uptr Size) { return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size)); } TransferBatch *Next; private: - u32 Count; CompactPtrT Batch[MaxNumCached]; + u16 Count; + }; + + // A BatchGroup is used to collect blocks. Each group has a group id to + // identify the group kind of contained blocks. + struct BatchGroup { + // `Next` is used by IntrusiveList. + BatchGroup *Next; + // The compact base address of each group + uptr CompactPtrGroupBase; + // Cache value of TransferBatch::getMaxCached() + u16 MaxCachedPerBatch; + // Number of blocks pushed into this group. This is an increment-only + // counter. + uptr PushedBlocks; + // This is used to track how many bytes are not in-use since last time we + // tried to release pages. + uptr BytesInBGAtLastCheckpoint; + // Blocks are managed by TransferBatch in a list. + SinglyLinkedList<TransferBatch> Batches; }; + static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch), + "BatchGroup uses the same class size as TransferBatch"); + void init(GlobalStats *S, SizeClassAllocator *A) { DCHECK(isEmpty()); Stats.init(); @@ -120,17 +151,49 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { TransferBatch *createBatch(uptr ClassId, void *B) { if (ClassId != BatchClassId) B = allocate(BatchClassId); + if (UNLIKELY(!B)) + reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); return reinterpret_cast<TransferBatch *>(B); } + BatchGroup *createGroup() { + void *Ptr = allocate(BatchClassId); + if (UNLIKELY(!Ptr)) + reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); + return reinterpret_cast<BatchGroup *>(Ptr); + } + LocalStats &getStats() { return Stats; } + void getStats(ScopedString *Str) { + bool EmptyCache = true; + for (uptr I = 0; I < NumClasses; ++I) { + if (PerClassArray[I].Count == 0) + continue; + + EmptyCache = false; + // The size of BatchClass is set to 0 intentionally. See the comment in + // initCache() for more details. + const uptr ClassSize = I == BatchClassId + ? SizeClassAllocator::getSizeByClassId(I) + : PerClassArray[I].ClassSize; + // Note that the string utils don't support printing u16 thus we cast it + // to a common use type uptr. + Str->append(" %02zu (%6zu): cached: %4zu max: %4zu\n", I, ClassSize, + static_cast<uptr>(PerClassArray[I].Count), + static_cast<uptr>(PerClassArray[I].MaxCount)); + } + + if (EmptyCache) + Str->append(" No block is cached.\n"); + } + private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr BatchClassId = SizeClassMap::BatchClassId; - struct PerClass { - u32 Count; - u32 MaxCount; + struct alignas(SCUDO_CACHE_LINE_SIZE) PerClass { + u16 Count; + u16 MaxCount; // Note: ClassSize is zero for the transfer batch. uptr ClassSize; CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; @@ -150,7 +213,7 @@ private: for (uptr I = 0; I < NumClasses; I++) { PerClass *P = &PerClassArray[I]; const uptr Size = SizeClassAllocator::getSizeByClassId(I); - P->MaxCount = 2 * TransferBatch::getMaxCached(Size); + P->MaxCount = static_cast<u16>(2 * TransferBatch::getMaxCached(Size)); if (I != BatchClassId) { P->ClassSize = Size; } else { @@ -180,16 +243,12 @@ private: } NOINLINE void drain(PerClass *C, uptr ClassId) { - const u32 Count = Min(C->MaxCount / 2, C->Count); - TransferBatch *B = - createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0])); - if (UNLIKELY(!B)) - reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); - B->setFromArray(&C->Chunks[0], Count); - C->Count -= Count; - for (uptr I = 0; I < C->Count; I++) + const u16 Count = Min(static_cast<u16>(C->MaxCount / 2), C->Count); + Allocator->pushBlocks(this, ClassId, &C->Chunks[0], Count); + // u16 will be promoted to int by arithmetic type conversion. + C->Count = static_cast<u16>(C->Count - Count); + for (u16 I = 0; I < C->Count; I++) C->Chunks[I] = C->Chunks[I + Count]; - Allocator->pushBatch(ClassId, B); } }; diff --git a/standalone/mem_map.cpp b/standalone/mem_map.cpp new file mode 100644 index 00000000000..115cc34e706 --- /dev/null +++ b/standalone/mem_map.cpp @@ -0,0 +1,84 @@ +//===-- mem_map.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mem_map.h" + +#include "common.h" + +namespace scudo { + +bool MemMapDefault::mapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *MappedAddr = + ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, Flags, &Data); + if (MappedAddr == nullptr) + return false; + Base = reinterpret_cast<uptr>(MappedAddr); + MappedBase = Base; + Capacity = Size; + return true; +} + +void MemMapDefault::unmapImpl(uptr Addr, uptr Size) { + if (Size == Capacity) { + Base = MappedBase = Capacity = 0; + } else { + if (Base == Addr) { + Base = Addr + Size; + MappedBase = MappedBase == 0 ? Base : Max(MappedBase, Base); + } + Capacity -= Size; + } + + ::scudo::unmap(reinterpret_cast<void *>(Addr), Size, UNMAP_ALL, &Data); +} + +bool MemMapDefault::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *RemappedPtr = + ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, Flags, &Data); + const uptr RemappedAddr = reinterpret_cast<uptr>(RemappedPtr); + MappedBase = MappedBase == 0 ? RemappedAddr : Min(MappedBase, RemappedAddr); + return RemappedAddr == Addr; +} + +void MemMapDefault::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + DCHECK_NE(MappedBase, 0U); + DCHECK_GE(From, MappedBase); + return ::scudo::releasePagesToOS(MappedBase, From - MappedBase, Size, &Data); +} + +void MemMapDefault::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + return ::scudo::setMemoryPermission(Addr, Size, Flags); +} + +void ReservedMemoryDefault::releaseImpl() { + ::scudo::unmap(reinterpret_cast<void *>(Base), Capacity, UNMAP_ALL, &Data); +} + +bool ReservedMemoryDefault::createImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *Reserved = ::scudo::map(reinterpret_cast<void *>(Addr), Size, Name, + Flags | MAP_NOACCESS, &Data); + if (Reserved == nullptr) + return false; + + Base = reinterpret_cast<uptr>(Reserved); + Capacity = Size; + + return true; +} + +ReservedMemoryDefault::MemMapT ReservedMemoryDefault::dispatchImpl(uptr Addr, + uptr Size) { + ReservedMemoryDefault::MemMapT NewMap(Addr, Size); + NewMap.setMapPlatformData(Data); + return NewMap; +} + +} // namespace scudo diff --git a/standalone/mem_map.h b/standalone/mem_map.h new file mode 100644 index 00000000000..0b27fa86c0d --- /dev/null +++ b/standalone/mem_map.h @@ -0,0 +1,89 @@ +//===-- mem_map.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_H_ +#define SCUDO_MEM_MAP_H_ + +#include "mem_map_base.h" + +#include "common.h" +#include "internal_defs.h" + +// TODO: This is only used for `MapPlatformData`. Remove these includes when we +// have all three platform specific `MemMap` and `ReservedMemory` +// implementations. +#include "fuchsia.h" +#include "linux.h" +#include "trusty.h" + +namespace scudo { + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class MemMapDefault final : public MemMapBase<MemMapDefault> { +public: + constexpr MemMapDefault() = default; + MemMapDefault(uptr Base, uptr Capacity) : Base(Base), Capacity(Capacity) {} + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + + void setMapPlatformData(MapPlatformData &NewData) { Data = NewData; } + +private: + uptr Base = 0; + uptr Capacity = 0; + uptr MappedBase = 0; + MapPlatformData Data = {}; +}; + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class ReservedMemoryDefault final + : public ReservedMemory<ReservedMemoryDefault, MemMapDefault> { +public: + constexpr ReservedMemoryDefault() = default; + + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + +private: + uptr Base = 0; + uptr Capacity = 0; + MapPlatformData Data = {}; +}; + +#if SCUDO_LINUX +using ReservedMemoryT = ReservedMemoryDefault; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_FUCHSIA +using ReservedMemoryT = ReservedMemoryDefault; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_TRUSTY +using ReservedMemoryT = ReservedMemoryDefault; +using MemMapT = ReservedMemoryT::MemMapT; +#else +#error \ + "Unsupported platform, please implement the ReservedMemory for your platform!" +#endif + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_H_ diff --git a/standalone/mem_map_base.h b/standalone/mem_map_base.h new file mode 100644 index 00000000000..0560f4102d8 --- /dev/null +++ b/standalone/mem_map_base.h @@ -0,0 +1,130 @@ +//===-- mem_map_base.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_BASE_H_ +#define SCUDO_MEM_MAP_BASE_H_ + +#include "common.h" + +namespace scudo { + +// In Scudo, every memory operation will be fulfilled through a +// platform-specific `MemMap` instance. The essential APIs are listed in the +// `MemMapBase` below. This is implemented in CRTP, so for each implementation, +// it has to implement all of the 'Impl' named functions. +template <class Derived> class MemMapBase { +public: + constexpr MemMapBase() = default; + + // This is used to map a new set of contiguous pages. Note that the `Addr` is + // only a suggestion to the system. + bool map(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isAllocated()); + return invokeImpl(&Derived::mapImpl, Addr, Size, Name, Flags); + } + + // This is used to unmap partial/full pages from the beginning or the end. + // I.e., the result pages are expected to be still contiguous. + void unmap(uptr Addr, uptr Size) { + DCHECK(isAllocated()); + DCHECK((Addr == getBase()) || (Addr + Size == getBase() + getCapacity())); + invokeImpl(&Derived::unmapImpl, Addr, Size); + } + + // This is used to remap a mapped range (either from map() or dispatched from + // ReservedMemory). For example, we have reserved several pages and then we + // want to remap them with different accessibility. + bool remap(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) || (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::remapImpl, Addr, Size, Name, Flags); + } + + // This is used to update the pages' access permission. For example, mark + // pages as no read/write permission. + void setMemoryPermission(uptr Addr, uptr Size, uptr Flags) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) || (Addr + Size <= getBase() + getCapacity())); + return static_cast<Derived *>(this)->setMemoryPermissionImpl(Addr, Size, + Flags); + } + + // Suggest releasing a set of contiguous physical pages back to the OS. Note + // that only physical pages are supposed to be released. Any release of + // virtual pages may lead to undefined behavior. + void releasePagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) || (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releasePagesToOSImpl, From, Size); + } + // This is similar to the above one except that any subsequent access to the + // released pages will return with zero-filled pages. + void releaseAndZeroPagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) || (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releaseAndZeroPagesToOSImpl, From, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isAllocated() { return getBase() != 0U; } + +protected: + template <typename R, typename... Args> + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast<Derived *>(this)->*MemFn)(args...); + } +}; + +// `ReservedMemory` is a special memory handle which can be viewed as a page +// allocator. `ReservedMemory` will reserve a contiguous pages and the later +// page request can be fulfilled at the designated address. This is used when +// we want to ensure the virtual address of the MemMap will be in a known range. +// This is implemented in CRTP, so for each +// implementation, it has to implement all of the 'Impl' named functions. +template <class Derived, typename MemMapTy> class ReservedMemory { +public: + using MemMapT = MemMapTy; + constexpr ReservedMemory() = default; + + // Reserve a chunk of memory at a suggested address. + bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isCreated()); + return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags); + } + + // Release the entire reserved memory. + void release() { + DCHECK(isCreated()); + invokeImpl(&Derived::releaseImpl); + } + + // Dispatch a sub-range of reserved memory. Note that any fragmentation of + // the reserved pages is managed by each implementation. + MemMapT dispatch(uptr Addr, uptr Size) { + DCHECK(isCreated()); + DCHECK((Addr >= getBase()) || (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::dispatchImpl, Addr, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isCreated() { return getBase() != 0U; } + +protected: + template <typename R, typename... Args> + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast<Derived *>(this)->*MemFn)(args...); + } +}; + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_BASE_H_ diff --git a/standalone/memtag.h b/standalone/memtag.h index 7578aff17be..7f14a30fee1 100644 --- a/standalone/memtag.h +++ b/standalone/memtag.h @@ -18,7 +18,8 @@ namespace scudo { -#if (__clang_major__ >= 12 && defined(__aarch64__)) || defined(SCUDO_FUZZ) +#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__)) || \ + defined(SCUDO_FUZZ) // We assume that Top-Byte Ignore is enabled if the architecture supports memory // tagging. Not all operating systems enable TBI, so we only claim architectural @@ -57,7 +58,7 @@ inline NORETURN uint8_t extractTag(uptr Ptr) { #endif -#if __clang_major__ >= 12 && defined(__aarch64__) +#if __clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) #if SCUDO_LINUX diff --git a/standalone/mutex.h b/standalone/mutex.h index c8504c04091..05340de3e12 100644 --- a/standalone/mutex.h +++ b/standalone/mutex.h @@ -11,6 +11,7 @@ #include "atomic_helpers.h" #include "common.h" +#include "thread_annotations.h" #include <string.h> @@ -20,10 +21,10 @@ namespace scudo { -class HybridMutex { +class CAPABILITY("mutex") HybridMutex { public: - bool tryLock(); - NOINLINE void lock() { + bool tryLock() TRY_ACQUIRE(true); + NOINLINE void lock() ACQUIRE() { if (LIKELY(tryLock())) return; // The compiler may try to fully unroll the loop, ending up in a @@ -40,9 +41,20 @@ public: } lockSlow(); } - void unlock(); + void unlock() RELEASE(); + + // TODO(chiahungduan): In general, we may want to assert the owner of lock as + // well. Given the current uses of HybridMutex, it's acceptable without + // asserting the owner. Re-evaluate this when we have certain scenarios which + // requires a more fine-grained lock granularity. + ALWAYS_INLINE void assertHeld() ASSERT_CAPABILITY(this) { + if (SCUDO_DEBUG) + assertHeldImpl(); + } private: + void assertHeldImpl(); + static constexpr u8 NumberOfTries = 8U; static constexpr u8 NumberOfYields = 8U; @@ -52,13 +64,13 @@ private: sync_mutex_t M = {}; #endif - void lockSlow(); + void lockSlow() ACQUIRE(); }; -class ScopedLock { +class SCOPED_CAPABILITY ScopedLock { public: - explicit ScopedLock(HybridMutex &M) : Mutex(M) { Mutex.lock(); } - ~ScopedLock() { Mutex.unlock(); } + explicit ScopedLock(HybridMutex &M) ACQUIRE(M) : Mutex(M) { Mutex.lock(); } + ~ScopedLock() RELEASE() { Mutex.unlock(); } private: HybridMutex &Mutex; diff --git a/standalone/platform.h b/standalone/platform.h index db4217ddab9..aae3b9ab876 100644 --- a/standalone/platform.h +++ b/standalone/platform.h @@ -37,6 +37,12 @@ #define SCUDO_TRUSTY 0 #endif +#if defined(__riscv) && (__riscv_xlen == 64) +#define SCUDO_RISCV64 1 +#else +#define SCUDO_RISCV64 0 +#endif + #if defined(__LP64__) #define SCUDO_WORDSIZE 64U #else diff --git a/standalone/primary32.h b/standalone/primary32.h index 326c10a32a8..b3d6e53dfca 100644 --- a/standalone/primary32.h +++ b/standalone/primary32.h @@ -18,6 +18,7 @@ #include "report.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -43,6 +44,7 @@ template <typename Config> class SizeClassAllocator32 { public: typedef typename Config::PrimaryCompactPtrT CompactPtrT; typedef typename Config::SizeClassMap SizeClassMap; + static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog; // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. @@ -51,6 +53,7 @@ public: typedef SizeClassAllocator32<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; + typedef typename CacheT::BatchGroup BatchGroup; static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -60,7 +63,7 @@ public: static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { if (SCUDO_FUCHSIA) reportError("SizeClassAllocator32 is not supported on Fuchsia"); @@ -70,7 +73,7 @@ public: DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); PossibleRegions.init(); u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>( Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); @@ -85,18 +88,26 @@ public: } void unmapTestOnly() { - while (NumberOfStashedRegions > 0) - unmap(reinterpret_cast<void *>(RegionsStash[--NumberOfStashedRegions]), - RegionSize); + { + ScopedLock L(RegionsStashMutex); + while (NumberOfStashedRegions > 0) { + unmap(reinterpret_cast<void *>(RegionsStash[--NumberOfStashedRegions]), + RegionSize); + } + } + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); if (Sci->MinRegionIndex < MinRegionIndex) MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) MaxRegionIndex = Sci->MaxRegionIndex; *Sci = {}; } + + ScopedLock L(ByteMapMutex); for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++) if (PossibleRegions[I]) unmap(reinterpret_cast<void *>(I * RegionSize), RegionSize); @@ -111,35 +122,80 @@ public: return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr)); } + uptr compactPtrGroupBase(CompactPtrT CompactPtr) { + const uptr Mask = (static_cast<uptr>(1) << GroupSizeLog) - 1; + return CompactPtr & ~Mask; + } + + uptr decompactGroupBase(uptr CompactPtrGroupBase) { + return CompactPtrGroupBase; + } + TransferBatch *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - TransferBatch *B = Sci->FreeList.front(); - if (B) { - Sci->FreeList.pop_front(); - } else { - B = populateFreeList(C, ClassId, Sci); - if (UNLIKELY(!B)) + TransferBatch *B = popBatchImpl(C, ClassId, Sci); + if (UNLIKELY(!B)) { + if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) return nullptr; + B = popBatchImpl(C, ClassId, Sci); + // if `populateFreeList` succeeded, we are supposed to get free blocks. + DCHECK_NE(B, nullptr); } - DCHECK_GT(B->getCount(), 0); Sci->Stats.PoppedBlocks += B->getCount(); return B; } - void pushBatch(uptr ClassId, TransferBatch *B) { + // Push the array of free blocks to the designated batch group. + void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) { DCHECK_LT(ClassId, NumClasses); - DCHECK_GT(B->getCount(), 0); + DCHECK_GT(Size, 0); + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + ScopedLock L(Sci->Mutex); + // Constructing a batch group in the free list will use two blocks in + // BatchClassId. If we are pushing BatchClassId blocks, we will use the + // blocks in the array directly (can't delegate local cache which will + // cause a recursive allocation). However, The number of free blocks may + // be less than two. Therefore, populate the free list before inserting + // the blocks. + if (Size == 1 && !populateFreeList(C, ClassId, Sci)) + return; + pushBlocksImpl(C, ClassId, Sci, Array, Size); + Sci->Stats.PushedBlocks += Size; + return; + } + + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. + + // Sort the blocks so that blocks belonging to the same group can be pushed + // together. + bool SameGroup = true; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && + compactPtrGroupBase(Cur) < compactPtrGroupBase(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; + } + ScopedLock L(Sci->Mutex); - Sci->FreeList.push_front(B); - Sci->Stats.PushedBlocks += B->getCount(); + pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup); + + Sci->Stats.PushedBlocks += Size; if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Sci, ClassId); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) { if (static_cast<uptr>(I) == SizeClassMap::BatchClassId) @@ -148,11 +204,11 @@ public: } getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); RegionsStashMutex.lock(); - PossibleRegions.disable(); + ByteMapMutex.lock(); } - void enable() { - PossibleRegions.enable(); + void enable() NO_THREAD_SAFETY_ANALYSIS { + ByteMapMutex.unlock(); RegionsStashMutex.unlock(); getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); for (uptr I = 0; I < NumClasses; I++) { @@ -166,12 +222,20 @@ public: uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator32. We may consider locking each region on demand + // only. + Sci->Mutex.assertHeld(); if (Sci->MinRegionIndex < MinRegionIndex) MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) MaxRegionIndex = Sci->MaxRegionIndex; } - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) + + // SizeClassAllocator32 is disabled, i.e., ByteMapMutex is held. + ByteMapMutex.assertHeld(); + + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] && (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { const uptr BlockSize = getSizeByClassId(PossibleRegions[I] - 1U); @@ -180,6 +244,7 @@ public: for (uptr Block = From; Block < To; Block += BlockSize) Callback(Block); } + } } void getStats(ScopedString *Str) { @@ -189,6 +254,7 @@ public: uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); TotalMapped += Sci->AllocatedUser; PoppedBlocks += Sci->Stats.PoppedBlocks; PushedBlocks += Sci->Stats.PushedBlocks; @@ -196,8 +262,11 @@ public: Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " "remains %zu\n", TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getStats(Str, I, Sci, 0); + } } bool setOption(Option O, sptr Value) { @@ -212,14 +281,14 @@ public: return true; } - uptr releaseToOS() { + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Sci, I, ReleaseType); } return TotalReleasedBytes; } @@ -248,7 +317,7 @@ private: }; struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; @@ -256,17 +325,17 @@ private: struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; - SinglyLinkedList<TransferBatch> FreeList; - uptr CurrentRegion; - uptr CurrentRegionAllocated; - SizeClassStats Stats; + SinglyLinkedList<BatchGroup> FreeList GUARDED_BY(Mutex); + uptr CurrentRegion GUARDED_BY(Mutex); + uptr CurrentRegionAllocated GUARDED_BY(Mutex); + SizeClassStats Stats GUARDED_BY(Mutex); u32 RandState; - uptr AllocatedUser; + uptr AllocatedUser GUARDED_BY(Mutex); // Lowest & highest region index allocated for this size class, to avoid // looping through the whole NumRegions. - uptr MinRegionIndex; - uptr MaxRegionIndex; - ReleaseToOsInfo ReleaseInfo; + uptr MinRegionIndex GUARDED_BY(Mutex); + uptr MaxRegionIndex GUARDED_BY(Mutex); + ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex); }; static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); @@ -291,17 +360,22 @@ private: else MapSize = RegionSize; } else { - Region = roundUpTo(MapBase, RegionSize); + Region = roundUp(MapBase, RegionSize); unmap(reinterpret_cast<void *>(MapBase), Region - MapBase); MapSize = RegionSize; } const uptr End = Region + MapSize; if (End != MapEnd) unmap(reinterpret_cast<void *>(End), MapEnd - End); + + DCHECK_EQ(Region % RegionSize, 0U); + static_assert(Config::PrimaryRegionSizeLog == GroupSizeLog, + "Memory group should be the same size as Region"); + return Region; } - uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) { + uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) REQUIRES(Sci->Mutex) { DCHECK_LT(ClassId, NumClasses); uptr Region = 0; { @@ -318,6 +392,7 @@ private: Sci->MinRegionIndex = RegionIndex; if (RegionIndex > Sci->MaxRegionIndex) Sci->MaxRegionIndex = RegionIndex; + ScopedLock L(ByteMapMutex); PossibleRegions.set(RegionIndex, static_cast<u8>(ClassId + 1U)); } return Region; @@ -328,8 +403,231 @@ private: return &SizeClassInfoArray[ClassId]; } - NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, - SizeClassInfo *Sci) { + // Push the blocks to their batch group. The layout will be like, + // + // FreeList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB + // + // Each BlockGroup(BG) will associate with unique group id and the free blocks + // are managed by a list of TransferBatch(TB). To reduce the time of inserting + // blocks, BGs are sorted and the input `Array` are supposed to be sorted so + // that we can get better performance of maintaining sorted property. + // Use `SameGroup=true` to indicate that all blocks in the array are from the + // same group then we will skip checking the group id of each block. + // + // The region mutex needs to be held while calling this method. + void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Sci->Mutex) { + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroup *BG = nullptr; + TransferBatch *TB = nullptr; + if (ClassId == SizeClassMap::BatchClassId) { + DCHECK_GE(Size, 2U); + + // Free blocks are recorded by TransferBatch in freelist, blocks of + // BatchClassId are included. In order not to use additional memory to + // record blocks of BatchClassId, they are self-contained. I.e., A + // TransferBatch may record the block address of itself. See the figure + // below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // The safeness of manipulating TransferBatch is kept by the invariant, + // + // The unit of each pop-block request is a TransferBatch. Return + // part of the blocks in a TransferBatch is not allowed. + // + // This ensures that TransferBatch won't leak the address itself while + // it's still holding other valid data. + // + // Besides, BatchGroup uses the same size-class as TransferBatch does + // and its address is recorded in the TransferBatch too. To maintain the + // safeness, the invariant to keep is, + // + // The address of itself is always recorded in the last TransferBatch + // of the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the BatchGroup becomes invalid. + // + // As a result, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + BG = reinterpret_cast<BatchGroup *>( + decompactPtr(ClassId, Array[Size - 1])); + BG->Batches.clear(); + + TB = reinterpret_cast<TransferBatch *>( + decompactPtr(ClassId, Array[Size - 2])); + TB->clear(); + + // Append the blocks used by BatchGroup and TransferBatch immediately so + // that we ensure that they are in the last TransBatch. + TB->appendFromArray(Array + Size - 2, 2); + Size -= 2; + } else { + BG = C->createGroup(); + BG->Batches.clear(); + + TB = C->createBatch(ClassId, nullptr); + TB->clear(); + } + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches`. + BG->Batches.push_front(TB); + BG->PushedBlocks = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + TransferBatch::getMaxCached(getSizeByClassId(ClassId)); + + return BG; + }; + + auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList<TransferBatch> &Batches = BG->Batches; + TransferBatch *CurBatch = Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); + u16 UnusedSlots = + static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = C->createBatch( + ClassId, + reinterpret_cast<void *>(decompactPtr(ClassId, Array[I]))); + CurBatch->clear(); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + }; + + BatchGroup *Cur = Sci->FreeList.front(); + + if (ClassId == SizeClassMap::BatchClassId) { + if (Cur == nullptr) { + // Don't need to classify BatchClassId. + Cur = CreateGroup(/*CompactPtrGroupBase=*/0); + Sci->FreeList.push_front(Cur); + } + InsertBlocks(Cur, Array, Size); + return; + } + + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroup *Prev = nullptr; + + while (Cur != nullptr && + compactPtrGroupBase(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroupBase(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[0])); + if (Prev == nullptr) + Sci->FreeList.push_front(Cur); + else + Sci->FreeList.insert(Prev, Cur); + } + + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroupBase(Array[I]), Cur->CompactPtrGroupBase); + + InsertBlocks(Cur, Array, Size); + return; + } + + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) { + DCHECK_EQ(compactPtrGroupBase(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroupBase(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroupBase(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[I])); + DCHECK_NE(Prev, nullptr); + Sci->FreeList.insert(Prev, Cur); + } + + Count = 1; + } else { + ++Count; + } + } + + InsertBlocks(Cur, Array + Size - Count, Count); + } + + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { + if (Sci->FreeList.empty()) + return nullptr; + + SinglyLinkedList<TransferBatch> &Batches = Sci->FreeList.front()->Batches; + DCHECK(!Batches.empty()); + + TransferBatch *B = Batches.front(); + Batches.pop_front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + if (Batches.empty()) { + BatchGroup *BG = Sci->FreeList.front(); + Sci->FreeList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } + + return B; + } + + NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { uptr Region; uptr Offset; // If the size-class currently has a region associated to it, use it. The @@ -344,14 +642,14 @@ private: DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); Region = allocateRegion(Sci, ClassId); if (UNLIKELY(!Region)) - return nullptr; + return false; C->getStats().add(StatMapped, RegionSize); Sci->CurrentRegion = Region; Offset = 0; } const uptr Size = getSizeByClassId(ClassId); - const u32 MaxCount = TransferBatch::getMaxCached(Size); + const u16 MaxCount = TransferBatch::getMaxCached(Size); DCHECK_GT(MaxCount, 0U); // The maximum number of blocks we should carve in the region is dictated // by the maximum number of batches we want to fill, and the amount of @@ -374,23 +672,29 @@ private: uptr P = Region + Offset; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) ShuffleArray[I] = reinterpret_cast<CompactPtrT>(P); - // No need to shuffle the batches size class. - if (ClassId != SizeClassMap::BatchClassId) - shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState); - for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = - C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I])); - if (UNLIKELY(!B)) - return nullptr; - const u32 N = Min(MaxCount, NumberOfBlocks - I); - B->setFromArray(&ShuffleArray[I], N); - Sci->FreeList.push_back(B); - I += N; + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroupBase(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Sci->RandState); + pushBlocksImpl(C, ClassId, Sci, ShuffleArray + I - N, N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroupBase(ShuffleArray[I]); + } else { + ++N; + } + } + + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState); + pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBlocksImpl(C, ClassId, Sci, ShuffleArray, NumberOfBlocks, + /*SameGroup=*/true); } - TransferBatch *B = Sci->FreeList.front(); - Sci->FreeList.pop_front(); - DCHECK(B); - DCHECK_GT(B->getCount(), 0); const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); @@ -406,11 +710,11 @@ private: } Sci->AllocatedUser += AllocatedUser; - return B; + return true; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - SizeClassInfo *Sci = getSizeClassInfo(ClassId); + void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci, uptr Rss) + REQUIRES(Sci->Mutex) { if (Sci->AllocatedUser == 0) return; const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; @@ -423,7 +727,8 @@ private: } NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, - bool Force = false) { + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Sci->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); @@ -431,33 +736,60 @@ private: const uptr BytesInFreeList = Sci->AllocatedUser - (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = - (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. + if (UNLIKELY(BytesInFreeList == 0)) + return 0; + + bool MaySkip = false; + + if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + MaySkip = true; + } + + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + const uptr PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + MaySkip = true; + + const bool CheckDensity = + BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll; // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. - if (BlockSize < PageSize / 16U) { - if (!Force && BytesPushed < Sci->AllocatedUser / 16U) - return 0; - // We want 8x% to 9x% free bytes (the larger the block, the lower the %). - if ((BytesInFreeList * 100U) / Sci->AllocatedUser < - (100U - 1U - BlockSize / 16U)) - return 0; + if (CheckDensity) { + if (ReleaseType == ReleaseToOS::Normal && + PushedBytesDelta < Sci->AllocatedUser / 16U) { + MaySkip = true; + } } - if (!Force) { + if (MaySkip && ReleaseType != ReleaseToOS::ForceAll) + return 0; + + if (ReleaseType == ReleaseToOS::Normal) { const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + static_cast<u64>(IntervalMs) * 1000000 > - getMonotonicTime()) { + getMonotonicTimeFast()) { return 0; // Memory was returned recently. } } @@ -469,37 +801,115 @@ private: uptr TotalReleasedBytes = 0; const uptr Base = First * RegionSize; const uptr NumberOfRegions = Last - First + 1U; + const uptr GroupSize = (1U << GroupSizeLog); + const uptr CurGroupBase = + compactPtrGroupBase(compactPtr(ClassId, Sci->CurrentRegion)); + ReleaseRecorder Recorder(Base); - auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { - return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; - }; + PageReleaseContext Context(BlockSize, NumberOfRegions, + /*ReleaseSize=*/RegionSize); + auto DecompactPtr = [](CompactPtrT CompactPtr) { return reinterpret_cast<uptr>(CompactPtr); }; - releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize, - &Recorder, DecompactPtr, SkipRegion); + for (BatchGroup &BG : Sci->FreeList) { + const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase); + // The `GroupSize` may not be divided by `BlockSize`, which means there is + // an unused space at the end of Region. Exclude that space to avoid + // unused page map entry. + uptr AllocatedGroupSize = GroupBase == CurGroupBase + ? Sci->CurrentRegionAllocated + : roundDownSlow(GroupSize, BlockSize); + if (AllocatedGroupSize == 0) + continue; + + // TransferBatches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (ReleaseType != ReleaseToOS::ForceAll && + BytesInBG <= BG.BytesInBGAtLastCheckpoint) { + BG.BytesInBGAtLastCheckpoint = BytesInBG; + continue; + } + const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + continue; + + // Given the randomness property, we try to release the pages only if the + // bytes used by free blocks exceed certain proportion of allocated + // spaces. + if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize < + (100U - 1U - BlockSize / 16U)) { + continue; + } + + // TODO: Consider updating this after page release if `ReleaseRecorder` + // can tell the releasd bytes in each group. + BG.BytesInBGAtLastCheckpoint = BytesInBG; + + const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + const uptr RegionIndex = (GroupBase - Base) / RegionSize; + + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroupBase(It.get(I)), BG.CompactPtrGroupBase); + + const uptr To = GroupBase + AllocatedGroupSize; + Context.markRangeAsAllCounted(GroupBase, To, GroupBase, RegionIndex, + AllocatedGroupSize); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion(BG.Batches, DecompactPtr, GroupBase, + RegionIndex, AllocatedGroupSize, + /*MayContainLastBlockInRegion=*/true); + } + + // We may not be able to do the page release In a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.hasBlockMarked())) + return 0; + } + + if (!Context.hasBlockMarked()) + return 0; + + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } - Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); return TotalReleasedBytes; } SizeClassInfo SizeClassInfoArray[NumClasses] = {}; + HybridMutex ByteMapMutex; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions = {}; + ByteMap PossibleRegions GUARDED_BY(ByteMapMutex) = {}; atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions = 0; - uptr RegionsStash[MaxStashedRegions] = {}; + uptr NumberOfStashedRegions GUARDED_BY(RegionsStashMutex) = 0; + uptr RegionsStash[MaxStashedRegions] GUARDED_BY(RegionsStashMutex) = {}; }; } // namespace scudo diff --git a/standalone/primary64.h b/standalone/primary64.h index 14784ee8f37..d3a1aea7400 100644 --- a/standalone/primary64.h +++ b/standalone/primary64.h @@ -13,11 +13,13 @@ #include "common.h" #include "list.h" #include "local_cache.h" +#include "mem_map.h" #include "memtag.h" #include "options.h" #include "release.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -45,84 +47,219 @@ template <typename Config> class SizeClassAllocator64 { public: typedef typename Config::PrimaryCompactPtrT CompactPtrT; static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale; + static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog; + static const uptr GroupScale = GroupSizeLog - CompactPtrScale; typedef typename Config::SizeClassMap SizeClassMap; typedef SizeClassAllocator64<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; + typedef typename CacheT::BatchGroup BatchGroup; static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? roundUpTo(sizeof(TransferBatch), 1U << CompactPtrScale) + ? roundUp(sizeof(TransferBatch), 1U << CompactPtrScale) : SizeClassMap::getSizeByClassId(ClassId); } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); - DCHECK_EQ(PrimaryBase, 0U); + + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1U << GroupSizeLog); + const uptr PagesInGroup = GroupSize / PageSize; + const uptr MinSizeClass = getSizeByClassId(1); + // When trying to release pages back to memory, visiting smaller size + // classes is expensive. Therefore, we only try to release smaller size + // classes when the amount of free blocks goes over a certain threshold (See + // the comment in releaseToOSMaybe() for more details). For example, for + // size class 32, we only do the release when the size of free blocks is + // greater than 97% of pages in a group. However, this may introduce another + // issue that if the number of free blocks is bouncing between 97% ~ 100%. + // Which means we may try many page releases but only release very few of + // them (less than 3% in a group). Even though we have + // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these + // calls but it will be better to have another guard to mitigate this issue. + // + // Here we add another constraint on the minimum size requirement. The + // constraint is determined by the size of in-use blocks in the minimal size + // class. Take size class 32 as an example, + // + // +- one memory group -+ + // +----------------------+------+ + // | 97% of free blocks | | + // +----------------------+------+ + // \ / + // 3% in-use blocks + // + // * The release size threshold is 97%. + // + // The 3% size in a group is about 7 pages. For two consecutive + // releaseToOSMaybe(), we require the difference between `PushedBlocks` + // should be greater than 7 pages. This mitigates the page releasing + // thrashing which is caused by memory usage bouncing around the threshold. + // The smallest size class takes longest time to do the page release so we + // use its size of in-use blocks as a heuristic. + SmallerBlockReleasePageDelta = + PagesInGroup * (1 + MinSizeClass / 16U) / 100; + // Reserve the space required for the Primary. - PrimaryBase = reinterpret_cast<uptr>( - map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data)); + CHECK(ReservedMemory.create(/*Addr=*/0U, PrimarySize, + "scudo:primary_reserve")); + PrimaryBase = ReservedMemory.getBase(); + DCHECK_NE(PrimaryBase, 0U); u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); - const uptr PageSize = getPageSizeCached(); + for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); // The actual start of a region is offset by a random number of pages // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = getRegionBaseByClassId(I) + + Region->RegionBeg = (PrimaryBase + (I << Config::PrimaryRegionSizeLog)) + (Config::PrimaryEnableRandomOffset ? ((getRandomModN(&Seed, 16) + 1) * PageSize) : 0); Region->RandState = getRandomU32(&Seed); + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(I))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; Region->ReleaseInfo.LastReleaseAtNs = Time; } + shuffle(RegionInfoArray, NumClasses, &Seed); + setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void unmapTestOnly() { + void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); *Region = {}; } if (PrimaryBase) - unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, - &Data); + ReservedMemory.release(); PrimaryBase = 0U; } TransferBatch *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); - ScopedLock L(Region->Mutex); - TransferBatch *B = Region->FreeList.front(); - if (B) { - Region->FreeList.pop_front(); - } else { - B = populateFreeList(C, ClassId, Region); - if (UNLIKELY(!B)) - return nullptr; + bool PrintStats = false; + { + ScopedLock L(Region->Mutex); + TransferBatch *B = popBatchImpl(C, ClassId, Region); + if (LIKELY(B)) { + Region->Stats.PoppedBlocks += B->getCount(); + return B; + } + + const bool RegionIsExhausted = Region->Exhausted; + if (UNLIKELY(RegionIsExhausted || + !populateFreeList(C, ClassId, Region))) { + PrintStats = !RegionIsExhausted && Region->Exhausted; + } else { + B = popBatchImpl(C, ClassId, Region); + // if `populateFreeList` succeeded, we are supposed to get free blocks. + DCHECK_NE(B, nullptr); + Region->Stats.PoppedBlocks += B->getCount(); + return B; + } } - DCHECK_GT(B->getCount(), 0); - Region->Stats.PoppedBlocks += B->getCount(); - return B; + + // Note that `getStats()` requires locking each region so we can't call it + // while locking the Region->Mutex in the above. + if (UNLIKELY(PrintStats)) { + ScopedString Str; + getStats(&Str); + Str.append( + "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", + RegionSize >> 20, getSizeByClassId(ClassId)); + Str.output(); + } + return nullptr; } - void pushBatch(uptr ClassId, TransferBatch *B) { - DCHECK_GT(B->getCount(), 0); + // Push the array of free blocks to the designated batch group. + void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) { + DCHECK_LT(ClassId, NumClasses); + DCHECK_GT(Size, 0); + RegionInfo *Region = getRegionInfo(ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + bool PrintStats = false; + { + ScopedLock L(Region->Mutex); + // Constructing a batch group in the free list will use two blocks in + // BatchClassId. If we are pushing BatchClassId blocks, we will use the + // blocks in the array directly (can't delegate local cache which will + // cause a recursive allocation). However, The number of free blocks may + // be less than two. Therefore, populate the free list before inserting + // the blocks. + const bool NeedToRefill = Size == 1U && Region->FreeList.empty(); + // If BatchClass has been exhausted, the program should have been + // aborted. + DCHECK(!Region->Exhausted); + + if (UNLIKELY( + NeedToRefill && + !populateFreeList(C, SizeClassMap::BatchClassId, Region))) { + PrintStats = true; + } else { + pushBlocksImpl(C, SizeClassMap::BatchClassId, Region, Array, Size); + Region->Stats.PushedBlocks += Size; + } + } + + // Note that `getStats()` requires the lock of each region so we can't + // call it while locking the Region->Mutex in the above. + if (UNLIKELY(PrintStats)) { + ScopedString Str; + getStats(&Str); + Str.append( + "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", + RegionSize >> 20, getSizeByClassId(ClassId)); + Str.output(); + // Theoretically, BatchClass shouldn't be used up. Abort immediately + // when it happens. + reportOutOfBatchClass(); + } + + return; + } + + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. + + // Sort the blocks so that blocks belonging to the same group can be pushed + // together. + bool SameGroup = true; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; + } + ScopedLock L(Region->Mutex); - Region->FreeList.push_front(B); - Region->Stats.PushedBlocks += B->getCount(); + pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); + + Region->Stats.PushedBlocks += Size; if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Region, ClassId); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) { if (static_cast<uptr>(I) == SizeClassMap::BatchClassId) @@ -132,7 +269,7 @@ public: getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) @@ -145,7 +282,11 @@ public: for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; - const RegionInfo *Region = getRegionInfo(I); + RegionInfo *Region = getRegionInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator64. We may consider locking each region on demand + // only. + Region->Mutex.assertHeld(); const uptr BlockSize = getSizeByClassId(I); const uptr From = Region->RegionBeg; const uptr To = From + Region->AllocatedUser; @@ -161,6 +302,7 @@ public: uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->Mutex); if (Region->MappedUser) TotalMapped += Region->MappedUser; PoppedBlocks += Region->Stats.PoppedBlocks; @@ -171,8 +313,11 @@ public: TotalMapped >> 20, 0U, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->Mutex); + getStats(Str, I, Region, 0); + } } bool setOption(Option O, sptr Value) { @@ -187,14 +332,14 @@ public: return true; } - uptr releaseToOS() { + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); } return TotalReleasedBytes; } @@ -206,9 +351,6 @@ public: static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } uptr getCompactPtrBaseByClassId(uptr ClassId) { - // If we are not compacting pointers, base everything off of 0. - if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0) - return 0; return getRegionInfo(ClassId)->RegionBeg; } @@ -223,15 +365,23 @@ public: decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); } - static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + static BlockInfo findNearestBlock(const char *RegionInfoData, + uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { const RegionInfo *RegionInfoArray = reinterpret_cast<const RegionInfo *>(RegionInfoData); + uptr ClassId; uptr MinDistance = -1UL; for (uptr I = 0; I != NumClasses; ++I) { if (I == SizeClassMap::BatchClassId) continue; uptr Begin = RegionInfoArray[I].RegionBeg; + // TODO(chiahungduan): In fact, We need to lock the RegionInfo::Mutex. + // However, the RegionInfoData is passed with const qualifier and lock the + // mutex requires modifying RegionInfoData, which means we need to remove + // the const qualifier. This may lead to another undefined behavior (The + // first one is accessing `AllocatedUser` without locking. It's better to + // pass `RegionInfoData` as `void *` then we can lock the mutex properly. uptr End = Begin + RegionInfoArray[I].AllocatedUser; if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) continue; @@ -284,7 +434,7 @@ private: }; struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; @@ -292,15 +442,20 @@ private: struct UnpaddedRegionInfo { HybridMutex Mutex; - SinglyLinkedList<TransferBatch> FreeList; + SinglyLinkedList<BatchGroup> FreeList GUARDED_BY(Mutex); + // This is initialized before thread creation. uptr RegionBeg = 0; - RegionStats Stats = {}; - u32 RandState = 0; - uptr MappedUser = 0; // Bytes mapped for user memory. - uptr AllocatedUser = 0; // Bytes allocated for user memory. - MapPlatformData Data = {}; - ReleaseToOsInfo ReleaseInfo = {}; - bool Exhausted = false; + RegionStats Stats GUARDED_BY(Mutex) = {}; + u32 RandState GUARDED_BY(Mutex) = 0; + // Bytes mapped for user memory. + uptr MappedUser GUARDED_BY(Mutex) = 0; + // Bytes allocated for user memory. + uptr AllocatedUser GUARDED_BY(Mutex) = 0; + // The minimum size of pushed blocks to trigger page release. + uptr TryReleaseThreshold GUARDED_BY(Mutex) = 0; + MemMapT MemMap = {}; + ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {}; + bool Exhausted GUARDED_BY(Mutex) = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - @@ -308,8 +463,13 @@ private: }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); + // TODO: `PrimaryBase` can be obtained from ReservedMemory. This needs to be + // deprecated. uptr PrimaryBase = 0; - MapPlatformData Data = {}; + ReservedMemoryT ReservedMemory = {}; + // The minimum size of pushed blocks that we will try to release the pages in + // that size class. + uptr SmallerBlockReleasePageDelta = 0; atomic_s32 ReleaseToOsIntervalMs = {}; alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; @@ -318,8 +478,10 @@ private: return &RegionInfoArray[ClassId]; } - uptr getRegionBaseByClassId(uptr ClassId) const { - return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); + uptr getRegionBaseByClassId(uptr ClassId) { + return roundDown(getRegionInfo(ClassId)->RegionBeg - PrimaryBase, + RegionSize) + + PrimaryBase; } static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { @@ -330,10 +492,248 @@ private: return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale); } - NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, - RegionInfo *Region) { + static uptr compactPtrGroup(CompactPtrT CompactPtr) { + const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1; + return static_cast<uptr>(CompactPtr) & ~Mask; + } + static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) { + DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U); + return Base + (CompactPtrGroupBase << CompactPtrScale); + } + + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; + } + + // Push the blocks to their batch group. The layout will be like, + // + // FreeList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB + // + // Each BlockGroup(BG) will associate with unique group id and the free blocks + // are managed by a list of TransferBatch(TB). To reduce the time of inserting + // blocks, BGs are sorted and the input `Array` are supposed to be sorted so + // that we can get better performance of maintaining sorted property. + // Use `SameGroup=true` to indicate that all blocks in the array are from the + // same group then we will skip checking the group id of each block. + // + // The region mutex needs to be held while calling this method. + void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Region->Mutex) { + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroup *BG = nullptr; + TransferBatch *TB = nullptr; + if (ClassId == SizeClassMap::BatchClassId) { + DCHECK_GE(Size, 2U); + + // Free blocks are recorded by TransferBatch in freelist, blocks of + // BatchClassId are included. In order not to use additional memory to + // record blocks of BatchClassId, they are self-contained. I.e., A + // TransferBatch may record the block address of itself. See the figure + // below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // The safeness of manipulating TransferBatch is kept by the invariant, + // + // The unit of each pop-block request is a TransferBatch. Return + // part of the blocks in a TransferBatch is not allowed. + // + // This ensures that TransferBatch won't leak the address itself while + // it's still holding other valid data. + // + // Besides, BatchGroup uses the same size-class as TransferBatch does + // and its address is recorded in the TransferBatch too. To maintain the + // safeness, the invariant to keep is, + // + // The address of itself is always recorded in the last TransferBatch + // of the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the BatchGroup becomes invalid. + // + // As a result, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + BG = reinterpret_cast<BatchGroup *>( + decompactPtr(ClassId, Array[Size - 1])); + BG->Batches.clear(); + + TB = reinterpret_cast<TransferBatch *>( + decompactPtr(ClassId, Array[Size - 2])); + TB->clear(); + + // Append the blocks used by BatchGroup and TransferBatch immediately so + // that we ensure that they are in the last TransBatch. + TB->appendFromArray(Array + Size - 2, 2); + Size -= 2; + } else { + BG = C->createGroup(); + BG->Batches.clear(); + + TB = C->createBatch(ClassId, nullptr); + TB->clear(); + } + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches`. + BG->Batches.push_front(TB); + BG->PushedBlocks = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + TransferBatch::getMaxCached(getSizeByClassId(ClassId)); + + return BG; + }; + + auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList<TransferBatch> &Batches = BG->Batches; + TransferBatch *CurBatch = Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); + u16 UnusedSlots = + static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = C->createBatch( + ClassId, + reinterpret_cast<void *>(decompactPtr(ClassId, Array[I]))); + CurBatch->clear(); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + }; + + BatchGroup *Cur = Region->FreeList.front(); + + if (ClassId == SizeClassMap::BatchClassId) { + if (Cur == nullptr) { + // Don't need to classify BatchClassId. + Cur = CreateGroup(/*CompactPtrGroupBase=*/0); + Region->FreeList.push_front(Cur); + } + InsertBlocks(Cur, Array, Size); + return; + } + + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroup *Prev = nullptr; + + while (Cur != nullptr && + compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[0])); + if (Prev == nullptr) + Region->FreeList.push_front(Cur); + else + Region->FreeList.insert(Prev, Cur); + } + + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); + + InsertBlocks(Cur, Array, Size); + return; + } + + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { + DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[I])); + DCHECK_NE(Prev, nullptr); + Region->FreeList.insert(Prev, Cur); + } + + Count = 1; + } else { + ++Count; + } + } + + InsertBlocks(Cur, Array + Size - Count, Count); + } + + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->Mutex) { + if (Region->FreeList.empty()) + return nullptr; + + SinglyLinkedList<TransferBatch> &Batches = + Region->FreeList.front()->Batches; + DCHECK(!Batches.empty()); + + TransferBatch *B = Batches.front(); + Batches.pop_front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + if (Batches.empty()) { + BatchGroup *BG = Region->FreeList.front(); + Region->FreeList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } + + return B; + } + + NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->Mutex) { const uptr Size = getSizeByClassId(ClassId); - const u32 MaxCount = TransferBatch::getMaxCached(Size); + const u16 MaxCount = TransferBatch::getMaxCached(Size); const uptr RegionBeg = Region->RegionBeg; const uptr MappedUser = Region->MappedUser; @@ -342,29 +742,32 @@ private: if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. const uptr MapSize = - roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); + roundUp(TotalUserBytes - MappedUser, MapSizeIncrement); const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { - if (!Region->Exhausted) { - Region->Exhausted = true; - ScopedString Str; - getStats(&Str); - Str.append( - "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", - RegionSize >> 20, Size); - Str.output(); - } - return nullptr; + Region->Exhausted = true; + return false; } - if (MappedUser == 0) - Region->Data = Data; - if (UNLIKELY(!map( - reinterpret_cast<void *>(RegionBeg + MappedUser), MapSize, - "scudo:primary", + // TODO: Consider allocating MemMap in init(). + if (!Region->MemMap.isAllocated()) { + // TODO: Ideally, a region should reserve RegionSize because the memory + // between `RegionBeg` and region base is still belong to a region and + // it's just not used. In order to make it work on every platform (some + // of them don't support `remap()` across the unused range), dispatch + // from `RegionBeg` for now. + const uptr ReserveSize = + RegionSize - (RegionBeg - getRegionBaseByClassId(ClassId)); + Region->MemMap = ReservedMemory.dispatch(RegionBeg, ReserveSize); + } + DCHECK(Region->MemMap.isAllocated()); + + if (UNLIKELY(!Region->MemMap.remap( + RegionBeg + MappedUser, MapSize, "scudo:primary", MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0), - &Region->Data))) - return nullptr; + (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG + : 0)))) { + return false; + } Region->MappedUser += MapSize; C->getStats().add(StatMapped, MapSize); } @@ -383,34 +786,39 @@ private: uptr P = RegionBeg + Region->AllocatedUser; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); - // No need to shuffle the batches size class. - if (ClassId != SizeClassMap::BatchClassId) - shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState); - for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = - C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal( - CompactPtrBase, ShuffleArray[I]))); - if (UNLIKELY(!B)) - return nullptr; - const u32 N = Min(MaxCount, NumberOfBlocks - I); - B->setFromArray(&ShuffleArray[I], N); - Region->FreeList.push_back(B); - I += N; - } - TransferBatch *B = Region->FreeList.front(); - Region->FreeList.pop_front(); - DCHECK(B); - DCHECK_GT(B->getCount(), 0); + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroup(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Region->RandState); + pushBlocksImpl(C, ClassId, Region, ShuffleArray + I - N, N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroup(ShuffleArray[I]); + } else { + ++N; + } + } + + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState); + pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBlocksImpl(C, ClassId, Region, ShuffleArray, NumberOfBlocks, + /*SameGroup=*/true); + } const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); Region->AllocatedUser += AllocatedUser; - return B; + return true; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - RegionInfo *Region = getRegionInfo(ClassId); + void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region, uptr Rss) + REQUIRES(Region->Mutex) { if (Region->MappedUser == 0) return; const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; @@ -427,7 +835,8 @@ private: } NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, - bool Force = false) { + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Region->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); @@ -435,53 +844,373 @@ private: const uptr BytesInFreeList = Region->AllocatedUser - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = (Region->Stats.PushedBlocks - - Region->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. + if (UNLIKELY(BytesInFreeList == 0)) + return 0; + + bool MaySkip = false; + + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + if (BytesInFreeList <= + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + MaySkip = true; + } + + const uptr RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (RegionPushedBytesDelta < PageSize) + MaySkip = true; + + const bool CheckDensity = isSmallBlock(BlockSize); // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. - if (BlockSize < PageSize / 16U) { - if (!Force && BytesPushed < Region->AllocatedUser / 16U) - return 0; - // We want 8x% to 9x% free bytes (the larger the block, the lower the %). - if ((BytesInFreeList * 100U) / Region->AllocatedUser < - (100U - 1U - BlockSize / 16U)) - return 0; + if (CheckDensity) { + if (ReleaseType == ReleaseToOS::Normal && + RegionPushedBytesDelta < Region->TryReleaseThreshold) { + MaySkip = true; + } } - if (!Force) { + if (MaySkip && ReleaseType != ReleaseToOS::ForceAll) + return 0; + + if (ReleaseType == ReleaseToOS::Normal) { const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) return 0; if (Region->ReleaseInfo.LastReleaseAtNs + static_cast<u64>(IntervalMs) * 1000000 > - getMonotonicTime()) { + getMonotonicTimeFast()) { return 0; // Memory was returned recently. } } - ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); + const uptr GroupSize = (1U << GroupSizeLog); + const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg; const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { return decompactPtrInternal(CompactPtrBase, CompactPtr); }; + + // Instead of always preparing PageMap for the entire region, we only do it + // for the range of releasing groups. To do that, the free-block marking + // process includes visiting BlockGroups twice. + + // The first visit is to determine the range of BatchGroups we are going to + // release. And we will extract those BatchGroups out and push into + // `GroupToRelease`. + SinglyLinkedList<BatchGroup> GroupToRelease; + GroupToRelease.clear(); + + // This is only used for debugging to ensure the consistency of the number + // of groups. + uptr NumberOfBatchGroups = Region->FreeList.size(); + + // We are examining each group and will take the minimum distance to the + // release threshold as the next Region::TryReleaseThreshold(). Note that if + // the size of free blocks has reached the release threshold, the distance + // to the next release will be PageSize * SmallerBlockReleasePageDelta. See + // the comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; + + for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr; + BG != nullptr;) { + // Group boundary is always GroupSize-aligned from CompactPtr base. The + // layout of memory groups is like, + // + // (CompactPtrBase) + // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... + // | | | + // v v v + // +-----------------------+-----------------------+ + // \ / \ / + // --- GroupSize --- --- GroupSize --- + // + // After decompacting the CompactPtrGroupBase, we expect the alignment + // property is held as well. + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); + DCHECK_LE(Region->RegionBeg, BatchGroupBase); + DCHECK_GE(AllocatedUserEnd, BatchGroupBase); + DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + if (AllocatedGroupSize == 0) { + Prev = BG; + BG = BG->Next; + continue; + } + + // TransferBatches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + + BG->Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (ReleaseType != ReleaseToOS::ForceAll && + BytesInBG <= BG->BytesInBGAtLastCheckpoint) { + BG->BytesInBGAtLastCheckpoint = BytesInBG; + Prev = BG; + BG = BG->Next; + continue; + } + + const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG; + + // Given the randomness property, we try to release the pages only if the + // bytes used by free blocks exceed certain proportion of group size. Note + // that this heuristic only applies when all the spaces in a BatchGroup + // are allocated. + if (CheckDensity) { + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; + const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); + // If all blocks in the group are released, we will do range marking + // which is fast. Otherwise, we will wait until we have accumulated + // a certain amount of free memory. + const bool ReachReleaseDelta = + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); + } else { + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } + + if (!HighDensity || !ReachReleaseDelta) { + Prev = BG; + BG = BG->Next; + continue; + } + } + + // If `BG` is the first BatchGroup in the list, we only need to advance + // `BG` and call FreeList::pop_front(). No update is needed for `Prev`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // nil +--+ +--+ + // |X | -> | | -> ... + // +--+ +--+ + // + // Otherwise, `Prev` will be used to extract the `Cur` from the + // `FreeList`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | | -> |X | -> | | -> ... + // +--+ +--+ +--+ + // + // After FreeList::extract(), + // + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | |-+ |X | +->| | -> ... + // +--+ | +--+ | +--+ + // +--------+ + // + // Note that we need to advance before pushing this BatchGroup to + // GroupToRelease because it's a destructive operation. + + BatchGroup *Cur = BG; + BG = BG->Next; + + // Ideally, we may want to update this only after successful release. + // However, for smaller blocks, each block marking is a costly operation. + // Therefore, we update it earlier. + // TODO: Consider updating this after page release if `ReleaseRecorder` + // can tell the releasd bytes in each group. + Cur->BytesInBGAtLastCheckpoint = BytesInBG; + + if (Prev != nullptr) + Region->FreeList.extract(Prev, Cur); + else + Region->FreeList.pop_front(); + GroupToRelease.push_back(Cur); + } + + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->TryReleaseThreshold = MinDistToThreshold; + } + + if (GroupToRelease.empty()) + return 0; + + const uptr ReleaseBase = decompactGroupBase( + CompactPtrBase, GroupToRelease.front()->CompactPtrGroupBase); + const uptr LastGroupEnd = + Min(decompactGroupBase(CompactPtrBase, + GroupToRelease.back()->CompactPtrGroupBase) + + GroupSize, + AllocatedUserEnd); + // The last block may straddle the group boundary. Rounding up to BlockSize + // to get the exact range. + const uptr ReleaseEnd = + roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + + Region->RegionBeg; + const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; + const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; + + RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMap, Region->RegionBeg, + ReleaseOffset); + PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + ReleaseRangeSize, ReleaseOffset); + // We may not be able to do the page release in a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.ensurePageMapAllocated())) + return 0; + + for (BatchGroup &BG : GroupToRelease) { + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; + const bool MayContainLastBlockInRegion = + BatchGroupUsedEnd == AllocatedUserEnd; + const bool BlockAlignedWithUsedEnd = + (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; + + uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + if (!BlockAlignedWithUsedEnd) + ++MaxContainedBlocks; + + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); + + Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, + Region->RegionBeg, /*RegionIndex=*/0, + Region->AllocatedUser); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion( + BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, + Region->AllocatedUser, MayContainLastBlockInRegion); + } + } + + DCHECK(Context.hasBlockMarked()); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U, - BlockSize, &Recorder, DecompactPtr, SkipRegion); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); if (Recorder.getReleasedRangesCount() > 0) { - Region->ReleaseInfo.PushedBlocksAtLastRelease = - Region->Stats.PushedBlocks; + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); } - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + + // Merge GroupToRelease back to the Region::FreeList. Note that both + // `Region->FreeList` and `GroupToRelease` are sorted. + for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr;;) { + if (BG == nullptr || GroupToRelease.empty()) { + if (!GroupToRelease.empty()) + Region->FreeList.append_back(&GroupToRelease); + break; + } + + DCHECK_NE(BG->CompactPtrGroupBase, + GroupToRelease.front()->CompactPtrGroupBase); + + if (BG->CompactPtrGroupBase < + GroupToRelease.front()->CompactPtrGroupBase) { + Prev = BG; + BG = BG->Next; + continue; + } + + // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase + // larger than the first element in `GroupToRelease`. We need to insert + // `GroupToRelease::front()` (which is `Cur` below) before `BG`. + // + // 1. If `Prev` is nullptr, we simply push `Cur` to the front of + // FreeList. + // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. + // + // Afterwards, we don't need to advance `BG` because the order between + // `BG` and the new `GroupToRelease::front()` hasn't been checked. + BatchGroup *Cur = GroupToRelease.front(); + GroupToRelease.pop_front(); + if (Prev == nullptr) + Region->FreeList.push_front(Cur); + else + Region->FreeList.insert(Prev, Cur); + DCHECK_EQ(Cur->Next, BG); + Prev = Cur; + } + + DCHECK_EQ(Region->FreeList.size(), NumberOfBatchGroups); + (void)NumberOfBatchGroups; + + if (SCUDO_DEBUG) { + BatchGroup *Prev = Region->FreeList.front(); + for (BatchGroup *Cur = Prev->Next; Cur != nullptr; + Prev = Cur, Cur = Cur->Next) { + CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); + } + } + return Recorder.getReleasedBytes(); } }; diff --git a/standalone/quarantine.h b/standalone/quarantine.h index 2d231c3a28d..b5f8db0e87c 100644 --- a/standalone/quarantine.h +++ b/standalone/quarantine.h @@ -12,6 +12,7 @@ #include "list.h" #include "mutex.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -172,7 +173,7 @@ public: typedef QuarantineCache<Callback> CacheT; using ThisT = GlobalQuarantine<Callback, Node>; - void init(uptr Size, uptr CacheSize) { + void init(uptr Size, uptr CacheSize) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); DCHECK_EQ(atomic_load_relaxed(&MaxSize), 0U); DCHECK_EQ(atomic_load_relaxed(&MinSize), 0U); @@ -191,22 +192,31 @@ public: uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); } uptr getCacheSize() const { return atomic_load_relaxed(&MaxCacheSize); } + // This is supposed to be used in test only. + bool isEmpty() { + ScopedLock L(CacheMutex); + return Cache.getSize() == 0U; + } + void put(CacheT *C, Callback Cb, Node *Ptr, uptr Size) { C->enqueue(Cb, Ptr, Size); if (C->getSize() > getCacheSize()) drain(C, Cb); } - void NOINLINE drain(CacheT *C, Callback Cb) { + void NOINLINE drain(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { + bool needRecycle = false; { ScopedLock L(CacheMutex); Cache.transfer(C); + needRecycle = Cache.getSize() > getMaxSize(); } - if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock()) + + if (needRecycle && RecycleMutex.tryLock()) recycle(atomic_load_relaxed(&MinSize), Cb); } - void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) { + void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { { ScopedLock L(CacheMutex); Cache.transfer(C); @@ -215,20 +225,21 @@ public: recycle(0, Cb); } - void getStats(ScopedString *Str) const { + void getStats(ScopedString *Str) EXCLUDES(CacheMutex) { + ScopedLock L(CacheMutex); // It assumes that the world is stopped, just as the allocator's printStats. Cache.getStats(Str); Str->append("Quarantine limits: global: %zuK; thread local: %zuK\n", getMaxSize() >> 10, getCacheSize() >> 10); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // RecycleMutex must be locked 1st since we grab CacheMutex within recycle. RecycleMutex.lock(); CacheMutex.lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { CacheMutex.unlock(); RecycleMutex.unlock(); } @@ -236,13 +247,14 @@ public: private: // Read-only data. alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; - CacheT Cache; + CacheT Cache GUARDED_BY(CacheMutex); alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; atomic_uptr MinSize = {}; atomic_uptr MaxSize = {}; alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; - void NOINLINE recycle(uptr MinSize, Callback Cb) { + void NOINLINE recycle(uptr MinSize, Callback Cb) RELEASE(RecycleMutex) + EXCLUDES(CacheMutex) { CacheT Tmp; Tmp.init(); { diff --git a/standalone/release.cpp b/standalone/release.cpp index 5d7c6c5fc11..938bb41faf6 100644 --- a/standalone/release.cpp +++ b/standalone/release.cpp @@ -10,7 +10,7 @@ namespace scudo { -HybridMutex PackedCounterArray::Mutex = {}; -uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount]; +BufferPool<RegionPageMap::StaticBufferCount, RegionPageMap::StaticBufferSize> + RegionPageMap::Buffers; } // namespace scudo diff --git a/standalone/release.h b/standalone/release.h index 293a8bc27ba..9ffc88df4f3 100644 --- a/standalone/release.h +++ b/standalone/release.h @@ -11,14 +11,46 @@ #include "common.h" #include "list.h" +#include "mem_map.h" #include "mutex.h" +#include "thread_annotations.h" namespace scudo { +template <typename MemMapT> class RegionReleaseRecorder { +public: + RegionReleaseRecorder(MemMapT *RegionMemMap, uptr Base, uptr Offset = 0) + : RegionMemMap(RegionMemMap), Base(Base), Offset(Offset) {} + + uptr getReleasedRangesCount() const { return ReleasedRangesCount; } + + uptr getReleasedBytes() const { return ReleasedBytes; } + + uptr getBase() const { return Base; } + + // Releases [From, To) range of pages back to OS. Note that `From` and `To` + // are offseted from `Base` + Offset. + void releasePageRangeToOS(uptr From, uptr To) { + const uptr Size = To - From; + RegionMemMap->releasePagesToOS(getBase() + Offset + From, Size); + ReleasedRangesCount++; + ReleasedBytes += Size; + } + +private: + uptr ReleasedRangesCount = 0; + uptr ReleasedBytes = 0; + MemMapT *RegionMemMap = nullptr; + uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; +}; + class ReleaseRecorder { public: - ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr) - : Base(Base), Data(Data) {} + ReleaseRecorder(uptr Base, uptr Offset = 0, MapPlatformData *Data = nullptr) + : Base(Base), Offset(Offset), Data(Data) {} uptr getReleasedRangesCount() const { return ReleasedRangesCount; } @@ -29,7 +61,7 @@ public: // Releases [From, To) range of pages back to OS. void releasePageRangeToOS(uptr From, uptr To) { const uptr Size = To - From; - releasePagesToOS(Base, From, Size, Data); + releasePagesToOS(Base, From + Offset, Size, Data); ReleasedRangesCount++; ReleasedBytes += Size; } @@ -37,31 +69,158 @@ public: private: uptr ReleasedRangesCount = 0; uptr ReleasedBytes = 0; + // The starting address to release. Note that we may want to combine (Base + + // Offset) as a new Base. However, the Base is retrieved from + // `MapPlatformData` on Fuchsia, which means the offset won't be aware. + // Therefore, store them separately to make it work on all the platforms. uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; MapPlatformData *Data = nullptr; }; -// A packed array of Counters. Each counter occupies 2^N bits, enough to store -// counter's MaxValue. Ctor will try to use a static buffer first, and if that -// fails (the buffer is too small or already locked), will allocate the +// A buffer pool which holds a fixed number of static buffers for fast buffer +// allocation. If the request size is greater than `StaticBufferSize`, it'll +// delegate the allocation to map(). +template <uptr StaticBufferCount, uptr StaticBufferSize> class BufferPool { +public: + // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while + // extracting the least significant bit from the `Mask`. + static_assert(StaticBufferCount < SCUDO_WORDSIZE, ""); + static_assert(isAligned(StaticBufferSize, SCUDO_CACHE_LINE_SIZE), ""); + + // Return a buffer which is at least `BufferSize`. + uptr *getBuffer(const uptr BufferSize) { + if (UNLIKELY(BufferSize > StaticBufferSize)) + return getDynamicBuffer(BufferSize); + + uptr index; + { + // TODO: In general, we expect this operation should be fast so the + // waiting thread won't be put into sleep. The HybridMutex does implement + // the busy-waiting but we may want to review the performance and see if + // we need an explict spin lock here. + ScopedLock L(Mutex); + index = getLeastSignificantSetBitIndex(Mask); + if (index < StaticBufferCount) + Mask ^= static_cast<uptr>(1) << index; + } + + if (index >= StaticBufferCount) + return getDynamicBuffer(BufferSize); + + const uptr Offset = index * StaticBufferSize; + memset(&RawBuffer[Offset], 0, StaticBufferSize); + return &RawBuffer[Offset]; + } + + void releaseBuffer(uptr *Buffer, const uptr BufferSize) { + const uptr index = getStaticBufferIndex(Buffer, BufferSize); + if (index < StaticBufferCount) { + ScopedLock L(Mutex); + DCHECK_EQ((Mask & (static_cast<uptr>(1) << index)), 0U); + Mask |= static_cast<uptr>(1) << index; + } else { + unmap(reinterpret_cast<void *>(Buffer), + roundUp(BufferSize, getPageSizeCached())); + } + } + + bool isStaticBufferTestOnly(uptr *Buffer, uptr BufferSize) { + return getStaticBufferIndex(Buffer, BufferSize) < StaticBufferCount; + } + +private: + uptr getStaticBufferIndex(uptr *Buffer, uptr BufferSize) { + if (UNLIKELY(BufferSize > StaticBufferSize)) + return StaticBufferCount; + + const uptr BufferBase = reinterpret_cast<uptr>(Buffer); + const uptr RawBufferBase = reinterpret_cast<uptr>(RawBuffer); + + if (BufferBase < RawBufferBase || + BufferBase >= RawBufferBase + sizeof(RawBuffer)) { + return StaticBufferCount; + } + + DCHECK_LE(BufferSize, StaticBufferSize); + DCHECK_LE(BufferBase + BufferSize, RawBufferBase + sizeof(RawBuffer)); + DCHECK_EQ((BufferBase - RawBufferBase) % StaticBufferSize, 0U); + + const uptr index = + (BufferBase - RawBufferBase) / (StaticBufferSize * sizeof(uptr)); + DCHECK_LT(index, StaticBufferCount); + return index; + } + + uptr *getDynamicBuffer(const uptr BufferSize) { + // When using a heap-based buffer, precommit the pages backing the + // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization + // where page fault exceptions are skipped as the allocated memory + // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a + // performance benefit on other platforms. + const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); + return reinterpret_cast<uptr *>( + map(nullptr, roundUp(BufferSize, getPageSizeCached()), "scudo:counters", + MmapFlags, &MapData)); + } + + HybridMutex Mutex; + // '1' means that buffer index is not used. '0' means the buffer is in use. + uptr Mask GUARDED_BY(Mutex) = ~static_cast<uptr>(0); + uptr RawBuffer[StaticBufferCount * StaticBufferSize] GUARDED_BY(Mutex); + [[no_unique_address]] MapPlatformData MapData = {}; +}; + +// A Region page map is used to record the usage of pages in the regions. It +// implements a packed array of Counters. Each counter occupies 2^N bits, enough +// to store counter's MaxValue. Ctor will try to use a static buffer first, and +// if that fails (the buffer is too small or already locked), will allocate the // required Buffer via map(). The caller is expected to check whether the // initialization was successful by checking isAllocated() result. For // performance sake, none of the accessors check the validity of the arguments, // It is assumed that Index is always in [0, N) range and the value is not // incremented past MaxValue. -class PackedCounterArray { +class RegionPageMap { public: - PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, - uptr MaxValue) - : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { - DCHECK_GT(Regions, 0); - DCHECK_GT(NumCounters, 0); + RegionPageMap() + : Regions(0), + NumCounters(0), + CounterSizeBitsLog(0), + CounterMask(0), + PackingRatioLog(0), + BitOffsetMask(0), + SizePerRegion(0), + BufferSize(0), + Buffer(nullptr) {} + RegionPageMap(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) { + reset(NumberOfRegions, CountersPerRegion, MaxValue); + } + ~RegionPageMap() { + if (!isAllocated()) + return; + Buffers.releaseBuffer(Buffer, BufferSize); + Buffer = nullptr; + } + + // Lock of `StaticBuffer` is acquired conditionally and there's no easy way to + // specify the thread-safety attribute properly in current code structure. + // Besides, it's the only place we may want to check thread safety. Therefore, + // it's fine to bypass the thread-safety analysis now. + void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) { + DCHECK_GT(NumberOfRegion, 0); + DCHECK_GT(CountersPerRegion, 0); DCHECK_GT(MaxValue, 0); + + Regions = NumberOfRegion; + NumCounters = CountersPerRegion; + constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = - roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); + roundUpPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); DCHECK_LE(CounterSizeBits, MaxCounterBits); CounterSizeBitsLog = getLog2(CounterSizeBits); CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits); @@ -72,27 +231,10 @@ public: BitOffsetMask = PackingRatio - 1; SizePerRegion = - roundUpTo(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >> + roundUp(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >> PackingRatioLog; BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; - if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && - Mutex.tryLock()) { - Buffer = &StaticBuffer[0]; - memset(Buffer, 0, BufferSize); - } else { - Buffer = reinterpret_cast<uptr *>( - map(nullptr, roundUpTo(BufferSize, getPageSizeCached()), - "scudo:counters", MAP_ALLOWNOMEM)); - } - } - ~PackedCounterArray() { - if (!isAllocated()) - return; - if (Buffer == &StaticBuffer[0]) - Mutex.unlock(); - else - unmap(reinterpret_cast<void *>(Buffer), - roundUpTo(BufferSize, getPageSizeCached())); + Buffer = Buffers.getBuffer(BufferSize); } bool isAllocated() const { return !!Buffer; } @@ -112,10 +254,22 @@ public: const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); + DCHECK_EQ(isAllCounted(Region, I), false); Buffer[Region * SizePerRegion + Index] += static_cast<uptr>(1U) << BitOffset; } + void incN(uptr Region, uptr I, uptr N) const { + DCHECK_GT(N, 0U); + DCHECK_LE(N, CounterMask); + DCHECK_LE(get(Region, I), CounterMask - N); + const uptr Index = I >> PackingRatioLog; + const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; + DCHECK_LT(BitOffset, SCUDO_WORDSIZE); + DCHECK_EQ(isAllCounted(Region, I), false); + Buffer[Region * SizePerRegion + Index] += N << BitOffset; + } + void incRange(uptr Region, uptr From, uptr To) const { DCHECK_LE(From, To); const uptr Top = Min(To + 1, NumCounters); @@ -123,13 +277,43 @@ public: inc(Region, I); } - uptr getBufferSize() const { return BufferSize; } + // Set the counter to the max value. Note that the max number of blocks in a + // page may vary. To provide an easier way to tell if all the blocks are + // counted for different pages, set to the same max value to denote the + // all-counted status. + void setAsAllCounted(uptr Region, uptr I) const { + DCHECK_LE(get(Region, I), CounterMask); + const uptr Index = I >> PackingRatioLog; + const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; + DCHECK_LT(BitOffset, SCUDO_WORDSIZE); + Buffer[Region * SizePerRegion + Index] |= CounterMask << BitOffset; + } + void setAsAllCountedRange(uptr Region, uptr From, uptr To) const { + DCHECK_LE(From, To); + const uptr Top = Min(To + 1, NumCounters); + for (uptr I = From; I < Top; I++) + setAsAllCounted(Region, I); + } - static const uptr StaticBufferCount = 2048U; + bool updateAsAllCountedIf(uptr Region, uptr I, uptr MaxCount) { + const uptr Count = get(Region, I); + if (Count == CounterMask) + return true; + if (Count == MaxCount) { + setAsAllCounted(Region, I); + return true; + } + return false; + } + bool isAllCounted(uptr Region, uptr I) const { + return get(Region, I) == CounterMask; + } + + uptr getBufferSize() const { return BufferSize; } private: - const uptr Regions; - const uptr NumCounters; + uptr Regions; + uptr NumCounters; uptr CounterSizeBitsLog; uptr CounterMask; uptr PackingRatioLog; @@ -139,17 +323,20 @@ private: uptr BufferSize; uptr *Buffer; - static HybridMutex Mutex; - static uptr StaticBuffer[StaticBufferCount]; + // We may consider making this configurable if there are cases which may + // benefit from this. + static const uptr StaticBufferCount = 2U; + static const uptr StaticBufferSize = 512U; + static BufferPool<StaticBufferCount, StaticBufferSize> Buffers; }; template <class ReleaseRecorderT> class FreePagesRangeTracker { public: - explicit FreePagesRangeTracker(ReleaseRecorderT *Recorder) + explicit FreePagesRangeTracker(ReleaseRecorderT &Recorder) : Recorder(Recorder), PageSizeLog(getLog2(getPageSizeCached())) {} - void processNextPage(bool Freed) { - if (Freed) { + void processNextPage(bool Released) { + if (Released) { if (!InRange) { CurrentRangeStatePage = CurrentPage; InRange = true; @@ -170,113 +357,270 @@ public: private: void closeOpenedRange() { if (InRange) { - Recorder->releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog), - (CurrentPage << PageSizeLog)); + Recorder.releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog), + (CurrentPage << PageSizeLog)); InRange = false; } } - ReleaseRecorderT *const Recorder; + ReleaseRecorderT &Recorder; const uptr PageSizeLog; bool InRange = false; uptr CurrentPage = 0; uptr CurrentRangeStatePage = 0; }; -template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT, - typename SkipRegionT> -NOINLINE void -releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, - uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, - ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr, - SkipRegionT SkipRegion) { - const uptr PageSize = getPageSizeCached(); - - // Figure out the number of chunks per page and whether we can take a fast - // path (the number of chunks per page is the same for all pages). - uptr FullPagesBlockCountMax; - bool SameBlockCountPerPage; - if (BlockSize <= PageSize) { - if (PageSize % BlockSize == 0) { - // Same number of chunks per page, no cross overs. - FullPagesBlockCountMax = PageSize / BlockSize; - SameBlockCountPerPage = true; - } else if (BlockSize % (PageSize % BlockSize) == 0) { - // Some chunks are crossing page boundaries, which means that the page - // contains one or two partial chunks, but all pages contain the same - // number of chunks. - FullPagesBlockCountMax = PageSize / BlockSize + 1; - SameBlockCountPerPage = true; +struct PageReleaseContext { + PageReleaseContext(uptr BlockSize, uptr NumberOfRegions, uptr ReleaseSize, + uptr ReleaseOffset = 0) + : BlockSize(BlockSize), NumberOfRegions(NumberOfRegions) { + PageSize = getPageSizeCached(); + if (BlockSize <= PageSize) { + if (PageSize % BlockSize == 0) { + // Same number of chunks per page, no cross overs. + FullPagesBlockCountMax = PageSize / BlockSize; + SameBlockCountPerPage = true; + } else if (BlockSize % (PageSize % BlockSize) == 0) { + // Some chunks are crossing page boundaries, which means that the page + // contains one or two partial chunks, but all pages contain the same + // number of chunks. + FullPagesBlockCountMax = PageSize / BlockSize + 1; + SameBlockCountPerPage = true; + } else { + // Some chunks are crossing page boundaries, which means that the page + // contains one or two partial chunks. + FullPagesBlockCountMax = PageSize / BlockSize + 2; + SameBlockCountPerPage = false; + } } else { - // Some chunks are crossing page boundaries, which means that the page - // contains one or two partial chunks. - FullPagesBlockCountMax = PageSize / BlockSize + 2; - SameBlockCountPerPage = false; + if (BlockSize % PageSize == 0) { + // One chunk covers multiple pages, no cross overs. + FullPagesBlockCountMax = 1; + SameBlockCountPerPage = true; + } else { + // One chunk covers multiple pages, Some chunks are crossing page + // boundaries. Some pages contain one chunk, some contain two. + FullPagesBlockCountMax = 2; + SameBlockCountPerPage = false; + } } - } else { - if (BlockSize % PageSize == 0) { - // One chunk covers multiple pages, no cross overs. - FullPagesBlockCountMax = 1; - SameBlockCountPerPage = true; + + // TODO: For multiple regions, it's more complicated to support partial + // region marking (which includes the complexity of how to handle the last + // block in a region). We may consider this after markFreeBlocks() accepts + // only free blocks from the same region. + if (NumberOfRegions != 1) + DCHECK_EQ(ReleaseOffset, 0U); + + PagesCount = roundUp(ReleaseSize, PageSize) / PageSize; + PageSizeLog = getLog2(PageSize); + ReleasePageOffset = ReleaseOffset >> PageSizeLog; + } + + // PageMap is lazily allocated when markFreeBlocks() is invoked. + bool hasBlockMarked() const { + return PageMap.isAllocated(); + } + + bool ensurePageMapAllocated() { + if (PageMap.isAllocated()) + return true; + PageMap.reset(NumberOfRegions, PagesCount, FullPagesBlockCountMax); + // TODO: Log some message when we fail on PageMap allocation. + return PageMap.isAllocated(); + } + + // Mark all the blocks in the given range [From, to). Instead of visiting all + // the blocks, we will just mark the page as all counted. Note the `From` and + // `To` has to be page aligned but with one exception, if `To` is equal to the + // RegionSize, it's not necessary to be aligned with page size. + bool markRangeAsAllCounted(uptr From, uptr To, uptr Base, + const uptr RegionIndex, const uptr RegionSize) { + DCHECK_LT(From, To); + DCHECK_LE(To, Base + RegionSize); + DCHECK_EQ(From % PageSize, 0U); + DCHECK_LE(To - From, RegionSize); + + if (!ensurePageMapAllocated()) + return false; + + uptr FromInRegion = From - Base; + uptr ToInRegion = To - Base; + uptr FirstBlockInRange = roundUpSlow(FromInRegion, BlockSize); + + // The straddling block sits across entire range. + if (FirstBlockInRange >= ToInRegion) + return true; + + // First block may not sit at the first pape in the range, move + // `FromInRegion` to the first block page. + FromInRegion = roundDown(FirstBlockInRange, PageSize); + + // When The first block is not aligned to the range boundary, which means + // there is a block sitting acorss `From`, that looks like, + // + // From To + // V V + // +-----------------------------------------------+ + // +-----+-----+-----+-----+ + // | | | | | ... + // +-----+-----+-----+-----+ + // |- first page -||- second page -||- ... + // + // Therefore, we can't just mark the first page as all counted. Instead, we + // increment the number of blocks in the first page in the page map and + // then round up the `From` to the next page. + if (FirstBlockInRange != FromInRegion) { + DCHECK_GT(FromInRegion + PageSize, FirstBlockInRange); + uptr NumBlocksInFirstPage = + (FromInRegion + PageSize - FirstBlockInRange + BlockSize - 1) / + BlockSize; + PageMap.incN(RegionIndex, getPageIndex(FromInRegion), + NumBlocksInFirstPage); + FromInRegion = roundUp(FromInRegion + 1, PageSize); + } + + uptr LastBlockInRange = roundDownSlow(ToInRegion - 1, BlockSize); + + // Note that LastBlockInRange may be smaller than `FromInRegion` at this + // point because it may contain only one block in the range. + + // When the last block sits across `To`, we can't just mark the pages + // occupied by the last block as all counted. Instead, we increment the + // counters of those pages by 1. The exception is that if it's the last + // block in the region, it's fine to mark those pages as all counted. + if (LastBlockInRange + BlockSize != RegionSize) { + DCHECK_EQ(ToInRegion % PageSize, 0U); + // The case below is like, + // + // From To + // V V + // +----------------------------------------+ + // +-----+-----+-----+-----+ + // | | | | | ... + // +-----+-----+-----+-----+ + // ... -||- last page -||- next page -| + // + // The last block is not aligned to `To`, we need to increment the + // counter of `next page` by 1. + if (LastBlockInRange + BlockSize != ToInRegion) { + PageMap.incRange(RegionIndex, getPageIndex(ToInRegion), + getPageIndex(LastBlockInRange + BlockSize - 1)); + } } else { - // One chunk covers multiple pages, Some chunks are crossing page - // boundaries. Some pages contain one chunk, some contain two. - FullPagesBlockCountMax = 2; - SameBlockCountPerPage = false; + ToInRegion = RegionSize; + } + + // After handling the first page and the last block, it's safe to mark any + // page in between the range [From, To). + if (FromInRegion < ToInRegion) { + PageMap.setAsAllCountedRange(RegionIndex, getPageIndex(FromInRegion), + getPageIndex(ToInRegion - 1)); } + + return true; } - const uptr PagesCount = roundUpTo(RegionSize, PageSize) / PageSize; - PackedCounterArray Counters(NumberOfRegions, PagesCount, - FullPagesBlockCountMax); - if (!Counters.isAllocated()) - return; - - const uptr PageSizeLog = getLog2(PageSize); - const uptr RoundedRegionSize = PagesCount << PageSizeLog; - const uptr RoundedSize = NumberOfRegions * RoundedRegionSize; - - // Iterate over free chunks and count how many free chunks affect each - // allocated page. - if (BlockSize <= PageSize && PageSize % BlockSize == 0) { - // Each chunk affects one page only. - for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.inc(RegionIndex, PInRegion >> PageSizeLog); + template <class TransferBatchT, typename DecompactPtrT> + bool markFreeBlocksInRegion(const IntrusiveList<TransferBatchT> &FreeList, + DecompactPtrT DecompactPtr, const uptr Base, + const uptr RegionIndex, const uptr RegionSize, + bool MayContainLastBlockInRegion) { + if (!ensurePageMapAllocated()) + return false; + + if (MayContainLastBlockInRegion) { + const uptr LastBlockInRegion = + ((RegionSize / BlockSize) - 1U) * BlockSize; + // The last block in a region may not use the entire page, we mark the + // following "pretend" memory block(s) as free in advance. + // + // Region Boundary + // v + // -----+-----------------------+ + // | Last Page | <- Rounded Region Boundary + // -----+-----------------------+ + // |-----||- trailing blocks -| + // ^ + // last block + const uptr RoundedRegionSize = roundUp(RegionSize, PageSize); + const uptr TrailingBlockBase = LastBlockInRegion + BlockSize; + // If the difference between `RoundedRegionSize` and + // `TrailingBlockBase` is larger than a page, that implies the reported + // `RegionSize` may not be accurate. + DCHECK_LT(RoundedRegionSize - TrailingBlockBase, PageSize); + + // Only the last page touched by the last block needs to mark the trailing + // blocks. Note that if the last "pretend" block straddles the boundary, + // we still have to count it in so that the logic of counting the number + // of blocks on a page is consistent. + uptr NumTrailingBlocks = + (roundUpSlow(RoundedRegionSize - TrailingBlockBase, BlockSize) + + BlockSize - 1) / + BlockSize; + if (NumTrailingBlocks > 0) { + PageMap.incN(RegionIndex, getPageIndex(TrailingBlockBase), + NumTrailingBlocks); } } - } else { - // In all other cases chunks might affect more than one page. - DCHECK_GE(RegionSize, BlockSize); - const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; - for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - uptr PInRegion = P - RegionIndex * RegionSize; - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - // The last block in a region might straddle a page, so if it's - // free, we mark the following "pretend" memory block(s) as free. - if (PInRegion == LastBlockInRegion) { - PInRegion += BlockSize; - while (PInRegion < RoundedRegionSize) { - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - PInRegion += BlockSize; - } + + // Iterate over free chunks and count how many free chunks affect each + // allocated page. + if (BlockSize <= PageSize && PageSize % BlockSize == 0) { + // Each chunk affects one page only. + for (const auto &It : FreeList) { + for (u16 I = 0; I < It.getCount(); I++) { + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; + DCHECK_LT(PInRegion, RegionSize); + PageMap.inc(RegionIndex, getPageIndex(PInRegion)); + } + } + } else { + // In all other cases chunks might affect more than one page. + DCHECK_GE(RegionSize, BlockSize); + for (const auto &It : FreeList) { + for (u16 I = 0; I < It.getCount(); I++) { + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; + PageMap.incRange(RegionIndex, getPageIndex(PInRegion), + getPageIndex(PInRegion + BlockSize - 1)); } } } + + return true; } + uptr getPageIndex(uptr P) { return (P >> PageSizeLog) - ReleasePageOffset; } + + uptr BlockSize; + uptr NumberOfRegions; + // For partial region marking, some pages in front are not needed to be + // counted. + uptr ReleasePageOffset; + uptr PageSize; + uptr PagesCount; + uptr PageSizeLog; + uptr FullPagesBlockCountMax; + bool SameBlockCountPerPage; + RegionPageMap PageMap; +}; + +// Try to release the page which doesn't have any in-used block, i.e., they are +// all free blocks. The `PageMap` will record the number of free blocks in each +// page. +template <class ReleaseRecorderT, typename SkipRegionT> +NOINLINE void +releaseFreeMemoryToOS(PageReleaseContext &Context, + ReleaseRecorderT &Recorder, SkipRegionT SkipRegion) { + const uptr PageSize = Context.PageSize; + const uptr BlockSize = Context.BlockSize; + const uptr PagesCount = Context.PagesCount; + const uptr NumberOfRegions = Context.NumberOfRegions; + const uptr ReleasePageOffset = Context.ReleasePageOffset; + const uptr FullPagesBlockCountMax = Context.FullPagesBlockCountMax; + const bool SameBlockCountPerPage = Context.SameBlockCountPerPage; + RegionPageMap &PageMap = Context.PageMap; + // Iterate over pages detecting ranges of pages with chunk Counters equal // to the expected number of chunks for the particular page. FreePagesRangeTracker<ReleaseRecorderT> RangeTracker(Recorder); @@ -287,9 +631,11 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, RangeTracker.skipPages(PagesCount); continue; } - for (uptr J = 0; J < PagesCount; J++) - RangeTracker.processNextPage(Counters.get(I, J) == - FullPagesBlockCountMax); + for (uptr J = 0; J < PagesCount; J++) { + const bool CanRelease = + PageMap.updateAsAllCountedIf(I, J, FullPagesBlockCountMax); + RangeTracker.processNextPage(CanRelease); + } } } else { // Slow path, go through the pages keeping count how many chunks affect @@ -308,6 +654,10 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, } uptr PrevPageBoundary = 0; uptr CurrentBoundary = 0; + if (ReleasePageOffset > 0) { + PrevPageBoundary = ReleasePageOffset * PageSize; + CurrentBoundary = roundUpSlow(PrevPageBoundary, BlockSize); + } for (uptr J = 0; J < PagesCount; J++) { const uptr PageBoundary = PrevPageBoundary + PageSize; uptr BlocksPerPage = Pn; @@ -321,7 +671,9 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, } } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); + const bool CanRelease = + PageMap.updateAsAllCountedIf(I, J, BlocksPerPage); + RangeTracker.processNextPage(CanRelease); } } } diff --git a/standalone/report.cpp b/standalone/report.cpp index 561c7c51f4e..16eae8c3136 100644 --- a/standalone/report.cpp +++ b/standalone/report.cpp @@ -36,6 +36,18 @@ private: inline void NORETURN trap() { __builtin_trap(); } +void NORETURN reportSoftRSSLimit(uptr RssLimitMb) { + ScopedErrorReport Report; + Report.append("Soft RSS limit of %zu MB exhausted, current RSS is %zu MB\n", + RssLimitMb, GetRSS() >> 20); +} + +void NORETURN reportHardRSSLimit(uptr RssLimitMb) { + ScopedErrorReport Report; + Report.append("Hard RSS limit of %zu MB exhausted, current RSS is %zu MB\n", + RssLimitMb, GetRSS() >> 20); +} + // This could potentially be called recursively if a CHECK fails in the reports. void NORETURN reportCheckFailed(const char *File, int Line, const char *Condition, u64 Value1, u64 Value2) { @@ -100,6 +112,11 @@ void NORETURN reportAllocationSizeTooBig(uptr UserSize, uptr TotalSize, UserSize, TotalSize, MaxSize); } +void NORETURN reportOutOfBatchClass() { + ScopedErrorReport Report; + Report.append("BatchClass region is used up, can't hold any free block\n"); +} + void NORETURN reportOutOfMemory(uptr RequestedSize) { ScopedErrorReport Report; Report.append("out of memory trying to allocate %zu bytes\n", RequestedSize); diff --git a/standalone/report.h b/standalone/report.h index 14e4e799b73..3a78ab64b13 100644 --- a/standalone/report.h +++ b/standalone/report.h @@ -32,7 +32,10 @@ void NORETURN reportSanityCheckError(const char *Field); void NORETURN reportAlignmentTooBig(uptr Alignment, uptr MaxAlignment); void NORETURN reportAllocationSizeTooBig(uptr UserSize, uptr TotalSize, uptr MaxSize); +void NORETURN reportOutOfBatchClass(); void NORETURN reportOutOfMemory(uptr RequestedSize); +void NORETURN reportSoftRSSLimit(uptr RssLimitMb); +void NORETURN reportHardRSSLimit(uptr RssLimitMb); enum class AllocatorAction : u8 { Recycling, Deallocating, diff --git a/standalone/rss_limit_checker.cpp b/standalone/rss_limit_checker.cpp new file mode 100644 index 00000000000..f428386b755 --- /dev/null +++ b/standalone/rss_limit_checker.cpp @@ -0,0 +1,37 @@ +//===-- common.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "rss_limit_checker.h" +#include "atomic_helpers.h" +#include "string_utils.h" + +namespace scudo { + +void RssLimitChecker::check(u64 NextCheck) { + // The interval for the checks is 250ms. + static constexpr u64 CheckInterval = 250 * 1000000; + + // Early return in case another thread already did the calculation. + if (!atomic_compare_exchange_strong(&RssNextCheckAtNS, &NextCheck, + getMonotonicTime() + CheckInterval, + memory_order_relaxed)) { + return; + } + + const uptr CurrentRssMb = GetRSS() >> 20; + + RssLimitExceeded Result = RssLimitExceeded::Neither; + if (UNLIKELY(HardRssLimitMb && HardRssLimitMb < CurrentRssMb)) + Result = RssLimitExceeded::Hard; + else if (UNLIKELY(SoftRssLimitMb && SoftRssLimitMb < CurrentRssMb)) + Result = RssLimitExceeded::Soft; + + atomic_store_relaxed(&RssLimitStatus, static_cast<u8>(Result)); +} + +} // namespace scudo diff --git a/standalone/rss_limit_checker.h b/standalone/rss_limit_checker.h new file mode 100644 index 00000000000..29dc063f3fc --- /dev/null +++ b/standalone/rss_limit_checker.h @@ -0,0 +1,63 @@ +//===-- common.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_RSS_LIMIT_CHECKER_H_ +#define SCUDO_RSS_LIMIT_CHECKER_H_ + +#include "atomic_helpers.h" +#include "common.h" +#include "internal_defs.h" + +namespace scudo { + +class RssLimitChecker { +public: + enum RssLimitExceeded { + Neither, + Soft, + Hard, + }; + + void init(int SoftRssLimitMb, int HardRssLimitMb) { + CHECK_GE(SoftRssLimitMb, 0); + CHECK_GE(HardRssLimitMb, 0); + this->SoftRssLimitMb = static_cast<uptr>(SoftRssLimitMb); + this->HardRssLimitMb = static_cast<uptr>(HardRssLimitMb); + } + + // Opportunistic RSS limit check. This will update the RSS limit status, if + // it can, every 250ms, otherwise it will just return the current one. + RssLimitExceeded getRssLimitExceeded() { + if (!HardRssLimitMb && !SoftRssLimitMb) + return RssLimitExceeded::Neither; + + u64 NextCheck = atomic_load_relaxed(&RssNextCheckAtNS); + u64 Now = getMonotonicTime(); + + if (UNLIKELY(Now >= NextCheck)) + check(NextCheck); + + return static_cast<RssLimitExceeded>(atomic_load_relaxed(&RssLimitStatus)); + } + + uptr getSoftRssLimit() const { return SoftRssLimitMb; } + uptr getHardRssLimit() const { return HardRssLimitMb; } + +private: + void check(u64 NextCheck); + + uptr SoftRssLimitMb = 0; + uptr HardRssLimitMb = 0; + + atomic_u64 RssNextCheckAtNS = {}; + atomic_u8 RssLimitStatus = {}; +}; + +} // namespace scudo + +#endif // SCUDO_RSS_LIMIT_CHECKER_H_ diff --git a/standalone/secondary.h b/standalone/secondary.h index 2d177576258..94009f5fa9c 100644 --- a/standalone/secondary.h +++ b/standalone/secondary.h @@ -12,11 +12,13 @@ #include "chunk.h" #include "common.h" #include "list.h" +#include "mem_map.h" #include "memtag.h" #include "mutex.h" #include "options.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -36,9 +38,7 @@ struct alignas(Max<uptr>(archSupportsMemoryTagging() LargeBlock::Header *Next; uptr CommitBase; uptr CommitSize; - uptr MapBase; - uptr MapSize; - [[no_unique_address]] MapPlatformData Data; + MemMapT MemMap; }; static_assert(sizeof(Header) % (1U << SCUDO_MIN_ALIGNMENT_LOG) == 0, ""); @@ -65,8 +65,11 @@ template <typename Config> static Header *getHeader(const void *Ptr) { } // namespace LargeBlock static void unmap(LargeBlock::Header *H) { - MapPlatformData Data = H->Data; - unmap(reinterpret_cast<void *>(H->MapBase), H->MapSize, UNMAP_ALL, &Data); + // Note that the `H->MapMap` is stored on the pages managed by itself. Take + // over the ownership before unmap() so that any operation along with unmap() + // won't touch inaccessible pages. + MemMapT MemMap = H->MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } class MapAllocatorNoCache { @@ -96,20 +99,19 @@ static const uptr MaxUnusedCachePages = 4U; template <typename Config> void mapSecondary(Options Options, uptr CommitBase, uptr CommitSize, - uptr AllocPos, uptr Flags, MapPlatformData *Data) { + uptr AllocPos, uptr Flags, MemMapT &MemMap) { const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached(); if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) { const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); - map(reinterpret_cast<void *>(CommitBase), UntaggedPos - CommitBase, - "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG | Flags, Data); - map(reinterpret_cast<void *>(UntaggedPos), - CommitBase + CommitSize - UntaggedPos, "scudo:secondary", - MAP_RESIZABLE | Flags, Data); + MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary", + MAP_RESIZABLE | MAP_MEMTAG | Flags); + MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos, + "scudo:secondary", MAP_RESIZABLE | Flags); } else { - map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary", + const uptr RemapFlags = MAP_RESIZABLE | (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | - Flags, - Data); + Flags; + MemMap.remap(CommitBase, CommitSize, "scudo:secondary", RemapFlags); } } @@ -133,7 +135,7 @@ public: Config::SecondaryCacheEntriesArraySize, ""); - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(EntriesCount, 0U); setOption(Option::MaxCacheEntriesCount, static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntriesCount)); @@ -142,7 +144,7 @@ public: setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void store(Options Options, LargeBlock::Header *H) { + void store(Options Options, LargeBlock::Header *H) EXCLUDES(Mutex) { if (!canCache(H->CommitSize)) return unmap(H); @@ -154,10 +156,8 @@ public: CachedBlock Entry; Entry.CommitBase = H->CommitBase; Entry.CommitSize = H->CommitSize; - Entry.MapBase = H->MapBase; - Entry.MapSize = H->MapSize; Entry.BlockBegin = reinterpret_cast<uptr>(H + 1); - Entry.Data = H->Data; + Entry.MemMap = H->MemMap; Entry.Time = Time; if (useMemoryTagging<Config>(Options)) { if (Interval == 0 && !SCUDO_FUCHSIA) { @@ -167,13 +167,13 @@ public: // on top so we just do the two syscalls there. Entry.Time = 0; mapSecondary<Config>(Options, Entry.CommitBase, Entry.CommitSize, - Entry.CommitBase, MAP_NOACCESS, &Entry.Data); + Entry.CommitBase, MAP_NOACCESS, Entry.MemMap); } else { - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS, - &Entry.Data); + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, + MAP_NOACCESS); } } else if (Interval == 0) { - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releasePagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } do { @@ -222,12 +222,11 @@ public: else if (Interval >= 0) releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); if (!EntryCached) - unmap(reinterpret_cast<void *>(Entry.MapBase), Entry.MapSize, UNMAP_ALL, - &Entry.Data); + Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity()); } bool retrieve(Options Options, uptr Size, uptr Alignment, - LargeBlock::Header **H, bool *Zeroed) { + LargeBlock::Header **H, bool *Zeroed) EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); bool Found = false; @@ -243,45 +242,46 @@ public: continue; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = - roundDownTo(CommitBase + CommitSize - Size, Alignment); + roundDown(CommitBase + CommitSize - Size, Alignment); HeaderPos = AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); if (HeaderPos > CommitBase + CommitSize) continue; if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { continue; + } Found = true; Entry = Entries[I]; Entries[I].CommitBase = 0; + EntriesCount--; break; } } - if (Found) { - *H = reinterpret_cast<LargeBlock::Header *>( - LargeBlock::addHeaderTag<Config>(HeaderPos)); - *Zeroed = Entry.Time == 0; - if (useMemoryTagging<Config>(Options)) - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data); - uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); - if (useMemoryTagging<Config>(Options)) { - if (*Zeroed) - storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), - NewBlockBegin); - else if (Entry.BlockBegin < NewBlockBegin) - storeTags(Entry.BlockBegin, NewBlockBegin); - else - storeTags(untagPointer(NewBlockBegin), - untagPointer(Entry.BlockBegin)); + if (!Found) + return false; + + *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(HeaderPos)); + *Zeroed = Entry.Time == 0; + if (useMemoryTagging<Config>(Options)) + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0); + uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); + if (useMemoryTagging<Config>(Options)) { + if (*Zeroed) { + storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), + NewBlockBegin); + } else if (Entry.BlockBegin < NewBlockBegin) { + storeTags(Entry.BlockBegin, NewBlockBegin); + } else { + storeTags(untagPointer(NewBlockBegin), + untagPointer(Entry.BlockBegin)); } - (*H)->CommitBase = Entry.CommitBase; - (*H)->CommitSize = Entry.CommitSize; - (*H)->MapBase = Entry.MapBase; - (*H)->MapSize = Entry.MapSize; - (*H)->Data = Entry.Data; - EntriesCount--; } - return Found; + (*H)->CommitBase = Entry.CommitBase; + (*H)->CommitSize = Entry.CommitSize; + (*H)->MemMap = Entry.MemMap; + return true; } bool canCache(uptr Size) { @@ -315,67 +315,62 @@ public: void releaseToOS() { releaseOlderThan(UINT64_MAX); } - void disableMemoryTagging() { + void disableMemoryTagging() EXCLUDES(Mutex) { ScopedLock L(Mutex); for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { if (Quarantine[I].CommitBase) { - unmap(reinterpret_cast<void *>(Quarantine[I].MapBase), - Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data); + MemMapT &MemMap = Quarantine[I].MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); Quarantine[I].CommitBase = 0; } } const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - for (u32 I = 0; I < MaxCount; I++) - if (Entries[I].CommitBase) - setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0, - &Entries[I].Data); + for (u32 I = 0; I < MaxCount; I++) { + if (Entries[I].CommitBase) { + Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, + Entries[I].CommitSize, 0); + } + } QuarantinePos = -1U; } - void disable() { Mutex.lock(); } + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); } - void enable() { Mutex.unlock(); } + void enable() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } void unmapTestOnly() { empty(); } private: void empty() { - struct { - void *MapBase; - uptr MapSize; - MapPlatformData Data; - } MapInfo[Config::SecondaryCacheEntriesArraySize]; + MemMapT MapInfo[Config::SecondaryCacheEntriesArraySize]; uptr N = 0; { ScopedLock L(Mutex); for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { if (!Entries[I].CommitBase) continue; - MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase); - MapInfo[N].MapSize = Entries[I].MapSize; - MapInfo[N].Data = Entries[I].Data; + MapInfo[N] = Entries[I].MemMap; Entries[I].CommitBase = 0; N++; } EntriesCount = 0; IsFullEvents = 0; } - for (uptr I = 0; I < N; I++) - unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL, - &MapInfo[I].Data); + for (uptr I = 0; I < N; I++) { + MemMapT &MemMap = MapInfo[I]; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } struct CachedBlock { - uptr CommitBase; - uptr CommitSize; - uptr MapBase; - uptr MapSize; - uptr BlockBegin; - [[no_unique_address]] MapPlatformData Data; - u64 Time; + uptr CommitBase = 0; + uptr CommitSize = 0; + uptr BlockBegin = 0; + MemMapT MemMap = {}; + u64 Time = 0; }; - void releaseIfOlderThan(CachedBlock &Entry, u64 Time) { + void releaseIfOlderThan(CachedBlock &Entry, u64 Time) REQUIRES(Mutex) { if (!Entry.CommitBase || !Entry.Time) return; if (Entry.Time > Time) { @@ -383,11 +378,11 @@ private: OldestTime = Entry.Time; return; } - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releasePagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } - void releaseOlderThan(u64 Time) { + void releaseOlderThan(u64 Time) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (!EntriesCount || OldestTime == 0 || OldestTime > Time) return; @@ -399,22 +394,24 @@ private: } HybridMutex Mutex; - u32 EntriesCount = 0; - u32 QuarantinePos = 0; + u32 EntriesCount GUARDED_BY(Mutex) = 0; + u32 QuarantinePos GUARDED_BY(Mutex) = 0; atomic_u32 MaxEntriesCount = {}; atomic_uptr MaxEntrySize = {}; - u64 OldestTime = 0; - u32 IsFullEvents = 0; + u64 OldestTime GUARDED_BY(Mutex) = 0; + u32 IsFullEvents GUARDED_BY(Mutex) = 0; atomic_s32 ReleaseToOsIntervalMs = {}; - CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; + CachedBlock + Entries[Config::SecondaryCacheEntriesArraySize] GUARDED_BY(Mutex) = {}; NonZeroLengthArray<CachedBlock, Config::SecondaryCacheQuarantineSize> - Quarantine = {}; + Quarantine GUARDED_BY(Mutex) = {}; }; template <typename Config> class MapAllocator { public: - void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + void init(GlobalStats *S, + s32 ReleaseToOsInterval = -1) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(AllocatedBytes, 0U); DCHECK_EQ(FreedBytes, 0U); Cache.init(ReleaseToOsInterval); @@ -438,19 +435,21 @@ public: return getBlockEnd(Ptr) - reinterpret_cast<uptr>(Ptr); } - void getStats(ScopedString *Str) const; + void getStats(ScopedString *Str); - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); Cache.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { Cache.enable(); Mutex.unlock(); } template <typename F> void iterateOverBlocks(F Callback) const { + Mutex.assertHeld(); + for (const auto &H : InUseBlocks) { uptr Ptr = reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize(); if (allocatorSupportsMemoryTagging<Config>()) @@ -472,14 +471,14 @@ public: private: typename Config::SecondaryCache Cache; - HybridMutex Mutex; - DoublyLinkedList<LargeBlock::Header> InUseBlocks; - uptr AllocatedBytes = 0; - uptr FreedBytes = 0; - uptr LargestSize = 0; - u32 NumberOfAllocs = 0; - u32 NumberOfFrees = 0; - LocalStats Stats; + mutable HybridMutex Mutex; + DoublyLinkedList<LargeBlock::Header> InUseBlocks GUARDED_BY(Mutex); + uptr AllocatedBytes GUARDED_BY(Mutex) = 0; + uptr FreedBytes GUARDED_BY(Mutex) = 0; + uptr LargestSize GUARDED_BY(Mutex) = 0; + u32 NumberOfAllocs GUARDED_BY(Mutex) = 0; + u32 NumberOfFrees GUARDED_BY(Mutex) = 0; + LocalStats Stats GUARDED_BY(Mutex); }; // As with the Primary, the size passed to this function includes any desired @@ -502,9 +501,9 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, Alignment = Max(Alignment, uptr(1U) << SCUDO_MIN_ALIGNMENT_LOG); const uptr PageSize = getPageSizeCached(); uptr RoundedSize = - roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() + - Chunk::getHeaderSize(), - PageSize); + roundUp(roundUp(Size, Alignment) + LargeBlock::getHeaderSize() + + Chunk::getHeaderSize(), + PageSize); if (Alignment > PageSize) RoundedSize += Alignment - PageSize; @@ -523,23 +522,26 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, if (FillContents && !Zeroed) memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, BlockEnd - PtrInt); - const uptr BlockSize = BlockEnd - HInt; { ScopedLock L(Mutex); InUseBlocks.push_back(H); - AllocatedBytes += BlockSize; + AllocatedBytes += H->CommitSize; NumberOfAllocs++; - Stats.add(StatAllocated, BlockSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatAllocated, H->CommitSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return Ptr; } } - MapPlatformData Data = {}; + ReservedMemoryT ReservedMemory; const uptr MapSize = RoundedSize + 2 * PageSize; - uptr MapBase = reinterpret_cast<uptr>( - map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); + ReservedMemory.create(/*Addr=*/0U, MapSize, nullptr, MAP_ALLOWNOMEM); + + // Take the entire ownership of reserved region. + MemMapT MemMap = ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()); + uptr MapBase = MemMap.getBase(); if (UNLIKELY(!MapBase)) return nullptr; uptr CommitBase = MapBase + PageSize; @@ -551,27 +553,27 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, // For alignments greater than or equal to a page, the user pointer (eg: the // pointer that is returned by the C or C++ allocation APIs) ends up on a // page boundary , and our headers will live in the preceding page. - CommitBase = roundUpTo(MapBase + PageSize + 1, Alignment) - PageSize; + CommitBase = roundUp(MapBase + PageSize + 1, Alignment) - PageSize; const uptr NewMapBase = CommitBase - PageSize; DCHECK_GE(NewMapBase, MapBase); // We only trim the extra memory on 32-bit platforms: 64-bit platforms // are less constrained memory wise, and that saves us two syscalls. if (SCUDO_WORDSIZE == 32U && NewMapBase != MapBase) { - unmap(reinterpret_cast<void *>(MapBase), NewMapBase - MapBase, 0, &Data); + MemMap.unmap(MapBase, NewMapBase - MapBase); MapBase = NewMapBase; } const uptr NewMapEnd = - CommitBase + PageSize + roundUpTo(Size, PageSize) + PageSize; + CommitBase + PageSize + roundUp(Size, PageSize) + PageSize; DCHECK_LE(NewMapEnd, MapEnd); if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) { - unmap(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd, 0, &Data); + MemMap.unmap(NewMapEnd, MapEnd - NewMapEnd); MapEnd = NewMapEnd; } } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr AllocPos = roundDownTo(CommitBase + CommitSize - Size, Alignment); - mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, &Data); + const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); + mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, MemMap); const uptr HeaderPos = AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>( @@ -579,11 +581,9 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, if (useMemoryTagging<Config>(Options)) storeTags(LargeBlock::addHeaderTag<Config>(CommitBase), reinterpret_cast<uptr>(H + 1)); - H->MapBase = MapBase; - H->MapSize = MapEnd - MapBase; H->CommitBase = CommitBase; H->CommitSize = CommitSize; - H->Data = Data; + H->MemMap = MemMap; if (BlockEndPtr) *BlockEndPtr = CommitBase + CommitSize; { @@ -594,13 +594,14 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, LargestSize = CommitSize; NumberOfAllocs++; Stats.add(StatAllocated, CommitSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return reinterpret_cast<void *>(HeaderPos + LargeBlock::getHeaderSize()); } template <typename Config> -void MapAllocator<Config>::deallocate(Options Options, void *Ptr) { +void MapAllocator<Config>::deallocate(Options Options, void *Ptr) + EXCLUDES(Mutex) { LargeBlock::Header *H = LargeBlock::getHeader<Config>(Ptr); const uptr CommitSize = H->CommitSize; { @@ -609,13 +610,14 @@ void MapAllocator<Config>::deallocate(Options Options, void *Ptr) { FreedBytes += CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); - Stats.sub(StatMapped, H->MapSize); + Stats.sub(StatMapped, H->MemMap.getCapacity()); } Cache.store(Options, H); } template <typename Config> -void MapAllocator<Config>::getStats(ScopedString *Str) const { +void MapAllocator<Config>::getStats(ScopedString *Str) EXCLUDES(Mutex) { + ScopedLock L(Mutex); Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times " "(%zuK), remains %u (%zuK) max %zuM\n", NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h index 6b060950abe..766562495ec 100644 --- a/standalone/size_class_map.h +++ b/standalone/size_class_map.h @@ -23,7 +23,7 @@ inline uptr scaledLog2(uptr Size, uptr ZeroLog, uptr LogBits) { } template <typename Config> struct SizeClassMapBase { - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_NE(Size, 0); u32 N; // Force a 32-bit division if the template parameters allow for it. @@ -31,7 +31,10 @@ template <typename Config> struct SizeClassMapBase { N = static_cast<u32>((1UL << Config::MaxBytesCachedLog) / Size); else N = (1U << Config::MaxBytesCachedLog) / static_cast<u32>(Size); - return Max(1U, Min(Config::MaxNumCachedHint, N)); + + // Note that Config::MaxNumCachedHint is u16 so the result is guaranteed to + // fit in u16. + return static_cast<u16>(Max(1U, Min<u32>(Config::MaxNumCachedHint, N))); } }; @@ -65,7 +68,7 @@ class FixedSizeClassMap : public SizeClassMapBase<Config> { static const uptr M = (1UL << S) - 1; public: - static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + static const u16 MaxNumCachedHint = Config::MaxNumCachedHint; static const uptr MaxSize = (1UL << Config::MaxSizeLog) + Config::SizeDelta; static const uptr NumClasses = @@ -99,7 +102,7 @@ public: return MidClass + 1 + scaledLog2(Size - 1, Config::MidSizeLog, S); } - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_LE(Size, MaxSize); return Base::getMaxCachedHint(Size); } @@ -178,7 +181,7 @@ class TableSizeClassMap : public SizeClassMapBase<Config> { static constexpr LSBTable LTable = {}; public: - static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + static const u16 MaxNumCachedHint = Config::MaxNumCachedHint; static const uptr NumClasses = ClassesSize + 1; static_assert(NumClasses < 256, ""); @@ -212,7 +215,7 @@ public: return SzTable.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; } - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_LE(Size, MaxSize); return Base::getMaxCachedHint(Size); } @@ -223,7 +226,7 @@ struct DefaultSizeClassConfig { static const uptr MinSizeLog = 5; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = 0; }; @@ -235,7 +238,7 @@ struct FuchsiaSizeClassConfig { static const uptr MinSizeLog = 5; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 10; + static const u16 MaxNumCachedHint = 12; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = Chunk::getHeaderSize(); }; @@ -248,7 +251,7 @@ struct AndroidSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 6; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 13; + static const u16 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 13; static constexpr u32 Classes[] = { @@ -263,7 +266,7 @@ struct AndroidSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 13; static constexpr u32 Classes[] = { @@ -292,7 +295,7 @@ struct SvelteSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 13; + static const u16 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = Chunk::getHeaderSize(); #else @@ -300,7 +303,7 @@ struct SvelteSizeClassConfig { static const uptr MinSizeLog = 3; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = Chunk::getHeaderSize(); #endif @@ -315,7 +318,7 @@ struct TrustySizeClassConfig { static const uptr MinSizeLog = 7; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 7; - static const u32 MaxNumCachedHint = 8; + static const u16 MaxNumCachedHint = 12; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = 0; }; diff --git a/standalone/stats.h b/standalone/stats.h index be5bf2d3720..658b75863ad 100644 --- a/standalone/stats.h +++ b/standalone/stats.h @@ -12,6 +12,7 @@ #include "atomic_helpers.h" #include "list.h" #include "mutex.h" +#include "thread_annotations.h" #include <string.h> @@ -60,19 +61,19 @@ class GlobalStats : public LocalStats { public: void init() { LocalStats::init(); } - void link(LocalStats *S) { + void link(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.push_back(S); } - void unlink(LocalStats *S) { + void unlink(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.remove(S); for (uptr I = 0; I < StatCount; I++) add(static_cast<StatType>(I), S->get(static_cast<StatType>(I))); } - void get(uptr *S) const { + void get(uptr *S) const EXCLUDES(Mutex) { ScopedLock L(Mutex); for (uptr I = 0; I < StatCount; I++) S[I] = LocalStats::get(static_cast<StatType>(I)); @@ -85,15 +86,15 @@ public: S[I] = static_cast<sptr>(S[I]) >= 0 ? S[I] : 0; } - void lock() { Mutex.lock(); } - void unlock() { Mutex.unlock(); } + void lock() ACQUIRE(Mutex) { Mutex.lock(); } + void unlock() RELEASE(Mutex) { Mutex.unlock(); } - void disable() { lock(); } - void enable() { unlock(); } + void disable() ACQUIRE(Mutex) { lock(); } + void enable() RELEASE(Mutex) { unlock(); } private: mutable HybridMutex Mutex; - DoublyLinkedList<LocalStats> StatsList; + DoublyLinkedList<LocalStats> StatsList GUARDED_BY(Mutex); }; } // namespace scudo diff --git a/standalone/string_utils.cpp b/standalone/string_utils.cpp index 13fdb9c6ca6..7e516f957ab 100644 --- a/standalone/string_utils.cpp +++ b/standalone/string_utils.cpp @@ -195,6 +195,28 @@ static int formatString(char *Buffer, uptr BufferLength, const char *Format, appendChar(&Buffer, BufferEnd, static_cast<char>(va_arg(Args, int))); break; } + // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively. + // However, `-Wformat` doesn't know we have a different parser for those + // placeholders and it keeps complaining the type mismatch on 64-bit + // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to + // silence the warning, we turn to use `PRId64`/`PRIu64` for printing + // `s64`/`u64` and handle the `ld`/`lu` here. + case 'l': { + ++Cur; + RAW_CHECK(*Cur == 'd' || *Cur == 'u'); + + if (*Cur == 'd') { + DVal = va_arg(Args, s64); + Res += + appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); + } else { + UVal = va_arg(Args, u64); + Res += appendUnsigned(&Buffer, BufferEnd, UVal, 10, Width, PadWithZero, + false); + } + + break; + } case '%': { RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); Res += appendChar(&Buffer, BufferEnd, '%'); diff --git a/standalone/string_utils.h b/standalone/string_utils.h index dd6ff7893b8..41901194dfd 100644 --- a/standalone/string_utils.h +++ b/standalone/string_utils.h @@ -28,6 +28,7 @@ public: void append(const char *Format, va_list Args); void append(const char *Format, ...) FORMAT(2, 3); void output() const { outputRaw(String.data()); } + void reserve(size_t Size) { String.reserve(Size + 1); } private: Vector<char> String; diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp index 94d97df8167..44ba639f7aa 100644 --- a/standalone/tests/combined_test.cpp +++ b/standalone/tests/combined_test.cpp @@ -10,7 +10,9 @@ #include "tests/scudo_unit_test.h" #include "allocator_config.h" +#include "chunk.h" #include "combined.h" +#include "mem_map.h" #include <condition_variable> #include <memory> @@ -38,7 +40,7 @@ bool isPrimaryAllocation(scudo::uptr Size, scudo::uptr Alignment) { if (Alignment < MinAlignment) Alignment = MinAlignment; const scudo::uptr NeededSize = - scudo::roundUpTo(Size, MinAlignment) + + scudo::roundUp(Size, MinAlignment) + ((Alignment > MinAlignment) ? Alignment : scudo::Chunk::getHeaderSize()); return AllocatorT::PrimaryT::canAllocate(NeededSize); } @@ -47,7 +49,7 @@ template <class AllocatorT> void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, scudo::uptr Alignment) { const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG; - Size = scudo::roundUpTo(Size, MinAlignment); + Size = scudo::roundUp(Size, MinAlignment); if (Allocator->useMemoryTaggingTestOnly()) EXPECT_DEATH( { @@ -91,7 +93,7 @@ template <class TypeParam> struct ScudoCombinedTest : public Test { Allocator = std::make_unique<AllocatorT>(); } ~ScudoCombinedTest() { - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); UseQuarantine = true; } @@ -152,7 +154,7 @@ void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) { for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) { const scudo::uptr Align = 1U << AlignLog; for (scudo::sptr Delta = -32; Delta <= 32; Delta++) { - if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0) + if (static_cast<scudo::sptr>(1U << SizeLog) + Delta < 0) continue; const scudo::uptr Size = (1U << SizeLog) + Delta; void *P = Allocator->allocate(Size, Origin, Align); @@ -165,6 +167,8 @@ void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) { Allocator->deallocate(P, Origin, Size); } } + + Allocator->printStats(); } #define SCUDO_MAKE_BASIC_TEST(SizeLog) \ @@ -411,7 +415,7 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, DisableMemoryTagging) { reinterpret_cast<char *>(P)[2048] = 0xaa; Allocator->deallocate(P, Origin); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } } @@ -434,7 +438,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) { EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos); } -SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) { +SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS { auto *Allocator = this->Allocator.get(); std::vector<void *> V; @@ -446,9 +450,31 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) { bool UnlockRequired; auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); - EXPECT_TRUE(!TSD->Cache.isEmpty()); - TSD->Cache.drain(); - EXPECT_TRUE(TSD->Cache.isEmpty()); + EXPECT_TRUE(!TSD->getCache().isEmpty()); + TSD->getCache().drain(); + EXPECT_TRUE(TSD->getCache().isEmpty()); + if (UnlockRequired) + TSD->unlock(); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS { + auto *Allocator = this->Allocator.get(); + + std::vector<void *> V; + for (scudo::uptr I = 0; I < 64U; I++) + V.push_back(Allocator->allocate( + rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + for (auto P : V) + Allocator->deallocate(P, Origin); + + // `ForceAll` will also drain the caches. + Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll); + + bool UnlockRequired; + auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + EXPECT_TRUE(TSD->getCache().isEmpty()); + EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U); + EXPECT_TRUE(Allocator->getQuarantine()->isEmpty()); if (UnlockRequired) TSD->unlock(); } @@ -487,18 +513,19 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { } for (auto &T : Threads) T.join(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } // Test that multiple instantiations of the allocator have not messed up the // process's signal handlers (GWP-ASan used to do this). TEST(ScudoCombinedDeathTest, SKIP_ON_FUCHSIA(testSEGV)) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, "testSEGV")); + void *P = reinterpret_cast<void *>(ReservedMemory.getBase()); + ASSERT_NE(P, nullptr); EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + ReservedMemory.release(); } struct DeathSizeClassConfig { @@ -506,12 +533,12 @@ struct DeathSizeClassConfig { static const scudo::uptr MinSizeLog = 10; static const scudo::uptr MidSizeLog = 10; static const scudo::uptr MaxSizeLog = 13; - static const scudo::u32 MaxNumCachedHint = 4; + static const scudo::u16 MaxNumCachedHint = 8; static const scudo::uptr MaxBytesCachedLog = 12; static const scudo::uptr SizeDelta = 0; }; -static const scudo::uptr DeathRegionSizeLog = 20U; +static const scudo::uptr DeathRegionSizeLog = 21U; struct DeathConfig { static const bool MaySupportMemoryTagging = false; @@ -525,6 +552,7 @@ struct DeathConfig { static const scudo::uptr PrimaryCompactPtrScale = 0; static const bool PrimaryEnableRandomOffset = true; static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::uptr PrimaryGroupSizeLog = 18; typedef scudo::MapAllocatorNoCache SecondaryCache; template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>; @@ -599,7 +627,7 @@ TEST(ScudoCombinedTest, FullRegion) { // operation without issue. SCUDO_TYPED_TEST(ScudoCombinedTest, ReleaseToOS) { auto *Allocator = this->Allocator.get(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } SCUDO_TYPED_TEST(ScudoCombinedTest, OddEven) { @@ -699,3 +727,85 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateInPlaceStress) { Allocator->deallocate(Ptrs[i], Origin); } } + +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferSize) { + auto *Allocator = this->Allocator.get(); + auto Size = Allocator->getRingBufferSize(); + if (Size > 0) + EXPECT_EQ(Allocator->getRingBufferAddress()[Size - 1], '\0'); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferAddress) { + auto *Allocator = this->Allocator.get(); + auto *Addr = Allocator->getRingBufferAddress(); + EXPECT_NE(Addr, nullptr); + EXPECT_EQ(Addr, Allocator->getRingBufferAddress()); +} + +#if SCUDO_CAN_USE_PRIMARY64 +#if SCUDO_TRUSTY + +// TrustyConfig is designed for a domain-specific allocator. Add a basic test +// which covers only simple operations and ensure the configuration is able to +// compile. +TEST(ScudoCombinedTest, BasicTrustyConfig) { + using AllocatorT = scudo::Allocator<scudo::TrustyConfig>; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); + + for (scudo::uptr ClassId = 1U; + ClassId <= scudo::TrustyConfig::SizeClassMap::LargestClassId; + ClassId++) { + const scudo::uptr Size = + scudo::TrustyConfig::SizeClassMap::getSizeByClassId(ClassId); + void *p = Allocator->allocate(Size - scudo::Chunk::getHeaderSize(), Origin); + ASSERT_NE(p, nullptr); + free(p); + } + + bool UnlockRequired; + auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + TSD->getCache().drain(); + + Allocator->releaseToOS(scudo::ReleaseToOS::Force); +} + +#endif +#endif + +#if SCUDO_LINUX + +SCUDO_TYPED_TEST(ScudoCombinedTest, SoftRssLimit) { + auto *Allocator = this->Allocator.get(); + Allocator->setRssLimitsTestOnly(1, 0, true); + + size_t Megabyte = 1024 * 1024; + size_t ChunkSize = 16; + size_t Error = 256; + + std::vector<void *> Ptrs; + for (size_t index = 0; index < Megabyte + Error; index += ChunkSize) { + void *Ptr = Allocator->allocate(ChunkSize, Origin); + Ptrs.push_back(Ptr); + } + + EXPECT_EQ(nullptr, Allocator->allocate(ChunkSize, Origin)); + + for (void *Ptr : Ptrs) + Allocator->deallocate(Ptr, Origin); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, HardRssLimit) { + auto *Allocator = this->Allocator.get(); + Allocator->setRssLimitsTestOnly(0, 1, false); + + size_t Megabyte = 1024 * 1024; + + EXPECT_DEATH( + { + disableDebuggerdMaybe(); + Allocator->allocate(Megabyte, Origin); + }, + ""); +} + +#endif diff --git a/standalone/tests/common_test.cpp b/standalone/tests/common_test.cpp index 711e3b28e31..b1e55e80d09 100644 --- a/standalone/tests/common_test.cpp +++ b/standalone/tests/common_test.cpp @@ -10,6 +10,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include <algorithm> #include <fstream> @@ -34,39 +35,64 @@ TEST(ScudoCommonTest, SKIP_ON_FUCHSIA(ResidentMemorySize)) { const uptr Size = 1ull << 30; const uptr Threshold = Size >> 3; - MapPlatformData Data = {}; - void *P = map(nullptr, Size, "ResidentMemorySize", 0, &Data); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "ResidentMemorySize")); + ASSERT_NE(MemMap.getBase(), 0U); + void *P = reinterpret_cast<void *>(MemMap.getBase()); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } TEST(ScudoCommonTest, Zeros) { const uptr Size = 1ull << 20; - MapPlatformData Data = {}; - uptr *P = reinterpret_cast<uptr *>(map(nullptr, Size, "Zeros", 0, &Data)); - const ptrdiff_t N = Size / sizeof(*P); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "Zeros")); + ASSERT_NE(MemMap.getBase(), 0U); + uptr *P = reinterpret_cast<uptr *>(MemMap.getBase()); + const ptrdiff_t N = Size / sizeof(uptr); EXPECT_EQ(std::count(P, P + N, 0), N); memset(P, 1, Size); EXPECT_EQ(std::count(P, P + N, 0), 0); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_EQ(std::count(P, P + N, 0), N); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } +#if 0 +// This test is temorarily disabled because it may not work as expected. E.g., +// it doesn't dirty the pages so the pages may not be commited and it may only +// work on the single thread environment. As a result, this test is flaky and is +// impacting many test scenarios. +TEST(ScudoCommonTest, GetRssFromBuffer) { + constexpr int64_t AllocSize = 10000000; + constexpr int64_t Error = 3000000; + constexpr size_t Runs = 10; + + int64_t Rss = scudo::GetRSS(); + EXPECT_GT(Rss, 0); + + std::vector<std::unique_ptr<char[]>> Allocs(Runs); + for (auto &Alloc : Allocs) { + Alloc.reset(new char[AllocSize]()); + int64_t Prev = Rss; + Rss = scudo::GetRSS(); + EXPECT_LE(std::abs(Rss - AllocSize - Prev), Error); + } +} +#endif + } // namespace scudo diff --git a/standalone/tests/list_test.cpp b/standalone/tests/list_test.cpp index 8e139916d05..140ca027ae9 100644 --- a/standalone/tests/list_test.cpp +++ b/standalone/tests/list_test.cpp @@ -161,6 +161,10 @@ TEST(ScudoListTest, SinglyLinkedList) { setList(&L1, X); checkList(&L1, X); + setList(&L1, X, Y); + L1.insert(X, Z); + checkList(&L1, X, Z, Y); + setList(&L1, X, Y, Z); setList(&L2, A, B, C); L1.append_back(&L2); diff --git a/standalone/tests/map_test.cpp b/standalone/tests/map_test.cpp index ff05258db58..06a56f84803 100644 --- a/standalone/tests/map_test.cpp +++ b/standalone/tests/map_test.cpp @@ -9,6 +9,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include <string.h> #include <unistd.h> @@ -22,11 +23,15 @@ TEST(ScudoMapTest, PageSize) { TEST(ScudoMapDeathTest, MapNoAccessUnmap) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, MappingName)); + EXPECT_NE(ReservedMemory.getBase(), 0U); + EXPECT_DEATH( + memset(reinterpret_cast<void *>(ReservedMemory.getBase()), 0xaa, Size), + ""); + + ReservedMemory.release(); } TEST(ScudoMapDeathTest, MapUnmap) { @@ -36,11 +41,13 @@ TEST(ScudoMapDeathTest, MapUnmap) { // Repeat few time to avoid missing crash if it's mmaped by unrelated // code. for (int i = 0; i < 10; ++i) { - void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr); - if (!P) + scudo::MemMapT MemMap; + MemMap.map(/*Addr=*/0U, Size, MappingName); + scudo::uptr P = MemMap.getBase(); + if (P == 0U) continue; - scudo::unmap(P, Size, 0, nullptr); - memset(P, 0xbb, Size); + MemMap.unmap(MemMap.getBase(), Size); + memset(reinterpret_cast<void *>(P), 0xbb, Size); } }, ""); @@ -49,30 +56,36 @@ TEST(ScudoMapDeathTest, MapUnmap) { TEST(ScudoMapDeathTest, MapWithGuardUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size + 2 * PageSize, MappingName, MAP_NOACCESS, - &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(P) + PageSize); - EXPECT_EQ(scudo::map(Q, Size, MappingName, 0, &Data), Q); - memset(Q, 0xaa, Size); - EXPECT_DEATH(memset(Q, 0xaa, Size + 1), ""); - scudo::unmap(P, Size + 2 * PageSize, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE( + ReservedMemory.create(/*Addr=*/0U, Size + 2 * PageSize, MappingName)); + ASSERT_NE(ReservedMemory.getBase(), 0U); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size + 2 * PageSize); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, Size, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xaa, Size); + EXPECT_DEATH(memset(reinterpret_cast<void *>(Q), 0xaa, Size + 1), ""); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } TEST(ScudoMapTest, MapGrowUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(P) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xaa, PageSize); - Q = reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(Q) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xbb, PageSize); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ReservedMemory.create(/*Addr=*/0U, Size, MappingName); + ASSERT_TRUE(ReservedMemory.isCreated()); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xaa, PageSize); + Q += PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast<void *>(Q), 0xbb, PageSize); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } diff --git a/standalone/tests/memtag_test.cpp b/standalone/tests/memtag_test.cpp index 283edaa2a2c..d4c39aabe91 100644 --- a/standalone/tests/memtag_test.cpp +++ b/standalone/tests/memtag_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "common.h" +#include "mem_map.h" #include "memtag.h" #include "platform.h" #include "tests/scudo_unit_test.h" @@ -45,20 +46,24 @@ protected: GTEST_SKIP() << "Memory tagging is not supported"; BufferSize = getPageSizeCached(); - Buffer = reinterpret_cast<u8 *>( - map(nullptr, BufferSize, "MemtagTest", MAP_MEMTAG, &Data)); - Addr = reinterpret_cast<uptr>(Buffer); + ASSERT_FALSE(MemMap.isAllocated()); + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, BufferSize, "MemtagTest", MAP_MEMTAG)); + ASSERT_NE(MemMap.getBase(), 0U); + Addr = MemMap.getBase(); + Buffer = reinterpret_cast<u8 *>(Addr); EXPECT_TRUE(isAligned(Addr, archMemoryTagGranuleSize())); EXPECT_EQ(Addr, untagPointer(Addr)); } void TearDown() override { - if (Buffer) - unmap(Buffer, BufferSize, 0, &Data); + if (Buffer) { + ASSERT_TRUE(MemMap.isAllocated()); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } uptr BufferSize = 0; - MapPlatformData Data = {}; + scudo::MemMapT MemMap = {}; u8 *Buffer = nullptr; uptr Addr = 0; }; @@ -163,7 +168,7 @@ TEST_F(MemtagTest, StoreTags) { uptr TaggedBegin = addFixedTag(NoTagBegin, Tag); uptr TaggedEnd = addFixedTag(NoTagEnd, Tag); - EXPECT_EQ(roundUpTo(TaggedEnd, archMemoryTagGranuleSize()), + EXPECT_EQ(roundUp(TaggedEnd, archMemoryTagGranuleSize()), storeTags(TaggedBegin, TaggedEnd)); uptr LoadPtr = Addr; @@ -179,7 +184,7 @@ TEST_F(MemtagTest, StoreTags) { EXPECT_EQ(LoadPtr, loadTag(LoadPtr)); // Reset tags without using StoreTags. - releasePagesToOS(Addr, 0, BufferSize, &Data); + MemMap.releasePagesToOS(Addr, BufferSize); } } diff --git a/standalone/tests/mutex_test.cpp b/standalone/tests/mutex_test.cpp index d3242a3f57d..c3efeab8272 100644 --- a/standalone/tests/mutex_test.cpp +++ b/standalone/tests/mutex_test.cpp @@ -99,3 +99,10 @@ TEST(ScudoMutexTest, MutexTry) { for (scudo::u32 I = 0; I < NumberOfThreads; I++) pthread_join(Threads[I], 0); } + +TEST(ScudoMutexTest, MutexAssertHeld) { + scudo::HybridMutex M; + M.lock(); + M.assertHeld(); + M.unlock(); +} diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp index 283e2973c1e..51a7038ac78 100644 --- a/standalone/tests/primary_test.cpp +++ b/standalone/tests/primary_test.cpp @@ -12,8 +12,11 @@ #include "primary64.h" #include "size_class_map.h" +#include <algorithm> +#include <chrono> #include <condition_variable> #include <mutex> +#include <random> #include <stdlib.h> #include <thread> #include <vector> @@ -24,6 +27,7 @@ struct TestConfig1 { static const scudo::uptr PrimaryRegionSizeLog = 18U; + static const scudo::uptr PrimaryGroupSizeLog = 18U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; static const bool MaySupportMemoryTagging = false; @@ -40,6 +44,7 @@ struct TestConfig2 { #else static const scudo::uptr PrimaryRegionSizeLog = 24U; #endif + static const scudo::uptr PrimaryGroupSizeLog = 20U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; static const bool MaySupportMemoryTagging = false; @@ -56,6 +61,7 @@ struct TestConfig3 { #else static const scudo::uptr PrimaryRegionSizeLog = 24U; #endif + static const scudo::uptr PrimaryGroupSizeLog = 20U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; static const bool MaySupportMemoryTagging = true; @@ -65,6 +71,23 @@ struct TestConfig3 { static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; }; +struct TestConfig4 { +#if defined(__mips__) + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr PrimaryRegionSizeLog = 23U; +#else + static const scudo::uptr PrimaryRegionSizeLog = 24U; +#endif + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = true; + static const scudo::uptr PrimaryCompactPtrScale = 3U; + static const scudo::uptr PrimaryGroupSizeLog = 20U; + typedef scudo::u32 PrimaryCompactPtrT; + static const bool PrimaryEnableRandomOffset = true; + static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; +}; + template <typename BaseConfig, typename SizeClassMapT> struct Config : public BaseConfig { using SizeClassMap = SizeClassMapT; @@ -100,7 +123,8 @@ template <class BaseConfig> struct ScudoPrimaryTest : public Test {}; #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4) #endif #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ @@ -137,7 +161,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { Cache.deallocate(ClassId, Pointers[J]); } Cache.destroy(nullptr); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); Str.output(); @@ -145,7 +169,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { struct SmallRegionsConfig { using SizeClassMap = scudo::DefaultSizeClassMap; - static const scudo::uptr PrimaryRegionSizeLog = 20U; + static const scudo::uptr PrimaryRegionSizeLog = 21U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; static const bool MaySupportMemoryTagging = false; @@ -153,6 +177,7 @@ struct SmallRegionsConfig { static const scudo::uptr PrimaryCompactPtrScale = 0; static const bool PrimaryEnableRandomOffset = true; static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::uptr PrimaryGroupSizeLog = 20U; }; // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes. @@ -170,22 +195,27 @@ TEST(ScudoPrimaryTest, Primary64OOM) { std::vector<TransferBatch *> Batches; const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId; const scudo::uptr Size = Primary::getSizeByClassId(ClassId); + typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached]; + for (scudo::uptr I = 0; I < 10000U; I++) { TransferBatch *B = Allocator.popBatch(&Cache, ClassId); if (!B) { AllocationFailed = true; break; } - for (scudo::u32 J = 0; J < B->getCount(); J++) + for (scudo::u16 J = 0; J < B->getCount(); J++) memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size); Batches.push_back(B); } while (!Batches.empty()) { - Allocator.pushBatch(ClassId, Batches.back()); + TransferBatch *B = Batches.back(); Batches.pop_back(); + B->copyToArray(Blocks); + Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount()); + Cache.deallocate(Primary::SizeClassMap::BatchClassId, B); } Cache.destroy(nullptr); - Allocator.releaseToOS(); + Allocator.releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator.getStats(&Str); Str.output(); @@ -223,7 +253,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { V.pop_back(); } Cache.destroy(nullptr); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); Str.output(); @@ -270,7 +300,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { } for (auto &T : Threads) T.join(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; Allocator->getStats(&Str); Str.output(); @@ -292,5 +322,49 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { EXPECT_NE(P, nullptr); Cache.deallocate(ClassId, P); Cache.destroy(nullptr); - EXPECT_GT(Allocator->releaseToOS(), 0U); + EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::Force), 0U); +} + +SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); + Allocator->init(/*ReleaseToOsInterval=*/-1); + typename Primary::CacheT Cache; + Cache.init(nullptr, Allocator.get()); + const scudo::uptr Size = 32U; + const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); + + // We will allocate 4 times the group size memory and release all of them. We + // expect the free blocks will be classified with groups. Then we will + // allocate the same amount of memory as group size and expect the blocks will + // have the max address difference smaller or equal to 2 times the group size. + // Note that it isn't necessary to be in the range of single group size + // because the way we get the group id is doing compact pointer shifting. + // According to configuration, the compact pointer may not align to group + // size. As a result, the blocks can cross two groups at most. + const scudo::uptr GroupSizeMem = (1ULL << Primary::GroupSizeLog); + const scudo::uptr PeakAllocationMem = 4 * GroupSizeMem; + const scudo::uptr PeakNumberOfAllocations = PeakAllocationMem / Size; + const scudo::uptr FinalNumberOfAllocations = GroupSizeMem / Size; + std::vector<scudo::uptr> Blocks; + std::mt19937 R; + + for (scudo::uptr I = 0; I < PeakNumberOfAllocations; ++I) + Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId))); + + std::shuffle(Blocks.begin(), Blocks.end(), R); + + // Release all the allocated blocks, including those held by local cache. + while (!Blocks.empty()) { + Cache.deallocate(ClassId, reinterpret_cast<void *>(Blocks.back())); + Blocks.pop_back(); + } + Cache.drain(); + + for (scudo::uptr I = 0; I < FinalNumberOfAllocations; ++I) + Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId))); + + EXPECT_LE(*std::max_element(Blocks.begin(), Blocks.end()) - + *std::min_element(Blocks.begin(), Blocks.end()), + GroupSizeMem * 2); } diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp index 04c02891e91..41f0b161a74 100644 --- a/standalone/tests/release_test.cpp +++ b/standalone/tests/release_test.cpp @@ -18,19 +18,22 @@ #include <random> #include <set> -TEST(ScudoReleaseTest, PackedCounterArray) { +TEST(ScudoReleaseTest, RegionPageMap) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { // Various valid counter's max values packed into one word. - scudo::PackedCounterArray Counters2N(1U, 1U, 1UL << I); - EXPECT_EQ(sizeof(scudo::uptr), Counters2N.getBufferSize()); + scudo::RegionPageMap PageMap2N(1U, 1U, 1UL << I); + ASSERT_TRUE(PageMap2N.isAllocated()); + EXPECT_EQ(sizeof(scudo::uptr), PageMap2N.getBufferSize()); // Check the "all bit set" values too. - scudo::PackedCounterArray Counters2N1_1(1U, 1U, ~0UL >> I); - EXPECT_EQ(sizeof(scudo::uptr), Counters2N1_1.getBufferSize()); + scudo::RegionPageMap PageMap2N1_1(1U, 1U, ~0UL >> I); + ASSERT_TRUE(PageMap2N1_1.isAllocated()); + EXPECT_EQ(sizeof(scudo::uptr), PageMap2N1_1.getBufferSize()); // Verify the packing ratio, the counter is Expected to be packed into the // closest power of 2 bits. - scudo::PackedCounterArray Counters(1U, SCUDO_WORDSIZE, 1UL << I); - EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpToPowerOfTwo(I + 1), - Counters.getBufferSize()); + scudo::RegionPageMap PageMap(1U, SCUDO_WORDSIZE, 1UL << I); + ASSERT_TRUE(PageMap.isAllocated()); + EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpPowerOfTwo(I + 1), + PageMap.getBufferSize()); } // Go through 1, 2, 4, 8, .. {32,64} bits per counter. @@ -38,22 +41,44 @@ TEST(ScudoReleaseTest, PackedCounterArray) { // Make sure counters request one memory page for the buffer. const scudo::uptr NumCounters = (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); - scudo::PackedCounterArray Counters(1U, NumCounters, + scudo::RegionPageMap PageMap(1U, NumCounters, 1UL << ((1UL << I) - 1)); - Counters.inc(0U, 0U); + ASSERT_TRUE(PageMap.isAllocated()); + PageMap.inc(0U, 0U); for (scudo::uptr C = 1; C < NumCounters - 1; C++) { - EXPECT_EQ(0UL, Counters.get(0U, C)); - Counters.inc(0U, C); - EXPECT_EQ(1UL, Counters.get(0U, C - 1)); + EXPECT_EQ(0UL, PageMap.get(0U, C)); + PageMap.inc(0U, C); + EXPECT_EQ(1UL, PageMap.get(0U, C - 1)); } - EXPECT_EQ(0UL, Counters.get(0U, NumCounters - 1)); - Counters.inc(0U, NumCounters - 1); + EXPECT_EQ(0UL, PageMap.get(0U, NumCounters - 1)); + PageMap.inc(0U, NumCounters - 1); if (I > 0) { - Counters.incRange(0u, 0U, NumCounters - 1); + PageMap.incRange(0u, 0U, NumCounters - 1); for (scudo::uptr C = 0; C < NumCounters; C++) - EXPECT_EQ(2UL, Counters.get(0U, C)); + EXPECT_EQ(2UL, PageMap.get(0U, C)); } } + + // Similar to the above except that we are using incN(). + for (scudo::uptr I = 0; (SCUDO_WORDSIZE >> I) != 0; I++) { + // Make sure counters request one memory page for the buffer. + const scudo::uptr NumCounters = + (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); + scudo::uptr MaxValue = 1UL << ((1UL << I) - 1); + if (MaxValue <= 1U) + continue; + + scudo::RegionPageMap PageMap(1U, NumCounters, MaxValue); + + scudo::uptr N = MaxValue / 2; + PageMap.incN(0U, 0, N); + for (scudo::uptr C = 1; C < NumCounters; C++) { + EXPECT_EQ(0UL, PageMap.get(0U, C)); + PageMap.incN(0U, C, N); + EXPECT_EQ(N, PageMap.get(0U, C - 1)); + } + EXPECT_EQ(N, PageMap.get(0U, NumCounters - 1)); + } } class StringRangeRecorder { @@ -102,7 +127,7 @@ TEST(ScudoReleaseTest, FreePagesRangeTracker) { for (auto TestCase : TestCases) { StringRangeRecorder Recorder; - RangeTracker Tracker(&Recorder); + RangeTracker Tracker(Recorder); for (scudo::uptr I = 0; TestCase[I] != 0; I++) Tracker.processNextPage(TestCase[I] == 'x'); Tracker.finish(); @@ -117,41 +142,45 @@ TEST(ScudoReleaseTest, FreePagesRangeTracker) { class ReleasedPagesRecorder { public: + ReleasedPagesRecorder() = default; + explicit ReleasedPagesRecorder(scudo::uptr Base) : Base(Base) {} std::set<scudo::uptr> ReportedPages; void releasePageRangeToOS(scudo::uptr From, scudo::uptr To) { const scudo::uptr PageSize = scudo::getPageSizeCached(); for (scudo::uptr I = From; I < To; I += PageSize) - ReportedPages.insert(I); + ReportedPages.insert(I + getBase()); } - scudo::uptr getBase() const { return 0; } + scudo::uptr getBase() const { return Base; } + scudo::uptr Base = 0; }; // Simplified version of a TransferBatch. template <class SizeClassMap> struct FreeBatch { - static const scudo::u32 MaxCount = SizeClassMap::MaxNumCachedHint; + static const scudo::u16 MaxCount = SizeClassMap::MaxNumCachedHint; void clear() { Count = 0; } void add(scudo::uptr P) { DCHECK_LT(Count, MaxCount); Batch[Count++] = P; } - scudo::u32 getCount() const { return Count; } - scudo::uptr get(scudo::u32 I) const { + scudo::u16 getCount() const { return Count; } + scudo::uptr get(scudo::u16 I) const { DCHECK_LE(I, Count); return Batch[I]; } FreeBatch *Next; private: - scudo::u32 Count; scudo::uptr Batch[MaxCount]; + scudo::u16 Count; }; template <class SizeClassMap> void testReleaseFreeMemoryToOS() { typedef FreeBatch<SizeClassMap> Batch; const scudo::uptr PagesCount = 1024; const scudo::uptr PageSize = scudo::getPageSizeCached(); + const scudo::uptr PageSizeLog = scudo::getLog2(PageSize); std::mt19937 R; scudo::u32 RandState = 42; @@ -195,8 +224,15 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; }; auto DecompactPtr = [](scudo::uptr P) { return P; }; ReleasedPagesRecorder Recorder; - releaseFreeMemoryToOS(FreeList, MaxBlocks * BlockSize, 1U, BlockSize, - &Recorder, DecompactPtr, SkipRegion); + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/MaxBlocks * BlockSize); + ASSERT_FALSE(Context.hasBlockMarked()); + Context.markFreeBlocksInRegion(FreeList, DecompactPtr, Recorder.getBase(), + /*RegionIndex=*/0, MaxBlocks * BlockSize, + /*MayContainLastBlockInRegion=*/true); + ASSERT_TRUE(Context.hasBlockMarked()); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + scudo::RegionPageMap &PageMap = Context.PageMap; // Verify that there are no released pages touched by used chunks and all // ranges of free chunks big enough to contain the entire memory pages had @@ -223,17 +259,20 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { const bool PageReleased = Recorder.ReportedPages.find(J * PageSize) != Recorder.ReportedPages.end(); EXPECT_EQ(false, PageReleased); + EXPECT_EQ(false, + PageMap.isAllCounted(0, (J * PageSize) >> PageSizeLog)); } if (InFreeRange) { InFreeRange = false; // Verify that all entire memory pages covered by this range of free // chunks were released. - scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize); + scudo::uptr P = scudo::roundUp(CurrentFreeRangeStart, PageSize); while (P + PageSize <= CurrentBlock) { const bool PageReleased = Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end(); EXPECT_EQ(true, PageReleased); + EXPECT_EQ(true, PageMap.isAllCounted(0, P >> PageSizeLog)); VerifiedReleasedPages++; P += PageSize; } @@ -244,13 +283,14 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { } if (InFreeRange) { - scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize); + scudo::uptr P = scudo::roundUp(CurrentFreeRangeStart, PageSize); const scudo::uptr EndPage = - scudo::roundUpTo(MaxBlocks * BlockSize, PageSize); + scudo::roundUp(MaxBlocks * BlockSize, PageSize); while (P + PageSize <= EndPage) { const bool PageReleased = Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end(); EXPECT_EQ(true, PageReleased); + EXPECT_EQ(true, PageMap.isAllCounted(0, P >> PageSizeLog)); VerifiedReleasedPages++; P += PageSize; } @@ -266,6 +306,243 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { } } +template <class SizeClassMap> void testPageMapMarkRange() { + const scudo::uptr PageSize = scudo::getPageSizeCached(); + + for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { + const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); + + const scudo::uptr GroupNum = 2; + const scudo::uptr GroupSize = scudo::roundUp(BlockSize, PageSize) * 2; + const scudo::uptr RegionSize = + scudo::roundUpSlow(GroupSize * GroupNum, BlockSize); + const scudo::uptr RoundedRegionSize = scudo::roundUp(RegionSize, PageSize); + + std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); + for (scudo::uptr Block = 0; Block < RoundedRegionSize; Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize && + Page < RoundedRegionSize / PageSize; + ++Page) { + ASSERT_LT(Page, Pages.size()); + ++Pages[Page]; + } + } + + for (scudo::uptr GroupId = 0; GroupId < GroupNum; ++GroupId) { + const scudo::uptr GroupBeg = GroupId * GroupSize; + const scudo::uptr GroupEnd = GroupBeg + GroupSize; + + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize); + Context.markRangeAsAllCounted(GroupBeg, GroupEnd, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize); + + scudo::uptr FirstBlock = + ((GroupBeg + BlockSize - 1) / BlockSize) * BlockSize; + + // All the pages before first block page are not supposed to be marked. + if (FirstBlock / PageSize > 0) { + for (scudo::uptr Page = 0; Page <= FirstBlock / PageSize - 1; ++Page) + EXPECT_EQ(Context.PageMap.get(/*Region=*/0, Page), 0U); + } + + // Verify the pages used by the blocks in the group except that if the + // end of the last block is not aligned with `GroupEnd`, it'll be verified + // later. + scudo::uptr Block; + for (Block = FirstBlock; Block + BlockSize <= GroupEnd; + Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize; ++Page) { + // First used page in the group has two cases, which are w/ and w/o + // block sitting across the boundary. + if (Page == FirstBlock / PageSize) { + if (FirstBlock % PageSize == 0) { + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0U, Page)); + } else { + // There's a block straddling `GroupBeg`, it's supposed to only + // increment the counter and we expect it should be 1 less + // (exclude the straddling one) than the total blocks on the page. + EXPECT_EQ(Context.PageMap.get(/*Region=*/0U, Page), + Pages[Page] - 1); + } + } else { + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, Page)); + } + } + } + + if (Block == GroupEnd) + continue; + + // Examine the last block which sits across the group boundary. + if (Block + BlockSize == RegionSize) { + // This is the last block in the region, it's supposed to mark all the + // pages as all counted. + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize; ++Page) { + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, Page)); + } + } else { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize; ++Page) { + if (Page <= (GroupEnd - 1) / PageSize) + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, Page)); + else + EXPECT_EQ(Context.PageMap.get(/*Region=*/0U, Page), 1U); + } + } + + const scudo::uptr FirstUncountedPage = + scudo::roundUp(Block + BlockSize, PageSize); + for (scudo::uptr Page = FirstUncountedPage; + Page <= RoundedRegionSize / PageSize; ++Page) { + EXPECT_EQ(Context.PageMap.get(/*Region=*/0U, Page), 0U); + } + } // Iterate each Group + + // Release the entire region. This is to ensure the last page is counted. + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize); + Context.markRangeAsAllCounted(/*From=*/0U, /*To=*/RegionSize, /*Base=*/0, + /*RegionIndex=*/0, RegionSize); + for (scudo::uptr Page = 0; Page < RoundedRegionSize / PageSize; ++Page) + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, Page)); + } // Iterate each size class +} + +template <class SizeClassMap> void testReleasePartialRegion() { + typedef FreeBatch<SizeClassMap> Batch; + const scudo::uptr PageSize = scudo::getPageSizeCached(); + + for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { + // In the following, we want to ensure the region includes at least 2 pages + // and we will release all the pages except the first one. The handling of + // the last block is tricky, so we always test the case that includes the + // last block. + const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); + const scudo::uptr ReleaseBase = scudo::roundUp(BlockSize, PageSize); + const scudo::uptr BasePageOffset = ReleaseBase / PageSize; + const scudo::uptr RegionSize = + scudo::roundUpSlow(scudo::roundUp(BlockSize, PageSize) + ReleaseBase, + BlockSize) + + BlockSize; + const scudo::uptr RoundedRegionSize = scudo::roundUp(RegionSize, PageSize); + + scudo::SinglyLinkedList<Batch> FreeList; + FreeList.clear(); + + // Skip the blocks in the first page and add the remaining. + std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); + for (scudo::uptr Block = scudo::roundUpSlow(ReleaseBase, BlockSize); + Block + BlockSize <= RoundedRegionSize; Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize; ++Page) { + ASSERT_LT(Page, Pages.size()); + ++Pages[Page]; + } + } + + // This follows the logic how we count the last page. It should be + // consistent with how markFreeBlocksInRegion() handles the last block. + if (RoundedRegionSize % BlockSize != 0) + ++Pages.back(); + + Batch *CurrentBatch = nullptr; + for (scudo::uptr Block = scudo::roundUpSlow(ReleaseBase, BlockSize); + Block < RegionSize; Block += BlockSize) { + if (CurrentBatch == nullptr || + CurrentBatch->getCount() == Batch::MaxCount) { + CurrentBatch = new Batch; + CurrentBatch->clear(); + FreeList.push_back(CurrentBatch); + } + CurrentBatch->add(Block); + } + + auto VerifyReleaseToOs = [&](scudo::PageReleaseContext &Context) { + auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; }; + ReleasedPagesRecorder Recorder(ReleaseBase); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + const scudo::uptr FirstBlock = scudo::roundUpSlow(ReleaseBase, BlockSize); + + for (scudo::uptr P = 0; P < RoundedRegionSize; P += PageSize) { + if (P < FirstBlock) { + // If FirstBlock is not aligned with page boundary, the first touched + // page will not be released either. + EXPECT_TRUE(Recorder.ReportedPages.find(P) == + Recorder.ReportedPages.end()); + } else { + EXPECT_TRUE(Recorder.ReportedPages.find(P) != + Recorder.ReportedPages.end()); + } + } + }; + + // Test marking by visiting each block. + { + auto DecompactPtr = [](scudo::uptr P) { return P; }; + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); + Context.markFreeBlocksInRegion(FreeList, DecompactPtr, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize, + /*MayContainLastBlockInRegion=*/true); + for (const Batch &It : FreeList) { + for (scudo::u16 I = 0; I < It.getCount(); I++) { + scudo::uptr Block = It.get(I); + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize; ++Page) { + EXPECT_EQ(Pages[Page], Context.PageMap.get(/*Region=*/0U, + Page - BasePageOffset)); + } + } + } + + VerifyReleaseToOs(Context); + } + + // Test range marking. + { + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); + Context.markRangeAsAllCounted(ReleaseBase, RegionSize, /*Base=*/0U, + /*RegionIndex=*/0, RegionSize); + for (scudo::uptr Page = ReleaseBase / PageSize; + Page < RoundedRegionSize / PageSize; ++Page) { + if (Context.PageMap.get(/*Region=*/0, Page - BasePageOffset) != + Pages[Page]) { + EXPECT_TRUE(Context.PageMap.isAllCounted(/*Region=*/0, + Page - BasePageOffset)); + } + } + + VerifyReleaseToOs(Context); + } + + // Check the buffer size of PageMap. + { + scudo::PageReleaseContext Full(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize); + Full.ensurePageMapAllocated(); + scudo::PageReleaseContext Partial(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/RegionSize - PageSize, + ReleaseBase); + Partial.ensurePageMapAllocated(); + + EXPECT_GE(Full.PageMap.getBufferSize(), Partial.PageMap.getBufferSize()); + } + + while (!FreeList.empty()) { + CurrentBatch = FreeList.front(); + FreeList.pop_front(); + delete CurrentBatch; + } + } // Iterate each size class +} + TEST(ScudoReleaseTest, ReleaseFreeMemoryToOSDefault) { testReleaseFreeMemoryToOS<scudo::DefaultSizeClassMap>(); } @@ -277,3 +554,106 @@ TEST(ScudoReleaseTest, ReleaseFreeMemoryToOSAndroid) { TEST(ScudoReleaseTest, ReleaseFreeMemoryToOSSvelte) { testReleaseFreeMemoryToOS<scudo::SvelteSizeClassMap>(); } + +TEST(ScudoReleaseTest, PageMapMarkRange) { + testPageMapMarkRange<scudo::DefaultSizeClassMap>(); + testPageMapMarkRange<scudo::AndroidSizeClassMap>(); + testPageMapMarkRange<scudo::FuchsiaSizeClassMap>(); + testPageMapMarkRange<scudo::SvelteSizeClassMap>(); +} + +TEST(ScudoReleaseTest, ReleasePartialRegion) { + testReleasePartialRegion<scudo::DefaultSizeClassMap>(); + testReleasePartialRegion<scudo::AndroidSizeClassMap>(); + testReleasePartialRegion<scudo::FuchsiaSizeClassMap>(); + testReleasePartialRegion<scudo::SvelteSizeClassMap>(); +} + +template <class SizeClassMap> void testReleaseRangeWithSingleBlock() { + const scudo::uptr PageSize = scudo::getPageSizeCached(); + + // We want to test if a memory group only contains single block that will be + // handled properly. The case is like: + // + // From To + // +----------------------+ + // +------------+------------+ + // | | | + // +------------+------------+ + // ^ + // RegionSize + // + // Note that `From` will be page aligned. + // + // If the second from the last block is aligned at `From`, then we expect all + // the pages after `From` will be marked as can-be-released. Otherwise, the + // pages only touched by the last blocks will be marked as can-be-released. + for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { + const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); + const scudo::uptr From = scudo::roundUp(BlockSize, PageSize); + const scudo::uptr To = + From % BlockSize == 0 + ? From + BlockSize + : scudo::roundDownSlow(From + BlockSize, BlockSize) + BlockSize; + const scudo::uptr RoundedRegionSize = scudo::roundUp(To, PageSize); + + std::vector<scudo::uptr> Pages(RoundedRegionSize / PageSize, 0); + for (scudo::uptr Block = (To - BlockSize); Block < RoundedRegionSize; + Block += BlockSize) { + for (scudo::uptr Page = Block / PageSize; + Page <= (Block + BlockSize - 1) / PageSize && + Page < RoundedRegionSize / PageSize; + ++Page) { + ASSERT_LT(Page, Pages.size()); + ++Pages[Page]; + } + } + + scudo::PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + /*ReleaseSize=*/To, + /*ReleaseBase=*/0U); + Context.markRangeAsAllCounted(From, To, /*Base=*/0U, /*RegionIndex=*/0, + /*RegionSize=*/To); + + for (scudo::uptr Page = 0; Page < RoundedRegionSize; Page += PageSize) { + if (Context.PageMap.get(/*Region=*/0U, Page / PageSize) != + Pages[Page / PageSize]) { + EXPECT_TRUE( + Context.PageMap.isAllCounted(/*Region=*/0U, Page / PageSize)); + } + } + } // for each size class +} + +TEST(ScudoReleaseTest, RangeReleaseRegionWithSingleBlock) { + testReleaseRangeWithSingleBlock<scudo::DefaultSizeClassMap>(); + testReleaseRangeWithSingleBlock<scudo::AndroidSizeClassMap>(); + testReleaseRangeWithSingleBlock<scudo::FuchsiaSizeClassMap>(); + testReleaseRangeWithSingleBlock<scudo::SvelteSizeClassMap>(); +} + +TEST(ScudoReleaseTest, BufferPool) { + constexpr scudo::uptr StaticBufferCount = SCUDO_WORDSIZE - 1; + constexpr scudo::uptr StaticBufferSize = 512U; + + // Allocate the buffer pool on the heap because it is quite large (slightly + // more than StaticBufferCount * StaticBufferSize * sizeof(uptr)) and it may + // not fit in the stack on some platforms. + using BufferPool = scudo::BufferPool<StaticBufferCount, StaticBufferSize>; + std::unique_ptr<BufferPool> Pool(new BufferPool()); + + std::vector<std::pair<scudo::uptr *, scudo::uptr>> Buffers; + for (scudo::uptr I = 0; I < StaticBufferCount; ++I) { + scudo::uptr *P = Pool->getBuffer(StaticBufferSize); + EXPECT_TRUE(Pool->isStaticBufferTestOnly(P, StaticBufferSize)); + Buffers.emplace_back(P, StaticBufferSize); + } + + // The static buffer is supposed to be used up. + scudo::uptr *P = Pool->getBuffer(StaticBufferSize); + EXPECT_FALSE(Pool->isStaticBufferTestOnly(P, StaticBufferSize)); + + Pool->releaseBuffer(P, StaticBufferSize); + for (auto &Buffer : Buffers) + Pool->releaseBuffer(Buffer.first, Buffer.second); +} diff --git a/standalone/tests/scudo_hooks_test.cpp b/standalone/tests/scudo_hooks_test.cpp new file mode 100644 index 00000000000..7184ec12a8b --- /dev/null +++ b/standalone/tests/scudo_hooks_test.cpp @@ -0,0 +1,114 @@ +//===-- scudo_hooks_test.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "allocator_config.h" +#include "combined.h" + +namespace { +void *LastAllocatedPtr = nullptr; +size_t LastRequestSize = 0; +void *LastDeallocatedPtr = nullptr; +} // namespace + +// Scudo defines weak symbols that can be defined by a client binary +// to register callbacks at key points in the allocation timeline. In +// order to enforce those invariants, we provide definitions that +// update some global state every time they are called, so that tests +// can inspect their effects. An unfortunate side effect of this +// setup is that because those symbols are part of the binary, they +// can't be selectively enabled; that means that they will get called +// on unrelated tests in the same compilation unit. To mitigate this +// issue, we insulate those tests in a separate compilation unit. +extern "C" { +__attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr, + size_t Size) { + LastAllocatedPtr = Ptr; + LastRequestSize = Size; +} +__attribute__((visibility("default"))) void __scudo_deallocate_hook(void *Ptr) { + LastDeallocatedPtr = Ptr; +} +} + +// Simple check that allocation callbacks, when registered, are called: +// 1) __scudo_allocate_hook is called when allocating. +// 2) __scudo_deallocate_hook is called when deallocating. +// 3) Both hooks are called when reallocating. +// 4) Neither are called for a no-op reallocation. +TEST(ScudoHooksTest, AllocateHooks) { + scudo::Allocator<scudo::DefaultConfig> Allocator; + constexpr scudo::uptr DefaultSize = 16U; + constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; + + // Simple allocation and deallocation. + { + LastAllocatedPtr = nullptr; + LastRequestSize = 0; + + void *Ptr = Allocator.allocate(DefaultSize, Origin); + + EXPECT_EQ(Ptr, LastAllocatedPtr); + EXPECT_EQ(DefaultSize, LastRequestSize); + + LastDeallocatedPtr = nullptr; + + Allocator.deallocate(Ptr, Origin); + + EXPECT_EQ(Ptr, LastDeallocatedPtr); + } + + // Simple no-op, same size reallocation. + { + void *Ptr = Allocator.allocate(DefaultSize, Origin); + + LastAllocatedPtr = nullptr; + LastRequestSize = 0; + LastDeallocatedPtr = nullptr; + + void *NewPtr = Allocator.reallocate(Ptr, DefaultSize); + + EXPECT_EQ(Ptr, NewPtr); + EXPECT_EQ(nullptr, LastAllocatedPtr); + EXPECT_EQ(0U, LastRequestSize); + EXPECT_EQ(nullptr, LastDeallocatedPtr); + } + + // Reallocation in increasing size classes. This ensures that at + // least one of the reallocations will be meaningful. + { + void *Ptr = Allocator.allocate(0, Origin); + + for (scudo::uptr ClassId = 1U; + ClassId <= scudo::DefaultConfig::Primary::SizeClassMap::LargestClassId; + ++ClassId) { + const scudo::uptr Size = + scudo::DefaultConfig::Primary::SizeClassMap::getSizeByClassId( + ClassId); + + LastAllocatedPtr = nullptr; + LastRequestSize = 0; + LastDeallocatedPtr = nullptr; + + void *NewPtr = Allocator.reallocate(Ptr, Size); + + if (NewPtr != Ptr) { + EXPECT_EQ(NewPtr, LastAllocatedPtr); + EXPECT_EQ(Size, LastRequestSize); + EXPECT_EQ(Ptr, LastDeallocatedPtr); + } else { + EXPECT_EQ(nullptr, LastAllocatedPtr); + EXPECT_EQ(0U, LastRequestSize); + EXPECT_EQ(nullptr, LastDeallocatedPtr); + } + + Ptr = NewPtr; + } + } +} diff --git a/standalone/tests/secondary_test.cpp b/standalone/tests/secondary_test.cpp index e656466d68f..b0319011771 100644 --- a/standalone/tests/secondary_test.cpp +++ b/standalone/tests/secondary_test.cpp @@ -64,7 +64,7 @@ template <typename Config> static void testSecondaryBasic(void) { P = L->allocate(Options, Size + Align, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast<void *>( - scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align)); + scudo::roundUp(reinterpret_cast<scudo::uptr>(P), Align)); memset(AlignedP, 'A', Size); L->deallocate(Options, P); @@ -122,7 +122,7 @@ struct MapAllocatorTest : public Test { // combined allocator. TEST_F(MapAllocatorTest, SecondaryCombinations) { constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16); - constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign); + constexpr scudo::uptr HeaderSize = scudo::roundUp(8, MinAlign); for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) { for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16; AlignLog++) { @@ -131,13 +131,13 @@ TEST_F(MapAllocatorTest, SecondaryCombinations) { if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0) continue; const scudo::uptr UserSize = - scudo::roundUpTo((1U << SizeLog) + Delta, MinAlign); + scudo::roundUp((1U << SizeLog) + Delta, MinAlign); const scudo::uptr Size = HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0); void *P = Allocator->allocate(Options, Size, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast<void *>( - scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align)); + scudo::roundUp(reinterpret_cast<scudo::uptr>(P), Align)); memset(AlignedP, 0xff, UserSize); Allocator->deallocate(Options, P); } diff --git a/standalone/tests/size_class_map_test.cpp b/standalone/tests/size_class_map_test.cpp index 076f36f86be..b11db1e9f64 100644 --- a/standalone/tests/size_class_map_test.cpp +++ b/standalone/tests/size_class_map_test.cpp @@ -33,7 +33,7 @@ struct OneClassSizeClassConfig { static const scudo::uptr MinSizeLog = 5; static const scudo::uptr MidSizeLog = 5; static const scudo::uptr MaxSizeLog = 5; - static const scudo::u32 MaxNumCachedHint = 0; + static const scudo::u16 MaxNumCachedHint = 0; static const scudo::uptr MaxBytesCachedLog = 0; static const scudo::uptr SizeDelta = 0; }; @@ -48,7 +48,7 @@ struct LargeMaxSizeClassConfig { static const scudo::uptr MinSizeLog = 4; static const scudo::uptr MidSizeLog = 8; static const scudo::uptr MaxSizeLog = 63; - static const scudo::u32 MaxNumCachedHint = 128; + static const scudo::u16 MaxNumCachedHint = 128; static const scudo::uptr MaxBytesCachedLog = 16; static const scudo::uptr SizeDelta = 0; }; diff --git a/standalone/tests/strings_test.cpp b/standalone/tests/strings_test.cpp index 6d7e78a816a..7a69ffd9762 100644 --- a/standalone/tests/strings_test.cpp +++ b/standalone/tests/strings_test.cpp @@ -43,9 +43,11 @@ TEST(ScudoStringsTest, Clear) { } TEST(ScudoStringsTest, ClearLarge) { + constexpr char appendString[] = "123"; scudo::ScopedString Str; + Str.reserve(sizeof(appendString) * 10000); for (int i = 0; i < 10000; ++i) - Str.append("123"); + Str.append(appendString); Str.clear(); EXPECT_EQ(0ul, Str.length()); EXPECT_EQ('\0', *Str.data()); @@ -76,6 +78,7 @@ TEST(ScudoStringTest, PotentialOverflows) { // of it with variations of append. The expectation is for nothing to crash. const scudo::uptr PageSize = scudo::getPageSizeCached(); scudo::ScopedString Str; + Str.reserve(2 * PageSize); Str.clear(); fillString(Str, 2 * PageSize); Str.clear(); diff --git a/standalone/tests/timing_test.cpp b/standalone/tests/timing_test.cpp new file mode 100644 index 00000000000..09a6c312246 --- /dev/null +++ b/standalone/tests/timing_test.cpp @@ -0,0 +1,86 @@ +//===-- timing_test.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "timing.h" + +#include <string> + +class ScudoTimingTest : public Test { +public: + void testFunc1() { scudo::ScopedTimer ST(Manager, __func__); } + + void testFunc2() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc1(); + } + + void testChainedCalls() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc2(); + } + + void testIgnoredTimer() { + scudo::ScopedTimer ST(Manager, __func__); + ST.ignore(); + } + + void printAllTimersStats() { Manager.printAll(); } + + scudo::TimingManager &getTimingManager() { return Manager; } + +private: + scudo::TimingManager Manager; +}; + +// Given that the output of statistics of timers are dumped through +// `scudo::Printf` which is platform dependent, so we don't have a reliable way +// to catch the output and verify the details. Now we only verify the number of +// invocations on linux. +TEST_F(ScudoTimingTest, SimpleTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + testIgnoredTimer(); + testChainedCalls(); + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("testIgnoredTimer (1)") == std::string::npos); + EXPECT_TRUE(output.find("testChainedCalls (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc2 (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc1 (1)") != std::string::npos); +#endif +} + +TEST_F(ScudoTimingTest, NestedTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + { + scudo::ScopedTimer Outer(getTimingManager(), "Outer"); + { + scudo::ScopedTimer Inner1(getTimingManager(), Outer, "Inner1"); + { scudo::ScopedTimer Inner2(getTimingManager(), Inner1, "Inner2"); } + } + } + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("Outer (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner1 (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner2 (1)") != std::string::npos); +#endif +} diff --git a/standalone/tests/tsd_test.cpp b/standalone/tests/tsd_test.cpp index 17387ee7c57..a092fdde904 100644 --- a/standalone/tests/tsd_test.cpp +++ b/standalone/tests/tsd_test.cpp @@ -25,7 +25,9 @@ template <class Config> class MockAllocator { public: using ThisT = MockAllocator<Config>; using TSDRegistryT = typename Config::template TSDRegistryT<ThisT>; - using CacheT = struct MockCache { volatile scudo::uptr Canary; }; + using CacheT = struct MockCache { + volatile scudo::uptr Canary; + }; using QuarantineCacheT = struct MockQuarantine {}; void init() { @@ -80,7 +82,7 @@ TEST(ScudoTSDTest, TSDRegistryInit) { EXPECT_FALSE(Allocator->isInitialized()); auto Registry = Allocator->getTSDRegistry(); - Registry->init(Allocator.get()); + Registry->initOnceMaybe(Allocator.get()); EXPECT_TRUE(Allocator->isInitialized()); } @@ -100,15 +102,15 @@ template <class AllocatorT> static void testRegistry() { bool UnlockRequired; auto TSD = Registry->getTSDAndLock(&UnlockRequired); EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->Cache.Canary, 0U); + EXPECT_EQ(TSD->getCache().Canary, 0U); if (UnlockRequired) TSD->unlock(); Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false); TSD = Registry->getTSDAndLock(&UnlockRequired); EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->Cache.Canary, 0U); - memset(&TSD->Cache, 0x42, sizeof(TSD->Cache)); + EXPECT_EQ(TSD->getCache().Canary, 0U); + memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache())); if (UnlockRequired) TSD->unlock(); } @@ -139,14 +141,14 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { // For an exclusive TSD, the cache should be empty. We cannot guarantee the // same for a shared TSD. if (!UnlockRequired) - EXPECT_EQ(TSD->Cache.Canary, 0U); + EXPECT_EQ(TSD->getCache().Canary, 0U); // Transform the thread id to a uptr to use it as canary. const scudo::uptr Canary = static_cast<scudo::uptr>( std::hash<std::thread::id>{}(std::this_thread::get_id())); - TSD->Cache.Canary = Canary; + TSD->getCache().Canary = Canary; // Loop a few times to make sure that a concurrent thread isn't modifying it. for (scudo::uptr I = 0; I < 4096U; I++) - EXPECT_EQ(TSD->Cache.Canary, Canary); + EXPECT_EQ(TSD->getCache().Canary, Canary); if (UnlockRequired) TSD->unlock(); } diff --git a/standalone/thread_annotations.h b/standalone/thread_annotations.h new file mode 100644 index 00000000000..68a1087c203 --- /dev/null +++ b/standalone/thread_annotations.h @@ -0,0 +1,70 @@ +//===-- thread_annotations.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_THREAD_ANNOTATIONS_ +#define SCUDO_THREAD_ANNOTATIONS_ + +// Enable thread safety attributes only with clang. +// The attributes can be safely ignored when compiling with other compilers. +#if defined(__clang__) +#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op +#endif + +#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) + +#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) + +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) + +#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) + +#define ACQUIRED_BEFORE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) + +#define ACQUIRED_AFTER(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) + +#define REQUIRES(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) + +#define REQUIRES_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) + +#define ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) + +#define ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) + +#define RELEASE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) + +#define RELEASE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) + +#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) + +#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) + +#define ASSERT_SHARED_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) + +#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) + +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) + +#endif // SCUDO_THREAD_ANNOTATIONS_ diff --git a/standalone/timing.cpp b/standalone/timing.cpp new file mode 100644 index 00000000000..59ae21d10f0 --- /dev/null +++ b/standalone/timing.cpp @@ -0,0 +1,29 @@ +//===-- timing.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "timing.h" + +namespace scudo { + +Timer::~Timer() { + if (Manager) + Manager->report(*this); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const char *Name) + : Timer(Manager.getOrCreateTimer(Name)) { + start(); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const Timer &Nest, + const char *Name) + : Timer(Manager.nest(Nest, Name)) { + start(); +} + +} // namespace scudo diff --git a/standalone/timing.h b/standalone/timing.h new file mode 100644 index 00000000000..84caa79e5c3 --- /dev/null +++ b/standalone/timing.h @@ -0,0 +1,221 @@ +//===-- timing.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_TIMING_H_ +#define SCUDO_TIMING_H_ + +#include "common.h" +#include "mutex.h" +#include "string_utils.h" +#include "thread_annotations.h" + +#include <inttypes.h> +#include <string.h> + +namespace scudo { + +class TimingManager; + +// A simple timer for evaluating execution time of code snippets. It can be used +// along with TimingManager or standalone. +class Timer { +public: + // The use of Timer without binding to a TimingManager is supposed to do the + // timer logging manually. Otherwise, TimingManager will do the logging stuff + // for you. + Timer() = default; + Timer(Timer &&Other) + : StartTime(0), AccTime(Other.AccTime), Manager(Other.Manager), + HandleId(Other.HandleId) { + Other.Manager = nullptr; + } + + Timer(const Timer &) = delete; + + ~Timer(); + + void start() { + CHECK_EQ(StartTime, 0U); + StartTime = getMonotonicTime(); + } + void stop() { + AccTime += getMonotonicTime() - StartTime; + StartTime = 0; + } + u64 getAccumulatedTime() const { return AccTime; } + + // Unset the bound TimingManager so that we don't report the data back. This + // is useful if we only want to track subset of certain scope events. + void ignore() { + StartTime = 0; + AccTime = 0; + Manager = nullptr; + } + +protected: + friend class TimingManager; + Timer(TimingManager &Manager, u32 HandleId) + : Manager(&Manager), HandleId(HandleId) {} + + u64 StartTime = 0; + u64 AccTime = 0; + TimingManager *Manager = nullptr; + u32 HandleId; +}; + +// A RAII-style wrapper for easy scope execution measurement. Note that in order +// not to take additional space for the message like `Name`. It only works with +// TimingManager. +class ScopedTimer : public Timer { +public: + ScopedTimer(TimingManager &Manager, const char *Name); + ScopedTimer(TimingManager &Manager, const Timer &Nest, const char *Name); + ~ScopedTimer() { stop(); } +}; + +// In Scudo, the execution time of single run of code snippets may not be +// useful, we are more interested in the average time from several runs. +// TimingManager lets the registered timer report their data and reports the +// average execution time for each timer periodically. +class TimingManager { +public: + TimingManager(u32 PrintingInterval = DefaultPrintingInterval) + : PrintingInterval(PrintingInterval) {} + ~TimingManager() { + if (NumAllocatedTimers != 0) + printAll(); + } + + Timer getOrCreateTimer(const char *Name) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + CHECK_LT(strlen(Name), MaxLenOfTimerName); + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (strncmp(Name, Timers[I].Name, MaxLenOfTimerName) == 0) + return Timer(*this, I); + } + + CHECK_LT(NumAllocatedTimers, MaxNumberOfTimers); + strncpy(Timers[NumAllocatedTimers].Name, Name, MaxLenOfTimerName); + TimerRecords[NumAllocatedTimers].AccumulatedTime = 0; + TimerRecords[NumAllocatedTimers].Occurrence = 0; + return Timer(*this, NumAllocatedTimers++); + } + + // Add a sub-Timer associated with another Timer. This is used when we want to + // detail the execution time in the scope of a Timer. + // For example, + // void Foo() { + // // T1 records the time spent in both first and second tasks. + // ScopedTimer T1(getTimingManager(), "Task1"); + // { + // // T2 records the time spent in first task + // ScopedTimer T2(getTimingManager, T1, "Task2"); + // // Do first task. + // } + // // Do second task. + // } + // + // The report will show proper indents to indicate the nested relation like, + // -- Average Operation Time -- -- Name (# of Calls) -- + // 10.0(ns) Task1 (1) + // 5.0(ns) Task2 (1) + Timer nest(const Timer &T, const char *Name) EXCLUDES(Mutex) { + CHECK_EQ(T.Manager, this); + Timer Nesting = getOrCreateTimer(Name); + + ScopedLock L(Mutex); + CHECK_NE(Nesting.HandleId, T.HandleId); + Timers[Nesting.HandleId].Nesting = T.HandleId; + return Nesting; + } + + void report(const Timer &T) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + const u32 HandleId = T.HandleId; + CHECK_LT(HandleId, MaxNumberOfTimers); + TimerRecords[HandleId].AccumulatedTime += T.getAccumulatedTime(); + ++TimerRecords[HandleId].Occurrence; + ++NumEventsReported; + if (NumEventsReported % PrintingInterval == 0) + printAllImpl(); + } + + void printAll() EXCLUDES(Mutex) { + ScopedLock L(Mutex); + printAllImpl(); + } + +private: + void printAllImpl() REQUIRES(Mutex) { + static char NameHeader[] = "-- Name (# of Calls) --"; + static char AvgHeader[] = "-- Average Operation Time --"; + ScopedString Str; + Str.append("%-15s %-15s\n", AvgHeader, NameHeader); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (Timers[I].Nesting != MaxNumberOfTimers) + continue; + printImpl(Str, I); + } + + Str.output(); + } + + void printImpl(ScopedString &Str, const u32 HandleId, + const u32 ExtraIndent = 0) REQUIRES(Mutex) { + const u64 AccumulatedTime = TimerRecords[HandleId].AccumulatedTime; + const u64 Occurrence = TimerRecords[HandleId].Occurrence; + const u64 Integral = Occurrence == 0 ? 0 : AccumulatedTime / Occurrence; + // Only keep single digit of fraction is enough and it enables easier layout + // maintenance. + const u64 Fraction = + Occurrence == 0 ? 0 + : ((AccumulatedTime % Occurrence) * 10) / Occurrence; + + Str.append("%14" PRId64 ".%" PRId64 "(ns) %-11s", Integral, Fraction, " "); + + for (u32 I = 0; I < ExtraIndent; ++I) + Str.append("%s", " "); + Str.append("%s (%" PRId64 ")\n", Timers[HandleId].Name, Occurrence); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) + if (Timers[I].Nesting == HandleId) + printImpl(Str, I, ExtraIndent + 1); + } + + // Instead of maintaining pages for timer registration, a static buffer is + // sufficient for most use cases in Scudo. + static constexpr u32 MaxNumberOfTimers = 50; + static constexpr u32 MaxLenOfTimerName = 50; + static constexpr u32 DefaultPrintingInterval = 100; + + struct Record { + u64 AccumulatedTime = 0; + u64 Occurrence = 0; + }; + + struct TimerInfo { + char Name[MaxLenOfTimerName + 1]; + u32 Nesting = MaxNumberOfTimers; + }; + + HybridMutex Mutex; + // The frequency of proactively dumping the timer statistics. For example, the + // default setting is to dump the statistics every 100 reported events. + u32 PrintingInterval GUARDED_BY(Mutex); + u64 NumEventsReported GUARDED_BY(Mutex) = 0; + u32 NumAllocatedTimers GUARDED_BY(Mutex) = 0; + TimerInfo Timers[MaxNumberOfTimers] GUARDED_BY(Mutex); + Record TimerRecords[MaxNumberOfTimers] GUARDED_BY(Mutex); +}; + +} // namespace scudo + +#endif // SCUDO_TIMING_H_ diff --git a/standalone/tools/compute_size_class_config.cpp b/standalone/tools/compute_size_class_config.cpp index 8b17be0e965..bcaa5834932 100644 --- a/standalone/tools/compute_size_class_config.cpp +++ b/standalone/tools/compute_size_class_config.cpp @@ -140,7 +140,7 @@ struct MySizeClassConfig { static const uptr MinSizeLog = %zu; static const uptr MidSizeLog = %zu; static const uptr MaxSizeLog = %zu; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 14; static constexpr u32 Classes[] = {)", diff --git a/standalone/trusty.cpp b/standalone/trusty.cpp index 81d6bc585f0..c08a4e6f433 100644 --- a/standalone/trusty.cpp +++ b/standalone/trusty.cpp @@ -37,7 +37,7 @@ void *map(UNUSED void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, uptr Start; uptr End; - Start = roundUpTo(ProgramBreak, SBRK_ALIGN); + Start = roundUp(ProgramBreak, SBRK_ALIGN); // Don't actually extend the heap if MAP_NOACCESS flag is set since this is // the case where Scudo tries to reserve a memory region without mapping // physical pages. @@ -45,7 +45,7 @@ void *map(UNUSED void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, return reinterpret_cast<void *>(Start); // Attempt to extend the heap by Size bytes using _trusty_brk. - End = roundUpTo(Start + Size, SBRK_ALIGN); + End = roundUp(Start + Size, SBRK_ALIGN); ProgramBreak = reinterpret_cast<uptr>(_trusty_brk(reinterpret_cast<void *>(End))); if (ProgramBreak < End) { @@ -76,6 +76,8 @@ void HybridMutex::lockSlow() {} void HybridMutex::unlock() {} +void HybridMutex::assertHeldImpl() {} + u64 getMonotonicTime() { timespec TS; clock_gettime(CLOCK_MONOTONIC, &TS); @@ -83,6 +85,17 @@ u64 getMonotonicTime() { static_cast<u64>(TS.tv_nsec); } +u64 getMonotonicTimeFast() { +#if defined(CLOCK_MONOTONIC_COARSE) + timespec TS; + clock_gettime(CLOCK_MONOTONIC_COARSE, &TS); + return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast<u64>(TS.tv_nsec); +#else + return getMonotonicTime(); +#endif +} + u32 getNumberOfCPUs() { return 0; } u32 getThreadID() { return 0; } diff --git a/standalone/tsd.h b/standalone/tsd.h index b400a3b56da..c5ed6ddfa12 100644 --- a/standalone/tsd.h +++ b/standalone/tsd.h @@ -12,6 +12,7 @@ #include "atomic_helpers.h" #include "common.h" #include "mutex.h" +#include "thread_annotations.h" #include <limits.h> // for PTHREAD_DESTRUCTOR_ITERATIONS #include <pthread.h> @@ -24,21 +25,17 @@ namespace scudo { template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { - typename Allocator::CacheT Cache; - typename Allocator::QuarantineCacheT QuarantineCache; using ThisT = TSD<Allocator>; u8 DestructorIterations = 0; - void init(Allocator *Instance) { + void init(Allocator *Instance) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(DestructorIterations, 0U); DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); Instance->initCache(&Cache); DestructorIterations = PTHREAD_DESTRUCTOR_ITERATIONS; } - void commitBack(Allocator *Instance) { Instance->commitBack(this); } - - inline bool tryLock() { + inline bool tryLock() NO_THREAD_SAFETY_ANALYSIS { if (Mutex.tryLock()) { atomic_store_relaxed(&Precedence, 0); return true; @@ -49,16 +46,40 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { static_cast<uptr>(getMonotonicTime() >> FIRST_32_SECOND_64(16, 0))); return false; } - inline void lock() { + inline void lock() NO_THREAD_SAFETY_ANALYSIS { atomic_store_relaxed(&Precedence, 0); Mutex.lock(); } - inline void unlock() { Mutex.unlock(); } + inline void unlock() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } + void commitBack(Allocator *Instance) ASSERT_CAPABILITY(Mutex) { + Instance->commitBack(this); + } + + // Ideally, we may want to assert that all the operations on + // Cache/QuarantineCache always have the `Mutex` acquired. However, the + // current architecture of accessing TSD is not easy to cooperate with the + // thread-safety analysis because of pointer aliasing. So now we just add the + // assertion on the getters of Cache/QuarantineCache. + // + // TODO(chiahungduan): Ideally, we want to do `Mutex.assertHeld` but acquiring + // TSD doesn't always require holding the lock. Add this assertion while the + // lock is always acquired. + typename Allocator::CacheT &getCache() ASSERT_CAPABILITY(Mutex) { + return Cache; + } + typename Allocator::QuarantineCacheT &getQuarantineCache() + ASSERT_CAPABILITY(Mutex) { + return QuarantineCache; + } + private: HybridMutex Mutex; atomic_uptr Precedence = {}; + + typename Allocator::CacheT Cache GUARDED_BY(Mutex); + typename Allocator::QuarantineCacheT QuarantineCache GUARDED_BY(Mutex); }; } // namespace scudo diff --git a/standalone/tsd_exclusive.h b/standalone/tsd_exclusive.h index d49427b2005..23836742023 100644 --- a/standalone/tsd_exclusive.h +++ b/standalone/tsd_exclusive.h @@ -11,6 +11,8 @@ #include "tsd.h" +#include "string_utils.h" + namespace scudo { struct ThreadState { @@ -25,7 +27,7 @@ struct ThreadState { template <class Allocator> void teardownThread(void *Ptr); template <class Allocator> struct TSDRegistryExT { - void init(Allocator *Instance) { + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0); @@ -33,14 +35,14 @@ template <class Allocator> struct TSDRegistryExT { Initialized = true; } - void initOnceMaybe(Allocator *Instance) { + void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (LIKELY(Initialized)) return; init(Instance); // Sets Initialized. } - void unmapTestOnly(Allocator *Instance) { + void unmapTestOnly(Allocator *Instance) EXCLUDES(Mutex) { DCHECK(Instance); if (reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey))) { DCHECK_EQ(reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey)), @@ -53,16 +55,32 @@ template <class Allocator> struct TSDRegistryExT { FallbackTSD.commitBack(Instance); FallbackTSD = {}; State = {}; + ScopedLock L(Mutex); Initialized = false; } + void drainCaches(Allocator *Instance) { + // We don't have a way to iterate all thread local `ThreadTSD`s. Simply + // drain the `ThreadTSD` of current thread and `FallbackTSD`. + Instance->drainCache(&ThreadTSD); + FallbackTSD.lock(); + Instance->drainCache(&FallbackTSD); + FallbackTSD.unlock(); + } + ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { if (LIKELY(State.InitState != ThreadState::NotInitialized)) return; initThread(Instance, MinimalInit); } - ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) { + // TODO(chiahungduan): Consider removing the argument `UnlockRequired` by + // embedding the logic into TSD or always locking the TSD. It will enable us + // to properly mark thread annotation here and adding proper runtime + // assertions in the member functions of TSD. For example, assert the lock is + // acquired before calling TSD::commitBack(). + ALWAYS_INLINE TSD<Allocator> * + getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { if (LIKELY(State.InitState == ThreadState::Initialized && !atomic_load(&Disabled, memory_order_acquire))) { *UnlockRequired = false; @@ -75,13 +93,13 @@ template <class Allocator> struct TSDRegistryExT { // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); FallbackTSD.lock(); atomic_store(&Disabled, 1U, memory_order_release); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { atomic_store(&Disabled, 0U, memory_order_release); FallbackTSD.unlock(); Mutex.unlock(); @@ -97,6 +115,13 @@ template <class Allocator> struct TSDRegistryExT { bool getDisableMemInit() { return State.DisableMemInit; } + void getStats(ScopedString *Str) { + // We don't have a way to iterate all thread local `ThreadTSD`s. Instead of + // printing only self `ThreadTSD` which may mislead the usage, we just skip + // it. + Str->append("Exclusive TSD don't support iterating each TSD\n"); + } + private: // Using minimal initialization allows for global initialization while keeping // the thread specific structure untouched. The fallback structure will be @@ -113,7 +138,7 @@ private: } pthread_key_t PThreadKey = {}; - bool Initialized = false; + bool Initialized GUARDED_BY(Mutex) = false; atomic_u8 Disabled = {}; TSD<Allocator> FallbackTSD; HybridMutex Mutex; @@ -128,7 +153,8 @@ thread_local TSD<Allocator> TSDRegistryExT<Allocator>::ThreadTSD; template <class Allocator> thread_local ThreadState TSDRegistryExT<Allocator>::State; -template <class Allocator> void teardownThread(void *Ptr) { +template <class Allocator> +void teardownThread(void *Ptr) NO_THREAD_SAFETY_ANALYSIS { typedef TSDRegistryExT<Allocator> TSDRegistryT; Allocator *Instance = reinterpret_cast<Allocator *>(Ptr); // The glibc POSIX thread-local-storage deallocation routine calls user diff --git a/standalone/tsd_shared.h b/standalone/tsd_shared.h index 1c2a880416b..dcb0948ad78 100644 --- a/standalone/tsd_shared.h +++ b/standalone/tsd_shared.h @@ -11,6 +11,8 @@ #include "tsd.h" +#include "string_utils.h" + #if SCUDO_HAS_PLATFORM_TLS_SLOT // This is a platform-provided header that needs to be on the include path when // Scudo is compiled. It must declare a function with the prototype: @@ -24,7 +26,7 @@ namespace scudo { template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount> struct TSDRegistrySharedT { - void init(Allocator *Instance) { + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); for (u32 I = 0; I < TSDsArraySize; I++) @@ -35,22 +37,32 @@ struct TSDRegistrySharedT { Initialized = true; } - void initOnceMaybe(Allocator *Instance) { + void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (LIKELY(Initialized)) return; init(Instance); // Sets Initialized. } - void unmapTestOnly(Allocator *Instance) { + void unmapTestOnly(Allocator *Instance) EXCLUDES(Mutex) { for (u32 I = 0; I < TSDsArraySize; I++) { TSDs[I].commitBack(Instance); TSDs[I] = {}; } setCurrentTSD(nullptr); + ScopedLock L(Mutex); Initialized = false; } + void drainCaches(Allocator *Instance) { + ScopedLock L(MutexTSDs); + for (uptr I = 0; I < NumberOfTSDs; ++I) { + TSDs[I].lock(); + Instance->drainCache(&TSDs[I]); + TSDs[I].unlock(); + } + } + ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, UNUSED bool MinimalInit) { if (LIKELY(getCurrentTSD())) @@ -58,7 +70,10 @@ struct TSDRegistrySharedT { initThread(Instance); } - ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) { + // TSDs is an array of locks and which is not supported for marking + // thread-safety capability. + ALWAYS_INLINE TSD<Allocator> * + getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { TSD<Allocator> *TSD = getCurrentTSD(); DCHECK(TSD); *UnlockRequired = true; @@ -75,13 +90,13 @@ struct TSDRegistrySharedT { return getTSDAndLockSlow(TSD); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--) TSDs[I].unlock(); Mutex.unlock(); @@ -98,6 +113,19 @@ struct TSDRegistrySharedT { bool getDisableMemInit() const { return *getTlsPtr() & 1; } + void getStats(ScopedString *Str) EXCLUDES(MutexTSDs) { + ScopedLock L(MutexTSDs); + + Str->append("Stats: SharedTSDs: %u available; total %u\n", NumberOfTSDs, + TSDsArraySize); + for (uptr I = 0; I < NumberOfTSDs; ++I) { + TSDs[I].lock(); + Str->append(" Shared TSD[%zu]:\n", I); + TSDs[I].getCache().getStats(Str); + TSDs[I].unlock(); + } + } + private: ALWAYS_INLINE uptr *getTlsPtr() const { #if SCUDO_HAS_PLATFORM_TLS_SLOT @@ -119,7 +147,7 @@ private: return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL); } - bool setNumberOfTSDs(u32 N) { + bool setNumberOfTSDs(u32 N) EXCLUDES(MutexTSDs) { ScopedLock L(MutexTSDs); if (N < NumberOfTSDs) return false; @@ -150,7 +178,7 @@ private: *getTlsPtr() |= B; } - NOINLINE void initThread(Allocator *Instance) { + NOINLINE void initThread(Allocator *Instance) NO_THREAD_SAFETY_ANALYSIS { initOnceMaybe(Instance); // Initial context assignment is done in a plain round-robin fashion. const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed); @@ -158,7 +186,10 @@ private: Instance->callPostInitCallback(); } - NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) { + // TSDs is an array of locks which is not supported for marking thread-safety + // capability. + NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) + EXCLUDES(MutexTSDs) { // Use the Precedence of the current TSD as our random seed. Since we are // in the slow path, it means that tryLock failed, and as a result it's // very likely that said Precedence is non-zero. @@ -202,10 +233,10 @@ private: } atomic_u32 CurrentIndex = {}; - u32 NumberOfTSDs = 0; - u32 NumberOfCoPrimes = 0; - u32 CoPrimes[TSDsArraySize] = {}; - bool Initialized = false; + u32 NumberOfTSDs GUARDED_BY(MutexTSDs) = 0; + u32 NumberOfCoPrimes GUARDED_BY(MutexTSDs) = 0; + u32 CoPrimes[TSDsArraySize] GUARDED_BY(MutexTSDs) = {}; + bool Initialized GUARDED_BY(Mutex) = false; HybridMutex Mutex; HybridMutex MutexTSDs; TSD<Allocator> TSDs[TSDsArraySize]; diff --git a/standalone/vector.h b/standalone/vector.h index eae774b56e2..9f2c200958f 100644 --- a/standalone/vector.h +++ b/standalone/vector.h @@ -27,7 +27,7 @@ public: } void destroy() { if (Data != &LocalData[0]) - unmap(Data, CapacityBytes); + unmap(Data, CapacityBytes, 0, &MapData); } T &operator[](uptr I) { DCHECK_LT(I, Size); @@ -40,7 +40,7 @@ public: void push_back(const T &Element) { DCHECK_LE(Size, capacity()); if (Size == capacity()) { - const uptr NewCapacity = roundUpToPowerOfTwo(Size + 1); + const uptr NewCapacity = roundUpPowerOfTwo(Size + 1); reallocate(NewCapacity); } memcpy(&Data[Size++], &Element, sizeof(T)); @@ -82,9 +82,9 @@ private: void reallocate(uptr NewCapacity) { DCHECK_GT(NewCapacity, 0); DCHECK_LE(Size, NewCapacity); - NewCapacity = roundUpTo(NewCapacity * sizeof(T), getPageSizeCached()); - T *NewData = - reinterpret_cast<T *>(map(nullptr, NewCapacity, "scudo:vector")); + NewCapacity = roundUp(NewCapacity * sizeof(T), getPageSizeCached()); + T *NewData = reinterpret_cast<T *>( + map(nullptr, NewCapacity, "scudo:vector", 0, &MapData)); memcpy(NewData, Data, Size * sizeof(T)); destroy(); Data = NewData; @@ -95,6 +95,7 @@ private: T LocalData[256 / sizeof(T)] = {}; uptr CapacityBytes = 0; uptr Size = 0; + [[no_unique_address]] MapPlatformData MapData = {}; }; template <typename T> class Vector : public VectorNoCtor<T> { diff --git a/standalone/wrappers_c.inc b/standalone/wrappers_c.inc index bbe3617dd0d..3e495eaa4a3 100644 --- a/standalone/wrappers_c.inc +++ b/standalone/wrappers_c.inc @@ -54,6 +54,8 @@ INTERFACE WEAK struct SCUDO_MALLINFO SCUDO_PREFIX(mallinfo)(void) { return Info; } +// On Android, mallinfo2 is an alias of mallinfo, so don't define both. +#if !SCUDO_ANDROID INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) { struct __scudo_mallinfo2 Info = {}; scudo::StatCounters Stats; @@ -70,6 +72,7 @@ INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) { Info.fordblks = Info.fsmblks; return Info; } +#endif INTERFACE WEAK void *SCUDO_PREFIX(malloc)(size_t size) { return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( @@ -91,7 +94,7 @@ INTERFACE WEAK void *SCUDO_PREFIX(memalign)(size_t alignment, size_t size) { alignment = 1U; } else { if (UNLIKELY(!scudo::isPowerOfTwo(alignment))) - alignment = scudo::roundUpToPowerOfTwo(alignment); + alignment = scudo::roundUpPowerOfTwo(alignment); } } else { if (UNLIKELY(!scudo::isPowerOfTwo(alignment))) { @@ -131,9 +134,9 @@ INTERFACE WEAK void *SCUDO_PREFIX(pvalloc)(size_t size) { scudo::reportPvallocOverflow(size); } // pvalloc(0) should allocate one page. - return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate( - size ? scudo::roundUpTo(size, PageSize) : PageSize, - scudo::Chunk::Origin::Memalign, PageSize)); + return scudo::setErrnoOnNull( + SCUDO_ALLOCATOR.allocate(size ? scudo::roundUp(size, PageSize) : PageSize, + scudo::Chunk::Origin::Memalign, PageSize)); } INTERFACE WEAK void *SCUDO_PREFIX(realloc)(void *ptr, size_t size) { @@ -188,7 +191,10 @@ INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, int value) { static_cast<scudo::sptr>(value)); return 1; } else if (param == M_PURGE) { - SCUDO_ALLOCATOR.releaseToOS(); + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::Force); + return 1; + } else if (param == M_PURGE_ALL) { + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::ForceAll); return 1; } else { scudo::Option option; @@ -238,7 +244,10 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { if (size < max_size) sizes[size]++; }; + + SCUDO_ALLOCATOR.disable(); SCUDO_ALLOCATOR.iterateOverChunks(0, -1ul, callback, sizes); + SCUDO_ALLOCATOR.enable(); fputs("<malloc version=\"scudo-1\">\n", stream); for (scudo::uptr i = 0; i != max_size; ++i) diff --git a/standalone/wrappers_c_bionic.cpp b/standalone/wrappers_c_bionic.cpp index 18c3bf2c0ed..1b9fe67d920 100644 --- a/standalone/wrappers_c_bionic.cpp +++ b/standalone/wrappers_c_bionic.cpp @@ -32,21 +32,6 @@ static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX -// Svelte MallocDispatch definitions. -#define SCUDO_PREFIX(name) CONCATENATE(scudo_svelte_, name) -#define SCUDO_ALLOCATOR SvelteAllocator - -extern "C" void SCUDO_PREFIX(malloc_postinit)(); -SCUDO_REQUIRE_CONSTANT_INITIALIZATION -static scudo::Allocator<scudo::AndroidSvelteConfig, - SCUDO_PREFIX(malloc_postinit)> - SCUDO_ALLOCATOR; - -#include "wrappers_c.inc" - -#undef SCUDO_ALLOCATOR -#undef SCUDO_PREFIX - // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } diff --git a/standalone/wrappers_c_checks.h b/standalone/wrappers_c_checks.h index 815d40023b6..9cd48e82792 100644 --- a/standalone/wrappers_c_checks.h +++ b/standalone/wrappers_c_checks.h @@ -64,7 +64,7 @@ inline bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { // Returns true if the size passed to pvalloc overflows when rounded to the next // multiple of PageSize. inline bool checkForPvallocOverflow(uptr Size, uptr PageSize) { - return roundUpTo(Size, PageSize) < Size; + return roundUp(Size, PageSize) < Size; } } // namespace scudo diff --git a/standalone/wrappers_cpp.cpp b/standalone/wrappers_cpp.cpp index 16f495b6a35..374e36d72b3 100644 --- a/standalone/wrappers_cpp.cpp +++ b/standalone/wrappers_cpp.cpp @@ -54,26 +54,28 @@ INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align, static_cast<scudo::uptr>(align)); } -INTERFACE WEAK void operator delete(void *ptr)NOEXCEPT { +INTERFACE WEAK void operator delete(void *ptr) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } -INTERFACE WEAK void operator delete(void *ptr, std::nothrow_t const &)NOEXCEPT { +INTERFACE WEAK void operator delete(void *ptr, + std::nothrow_t const &) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New); } INTERFACE WEAK void operator delete[](void *ptr, std::nothrow_t const &) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray); } -INTERFACE WEAK void operator delete(void *ptr, size_t size)NOEXCEPT { +INTERFACE WEAK void operator delete(void *ptr, size_t size) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size); } INTERFACE WEAK void operator delete[](void *ptr, size_t size) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size); } -INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align)NOEXCEPT { +INTERFACE WEAK void operator delete(void *ptr, + std::align_val_t align) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, static_cast<scudo::uptr>(align)); } @@ -83,7 +85,7 @@ INTERFACE WEAK void operator delete[](void *ptr, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align, - std::nothrow_t const &)NOEXCEPT { + std::nothrow_t const &) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0, static_cast<scudo::uptr>(align)); } @@ -93,7 +95,7 @@ INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align, static_cast<scudo::uptr>(align)); } INTERFACE WEAK void operator delete(void *ptr, size_t size, - std::align_val_t align)NOEXCEPT { + std::align_val_t align) NOEXCEPT { Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size, static_cast<scudo::uptr>(align)); } |