diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2021-06-21 14:33:12 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2021-06-21 14:33:12 +0000 |
commit | 1dbe2af4908c123fe26bec15cfaab0f4540d2f1c (patch) | |
tree | c8d9375af9072ed4fab7a55b8ecc12b078f519ea | |
parent | 2ef93a00c7e0b761e060df21b56d23b963565a2d (diff) | |
parent | 7ab61136279ca87c288e58bcd35d63a6a1950d17 (diff) | |
download | scudo-android12-mainline-documentsui-release.tar.gz |
Snap for 7478028 from 7ab61136279ca87c288e58bcd35d63a6a1950d17 to mainline-documentsui-releaseandroid-mainline-12.0.0_r26android-mainline-12.0.0_r2aml_doc_310851020android12-mainline-documentsui-release
Change-Id: I9c57d9f4706f591dc518c23e7793910d64caa111
59 files changed, 3640 insertions, 1441 deletions
diff --git a/Android.bp b/Android.bp index b9e94aa2dc7..60cadc91263 100644 --- a/Android.bp +++ b/Android.bp @@ -14,10 +14,43 @@ // limitations under the License. // +package { + default_applicable_licenses: ["external_scudo_license"], +} + +// Added automatically by a large-scale-change that took the approach of +// 'apply every license found to every target'. While this makes sure we respect +// every license restriction, it may not be entirely correct. +// +// e.g. GPL in an MIT project might only apply to the contrib/ directory. +// +// Please consider splitting the single license below into multiple licenses, +// taking care not to lose any license_kind information, and overriding the +// default license using the 'licenses: [...]' property on targets as needed. +// +// For unused files, consider creating a 'filegroup' with "//visibility:private" +// to attach the license to, and including a comment whether the files may be +// used in the current project. +// http://go/android-license-faq +license { + name: "external_scudo_license", + visibility: [":__subpackages__"], + license_kinds: [ + "SPDX-license-identifier-Apache-2.0", + "SPDX-license-identifier-BSD", + "SPDX-license-identifier-MIT", + "SPDX-license-identifier-NCSA", + ], + license_text: [ + "LICENSE.TXT", + ], +} + cc_defaults { name: "libscudo_defaults", native_coverage: false, ramdisk_available: true, + vendor_ramdisk_available: true, recovery_available: true, host_supported: true, native_bridge_supported: true, @@ -46,12 +79,20 @@ cc_defaults { // Android assumes that allocations of multiples of 16 bytes // will be aligned to at least 16 bytes. "-DSCUDO_MIN_ALIGNMENT_LOG=4", + + // Allow scudo to use android_unsafe_frame_pointer_chase(), which is + // normally a private function. + "-DHAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE", ], cppflags: [ "-nostdinc++", "-fno-exceptions", ], + include_dirs: [ + "external/scudo/standalone/include", + ], + system_shared_libs: [], srcs: [ @@ -88,22 +129,32 @@ cc_defaults { linux_glibc: { enabled: true, }, - }, - - header_libs: [ - "bionic_libc_platform_headers", - ], - product_variables: { - experimental_mte: { - cflags: ["-DANDROID_EXPERIMENTAL_MTE"], + android: { + header_libs: ["bionic_libc_platform_headers"], + }, + linux_bionic: { + header_libs: ["bionic_libc_platform_headers"], + }, + native_bridge: { + cflags: ["-DSCUDO_DISABLE_TBI"], }, }, + + header_libs: ["libc_headers"], } cc_library_static { name: "libscudo", defaults: ["libscudo_defaults"], - cflags: ["-D_BIONIC=1"], + cflags: [ + "-D_BIONIC=1", + "-DSCUDO_HAS_PLATFORM_TLS_SLOT", + ], + visibility: [ + "//bionic:__subpackages__", + "//frameworks/libs/native_bridge_support/libc:__subpackages__", + "//system/core/debuggerd:__subpackages__", + ], } cc_library_static { @@ -113,7 +164,9 @@ cc_library_static { cc_test { name: "scudo_unit_tests", - host_supported: true, + // Temporarily disabled on host due to a 15-20s per-test timeout, + // which is currently exceeded by ScudoCombinedTest.BasicCombined. + host_supported: false, srcs: [ "standalone/tests/atomic_test.cpp", "standalone/tests/bytemap_test.cpp", @@ -138,20 +191,39 @@ cc_test { ], static_libs: ["libscudo_for_testing"], include_dirs: [ - "external", "external/scudo/standalone", + "external/scudo/standalone/include", ], cflags: [ "-Wno-unused-parameter", "-fno-emulated-tls", ], - header_libs: [ - "bionic_libc_platform_headers", - ], - product_variables: { - experimental_mte: { - cflags: ["-DANDROID_EXPERIMENTAL_MTE"], + target: { + android: { + header_libs: ["bionic_libc_platform_headers"], + }, + linux_bionic: { + header_libs: ["bionic_libc_platform_headers"], }, }, test_suites: ["general-tests"], + bootstrap: true, +} + +cc_fuzz { + name: "scudo_get_error_info_fuzzer", + host_supported: true, + compile_multilib: "64", + static_libs: ["libscudo"], + include_dirs: [ + "external/scudo/standalone", + "external/scudo/standalone/include", + ], + cflags: [ + "-Wno-unneeded-internal-declaration", + ], + srcs: ["standalone/fuzz/get_error_info_fuzzer.cpp"], + fuzz_config: { + componentid: 87896 + }, } @@ -17,5 +17,6 @@ third_party { value: "https://github.com/llvm/llvm-project.git" } version: "161cca266a9d0b6deb5f1fd2de8ad543649a7fa1" + license_type: NOTICE last_upgrade_date { year: 2019 month: 9 day: 10 } } diff --git a/NOTICE b/NOTICE deleted file mode 120000 index 7a694c9699a..00000000000 --- a/NOTICE +++ /dev/null @@ -1 +0,0 @@ -LICENSE
\ No newline at end of file diff --git a/copybara/copy.bara.sky b/copybara/copy.bara.sky index 4d22c479513..54ca37c0dcc 100644 --- a/copybara/copy.bara.sky +++ b/copybara/copy.bara.sky @@ -2,7 +2,7 @@ core.workflow( name = "default", origin = git.origin( url = "https://github.com/llvm/llvm-project.git", - ref = "master", + ref = "main", ), origin_files = glob( [ @@ -26,16 +26,12 @@ core.workflow( "**/Android.bp" ], ), - mode = "SQUASH", authoring = authoring.pass_thru( "Dynamic Tools Team <dynamic-tools@google.com>" ), + mode = 'ITERATIVE', transformations = [ core.move("compiler-rt/lib/scudo/standalone/", "standalone"), core.move("compiler-rt/LICENSE.TXT", "LICENSE.TXT"), - metadata.squash_notes( - prefix = "Imported Scudo Standalone changes:\n\n", - oldest_first = True, - ), ], ) diff --git a/standalone/allocator_config.h b/standalone/allocator_config.h index ad2a17ef701..8e103f28b1a 100644 --- a/standalone/allocator_config.h +++ b/standalone/allocator_config.h @@ -21,59 +21,138 @@ namespace scudo { +// The combined allocator uses a structure as a template argument that +// specifies the configuration options for the various subcomponents of the +// allocator. +// +// struct ExampleConfig { +// // SizeClasMmap to use with the Primary. +// using SizeClassMap = DefaultSizeClassMap; +// // Indicates possible support for Memory Tagging. +// static const bool MaySupportMemoryTagging = false; +// // Defines the Primary allocator to use. +// typedef SizeClassAllocator64<ExampleConfig> Primary; +// // Log2 of the size of a size class region, as used by the Primary. +// static const uptr PrimaryRegionSizeLog = 30U; +// // Defines the type and scale of a compact pointer. A compact pointer can +// // be understood as the offset of a pointer within the region it belongs +// // to, in increments of a power-of-2 scale. +// // eg: Ptr = Base + (CompactPtr << Scale). +// typedef u32 PrimaryCompactPtrT; +// static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; +// // Defines the minimal & maximal release interval that can be set. +// static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; +// static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; +// // Defines the type of cache used by the Secondary. Some additional +// // configuration entries can be necessary depending on the Cache. +// typedef MapAllocatorNoCache SecondaryCache; +// // Thread-Specific Data Registry used, shared or exclusive. +// template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; +// }; + // Default configurations for various platforms. struct DefaultConfig { using SizeClassMap = DefaultSizeClassMap; + static const bool MaySupportMemoryTagging = false; + #if SCUDO_CAN_USE_PRIMARY64 - // 1GB Regions - typedef SizeClassAllocator64<SizeClassMap, 30U> Primary; + typedef SizeClassAllocator64<DefaultConfig> Primary; + static const uptr PrimaryRegionSizeLog = 32U; + typedef uptr PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = 0; #else - // 512KB regions - typedef SizeClassAllocator32<SizeClassMap, 19U> Primary; + typedef SizeClassAllocator32<DefaultConfig> Primary; + static const uptr PrimaryRegionSizeLog = 19U; + typedef uptr PrimaryCompactPtrT; #endif - typedef MapAllocator<MapAllocatorCache<>> Secondary; + static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + + typedef MapAllocatorCache<DefaultConfig> SecondaryCache; + static const u32 SecondaryCacheEntriesArraySize = 32U; + static const u32 SecondaryCacheQuarantineSize = 0U; + static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; + static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 19; + static const s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = INT32_MAX; + template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive }; struct AndroidConfig { using SizeClassMap = AndroidSizeClassMap; + static const bool MaySupportMemoryTagging = true; + #if SCUDO_CAN_USE_PRIMARY64 - // 256MB regions - typedef SizeClassAllocator64<SizeClassMap, 28U, 1000, 1000, - /*MaySupportMemoryTagging=*/true> - Primary; + typedef SizeClassAllocator64<AndroidConfig> Primary; + static const uptr PrimaryRegionSizeLog = 28U; + typedef u32 PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; #else - // 256KB regions - typedef SizeClassAllocator32<SizeClassMap, 18U, 1000, 1000> Primary; + typedef SizeClassAllocator32<AndroidConfig> Primary; + static const uptr PrimaryRegionSizeLog = 18U; + typedef uptr PrimaryCompactPtrT; #endif - // Cache blocks up to 2MB - typedef MapAllocator<MapAllocatorCache<32U, 2UL << 20, 0, 1000>> Secondary; + static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; + static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; + + typedef MapAllocatorCache<AndroidConfig> SecondaryCache; + static const u32 SecondaryCacheEntriesArraySize = 256U; + static const u32 SecondaryCacheQuarantineSize = 32U; + static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; + static const uptr SecondaryCacheDefaultMaxEntrySize = 2UL << 20; + static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; + static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 1000; + template <class A> - using TSDRegistryT = TSDRegistrySharedT<A, 2U>; // Shared, max 2 TSDs. + using TSDRegistryT = TSDRegistrySharedT<A, 8U, 2U>; // Shared, max 8 TSDs. }; struct AndroidSvelteConfig { using SizeClassMap = SvelteSizeClassMap; + static const bool MaySupportMemoryTagging = false; + #if SCUDO_CAN_USE_PRIMARY64 - // 128MB regions - typedef SizeClassAllocator64<SizeClassMap, 27U, 1000, 1000> Primary; + typedef SizeClassAllocator64<AndroidSvelteConfig> Primary; + static const uptr PrimaryRegionSizeLog = 27U; + typedef u32 PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; #else - // 64KB regions - typedef SizeClassAllocator32<SizeClassMap, 16U, 1000, 1000> Primary; + typedef SizeClassAllocator32<AndroidSvelteConfig> Primary; + static const uptr PrimaryRegionSizeLog = 16U; + typedef uptr PrimaryCompactPtrT; #endif - typedef MapAllocator<MapAllocatorCache<4U, 1UL << 18, 0, 0>> Secondary; + static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; + static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; + + typedef MapAllocatorCache<AndroidSvelteConfig> SecondaryCache; + static const u32 SecondaryCacheEntriesArraySize = 16U; + static const u32 SecondaryCacheQuarantineSize = 32U; + static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U; + static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 18; + static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; + static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 0; + template <class A> - using TSDRegistryT = TSDRegistrySharedT<A, 1U>; // Shared, only 1 TSD. + using TSDRegistryT = TSDRegistrySharedT<A, 2U, 1U>; // Shared, max 2 TSDs. }; #if SCUDO_CAN_USE_PRIMARY64 struct FuchsiaConfig { - // 1GB Regions - typedef SizeClassAllocator64<DefaultSizeClassMap, 30U> Primary; - typedef MapAllocator<MapAllocatorNoCache> Secondary; + using SizeClassMap = DefaultSizeClassMap; + static const bool MaySupportMemoryTagging = false; + + typedef SizeClassAllocator64<FuchsiaConfig> Primary; + static const uptr PrimaryRegionSizeLog = 30U; + typedef u32 PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + + typedef MapAllocatorNoCache SecondaryCache; template <class A> - using TSDRegistryT = TSDRegistrySharedT<A, 8U>; // Shared, max 8 TSDs. + using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs. }; #endif diff --git a/standalone/atomic_helpers.h b/standalone/atomic_helpers.h index 6c84ba86ed3..d88f5d7be64 100644 --- a/standalone/atomic_helpers.h +++ b/standalone/atomic_helpers.h @@ -51,7 +51,7 @@ struct atomic_u32 { struct atomic_u64 { typedef u64 Type; // On 32-bit platforms u64 is not necessarily aligned on 8 bytes. - ALIGNED(8) volatile Type ValDoNotUse; + alignas(8) volatile Type ValDoNotUse; }; struct atomic_uptr { @@ -90,6 +90,20 @@ inline typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, } template <typename T> +inline typename T::Type atomic_fetch_and(volatile T *A, typename T::Type V, + memory_order MO) { + DCHECK(!(reinterpret_cast<uptr>(A) % sizeof(*A))); + return __atomic_fetch_and(&A->ValDoNotUse, V, MO); +} + +template <typename T> +inline typename T::Type atomic_fetch_or(volatile T *A, typename T::Type V, + memory_order MO) { + DCHECK(!(reinterpret_cast<uptr>(A) % sizeof(*A))); + return __atomic_fetch_or(&A->ValDoNotUse, V, MO); +} + +template <typename T> inline typename T::Type atomic_exchange(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast<uptr>(A) % sizeof(*A))); @@ -106,14 +120,6 @@ inline bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, __ATOMIC_RELAXED); } -template <typename T> -inline bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, - typename T::Type Xchg, - memory_order MO) { - return __atomic_compare_exchange(&A->ValDoNotUse, Cmp, &Xchg, true, MO, - __ATOMIC_RELAXED); -} - // Clutter-reducing helpers. template <typename T> diff --git a/standalone/benchmarks/malloc_benchmark.cpp b/standalone/benchmarks/malloc_benchmark.cpp index ce48dc02f7a..661fff45a8d 100644 --- a/standalone/benchmarks/malloc_benchmark.cpp +++ b/standalone/benchmarks/malloc_benchmark.cpp @@ -13,15 +13,22 @@ #include "benchmark/benchmark.h" #include <memory> +#include <vector> + +void *CurrentAllocator; +template <typename Config> void PostInitCallback() { + reinterpret_cast<scudo::Allocator<Config> *>(CurrentAllocator)->initGwpAsan(); +} template <typename Config> static void BM_malloc_free(benchmark::State &State) { - using AllocatorT = scudo::Allocator<Config>; + using AllocatorT = scudo::Allocator<Config, PostInitCallback<Config>>; auto Deleter = [](AllocatorT *A) { A->unmapTestOnly(); delete A; }; std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, Deleter); + CurrentAllocator = Allocator.get(); Allocator->reset(); const size_t NBytes = State.range(0); @@ -55,18 +62,19 @@ BENCHMARK_TEMPLATE(BM_malloc_free, scudo::FuchsiaConfig) template <typename Config> static void BM_malloc_free_loop(benchmark::State &State) { - using AllocatorT = scudo::Allocator<Config>; + using AllocatorT = scudo::Allocator<Config, PostInitCallback<Config>>; auto Deleter = [](AllocatorT *A) { A->unmapTestOnly(); delete A; }; std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, Deleter); + CurrentAllocator = Allocator.get(); Allocator->reset(); const size_t NumIters = State.range(0); size_t PageSize = scudo::getPageSizeCached(); - void *Ptrs[NumIters]; + std::vector<void *> Ptrs(NumIters); for (auto _ : State) { size_t SizeLog2 = 0; diff --git a/standalone/checksum.cpp b/standalone/checksum.cpp index 5de049a0931..05d4ba54bfc 100644 --- a/standalone/checksum.cpp +++ b/standalone/checksum.cpp @@ -31,6 +31,13 @@ Checksum HashAlgorithm = {Checksum::BSD}; #define bit_SSE4_2 bit_SSE42 // clang and gcc have different defines. #endif +#ifndef signature_HYGON_ebx // They are not defined in gcc. +// HYGON: "HygonGenuine". +#define signature_HYGON_ebx 0x6f677948 +#define signature_HYGON_edx 0x6e65476e +#define signature_HYGON_ecx 0x656e6975 +#endif + bool hasHardwareCRC32() { u32 Eax, Ebx = 0, Ecx = 0, Edx = 0; __get_cpuid(0, &Eax, &Ebx, &Ecx, &Edx); @@ -39,7 +46,10 @@ bool hasHardwareCRC32() { (Ecx == signature_INTEL_ecx); const bool IsAMD = (Ebx == signature_AMD_ebx) && (Edx == signature_AMD_edx) && (Ecx == signature_AMD_ecx); - if (!IsIntel && !IsAMD) + const bool IsHygon = (Ebx == signature_HYGON_ebx) && + (Edx == signature_HYGON_edx) && + (Ecx == signature_HYGON_ecx); + if (!IsIntel && !IsAMD && !IsHygon) return false; __get_cpuid(1, &Eax, &Ebx, &Ecx, &Edx); return !!(Ecx & bit_SSE4_2); diff --git a/standalone/chunk.h b/standalone/chunk.h index f4d68b3ac6c..69b8e1b12a9 100644 --- a/standalone/chunk.h +++ b/standalone/chunk.h @@ -65,7 +65,8 @@ typedef u64 PackedHeader; struct UnpackedHeader { uptr ClassId : 8; u8 State : 2; - u8 Origin : 2; + // Origin if State == Allocated, or WasZeroed otherwise. + u8 OriginOrWasZeroed : 2; uptr SizeOrUnusedBytes : 20; uptr Offset : 16; uptr Checksum : 16; diff --git a/standalone/combined.h b/standalone/combined.h index 3ed34c21aa5..8080d677d7b 100644 --- a/standalone/combined.h +++ b/standalone/combined.h @@ -13,15 +13,18 @@ #include "common.h" #include "flags.h" #include "flags_parser.h" -#include "interface.h" #include "local_cache.h" #include "memtag.h" +#include "options.h" #include "quarantine.h" #include "report.h" #include "secondary.h" +#include "stack_depot.h" #include "string_utils.h" #include "tsd.h" +#include "scudo/interface.h" + #ifdef GWP_ASAN_HOOKS #include "gwp_asan/guarded_pool_allocator.h" #include "gwp_asan/optional/backtrace.h" @@ -30,9 +33,14 @@ extern "C" inline void EmptyCallback() {} -namespace scudo { +#ifdef HAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE +// This function is not part of the NDK so it does not appear in any public +// header files. We only declare/use it when targeting the platform. +extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf, + size_t num_entries); +#endif -enum class Option { ReleaseInterval }; +namespace scudo { template <class Params, void (*PostInitCallback)(void) = EmptyCallback> class Allocator { @@ -43,8 +51,7 @@ public: typedef typename Params::template TSDRegistryT<ThisT> TSDRegistryT; void callPostInitCallback() { - static pthread_once_t OnceControl = PTHREAD_ONCE_INIT; - pthread_once(&OnceControl, PostInitCallback); + pthread_once(&PostInitNonce, PostInitCallback); } struct QuarantineCallback { @@ -63,12 +70,10 @@ public: NewHeader.State = Chunk::State::Available; Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(Ptr); void *BlockBegin = Allocator::getBlockBegin(Ptr, &NewHeader); - const uptr ClassId = NewHeader.ClassId; - if (LIKELY(ClassId)) - Cache.deallocate(ClassId, BlockBegin); - else - Allocator.Secondary.deallocate(BlockBegin); + Cache.deallocate(NewHeader.ClassId, BlockBegin); } // We take a shortcut when allocating a quarantine batch by working with the @@ -90,6 +95,12 @@ public: Header.State = Chunk::State::Allocated; Chunk::storeHeader(Allocator.Cookie, Ptr, &Header); + // Reset tag to 0 as this chunk may have been previously used for a tagged + // user allocation. + if (UNLIKELY(useMemoryTagging<Params>(Allocator.Primary.Options.load()))) + storeTags(reinterpret_cast<uptr>(Ptr), + reinterpret_cast<uptr>(Ptr) + sizeof(QuarantineBatch)); + return Ptr; } @@ -137,11 +148,22 @@ public: reportUnrecognizedFlags(); // Store some flags locally. - Options.MayReturnNull = getFlags()->may_return_null; - Options.ZeroContents = getFlags()->zero_contents; - Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch; - Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch; - Options.QuarantineMaxChunkSize = + if (getFlags()->may_return_null) + Primary.Options.set(OptionBit::MayReturnNull); + if (getFlags()->zero_contents) + Primary.Options.setFillContentsMode(ZeroFill); + else if (getFlags()->pattern_fill_contents) + Primary.Options.setFillContentsMode(PatternOrZeroFill); + if (getFlags()->dealloc_type_mismatch) + Primary.Options.set(OptionBit::DeallocTypeMismatch); + if (getFlags()->delete_size_mismatch) + Primary.Options.set(OptionBit::DeleteSizeMismatch); + if (allocatorSupportsMemoryTagging<Params>() && + systemSupportsMemoryTagging()) + Primary.Options.set(OptionBit::UseMemoryTagging); + Primary.Options.set(OptionBit::UseOddEvenTags); + + QuarantineMaxChunkSize = static_cast<u32>(getFlags()->quarantine_max_chunk_size); Stats.initLinkerInitialized(); @@ -160,11 +182,6 @@ public: #ifdef GWP_ASAN_HOOKS gwp_asan::options::Options Opt; Opt.Enabled = getFlags()->GWP_ASAN_Enabled; - // Bear in mind - Scudo has its own alignment guarantees that are strictly - // enforced. Scudo exposes the same allocation function for everything from - // malloc() to posix_memalign, so in general this flag goes unused, as Scudo - // will always ask GWP-ASan for an aligned amount of bytes. - Opt.PerfectlyRightAlign = getFlags()->GWP_ASAN_PerfectlyRightAlign; Opt.MaxSimultaneousAllocations = getFlags()->GWP_ASAN_MaxSimultaneousAllocations; Opt.SampleRate = getFlags()->GWP_ASAN_SampleRate; @@ -173,16 +190,26 @@ public: // Allocator::disable calling GWPASan.disable). Disable GWP-ASan's atfork // handler. Opt.InstallForkHandlers = false; - Opt.Backtrace = gwp_asan::options::getBacktraceFunction(); + Opt.Backtrace = gwp_asan::backtrace::getBacktraceFunction(); GuardedAlloc.init(Opt); if (Opt.InstallSignalHandlers) - gwp_asan::crash_handler::installSignalHandlers( - &GuardedAlloc, Printf, gwp_asan::options::getPrintBacktraceFunction(), - Opt.Backtrace); + gwp_asan::segv_handler::installSignalHandlers( + &GuardedAlloc, Printf, + gwp_asan::backtrace::getPrintBacktraceFunction(), + gwp_asan::backtrace::getSegvBacktraceFunction()); + + GuardedAllocSlotSize = + GuardedAlloc.getAllocatorState()->maximumAllocationSize(); + Stats.add(StatFree, static_cast<uptr>(Opt.MaxSimultaneousAllocations) * + GuardedAllocSlotSize); #endif // GWP_ASAN_HOOKS } + ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) { + TSDRegistry.initThreadMaybe(this, MinimalInit); + } + void reset() { memset(this, 0, sizeof(*this)); } void unmapTestOnly() { @@ -190,7 +217,7 @@ public: Primary.unmapTestOnly(); #ifdef GWP_ASAN_HOOKS if (getFlags()->GWP_ASAN_InstallSignalHandlers) - gwp_asan::crash_handler::uninstallSignalHandlers(); + gwp_asan::segv_handler::uninstallSignalHandlers(); GuardedAlloc.uninitTestOnly(); #endif // GWP_ASAN_HOOKS } @@ -213,11 +240,53 @@ public: TSD->Cache.destroy(&Stats); } - ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) { - if (Primary.SupportsMemoryTagging) - return reinterpret_cast<void *>( - untagPointer(reinterpret_cast<uptr>(Ptr))); - return Ptr; + ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { + if (!allocatorSupportsMemoryTagging<Params>()) + return Ptr; + auto UntaggedPtr = untagPointer(Ptr); + if (UntaggedPtr != Ptr) + return UntaggedPtr; + // Secondary, or pointer allocated while memory tagging is unsupported or + // disabled. The tag mismatch is okay in the latter case because tags will + // not be checked. + return addHeaderTag(Ptr); + } + + ALWAYS_INLINE uptr addHeaderTag(uptr Ptr) { + if (!allocatorSupportsMemoryTagging<Params>()) + return Ptr; + return addFixedTag(Ptr, 2); + } + + ALWAYS_INLINE void *addHeaderTag(void *Ptr) { + return reinterpret_cast<void *>(addHeaderTag(reinterpret_cast<uptr>(Ptr))); + } + + NOINLINE u32 collectStackTrace() { +#ifdef HAVE_ANDROID_UNSAFE_FRAME_POINTER_CHASE + // Discard collectStackTrace() frame and allocator function frame. + constexpr uptr DiscardFrames = 2; + uptr Stack[MaxTraceSize + DiscardFrames]; + uptr Size = + android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames); + Size = Min<uptr>(Size, MaxTraceSize + DiscardFrames); + return Depot.insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size); +#else + return 0; +#endif + } + + uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, + uptr ClassId) { + if (!Options.get(OptionBit::UseOddEvenTags)) + return 0; + + // If a chunk's tag is odd, we want the tags of the surrounding blocks to be + // even, and vice versa. Blocks are laid out Size bytes apart, and adding + // Size to Ptr will flip the least significant set bit of Size in Ptr, so + // that bit will have the pattern 010101... for consecutive blocks, which we + // can use to determine which tag mask to use. + return 0x5555U << ((Ptr >> SizeClassMap::getSizeLSBByClassId(ClassId)) & 1); } NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, @@ -225,23 +294,34 @@ public: bool ZeroContents = false) { initThreadMaybe(); -#ifdef GWP_ASAN_HOOKS - if (UNLIKELY(GuardedAlloc.shouldSample())) { - if (void *Ptr = GuardedAlloc.allocate(roundUpTo(Size, Alignment))) - return Ptr; - } -#endif // GWP_ASAN_HOOKS - - ZeroContents |= static_cast<bool>(Options.ZeroContents); - + const Options Options = Primary.Options.load(); if (UNLIKELY(Alignment > MaxAlignment)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAlignmentTooBig(Alignment, MaxAlignment); } if (Alignment < MinAlignment) Alignment = MinAlignment; +#ifdef GWP_ASAN_HOOKS + if (UNLIKELY(GuardedAlloc.shouldSample())) { + if (void *Ptr = GuardedAlloc.allocate(Size, Alignment)) { + if (UNLIKELY(&__scudo_allocate_hook)) + __scudo_allocate_hook(Ptr, Size); + Stats.lock(); + Stats.add(StatAllocated, GuardedAllocSlotSize); + Stats.sub(StatFree, GuardedAllocSlotSize); + Stats.unlock(); + return Ptr; + } + } +#endif // GWP_ASAN_HOOKS + + const FillContentsMode FillContents = ZeroContents ? ZeroFill + : TSDRegistry.getDisableMemInit() + ? NoFill + : Options.getFillContentsMode(); + // If the requested size happens to be 0 (more common than you might think), // allocate MinAlignment bytes on top of the header. Then add the extra // bytes required to fulfill the alignment requirements: we allocate enough @@ -254,7 +334,7 @@ public: // Takes care of extravagantly large sizes as well as integer overflows. static_assert(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment, ""); if (UNLIKELY(Size >= MaxAllowedMallocSize)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize); } @@ -262,7 +342,7 @@ public: void *Block = nullptr; uptr ClassId = 0; - uptr SecondaryBlockEnd; + uptr SecondaryBlockEnd = 0; if (LIKELY(PrimaryT::canAllocate(NeededSize))) { ClassId = SizeClassMap::getClassIdBySize(NeededSize); DCHECK_NE(ClassId, 0U); @@ -274,25 +354,20 @@ public: // larger class until it fits. If it fails to fit in the largest class, // fallback to the Secondary. if (UNLIKELY(!Block)) { - while (ClassId < SizeClassMap::LargestClassId) { + while (ClassId < SizeClassMap::LargestClassId && !Block) Block = TSD->Cache.allocate(++ClassId); - if (LIKELY(Block)) { - break; - } - } - if (UNLIKELY(!Block)) { + if (!Block) ClassId = 0; - } } if (UnlockRequired) TSD->unlock(); } if (UNLIKELY(ClassId == 0)) - Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd, - ZeroContents); + Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, + FillContents); if (UNLIKELY(!Block)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportOutOfMemory(NeededSize); } @@ -303,7 +378,7 @@ public: void *Ptr = reinterpret_cast<void *>(UserPtr); void *TaggedPtr = Ptr; - if (ClassId) { + if (LIKELY(ClassId)) { // We only need to zero or tag the contents for Primary backed // allocations. We only set tags for primary allocations in order to avoid // faulting potentially large numbers of pages for large secondary @@ -315,10 +390,11 @@ public: // // When memory tagging is enabled, zeroing the contents is done as part of // setting the tag. - if (UNLIKELY(useMemoryTagging())) { + if (UNLIKELY(useMemoryTagging<Params>(Options))) { uptr PrevUserPtr; Chunk::UnpackedHeader Header; - const uptr BlockEnd = BlockUptr + PrimaryT::getSizeByClassId(ClassId); + const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); + const uptr BlockEnd = BlockUptr + BlockSize; // If possible, try to reuse the UAF tag that was set by deallocate(). // For simplicity, only reuse tags if we have the same start address as // the previous allocation. This handles the majority of cases since @@ -361,14 +437,44 @@ public: if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) PrevEnd = NextPage; TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr); - resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd); + resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd); + if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) { + // If an allocation needs to be zeroed (i.e. calloc) we can normally + // avoid zeroing the memory now since we can rely on memory having + // been zeroed on free, as this is normally done while setting the + // UAF tag. But if tagging was disabled per-thread when the memory + // was freed, it would not have been retagged and thus zeroed, and + // therefore it needs to be zeroed now. + memset(TaggedPtr, 0, + Min(Size, roundUpTo(PrevEnd - TaggedUserPtr, + archMemoryTagGranuleSize()))); + } else if (Size) { + // Clear any stack metadata that may have previously been stored in + // the chunk data. + memset(TaggedPtr, 0, archMemoryTagGranuleSize()); + } } else { - TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd); + const uptr OddEvenMask = + computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId); + TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); } - } else if (UNLIKELY(ZeroContents)) { - // This condition is not necessarily unlikely, but since memset is - // costly, we might as well mark it as such. - memset(Block, 0, PrimaryT::getSizeByClassId(ClassId)); + storePrimaryAllocationStackMaybe(Options, Ptr); + } else { + Block = addHeaderTag(Block); + Ptr = addHeaderTag(Ptr); + if (UNLIKELY(FillContents != NoFill)) { + // This condition is not necessarily unlikely, but since memset is + // costly, we might as well mark it as such. + memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, + PrimaryT::getSizeByClassId(ClassId)); + } + } + } else { + Block = addHeaderTag(Block); + Ptr = addHeaderTag(Ptr); + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr)); + storeSecondaryAllocationStackMaybe(Options, Ptr, Size); } } @@ -386,13 +492,13 @@ public: } Header.ClassId = ClassId & Chunk::ClassIdMask; Header.State = Chunk::State::Allocated; - Header.Origin = Origin & Chunk::OriginMask; + Header.OriginOrWasZeroed = Origin & Chunk::OriginMask; Header.SizeOrUnusedBytes = (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) & Chunk::SizeOrUnusedBytesMask; Chunk::storeHeader(Cookie, Ptr, &Header); - if (&__scudo_allocate_hook) + if (UNLIKELY(&__scudo_allocate_hook)) __scudo_allocate_hook(TaggedPtr, Size); return TaggedPtr; @@ -408,58 +514,67 @@ public: // being destroyed properly. Any other heap operation will do a full init. initThreadMaybe(/*MinimalInit=*/true); + if (UNLIKELY(&__scudo_deallocate_hook)) + __scudo_deallocate_hook(Ptr); + + if (UNLIKELY(!Ptr)) + return; + #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) { GuardedAlloc.deallocate(Ptr); + Stats.lock(); + Stats.add(StatFree, GuardedAllocSlotSize); + Stats.sub(StatAllocated, GuardedAllocSlotSize); + Stats.unlock(); return; } #endif // GWP_ASAN_HOOKS - if (&__scudo_deallocate_hook) - __scudo_deallocate_hook(Ptr); - - if (UNLIKELY(!Ptr)) - return; if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Deallocating, Ptr); - Ptr = untagPointerMaybe(Ptr); + void *TaggedPtr = Ptr; + Ptr = getHeaderTaggedPointer(Ptr); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Deallocating, Ptr); - if (Options.DeallocTypeMismatch) { - if (Header.Origin != Origin) { + + const Options Options = Primary.Options.load(); + if (Options.get(OptionBit::DeallocTypeMismatch)) { + if (UNLIKELY(Header.OriginOrWasZeroed != Origin)) { // With the exception of memalign'd chunks, that can be still be free'd. - if (UNLIKELY(Header.Origin != Chunk::Origin::Memalign || - Origin != Chunk::Origin::Malloc)) + if (Header.OriginOrWasZeroed != Chunk::Origin::Memalign || + Origin != Chunk::Origin::Malloc) reportDeallocTypeMismatch(AllocatorAction::Deallocating, Ptr, - Header.Origin, Origin); + Header.OriginOrWasZeroed, Origin); } } const uptr Size = getSize(Ptr, &Header); - if (DeleteSize && Options.DeleteSizeMismatch) { + if (DeleteSize && Options.get(OptionBit::DeleteSizeMismatch)) { if (UNLIKELY(DeleteSize != Size)) reportDeleteSizeMismatch(Ptr, DeleteSize, Size); } - quarantineOrDeallocateChunk(Ptr, &Header, Size); + quarantineOrDeallocateChunk(Options, TaggedPtr, &Header, Size); } void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) { initThreadMaybe(); + const Options Options = Primary.Options.load(); if (UNLIKELY(NewSize >= MaxAllowedMallocSize)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); } void *OldTaggedPtr = OldPtr; - OldPtr = untagPointerMaybe(OldPtr); + OldPtr = getHeaderTaggedPointer(OldPtr); // The following cases are handled by the C wrappers. DCHECK_NE(OldPtr, nullptr); @@ -472,6 +587,10 @@ public: if (NewPtr) memcpy(NewPtr, OldPtr, (NewSize < OldSize) ? NewSize : OldSize); GuardedAlloc.deallocate(OldPtr); + Stats.lock(); + Stats.add(StatFree, GuardedAllocSlotSize); + Stats.sub(StatAllocated, GuardedAllocSlotSize); + Stats.unlock(); return NewPtr; } #endif // GWP_ASAN_HOOKS @@ -488,13 +607,14 @@ public: // Pointer has to be allocated with a malloc-type function. Some // applications think that it is OK to realloc a memalign'ed pointer, which // will trigger this check. It really isn't. - if (Options.DeallocTypeMismatch) { - if (UNLIKELY(OldHeader.Origin != Chunk::Origin::Malloc)) + if (Options.get(OptionBit::DeallocTypeMismatch)) { + if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr, - OldHeader.Origin, Chunk::Origin::Malloc); + OldHeader.OriginOrWasZeroed, + Chunk::Origin::Malloc); } - void *BlockBegin = getBlockBegin(OldPtr, &OldHeader); + void *BlockBegin = getBlockBegin(OldTaggedPtr, &OldHeader); uptr BlockEnd; uptr OldSize; const uptr ClassId = OldHeader.ClassId; @@ -504,24 +624,31 @@ public: OldSize = OldHeader.SizeOrUnusedBytes; } else { BlockEnd = SecondaryT::getBlockEnd(BlockBegin); - OldSize = BlockEnd - - (reinterpret_cast<uptr>(OldPtr) + OldHeader.SizeOrUnusedBytes); + OldSize = BlockEnd - (reinterpret_cast<uptr>(OldTaggedPtr) + + OldHeader.SizeOrUnusedBytes); } // If the new chunk still fits in the previously allocated block (with a // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. - if (reinterpret_cast<uptr>(OldPtr) + NewSize <= BlockEnd) { + if (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize <= BlockEnd) { if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { Chunk::UnpackedHeader NewHeader = OldHeader; NewHeader.SizeOrUnusedBytes = (ClassId ? NewSize - : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) & + : BlockEnd - + (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging())) - resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, - reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, - BlockEnd); + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + if (ClassId) { + resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, + reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, + NewSize, BlockEnd); + storePrimaryAllocationStackMaybe(Options, OldPtr); + } else { + storeSecondaryAllocationStackMaybe(Options, OldPtr, NewSize); + } + } return OldTaggedPtr; } } @@ -531,10 +658,9 @@ public: // allow for potential further in-place realloc. The gains of such a trick // are currently unclear. void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); - if (NewPtr) { - const uptr OldSize = getSize(OldPtr, &OldHeader); + if (LIKELY(NewPtr)) { memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); - quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize); + quarantineOrDeallocateChunk(Options, OldTaggedPtr, &OldHeader, OldSize); } return NewPtr; } @@ -607,15 +733,31 @@ public: initThreadMaybe(); const uptr From = Base; const uptr To = Base + Size; - auto Lambda = [this, From, To, Callback, Arg](uptr Block) { + bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging<Params>() && + systemSupportsMemoryTagging(); + auto Lambda = [this, From, To, MayHaveTaggedPrimary, Callback, + Arg](uptr Block) { if (Block < From || Block >= To) return; uptr Chunk; Chunk::UnpackedHeader Header; - if (getChunkFromBlock(Block, &Chunk, &Header) && - Header.State == Chunk::State::Allocated) { + if (MayHaveTaggedPrimary) { + // A chunk header can either have a zero tag (tagged primary) or the + // header tag (secondary, or untagged primary). We don't know which so + // try both. + ScopedDisableMemoryTagChecks x; + if (!getChunkFromBlock(Block, &Chunk, &Header) && + !getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header)) + return; + } else { + if (!getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header)) + return; + } + if (Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; - if (useMemoryTagging()) + if (allocatorSupportsMemoryTagging<Params>()) + TaggedChunk = untagPointer(TaggedChunk); + if (useMemoryTagging<Params>(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header), Arg); @@ -630,14 +772,32 @@ public: bool canReturnNull() { initThreadMaybe(); - return Options.MayReturnNull; + return Primary.Options.load().get(OptionBit::MayReturnNull); } bool setOption(Option O, sptr Value) { - if (O == Option::ReleaseInterval) { - Primary.setReleaseToOsIntervalMs(static_cast<s32>(Value)); - Secondary.setReleaseToOsIntervalMs(static_cast<s32>(Value)); + initThreadMaybe(); + if (O == Option::MemtagTuning) { + // Enabling odd/even tags involves a tradeoff between use-after-free + // detection and buffer overflow detection. Odd/even tags make it more + // likely for buffer overflows to be detected by increasing the size of + // the guaranteed "red zone" around the allocation, but on the other hand + // use-after-free is less likely to be detected because the tag space for + // any particular chunk is cut in half. Therefore we use this tuning + // setting to control whether odd/even tags are enabled. + if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW) + Primary.Options.set(OptionBit::UseOddEvenTags); + else if (Value == M_MEMTAG_TUNING_UAF) + Primary.Options.clear(OptionBit::UseOddEvenTags); return true; + } else { + // We leave it to the various sub-components to decide whether or not they + // want to handle the option, but we do not want to short-circuit + // execution if one of the setOption was to return false. + const bool PrimaryResult = Primary.setOption(O, Value); + const bool SecondaryResult = Secondary.setOption(O, Value); + const bool RegistryResult = TSDRegistry.setOption(O, Value); + return PrimaryResult && SecondaryResult && RegistryResult; } return false; } @@ -657,7 +817,7 @@ public: return GuardedAlloc.getSize(Ptr); #endif // GWP_ASAN_HOOKS - Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); + Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); // Getting the usable size of a chunk only makes sense if it's allocated. @@ -682,18 +842,114 @@ public: #endif // GWP_ASAN_HOOKS if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)) return false; - Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); + Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; return Chunk::isValid(Cookie, Ptr, &Header) && Header.State == Chunk::State::Allocated; } - bool useMemoryTagging() { return Primary.useMemoryTagging(); } + bool useMemoryTaggingTestOnly() const { + return useMemoryTagging<Params>(Primary.Options.load()); + } + void disableMemoryTagging() { + // If we haven't been initialized yet, we need to initialize now in order to + // prevent a future call to initThreadMaybe() from enabling memory tagging + // based on feature detection. But don't call initThreadMaybe() because it + // may end up calling the allocator (via pthread_atfork, via the post-init + // callback), which may cause mappings to be created with memory tagging + // enabled. + TSDRegistry.initOnceMaybe(this); + if (allocatorSupportsMemoryTagging<Params>()) { + Secondary.disableMemoryTagging(); + Primary.Options.clear(OptionBit::UseMemoryTagging); + } + } + + void setTrackAllocationStacks(bool Track) { + initThreadMaybe(); + if (Track) + Primary.Options.set(OptionBit::TrackAllocationStacks); + else + Primary.Options.clear(OptionBit::TrackAllocationStacks); + } + + void setFillContents(FillContentsMode FillContents) { + initThreadMaybe(); + Primary.Options.setFillContentsMode(FillContents); + } + + void setAddLargeAllocationSlack(bool AddSlack) { + initThreadMaybe(); + if (AddSlack) + Primary.Options.set(OptionBit::AddLargeAllocationSlack); + else + Primary.Options.clear(OptionBit::AddLargeAllocationSlack); + } + + const char *getStackDepotAddress() const { + return reinterpret_cast<const char *>(&Depot); + } + + const char *getRegionInfoArrayAddress() const { + return Primary.getRegionInfoArrayAddress(); + } + + static uptr getRegionInfoArraySize() { + return PrimaryT::getRegionInfoArraySize(); + } + + const char *getRingBufferAddress() const { + return reinterpret_cast<const char *>(&RingBuffer); + } + + static uptr getRingBufferSize() { return sizeof(RingBuffer); } + + static const uptr MaxTraceSize = 64; + + static void collectTraceMaybe(const StackDepot *Depot, + uintptr_t (&Trace)[MaxTraceSize], u32 Hash) { + uptr RingPos, Size; + if (!Depot->find(Hash, &RingPos, &Size)) + return; + for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) + Trace[I] = (*Depot)[RingPos + I]; + } + + static void getErrorInfo(struct scudo_error_info *ErrorInfo, + uintptr_t FaultAddr, const char *DepotPtr, + const char *RegionInfoPtr, const char *RingBufferPtr, + const char *Memory, const char *MemoryTags, + uintptr_t MemoryAddr, size_t MemorySize) { + *ErrorInfo = {}; + if (!allocatorSupportsMemoryTagging<Params>() || + MemoryAddr + MemorySize < MemoryAddr) + return; - void disableMemoryTagging() { Primary.disableMemoryTagging(); } + auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr); + size_t NextErrorReport = 0; + + // Check for OOB in the current block and the two surrounding blocks. Beyond + // that, UAF is more likely. + if (extractTag(FaultAddr) != 0) + getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RegionInfoPtr, Memory, MemoryTags, MemoryAddr, + MemorySize, 0, 2); + + // Check the ring buffer. For primary allocations this will only find UAF; + // for secondary allocations we can find either UAF or OOB. + getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RingBufferPtr); + + // Check for OOB in the 28 blocks surrounding the 3 we checked earlier. + // Beyond that we are likely to hit false positives. + if (extractTag(FaultAddr) != 0) + getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RegionInfoPtr, Memory, MemoryTags, MemoryAddr, + MemorySize, 2, 16); + } private: - using SecondaryT = typename Params::Secondary; + using SecondaryT = MapAllocator<Params>; typedef typename PrimaryT::SizeClassMap SizeClassMap; static const uptr MinAlignmentLog = SCUDO_MIN_ALIGNMENT_LOG; @@ -705,32 +961,59 @@ private: static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), "Minimal alignment must at least cover a chunk header."); - static_assert(!PrimaryT::SupportsMemoryTagging || + static_assert(!allocatorSupportsMemoryTagging<Params>() || MinAlignment >= archMemoryTagGranuleSize(), ""); static const u32 BlockMarker = 0x44554353U; + // These are indexes into an "array" of 32-bit values that store information + // inline with a chunk that is relevant to diagnosing memory tag faults, where + // 0 corresponds to the address of the user memory. This means that only + // negative indexes may be used. The smallest index that may be used is -2, + // which corresponds to 8 bytes before the user memory, because the chunk + // header size is 8 bytes and in allocators that support memory tagging the + // minimum alignment is at least the tag granule size (16 on aarch64). + static const sptr MemTagAllocationTraceIndex = -2; + static const sptr MemTagAllocationTidIndex = -1; + + u32 Cookie = 0; + u32 QuarantineMaxChunkSize = 0; + GlobalStats Stats; - TSDRegistryT TSDRegistry; PrimaryT Primary; SecondaryT Secondary; QuarantineT Quarantine; - - u32 Cookie; - - struct { - u8 MayReturnNull : 1; // may_return_null - u8 ZeroContents : 1; // zero_contents - u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch - u8 DeleteSizeMismatch : 1; // delete_size_mismatch - u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size - } Options; + TSDRegistryT TSDRegistry; + pthread_once_t PostInitNonce = PTHREAD_ONCE_INIT; #ifdef GWP_ASAN_HOOKS gwp_asan::GuardedPoolAllocator GuardedAlloc; + uptr GuardedAllocSlotSize = 0; #endif // GWP_ASAN_HOOKS + StackDepot Depot; + + struct AllocationRingBuffer { + struct Entry { + atomic_uptr Ptr; + atomic_uptr AllocationSize; + atomic_u32 AllocationTrace; + atomic_u32 AllocationTid; + atomic_u32 DeallocationTrace; + atomic_u32 DeallocationTid; + }; + + atomic_uptr Pos; +#ifdef SCUDO_FUZZ + static const uptr NumEntries = 2; +#else + static const uptr NumEntries = 32768; +#endif + Entry Entries[NumEntries]; + }; + AllocationRingBuffer RingBuffer = {}; + // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { // Verify that the header offset field can hold the maximum offset. In the @@ -778,30 +1061,50 @@ private: const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(const_cast<void *>(Ptr)); return SecondaryT::getBlockEnd(getBlockBegin(Ptr, Header)) - reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes; } - ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) { - TSDRegistry.initThreadMaybe(this, MinimalInit); - } - - void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header, - uptr Size) { + void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, + Chunk::UnpackedHeader *Header, uptr Size) { + void *Ptr = getHeaderTaggedPointer(TaggedPtr); Chunk::UnpackedHeader NewHeader = *Header; - if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { - uptr TaggedBegin, TaggedEnd; - setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd); - } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. - // Logical Or can be short-circuited, which introduces unnecessary - // conditional jumps, so use bitwise Or and let the compiler be clever. - const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size | - (Size > Options.QuarantineMaxChunkSize); - if (BypassQuarantine) { + // This purposefully underflows for Size == 0. + const bool BypassQuarantine = !Quarantine.getCacheSize() || + ((Size - 1) >= QuarantineMaxChunkSize) || + !NewHeader.ClassId; + if (BypassQuarantine) NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + else + NewHeader.State = Chunk::State::Quarantined; + NewHeader.OriginOrWasZeroed = useMemoryTagging<Params>(Options) && + NewHeader.ClassId && + !TSDRegistry.getDisableMemInit(); + Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + u8 PrevTag = extractTag(reinterpret_cast<uptr>(TaggedPtr)); + storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); + if (NewHeader.ClassId) { + if (!TSDRegistry.getDisableMemInit()) { + uptr TaggedBegin, TaggedEnd; + const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( + Options, reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)), + NewHeader.ClassId); + // Exclude the previous tag so that immediate use after free is + // detected 100% of the time. + setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, + &TaggedEnd); + } + } + } + if (BypassQuarantine) { + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(Ptr); void *BlockBegin = getBlockBegin(Ptr, &NewHeader); const uptr ClassId = NewHeader.ClassId; if (LIKELY(ClassId)) { @@ -811,11 +1114,12 @@ private: if (UnlockRequired) TSD->unlock(); } else { - Secondary.deallocate(BlockBegin); + if (UNLIKELY(useMemoryTagging<Params>(Options))) + storeTags(reinterpret_cast<uptr>(BlockBegin), + reinterpret_cast<uptr>(Ptr)); + Secondary.deallocate(Options, BlockBegin); } } else { - NewHeader.State = Chunk::State::Quarantined; - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); Quarantine.put(&TSD->QuarantineCache, @@ -827,13 +1131,293 @@ private: bool getChunkFromBlock(uptr Block, uptr *Chunk, Chunk::UnpackedHeader *Header) { - u32 Offset = 0; - if (reinterpret_cast<u32 *>(Block)[0] == BlockMarker) - Offset = reinterpret_cast<u32 *>(Block)[1]; - *Chunk = Block + Offset + Chunk::getHeaderSize(); + *Chunk = + Block + getChunkOffsetFromBlock(reinterpret_cast<const char *>(Block)); return Chunk::isValid(Cookie, reinterpret_cast<void *>(*Chunk), Header); } + static uptr getChunkOffsetFromBlock(const char *Block) { + u32 Offset = 0; + if (reinterpret_cast<const u32 *>(Block)[0] == BlockMarker) + Offset = reinterpret_cast<const u32 *>(Block)[1]; + return Offset + Chunk::getHeaderSize(); + } + + // Set the tag of the granule past the end of the allocation to 0, to catch + // linear overflows even if a previous larger allocation used the same block + // and tag. Only do this if the granule past the end is in our block, because + // this would otherwise lead to a SEGV if the allocation covers the entire + // block and our block is at the end of a mapping. The tag of the next block's + // header granule will be set to 0, so it will serve the purpose of catching + // linear overflows in this case. + // + // For allocations of size 0 we do not end up storing the address tag to the + // memory tag space, which getInlineErrorInfo() normally relies on to match + // address tags against chunks. To allow matching in this case we store the + // address tag in the first byte of the chunk. + void storeEndMarker(uptr End, uptr Size, uptr BlockEnd) { + uptr UntaggedEnd = untagPointer(End); + if (UntaggedEnd != BlockEnd) { + storeTag(UntaggedEnd); + if (Size == 0) + *reinterpret_cast<u8 *>(UntaggedEnd) = extractTag(End); + } + } + + void *prepareTaggedChunk(void *Ptr, uptr Size, uptr ExcludeMask, + uptr BlockEnd) { + // Prepare the granule before the chunk to store the chunk header by setting + // its tag to 0. Normally its tag will already be 0, but in the case where a + // chunk holding a low alignment allocation is reused for a higher alignment + // allocation, the chunk may already have a non-zero tag from the previous + // allocation. + storeTag(reinterpret_cast<uptr>(Ptr) - archMemoryTagGranuleSize()); + + uptr TaggedBegin, TaggedEnd; + setRandomTag(Ptr, Size, ExcludeMask, &TaggedBegin, &TaggedEnd); + + storeEndMarker(TaggedEnd, Size, BlockEnd); + return reinterpret_cast<void *>(TaggedBegin); + } + + void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr NewSize, + uptr BlockEnd) { + uptr RoundOldPtr = roundUpTo(OldPtr, archMemoryTagGranuleSize()); + uptr RoundNewPtr; + if (RoundOldPtr >= NewPtr) { + // If the allocation is shrinking we just need to set the tag past the end + // of the allocation to 0. See explanation in storeEndMarker() above. + RoundNewPtr = roundUpTo(NewPtr, archMemoryTagGranuleSize()); + } else { + // Set the memory tag of the region + // [RoundOldPtr, roundUpTo(NewPtr, archMemoryTagGranuleSize())) + // to the pointer tag stored in OldPtr. + RoundNewPtr = storeTags(RoundOldPtr, NewPtr); + } + storeEndMarker(RoundNewPtr, NewSize, BlockEnd); + } + + void storePrimaryAllocationStackMaybe(Options Options, void *Ptr) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagAllocationTidIndex] = getThreadID(); + } + + void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid, + uptr AllocationSize, u32 DeallocationTrace, + u32 DeallocationTid) { + uptr Pos = atomic_fetch_add(&RingBuffer.Pos, 1, memory_order_relaxed); + typename AllocationRingBuffer::Entry *Entry = + &RingBuffer.Entries[Pos % AllocationRingBuffer::NumEntries]; + + // First invalidate our entry so that we don't attempt to interpret a + // partially written state in getSecondaryErrorInfo(). The fences below + // ensure that the compiler does not move the stores to Ptr in between the + // stores to the other fields. + atomic_store_relaxed(&Entry->Ptr, 0); + + __atomic_signal_fence(__ATOMIC_SEQ_CST); + atomic_store_relaxed(&Entry->AllocationTrace, AllocationTrace); + atomic_store_relaxed(&Entry->AllocationTid, AllocationTid); + atomic_store_relaxed(&Entry->AllocationSize, AllocationSize); + atomic_store_relaxed(&Entry->DeallocationTrace, DeallocationTrace); + atomic_store_relaxed(&Entry->DeallocationTid, DeallocationTid); + __atomic_signal_fence(__ATOMIC_SEQ_CST); + + atomic_store_relaxed(&Entry->Ptr, reinterpret_cast<uptr>(Ptr)); + } + + void storeSecondaryAllocationStackMaybe(Options Options, void *Ptr, + uptr Size) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + + u32 Trace = collectStackTrace(); + u32 Tid = getThreadID(); + + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + Ptr32[MemTagAllocationTraceIndex] = Trace; + Ptr32[MemTagAllocationTidIndex] = Tid; + + storeRingBufferEntry(untagPointer(Ptr), Trace, Tid, Size, 0, 0); + } + + void storeDeallocationStackMaybe(Options Options, void *Ptr, u8 PrevTag, + uptr Size) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + u32 AllocationTrace = Ptr32[MemTagAllocationTraceIndex]; + u32 AllocationTid = Ptr32[MemTagAllocationTidIndex]; + + u32 DeallocationTrace = collectStackTrace(); + u32 DeallocationTid = getThreadID(); + + storeRingBufferEntry(addFixedTag(untagPointer(Ptr), PrevTag), + AllocationTrace, AllocationTid, Size, + DeallocationTrace, DeallocationTid); + } + + static const size_t NumErrorReports = + sizeof(((scudo_error_info *)0)->reports) / + sizeof(((scudo_error_info *)0)->reports[0]); + + static void getInlineErrorInfo(struct scudo_error_info *ErrorInfo, + size_t &NextErrorReport, uintptr_t FaultAddr, + const StackDepot *Depot, + const char *RegionInfoPtr, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize, size_t MinDistance, + size_t MaxDistance) { + uptr UntaggedFaultAddr = untagPointer(FaultAddr); + u8 FaultAddrTag = extractTag(FaultAddr); + BlockInfo Info = + PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); + + auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { + if (Addr < MemoryAddr || Addr + archMemoryTagGranuleSize() < Addr || + Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) + return false; + *Data = &Memory[Addr - MemoryAddr]; + *Tag = static_cast<u8>( + MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]); + return true; + }; + + auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr, + Chunk::UnpackedHeader *Header, const u32 **Data, + u8 *Tag) { + const char *BlockBegin; + u8 BlockBeginTag; + if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag)) + return false; + uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin); + *ChunkAddr = Addr + ChunkOffset; + + const char *ChunkBegin; + if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag)) + return false; + *Header = *reinterpret_cast<const Chunk::UnpackedHeader *>( + ChunkBegin - Chunk::getHeaderSize()); + *Data = reinterpret_cast<const u32 *>(ChunkBegin); + + // Allocations of size 0 will have stashed the tag in the first byte of + // the chunk, see storeEndMarker(). + if (Header->SizeOrUnusedBytes == 0) + *Tag = static_cast<u8>(*ChunkBegin); + + return true; + }; + + if (NextErrorReport == NumErrorReports) + return; + + auto CheckOOB = [&](uptr BlockAddr) { + if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd) + return false; + + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) || + Header.State != Chunk::State::Allocated || Tag != FaultAddrTag) + return false; + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = + UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + collectTraceMaybe(Depot, R->allocation_trace, + Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + return NextErrorReport == NumErrorReports; + }; + + if (MinDistance == 0 && CheckOOB(Info.BlockBegin)) + return; + + for (size_t I = Max<size_t>(MinDistance, 1); I != MaxDistance; ++I) + if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) || + CheckOOB(Info.BlockBegin - I * Info.BlockSize)) + return; + } + + static void getRingBufferErrorInfo(struct scudo_error_info *ErrorInfo, + size_t &NextErrorReport, + uintptr_t FaultAddr, + const StackDepot *Depot, + const char *RingBufferPtr) { + auto *RingBuffer = + reinterpret_cast<const AllocationRingBuffer *>(RingBufferPtr); + uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); + + for (uptr I = Pos - 1; I != Pos - 1 - AllocationRingBuffer::NumEntries && + NextErrorReport != NumErrorReports; + --I) { + auto *Entry = &RingBuffer->Entries[I % AllocationRingBuffer::NumEntries]; + uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); + if (!EntryPtr) + continue; + + uptr UntaggedEntryPtr = untagPointer(EntryPtr); + uptr EntrySize = atomic_load_relaxed(&Entry->AllocationSize); + u32 AllocationTrace = atomic_load_relaxed(&Entry->AllocationTrace); + u32 AllocationTid = atomic_load_relaxed(&Entry->AllocationTid); + u32 DeallocationTrace = atomic_load_relaxed(&Entry->DeallocationTrace); + u32 DeallocationTid = atomic_load_relaxed(&Entry->DeallocationTid); + + if (DeallocationTid) { + // For UAF we only consider in-bounds fault addresses because + // out-of-bounds UAF is rare and attempting to detect it is very likely + // to result in false positives. + if (FaultAddr < EntryPtr || FaultAddr >= EntryPtr + EntrySize) + continue; + } else { + // Ring buffer OOB is only possible with secondary allocations. In this + // case we are guaranteed a guard region of at least a page on either + // side of the allocation (guard page on the right, guard page + tagged + // region on the left), so ignore any faults outside of that range. + if (FaultAddr < EntryPtr - getPageSizeCached() || + FaultAddr >= EntryPtr + EntrySize + getPageSizeCached()) + continue; + + // For UAF the ring buffer will contain two entries, one for the + // allocation and another for the deallocation. Don't report buffer + // overflow/underflow using the allocation entry if we have already + // collected a report from the deallocation entry. + bool Found = false; + for (uptr J = 0; J != NextErrorReport; ++J) { + if (ErrorInfo->reports[J].allocation_address == UntaggedEntryPtr) { + Found = true; + break; + } + } + if (Found) + continue; + } + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + if (DeallocationTid) + R->error_type = USE_AFTER_FREE; + else if (FaultAddr < EntryPtr) + R->error_type = BUFFER_UNDERFLOW; + else + R->error_type = BUFFER_OVERFLOW; + + R->allocation_address = UntaggedEntryPtr; + R->allocation_size = EntrySize; + collectTraceMaybe(Depot, R->allocation_trace, AllocationTrace); + R->allocation_tid = AllocationTid; + collectTraceMaybe(Depot, R->deallocation_trace, DeallocationTrace); + R->deallocation_tid = DeallocationTid; + } + } + uptr getStats(ScopedString *Str) { Primary.getStats(Str); Secondary.getStats(Str); diff --git a/standalone/common.cpp b/standalone/common.cpp index d93bfc59b3c..666f95400c7 100644 --- a/standalone/common.cpp +++ b/standalone/common.cpp @@ -8,6 +8,7 @@ #include "common.h" #include "atomic_helpers.h" +#include "string_utils.h" namespace scudo { @@ -21,11 +22,16 @@ uptr getPageSizeSlow() { } // Fatal internal map() or unmap() error (potentially OOM related). -void NORETURN dieOnMapUnmapError(bool OutOfMemory) { - outputRaw("Scudo ERROR: internal map or unmap failure"); - if (OutOfMemory) - outputRaw(" (OOM)"); - outputRaw("\n"); +void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { + char Error[128] = "Scudo ERROR: internal map or unmap failure\n"; + if (SizeIfOOM) { + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", + SizeIfOOM >> 10); + } + outputRaw(Error); + setAbortMessage(Error); die(); } diff --git a/standalone/common.h b/standalone/common.h index e026e34c004..3f27a3d3e1b 100644 --- a/standalone/common.h +++ b/standalone/common.h @@ -133,6 +133,8 @@ const char *getEnv(const char *Name); u64 getMonotonicTime(); +u32 getThreadID(); + // Our randomness gathering function is limited to 256 bytes to ensure we get // as many bytes as requested, and avoid interruptions (on Linux). constexpr uptr MaxRandomLength = 256U; @@ -163,16 +165,46 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags = 0, void unmap(void *Addr, uptr Size, uptr Flags = 0, MapPlatformData *Data = nullptr); +void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, + MapPlatformData *Data = nullptr); + void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data = nullptr); -// Internal map & unmap fatal error. This must not call map(). -void NORETURN dieOnMapUnmapError(bool OutOfMemory = false); +// Internal map & unmap fatal error. This must not call map(). SizeIfOOM shall +// hold the requested size on an out-of-memory error, 0 otherwise. +void NORETURN dieOnMapUnmapError(uptr SizeIfOOM = 0); // Logging related functions. void setAbortMessage(const char *Message); +struct BlockInfo { + uptr BlockBegin; + uptr BlockSize; + uptr RegionBegin; + uptr RegionEnd; +}; + +enum class Option : u8 { + ReleaseInterval, // Release to OS interval in milliseconds. + MemtagTuning, // Whether to tune tagging for UAF or overflow. + ThreadDisableMemInit, // Whether to disable automatic heap initialization and, + // where possible, memory tagging, on this thread. + MaxCacheEntriesCount, // Maximum number of blocks that can be cached. + MaxCacheEntrySize, // Maximum size of a block that can be cached. + MaxTSDsCount, // Number of usable TSDs for the shared registry. +}; + +constexpr unsigned char PatternFillByte = 0xAB; + +enum FillContentsMode { + NoFill = 0, + ZeroFill = 1, + PatternOrZeroFill = 2 // Pattern fill unless the memory is known to be + // zero-initialized already. +}; + } // namespace scudo #endif // SCUDO_COMMON_H_ diff --git a/standalone/flags.cpp b/standalone/flags.cpp index dd9f050a2d2..de5153b288b 100644 --- a/standalone/flags.cpp +++ b/standalone/flags.cpp @@ -9,7 +9,8 @@ #include "flags.h" #include "common.h" #include "flags_parser.h" -#include "interface.h" + +#include "scudo/interface.h" namespace scudo { diff --git a/standalone/flags.inc b/standalone/flags.inc index 342af1c79ad..b5cab473416 100644 --- a/standalone/flags.inc +++ b/standalone/flags.inc @@ -34,6 +34,9 @@ SCUDO_FLAG(bool, delete_size_mismatch, true, SCUDO_FLAG(bool, zero_contents, false, "Zero chunk contents on allocation.") +SCUDO_FLAG(bool, pattern_fill_contents, false, + "Pattern fill chunk contents on allocation.") + SCUDO_FLAG(int, rss_limit_mb, -1, "Enforce an upper limit (in megabytes) to the process RSS. The " "allocator will terminate or return NULL when allocations are " diff --git a/standalone/flags_parser.h b/standalone/flags_parser.h index 32511f768c6..ba832adbd90 100644 --- a/standalone/flags_parser.h +++ b/standalone/flags_parser.h @@ -29,7 +29,7 @@ public: void printFlagDescriptions(); private: - static const u32 MaxFlags = 16; + static const u32 MaxFlags = 20; struct Flag { const char *Name; const char *Desc; diff --git a/standalone/fuchsia.cpp b/standalone/fuchsia.cpp index b3d72de158c..3b473bc9e22 100644 --- a/standalone/fuchsia.cpp +++ b/standalone/fuchsia.cpp @@ -15,7 +15,6 @@ #include "string_utils.h" #include <lib/sync/mutex.h> // for sync_mutex_t -#include <limits.h> // for PAGE_SIZE #include <stdlib.h> // for getenv() #include <zircon/compiler.h> #include <zircon/sanitizer.h> @@ -23,7 +22,7 @@ namespace scudo { -uptr getPageSize() { return PAGE_SIZE; } +uptr getPageSize() { return _zx_system_get_page_size(); } void NORETURN die() { __builtin_trap(); } @@ -42,7 +41,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { Size, &Data->Vmar, &Data->VmarBase); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } return reinterpret_cast<void *>(Data->VmarBase); @@ -50,7 +49,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { void *map(void *Addr, uptr Size, const char *Name, uptr Flags, MapPlatformData *Data) { - DCHECK_EQ(Size % PAGE_SIZE, 0); + DCHECK_EQ(Size % getPageSizeCached(), 0); const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); // For MAP_NOACCESS, just allocate a Vmar and return. @@ -72,7 +71,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_set_size(Vmo, VmoSize + Size); if (Status != ZX_OK) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } } else { @@ -80,7 +79,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_create(Size, ZX_VMO_RESIZABLE, &Vmo); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, strlen(Name)); @@ -97,14 +96,16 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, // No need to track the Vmo if we don't intend on resizing it. Close it. if (Flags & MAP_RESIZABLE) { DCHECK(Data); - DCHECK_EQ(Data->Vmo, ZX_HANDLE_INVALID); - Data->Vmo = Vmo; + if (Data->Vmo == ZX_HANDLE_INVALID) + Data->Vmo = Vmo; + else + DCHECK_EQ(Data->Vmo, Vmo); } else { CHECK_EQ(_zx_handle_close(Vmo), ZX_OK); } if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } if (Data) @@ -135,6 +136,16 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { } } +void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, + UNUSED MapPlatformData *Data) { + const zx_vm_option_t Prot = + (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE); + DCHECK(Data); + DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); + if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK) + dieOnMapUnmapError(); +} + void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data) { DCHECK(Data); @@ -170,6 +181,8 @@ u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } +u32 getThreadID() { return 0; } + bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, ""); if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength)) diff --git a/standalone/fuzz/get_error_info_fuzzer.cpp b/standalone/fuzz/get_error_info_fuzzer.cpp new file mode 100644 index 00000000000..078e44b0dfc --- /dev/null +++ b/standalone/fuzz/get_error_info_fuzzer.cpp @@ -0,0 +1,60 @@ +//===-- get_error_info_fuzzer.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SCUDO_FUZZ +#include "allocator_config.h" +#include "combined.h" + +#include <fuzzer/FuzzedDataProvider.h> + +#include <string> +#include <vector> + +extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { + using AllocatorT = scudo::Allocator<scudo::AndroidConfig>; + FuzzedDataProvider FDP(Data, Size); + + uintptr_t FaultAddr = FDP.ConsumeIntegral<uintptr_t>(); + uintptr_t MemoryAddr = FDP.ConsumeIntegral<uintptr_t>(); + + std::string MemoryAndTags = + FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + const char *Memory = MemoryAndTags.c_str(); + // Assume 16-byte alignment. + size_t MemorySize = (MemoryAndTags.length() / 17) * 16; + const char *MemoryTags = Memory + MemorySize; + + std::string StackDepotBytes = + FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + std::vector<char> StackDepot(sizeof(scudo::StackDepot), 0); + for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size(); + ++i) { + StackDepot[i] = StackDepotBytes[i]; + } + + std::string RegionInfoBytes = + FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + std::vector<char> RegionInfo(AllocatorT::getRegionInfoArraySize(), 0); + for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); + ++i) { + RegionInfo[i] = RegionInfoBytes[i]; + } + + std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); + std::vector<char> RingBuffer(AllocatorT::getRingBufferSize(), 0); + for (size_t i = 0; i < RingBufferBytes.length() && i < RingBuffer.size(); + ++i) { + RingBuffer[i] = RingBufferBytes[i]; + } + + scudo_error_info ErrorInfo; + AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), + RegionInfo.data(), RingBuffer.data(), Memory, + MemoryTags, MemoryAddr, MemorySize); + return 0; +} diff --git a/standalone/include/scudo/interface.h b/standalone/include/scudo/interface.h new file mode 100644 index 00000000000..0e6cf3d4e25 --- /dev/null +++ b/standalone/include/scudo/interface.h @@ -0,0 +1,158 @@ +//===-- scudo/interface.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_INTERFACE_H_ +#define SCUDO_INTERFACE_H_ + +#include <stddef.h> +#include <stdint.h> + +extern "C" { + +__attribute__((weak)) const char *__scudo_default_options(); + +// Post-allocation & pre-deallocation hooks. +// They must be thread-safe and not use heap related functions. +__attribute__((weak)) void __scudo_allocate_hook(void *ptr, size_t size); +__attribute__((weak)) void __scudo_deallocate_hook(void *ptr); + +void __scudo_print_stats(void); + +typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); + +// Determine the likely cause of a tag check fault or other memory protection +// error on a system with memory tagging support. The results are returned via +// the error_info data structure. Up to three possible causes are returned in +// the reports array, in decreasing order of probability. The remaining elements +// of reports are zero-initialized. +// +// This function may be called from a different process from the one that +// crashed. In this case, various data structures must be copied from the +// crashing process to the process that analyzes the crash. +// +// This interface is not guaranteed to be stable and may change at any time. +// Furthermore, the version of scudo in the crashing process must be the same as +// the version in the process that analyzes the crash. +// +// fault_addr is the fault address. On aarch64 this is available in the system +// register FAR_ELx, or siginfo.si_addr in Linux 5.11 or above. This address +// must include the pointer tag; this is available if SA_EXPOSE_TAGBITS was set +// in sigaction.sa_flags when the signal handler was registered. Note that the +// kernel strips the tag from the field sigcontext.fault_address, so this +// address is not suitable to be passed as fault_addr. +// +// stack_depot is a pointer to the stack depot data structure, which may be +// obtained by calling the function __scudo_get_stack_depot_addr() in the +// crashing process. The size of the stack depot is available by calling the +// function __scudo_get_stack_depot_size(). +// +// region_info is a pointer to the region info data structure, which may be +// obtained by calling the function __scudo_get_region_info_addr() in the +// crashing process. The size of the region info is available by calling the +// function __scudo_get_region_info_size(). +// +// memory is a pointer to a region of memory surrounding the fault address. +// The more memory available via this pointer, the more likely it is that the +// function will be able to analyze a crash correctly. It is recommended to +// provide an amount of memory equal to 16 * the primary allocator's largest +// size class either side of the fault address. +// +// memory_tags is a pointer to an array of memory tags for the memory accessed +// via memory. Each byte of this array corresponds to a region of memory of size +// equal to the architecturally defined memory tag granule size (16 on aarch64). +// +// memory_addr is the start address of memory in the crashing process's address +// space. +// +// memory_size is the size of the memory region referred to by the memory +// pointer. +void __scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size); + +enum scudo_error_type { + UNKNOWN, + USE_AFTER_FREE, + BUFFER_OVERFLOW, + BUFFER_UNDERFLOW, +}; + +struct scudo_error_report { + enum scudo_error_type error_type; + + uintptr_t allocation_address; + uintptr_t allocation_size; + + uint32_t allocation_tid; + uintptr_t allocation_trace[64]; + + uint32_t deallocation_tid; + uintptr_t deallocation_trace[64]; +}; + +struct scudo_error_info { + struct scudo_error_report reports[3]; +}; + +const char *__scudo_get_stack_depot_addr(); +size_t __scudo_get_stack_depot_size(); + +const char *__scudo_get_region_info_addr(); +size_t __scudo_get_region_info_size(); + +const char *__scudo_get_ring_buffer_addr(); +size_t __scudo_get_ring_buffer_size(); + +#ifndef M_DECAY_TIME +#define M_DECAY_TIME -100 +#endif + +#ifndef M_PURGE +#define M_PURGE -101 +#endif + +// Tune the allocator's choice of memory tags to make it more likely that +// a certain class of memory errors will be detected. The value argument should +// be one of the enumerators of the scudo_memtag_tuning enum below. +#ifndef M_MEMTAG_TUNING +#define M_MEMTAG_TUNING -102 +#endif + +// Per-thread memory initialization tuning. The value argument should be one of: +// 1: Disable automatic heap initialization and, where possible, memory tagging, +// on this thread. +// 0: Normal behavior. +#ifndef M_THREAD_DISABLE_MEM_INIT +#define M_THREAD_DISABLE_MEM_INIT -103 +#endif + +#ifndef M_CACHE_COUNT_MAX +#define M_CACHE_COUNT_MAX -200 +#endif + +#ifndef M_CACHE_SIZE_MAX +#define M_CACHE_SIZE_MAX -201 +#endif + +#ifndef M_TSDS_COUNT_MAX +#define M_TSDS_COUNT_MAX -202 +#endif + +enum scudo_memtag_tuning { + // Tune for buffer overflows. + M_MEMTAG_TUNING_BUFFER_OVERFLOW, + + // Tune for use-after-free. + M_MEMTAG_TUNING_UAF, +}; + +} // extern "C" + +#endif // SCUDO_INTERFACE_H_ diff --git a/standalone/interface.h b/standalone/interface.h deleted file mode 100644 index e2639823f42..00000000000 --- a/standalone/interface.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- interface.h ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SCUDO_INTERFACE_H_ -#define SCUDO_INTERFACE_H_ - -#include "internal_defs.h" - -extern "C" { - -WEAK INTERFACE const char *__scudo_default_options(); - -// Post-allocation & pre-deallocation hooks. -// They must be thread-safe and not use heap related functions. -WEAK INTERFACE void __scudo_allocate_hook(void *ptr, size_t size); -WEAK INTERFACE void __scudo_deallocate_hook(void *ptr); - -WEAK INTERFACE void __scudo_print_stats(void); - -typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); - -} // extern "C" - -#endif // SCUDO_INTERFACE_H_ diff --git a/standalone/internal_defs.h b/standalone/internal_defs.h index c61f8e6c71b..bbf7631be18 100644 --- a/standalone/internal_defs.h +++ b/standalone/internal_defs.h @@ -33,13 +33,9 @@ #define WEAK __attribute__((weak)) #define ALWAYS_INLINE inline __attribute__((always_inline)) #define ALIAS(X) __attribute__((alias(X))) -// Please only use the ALIGNED macro before the type. Using ALIGNED after the -// variable declaration is not portable. -#define ALIGNED(X) __attribute__((aligned(X))) #define FORMAT(F, A) __attribute__((format(printf, F, A))) #define NOINLINE __attribute__((noinline)) #define NORETURN __attribute__((noreturn)) -#define THREADLOCAL __thread #define LIKELY(X) __builtin_expect(!!(X), 1) #define UNLIKELY(X) __builtin_expect(!!(X), 0) #if defined(__i386__) || defined(__x86_64__) @@ -52,6 +48,34 @@ #define USED __attribute__((used)) #define NOEXCEPT noexcept +// This check is only available on Clang. This is essentially an alias of +// C++20's 'constinit' specifier which will take care of this when (if?) we can +// ask all libc's that use Scudo to compile us with C++20. Dynamic +// initialization is bad; Scudo is designed to be lazy-initializated on the +// first call to malloc/free (and friends), and this generally happens in the +// loader somewhere in libdl's init. After the loader is done, control is +// transferred to libc's initialization, and the dynamic initializers are run. +// If there's a dynamic initializer for Scudo, then it will clobber the +// already-initialized Scudo, and re-initialize all its members back to default +// values, causing various explosions. Unfortunately, marking +// scudo::Allocator<>'s constructor as 'constexpr' isn't sufficient to prevent +// dynamic initialization, as default initialization is fine under 'constexpr' +// (but not 'constinit'). Clang at -O0, and gcc at all opt levels will emit a +// dynamic initializer for any constant-initialized variables if there is a mix +// of default-initialized and constant-initialized variables. +// +// If you're looking at this because your build failed, you probably introduced +// a new member to scudo::Allocator<> (possibly transiently) that didn't have an +// initializer. The fix is easy - just add one. +#if defined(__has_attribute) +#if __has_attribute(require_constant_initialization) +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION \ + __attribute__((__require_constant_initialization__)) +#else +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION +#endif +#endif + namespace scudo { typedef unsigned long uptr; @@ -110,13 +134,27 @@ void NORETURN reportCheckFailed(const char *File, int Line, #define DCHECK_GT(A, B) CHECK_GT(A, B) #define DCHECK_GE(A, B) CHECK_GE(A, B) #else -#define DCHECK(A) -#define DCHECK_EQ(A, B) -#define DCHECK_NE(A, B) -#define DCHECK_LT(A, B) -#define DCHECK_LE(A, B) -#define DCHECK_GT(A, B) -#define DCHECK_GE(A, B) +#define DCHECK(A) \ + do { \ + } while (false) +#define DCHECK_EQ(A, B) \ + do { \ + } while (false) +#define DCHECK_NE(A, B) \ + do { \ + } while (false) +#define DCHECK_LT(A, B) \ + do { \ + } while (false) +#define DCHECK_LE(A, B) \ + do { \ + } while (false) +#define DCHECK_GT(A, B) \ + do { \ + } while (false) +#define DCHECK_GE(A, B) \ + do { \ + } while (false) #endif // The superfluous die() call effectively makes this macro NORETURN. diff --git a/standalone/linux.cpp b/standalone/linux.cpp index 0ab96836fc4..301bdcd34da 100644 --- a/standalone/linux.cpp +++ b/standalone/linux.cpp @@ -10,6 +10,7 @@ #if SCUDO_LINUX +#include "atomic_helpers.h" #include "common.h" #include "linux.h" #include "mutex.h" @@ -35,10 +36,6 @@ #define ANDROID_PR_SET_VMA_ANON_NAME 0 #endif -#ifdef ANDROID_EXPERIMENTAL_MTE -#include <bionic/mte_kernel.h> -#endif - namespace scudo { uptr getPageSize() { return static_cast<uptr>(sysconf(_SC_PAGESIZE)); } @@ -54,11 +51,14 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, MmapProt = PROT_NONE; } else { MmapProt = PROT_READ | PROT_WRITE; -#if defined(__aarch64__) && defined(ANDROID_EXPERIMENTAL_MTE) - if (Flags & MAP_MEMTAG) - MmapProt |= PROT_MTE; -#endif } +#if defined(__aarch64__) +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif + if (Flags & MAP_MEMTAG) + MmapProt |= PROT_MTE; +#endif if (Addr) { // Currently no scenario for a noaccess mapping with a fixed address. DCHECK_EQ(Flags & MAP_NOACCESS, 0); @@ -67,11 +67,11 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM); + dieOnMapUnmapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID - if (!(Flags & MAP_NOACCESS)) + if (Name) prctl(ANDROID_PR_SET_VMA, ANDROID_PR_SET_VMA_ANON_NAME, P, Size, Name); #endif return P; @@ -83,9 +83,48 @@ void unmap(void *Addr, uptr Size, UNUSED uptr Flags, dieOnMapUnmapError(); } +void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, + UNUSED MapPlatformData *Data) { + int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); + if (mprotect(reinterpret_cast<void *>(Addr), Size, Prot) != 0) + dieOnMapUnmapError(); +} + +static bool madviseNeedsMemset() { + const uptr Size = getPageSizeCached(); + char *P = reinterpret_cast<char *>(mmap(0, Size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (!P) + dieOnMapUnmapError(errno == ENOMEM ? Size : 0); + *P = 1; + while (madvise(P, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) { + } + const bool R = (*P != 0); + if (munmap(P, Size) != 0) + dieOnMapUnmapError(); + return R; +} + +static bool madviseNeedsMemsetCached() { + static atomic_u8 Cache; + enum State : u8 { Unknown = 0, Yes = 1, No = 2 }; + State NeedsMemset = static_cast<State>(atomic_load_relaxed(&Cache)); + if (NeedsMemset == Unknown) { + NeedsMemset = madviseNeedsMemset() ? Yes : No; + atomic_store_relaxed(&Cache, NeedsMemset); + } + return NeedsMemset == Yes; +} + void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, UNUSED MapPlatformData *Data) { void *Addr = reinterpret_cast<void *>(BaseAddress + Offset); + if (madviseNeedsMemsetCached()) { + // Workaround for QEMU-user ignoring MADV_DONTNEED. + // https://github.com/qemu/qemu/blob/b1cffefa1b163bce9aebc3416f562c1d3886eeaa/linux-user/syscall.c#L11941 + // https://bugs.launchpad.net/qemu/+bug/1926521 + memset(Addr, 0, Size); + } while (madvise(Addr, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) { } } @@ -139,6 +178,14 @@ u32 getNumberOfCPUs() { return static_cast<u32>(CPU_COUNT(&CPUs)); } +u32 getThreadID() { +#if SCUDO_ANDROID + return static_cast<u32>(gettid()); +#else + return static_cast<u32>(syscall(SYS_gettid)); +#endif +} + // Blocking is possibly unused if the getrandom block is not compiled in. bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { if (!Buffer || !Length || Length > MaxRandomLength) @@ -190,7 +237,7 @@ void outputRaw(const char *Buffer) { } async_safe_write_log(AndroidLogInfo, "scudo", Buffer); } else { - write(2, Buffer, strlen(Buffer)); + (void)write(2, Buffer, strlen(Buffer)); } } diff --git a/standalone/linux.h b/standalone/linux.h index c8e41484c85..72acb6da83a 100644 --- a/standalone/linux.h +++ b/standalone/linux.h @@ -18,51 +18,6 @@ namespace scudo { // MapPlatformData is unused on Linux, define it as a minimally sized structure. struct MapPlatformData {}; -#if SCUDO_ANDROID - -#if defined(__aarch64__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mrs %0, tpidr_el0" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__arm__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__i386__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("movl %%gs:0, %0" : "=r"(__v)); \ - __v; \ - }) -#elif defined(__x86_64__) -#define __get_tls() \ - ({ \ - void **__v; \ - __asm__("mov %%fs:0, %0" : "=r"(__v)); \ - __v; \ - }) -#else -#error "Unsupported architecture." -#endif - -// The Android Bionic team has allocated a TLS slot for sanitizers starting -// with Q, given that Android currently doesn't support ELF TLS. It is used to -// store sanitizer thread specific data. -static const int TLS_SLOT_SANITIZER = 6; - -ALWAYS_INLINE uptr *getAndroidTlsPtr() { - return reinterpret_cast<uptr *>(&__get_tls()[TLS_SLOT_SANITIZER]); -} - -#endif // SCUDO_ANDROID - } // namespace scudo #endif // SCUDO_LINUX diff --git a/standalone/list.h b/standalone/list.h index c3b898a328c..1ac93c2f65d 100644 --- a/standalone/list.h +++ b/standalone/list.h @@ -57,9 +57,9 @@ template <class T> struct IntrusiveList { void checkConsistency() const; protected: - uptr Size; - T *First; - T *Last; + uptr Size = 0; + T *First = nullptr; + T *Last = nullptr; }; template <class T> void IntrusiveList<T>::checkConsistency() const { diff --git a/standalone/local_cache.h b/standalone/local_cache.h index 089aeb93962..50039379fa0 100644 --- a/standalone/local_cache.h +++ b/standalone/local_cache.h @@ -17,24 +17,25 @@ namespace scudo { template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; + typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; struct TransferBatch { static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint; - void setFromArray(void **Array, u32 N) { + void setFromArray(CompactPtrT *Array, u32 N) { DCHECK_LE(N, MaxNumCached); Count = N; - memcpy(Batch, Array, sizeof(void *) * Count); + memcpy(Batch, Array, sizeof(Batch[0]) * Count); } void clear() { Count = 0; } - void add(void *P) { + void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); Batch[Count++] = P; } - void copyToArray(void **Array) const { - memcpy(Array, Batch, sizeof(void *) * Count); + void copyToArray(CompactPtrT *Array) const { + memcpy(Array, Batch, sizeof(Batch[0]) * Count); } u32 getCount() const { return Count; } - void *get(u32 I) const { + CompactPtrT get(u32 I) const { DCHECK_LE(I, Count); return Batch[I]; } @@ -45,7 +46,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { private: u32 Count; - void *Batch[MaxNumCached]; + CompactPtrT Batch[MaxNumCached]; }; void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) { @@ -78,13 +79,10 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { // Count, while Chunks might be further off (depending on Count). That keeps // the memory accesses in close quarters. const uptr ClassSize = C->ClassSize; - void *P = C->Chunks[--C->Count]; - // The jury is still out as to whether any kind of PREFETCH here increases - // performance. It definitely decreases performance on Android though. - // if (!SCUDO_ANDROID) PREFETCH(P); + CompactPtrT CompactP = C->Chunks[--C->Count]; Stats.add(StatAllocated, ClassSize); Stats.sub(StatFree, ClassSize); - return P; + return Allocator->decompactPtr(ClassId, CompactP); } void deallocate(uptr ClassId, void *P) { @@ -97,22 +95,35 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { drain(C, ClassId); // See comment in allocate() about memory accesses. const uptr ClassSize = C->ClassSize; - C->Chunks[C->Count++] = P; + C->Chunks[C->Count++] = + Allocator->compactPtr(ClassId, reinterpret_cast<uptr>(P)); Stats.sub(StatAllocated, ClassSize); Stats.add(StatFree, ClassSize); } + bool isEmpty() const { + for (uptr I = 0; I < NumClasses; ++I) + if (PerClassArray[I].Count) + return false; + return true; + } + void drain() { - for (uptr I = 0; I < NumClasses; I++) { - PerClass *C = &PerClassArray[I]; - while (C->Count > 0) - drain(C, I); + // Drain BatchClassId last as createBatch can refill it. + for (uptr I = 0; I < NumClasses; ++I) { + if (I == BatchClassId) + continue; + while (PerClassArray[I].Count > 0) + drain(&PerClassArray[I], I); } + while (PerClassArray[BatchClassId].Count > 0) + drain(&PerClassArray[BatchClassId], BatchClassId); + DCHECK(isEmpty()); } TransferBatch *createBatch(uptr ClassId, void *B) { - if (ClassId != SizeClassMap::BatchClassId) - B = allocate(SizeClassMap::BatchClassId); + if (ClassId != BatchClassId) + B = allocate(BatchClassId); return reinterpret_cast<TransferBatch *>(B); } @@ -120,15 +131,17 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { private: static const uptr NumClasses = SizeClassMap::NumClasses; + static const uptr BatchClassId = SizeClassMap::BatchClassId; struct PerClass { u32 Count; u32 MaxCount; + // Note: ClassSize is zero for the transfer batch. uptr ClassSize; - void *Chunks[2 * TransferBatch::MaxNumCached]; + CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; }; - PerClass PerClassArray[NumClasses]; + PerClass PerClassArray[NumClasses] = {}; LocalStats Stats; - SizeClassAllocator *Allocator; + SizeClassAllocator *Allocator = nullptr; ALWAYS_INLINE void initCacheMaybe(PerClass *C) { if (LIKELY(C->MaxCount)) @@ -142,13 +155,19 @@ private: PerClass *P = &PerClassArray[I]; const uptr Size = SizeClassAllocator::getSizeByClassId(I); P->MaxCount = 2 * TransferBatch::getMaxCached(Size); - P->ClassSize = Size; + if (I != BatchClassId) { + P->ClassSize = Size; + } else { + // ClassSize in this struct is only used for malloc/free stats, which + // should only track user allocations, not internal movements. + P->ClassSize = 0; + } } } void destroyBatch(uptr ClassId, void *B) { - if (ClassId != SizeClassMap::BatchClassId) - deallocate(SizeClassMap::BatchClassId, B); + if (ClassId != BatchClassId) + deallocate(BatchClassId, B); } NOINLINE bool refill(PerClass *C, uptr ClassId) { @@ -166,10 +185,10 @@ private: NOINLINE void drain(PerClass *C, uptr ClassId) { const u32 Count = Min(C->MaxCount / 2, C->Count); - TransferBatch *B = createBatch(ClassId, C->Chunks[0]); + TransferBatch *B = + createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0])); if (UNLIKELY(!B)) - reportOutOfMemory( - SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId)); + reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); B->setFromArray(&C->Chunks[0], Count); C->Count -= Count; for (uptr I = 0; I < C->Count; I++) diff --git a/standalone/memtag.h b/standalone/memtag.h index 76271333754..4bdce16faea 100644 --- a/standalone/memtag.h +++ b/standalone/memtag.h @@ -14,163 +14,228 @@ #if SCUDO_LINUX #include <sys/auxv.h> #include <sys/prctl.h> -#if defined(ANDROID_EXPERIMENTAL_MTE) -#include <bionic/mte_kernel.h> -#endif #endif namespace scudo { -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(SCUDO_FUZZ) +// We assume that Top-Byte Ignore is enabled if the architecture supports memory +// tagging. Not all operating systems enable TBI, so we only claim architectural +// support for memory tagging if the operating system enables TBI. +#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) inline constexpr bool archSupportsMemoryTagging() { return true; } +#else +inline constexpr bool archSupportsMemoryTagging() { return false; } +#endif + inline constexpr uptr archMemoryTagGranuleSize() { return 16; } -inline bool systemSupportsMemoryTagging() { -#if defined(ANDROID_EXPERIMENTAL_MTE) - return getauxval(AT_HWCAP2) & HWCAP2_MTE; +inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } + +inline uint8_t extractTag(uptr Ptr) { return (Ptr >> 56) & 0xf; } + #else - return false; + +inline constexpr bool archSupportsMemoryTagging() { return false; } + +inline uptr archMemoryTagGranuleSize() { + UNREACHABLE("memory tagging not supported"); +} + +inline uptr untagPointer(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +inline uint8_t extractTag(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + #endif + +#if defined(__aarch64__) + +#if SCUDO_LINUX + +inline bool systemSupportsMemoryTagging() { +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + return getauxval(AT_HWCAP2) & HWCAP2_MTE; } inline bool systemDetectsMemoryTagFaultsTestOnly() { -#if defined(ANDROID_EXPERIMENTAL_MTE) - return (prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) & PR_MTE_TCF_MASK) != - PR_MTE_TCF_NONE; -#else - return false; +#ifndef PR_GET_TAGGED_ADDR_CTRL +#define PR_GET_TAGGED_ADDR_CTRL 56 +#endif +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_MASK +#define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) #endif + return (static_cast<unsigned long>( + prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) & + PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE; } +#else // !SCUDO_LINUX + +inline bool systemSupportsMemoryTagging() { return false; } + +inline bool systemDetectsMemoryTagFaultsTestOnly() { return false; } + +#endif // SCUDO_LINUX + inline void disableMemoryTagChecksTestOnly() { - __asm__ __volatile__(".arch_extension mte; msr tco, #1"); + __asm__ __volatile__( + R"( + .arch_extension memtag + msr tco, #1 + )"); } inline void enableMemoryTagChecksTestOnly() { - __asm__ __volatile__(".arch_extension mte; msr tco, #0"); + __asm__ __volatile__( + R"( + .arch_extension memtag + msr tco, #0 + )"); } -inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } +class ScopedDisableMemoryTagChecks { + size_t PrevTCO; + +public: + ScopedDisableMemoryTagChecks() { + __asm__ __volatile__( + R"( + .arch_extension memtag + mrs %0, tco + msr tco, #1 + )" + : "=r"(PrevTCO)); + } -inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, - uptr *TaggedEnd) { - void *End; + ~ScopedDisableMemoryTagChecks() { + __asm__ __volatile__( + R"( + .arch_extension memtag + msr tco, %0 + )" + : + : "r"(PrevTCO)); + } +}; + +inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { + uptr TaggedPtr; __asm__ __volatile__( R"( - .arch_extension mte - - // Set a random tag for Ptr in TaggedPtr. This needs to happen even if - // Size = 0 so that TaggedPtr ends up pointing at a valid address. - irg %[TaggedPtr], %[Ptr] - mov %[Cur], %[TaggedPtr] - - // Skip the loop if Size = 0. We don't want to do any tagging in this case. - cbz %[Size], 2f + .arch_extension memtag + irg %[TaggedPtr], %[Ptr], %[ExcludeMask] + )" + : [TaggedPtr] "=r"(TaggedPtr) + : [Ptr] "r"(Ptr), [ExcludeMask] "r"(ExcludeMask)); + return TaggedPtr; +} - // Set the memory tag of the region - // [TaggedPtr, TaggedPtr + roundUpTo(Size, 16)) - // to the pointer tag stored in TaggedPtr. - add %[End], %[TaggedPtr], %[Size] +inline uptr addFixedTag(uptr Ptr, uptr Tag) { return Ptr | (Tag << 56); } +inline uptr storeTags(uptr Begin, uptr End) { + DCHECK(Begin % 16 == 0); + uptr LineSize, Next, Tmp; + __asm__ __volatile__( + R"( + .arch_extension memtag + + // Compute the cache line size in bytes (DCZID_EL0 stores it as the log2 + // of the number of 4-byte words) and bail out to the slow path if DCZID_EL0 + // indicates that the DC instructions are unavailable. + DCZID .req %[Tmp] + mrs DCZID, dczid_el0 + tbnz DCZID, #4, 3f + and DCZID, DCZID, #15 + mov %[LineSize], #4 + lsl %[LineSize], %[LineSize], DCZID + .unreq DCZID + + // Our main loop doesn't handle the case where we don't need to perform any + // DC GZVA operations. If the size of our tagged region is less than + // twice the cache line size, bail out to the slow path since it's not + // guaranteed that we'll be able to do a DC GZVA. + Size .req %[Tmp] + sub Size, %[End], %[Cur] + cmp Size, %[LineSize], lsl #1 + b.lt 3f + .unreq Size + + LineMask .req %[Tmp] + sub LineMask, %[LineSize], #1 + + // STZG until the start of the next cache line. + orr %[Next], %[Cur], LineMask 1: stzg %[Cur], [%[Cur]], #16 - cmp %[Cur], %[End] + cmp %[Cur], %[Next] b.lt 1b + // DC GZVA cache lines until we have no more full cache lines. + bic %[Next], %[End], LineMask + .unreq LineMask 2: + dc gzva, %[Cur] + add %[Cur], %[Cur], %[LineSize] + cmp %[Cur], %[Next] + b.lt 2b + + // STZG until the end of the tagged region. This loop is also used to handle + // slow path cases. + 3: + cmp %[Cur], %[End] + b.ge 4f + stzg %[Cur], [%[Cur]], #16 + b 3b + + 4: )" - : [ TaggedPtr ] "=&r"(*TaggedBegin), [ Cur ] "=&r"(*TaggedEnd), - [ End ] "=&r"(End) - : [ Ptr ] "r"(Ptr), [ Size ] "r"(Size) + : [Cur] "+&r"(Begin), [LineSize] "=&r"(LineSize), [Next] "=&r"(Next), + [Tmp] "=&r"(Tmp) + : [End] "r"(End) : "memory"); + return Begin; } -inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) { - // Prepare the granule before the chunk to store the chunk header by setting - // its tag to 0. Normally its tag will already be 0, but in the case where a - // chunk holding a low alignment allocation is reused for a higher alignment - // allocation, the chunk may already have a non-zero tag from the previous - // allocation. - __asm__ __volatile__(".arch_extension mte; stg %0, [%0, #-16]" - : - : "r"(Ptr) - : "memory"); - - uptr TaggedBegin, TaggedEnd; - setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd); - - // Finally, set the tag of the granule past the end of the allocation to 0, - // to catch linear overflows even if a previous larger allocation used the - // same block and tag. Only do this if the granule past the end is in our - // block, because this would otherwise lead to a SEGV if the allocation - // covers the entire block and our block is at the end of a mapping. The tag - // of the next block's header granule will be set to 0, so it will serve the - // purpose of catching linear overflows in this case. - uptr UntaggedEnd = untagPointer(TaggedEnd); - if (UntaggedEnd != BlockEnd) - __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" - : - : "r"(UntaggedEnd) - : "memory"); - return reinterpret_cast<void *>(TaggedBegin); -} - -inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { - uptr RoundOldPtr = roundUpTo(OldPtr, 16); - if (RoundOldPtr >= NewPtr) { - // If the allocation is shrinking we just need to set the tag past the end - // of the allocation to 0. See explanation in prepareTaggedChunk above. - uptr RoundNewPtr = untagPointer(roundUpTo(NewPtr, 16)); - if (RoundNewPtr != BlockEnd) - __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" - : - : "r"(RoundNewPtr) - : "memory"); - return; - } - +inline void storeTag(uptr Ptr) { __asm__ __volatile__(R"( - .arch_extension mte - - // Set the memory tag of the region - // [roundUpTo(OldPtr, 16), roundUpTo(NewPtr, 16)) - // to the pointer tag stored in OldPtr. - 1: - stzg %[Cur], [%[Cur]], #16 - cmp %[Cur], %[End] - b.lt 1b - - // Finally, set the tag of the granule past the end of the allocation to 0. - and %[Cur], %[Cur], #(1 << 56) - 1 - cmp %[Cur], %[BlockEnd] - b.eq 2f - stg %[Cur], [%[Cur]] - - 2: + .arch_extension memtag + stg %0, [%0] )" - : [ Cur ] "+&r"(RoundOldPtr), [ End ] "+&r"(NewPtr) - : [ BlockEnd ] "r"(BlockEnd) + : + : "r"(Ptr) : "memory"); } -inline uptr tagPointer(uptr UntaggedPtr, uptr Tag) { - return UntaggedPtr | (Tag & (0xfUL << 56)); -} - inline uptr loadTag(uptr Ptr) { uptr TaggedPtr = Ptr; - __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" - : "+r"(TaggedPtr) - : - : "memory"); + __asm__ __volatile__( + R"( + .arch_extension memtag + ldg %0, [%0] + )" + : "+r"(TaggedPtr) + : + : "memory"); return TaggedPtr; } #else -inline constexpr bool archSupportsMemoryTagging() { return false; } - inline bool systemSupportsMemoryTagging() { UNREACHABLE("memory tagging not supported"); } @@ -179,10 +244,6 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() { UNREACHABLE("memory tagging not supported"); } -inline uptr archMemoryTagGranuleSize() { - UNREACHABLE("memory tagging not supported"); -} - inline void disableMemoryTagChecksTestOnly() { UNREACHABLE("memory tagging not supported"); } @@ -191,31 +252,30 @@ inline void enableMemoryTagChecksTestOnly() { UNREACHABLE("memory tagging not supported"); } -inline uptr untagPointer(uptr Ptr) { +struct ScopedDisableMemoryTagChecks { + ScopedDisableMemoryTagChecks() {} +}; + +inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { (void)Ptr; + (void)ExcludeMask; UNREACHABLE("memory tagging not supported"); } -inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, - uptr *TaggedEnd) { +inline uptr addFixedTag(uptr Ptr, uptr Tag) { (void)Ptr; - (void)Size; - (void)TaggedBegin; - (void)TaggedEnd; + (void)Tag; UNREACHABLE("memory tagging not supported"); } -inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) { - (void)Ptr; - (void)Size; - (void)BlockEnd; +inline uptr storeTags(uptr Begin, uptr End) { + (void)Begin; + (void)End; UNREACHABLE("memory tagging not supported"); } -inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { - (void)OldPtr; - (void)NewPtr; - (void)BlockEnd; +inline void storeTag(uptr Ptr) { + (void)Ptr; UNREACHABLE("memory tagging not supported"); } @@ -226,6 +286,30 @@ inline uptr loadTag(uptr Ptr) { #endif +inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, + uptr *TaggedBegin, uptr *TaggedEnd) { + *TaggedBegin = selectRandomTag(reinterpret_cast<uptr>(Ptr), ExcludeMask); + *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size); +} + +inline void *untagPointer(void *Ptr) { + return reinterpret_cast<void *>(untagPointer(reinterpret_cast<uptr>(Ptr))); +} + +inline void *loadTag(void *Ptr) { + return reinterpret_cast<void *>(loadTag(reinterpret_cast<uptr>(Ptr))); +} + +inline void *addFixedTag(void *Ptr, uptr Tag) { + return reinterpret_cast<void *>( + addFixedTag(reinterpret_cast<uptr>(Ptr), Tag)); +} + +template <typename Config> +inline constexpr bool allocatorSupportsMemoryTagging() { + return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging; +} + } // namespace scudo #endif diff --git a/standalone/mutex.h b/standalone/mutex.h index b26b2df0662..a654d35c5a7 100644 --- a/standalone/mutex.h +++ b/standalone/mutex.h @@ -22,7 +22,7 @@ namespace scudo { class HybridMutex { public: - void init() { memset(this, 0, sizeof(*this)); } + void init() { M = {}; } bool tryLock(); NOINLINE void lock() { if (LIKELY(tryLock())) @@ -48,9 +48,9 @@ private: static constexpr u8 NumberOfYields = 8U; #if SCUDO_LINUX - atomic_u32 M; + atomic_u32 M = {}; #elif SCUDO_FUCHSIA - sync_mutex_t M; + sync_mutex_t M = {}; #endif void lockSlow(); diff --git a/standalone/options.h b/standalone/options.h new file mode 100644 index 00000000000..4e678651333 --- /dev/null +++ b/standalone/options.h @@ -0,0 +1,74 @@ +//===-- options.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_OPTIONS_H_ +#define SCUDO_OPTIONS_H_ + +#include "atomic_helpers.h" +#include "common.h" +#include "memtag.h" + +namespace scudo { + +enum class OptionBit { + MayReturnNull, + FillContents0of2, + FillContents1of2, + DeallocTypeMismatch, + DeleteSizeMismatch, + TrackAllocationStacks, + UseOddEvenTags, + UseMemoryTagging, + AddLargeAllocationSlack, +}; + +struct Options { + u32 Val; + + bool get(OptionBit Opt) const { return Val & (1U << static_cast<u32>(Opt)); } + + FillContentsMode getFillContentsMode() const { + return static_cast<FillContentsMode>( + (Val >> static_cast<u32>(OptionBit::FillContents0of2)) & 3); + } +}; + +template <typename Config> bool useMemoryTagging(Options Options) { + return allocatorSupportsMemoryTagging<Config>() && + Options.get(OptionBit::UseMemoryTagging); +} + +struct AtomicOptions { + atomic_u32 Val = {}; + + Options load() const { return Options{atomic_load_relaxed(&Val)}; } + + void clear(OptionBit Opt) { + atomic_fetch_and(&Val, ~(1U << static_cast<u32>(Opt)), + memory_order_relaxed); + } + + void set(OptionBit Opt) { + atomic_fetch_or(&Val, 1U << static_cast<u32>(Opt), memory_order_relaxed); + } + + void setFillContentsMode(FillContentsMode FillContents) { + u32 Opts = atomic_load_relaxed(&Val), NewOpts; + do { + NewOpts = Opts; + NewOpts &= ~(3U << static_cast<u32>(OptionBit::FillContents0of2)); + NewOpts |= static_cast<u32>(FillContents) + << static_cast<u32>(OptionBit::FillContents0of2); + } while (!atomic_compare_exchange_strong(&Val, &Opts, NewOpts, + memory_order_relaxed)); + } +}; + +} // namespace scudo + +#endif // SCUDO_OPTIONS_H_ diff --git a/standalone/primary32.h b/standalone/primary32.h index 7d061e2cbcc..33d81754fb5 100644 --- a/standalone/primary32.h +++ b/standalone/primary32.h @@ -13,6 +13,7 @@ #include "common.h" #include "list.h" #include "local_cache.h" +#include "options.h" #include "release.h" #include "report.h" #include "stats.h" @@ -38,23 +39,18 @@ namespace scudo { // Memory used by this allocator is never unmapped but can be partially // reclaimed if the platform allows for it. -template <class SizeClassMapT, uptr RegionSizeLog, - s32 MinReleaseToOsIntervalMs = INT32_MIN, - s32 MaxReleaseToOsIntervalMs = INT32_MAX> -class SizeClassAllocator32 { +template <typename Config> class SizeClassAllocator32 { public: - typedef SizeClassMapT SizeClassMap; + typedef typename Config::PrimaryCompactPtrT CompactPtrT; + typedef typename Config::SizeClassMap SizeClassMap; // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. - static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); - typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog, - MinReleaseToOsIntervalMs, - MaxReleaseToOsIntervalMs> - ThisT; + static_assert((1UL << Config::PrimaryRegionSizeLog) >= SizeClassMap::MaxSize, + ""); + typedef SizeClassAllocator32<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; - static const bool SupportsMemoryTagging = false; static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) @@ -69,24 +65,20 @@ public: reportError("SizeClassAllocator32 is not supported on Fuchsia"); PossibleRegions.initLinkerInitialized(); - MinRegionIndex = NumRegions; // MaxRegionIndex is already initialized to 0. u32 Seed; const u64 Time = getMonotonicTime(); - if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) + if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>( Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); - const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); Sci->RandState = getRandomU32(&Seed); - // See comment in the 64-bit primary about releasing smaller size classes. - Sci->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Sci->CanRelease) - Sci->ReleaseInfo.LastReleaseAtNs = Time; + // Sci->MaxRegionIndex is already initialized to 0. + Sci->MinRegionIndex = NumRegions; + Sci->ReleaseInfo.LastReleaseAtNs = Time; } - setReleaseToOsIntervalMs(ReleaseToOsInterval); + setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -97,12 +89,28 @@ public: while (NumberOfStashedRegions > 0) unmap(reinterpret_cast<void *>(RegionsStash[--NumberOfStashedRegions]), RegionSize); - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + if (Sci->MinRegionIndex < MinRegionIndex) + MinRegionIndex = Sci->MinRegionIndex; + if (Sci->MaxRegionIndex > MaxRegionIndex) + MaxRegionIndex = Sci->MaxRegionIndex; + } + for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++) if (PossibleRegions[I]) unmap(reinterpret_cast<void *>(I * RegionSize), RegionSize); PossibleRegions.unmapTestOnly(); } + CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const { + return static_cast<CompactPtrT>(Ptr); + } + + void *decompactPtr(UNUSED uptr ClassId, CompactPtrT CompactPtr) const { + return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr)); + } + TransferBatch *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); @@ -127,7 +135,7 @@ public: ScopedLock L(Sci->Mutex); Sci->FreeList.push_front(B); Sci->Stats.PushedBlocks += B->getCount(); - if (Sci->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Sci, ClassId); } @@ -155,6 +163,14 @@ public: } template <typename F> void iterateOverBlocks(F Callback) { + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + if (Sci->MinRegionIndex < MinRegionIndex) + MinRegionIndex = Sci->MinRegionIndex; + if (Sci->MaxRegionIndex > MaxRegionIndex) + MaxRegionIndex = Sci->MaxRegionIndex; + } for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) if (PossibleRegions[I] && (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { @@ -184,18 +200,23 @@ public: getStats(Str, I, 0); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = Max( + Min(static_cast<s32>(Value), Config::PrimaryMaxReleaseToOsIntervalMs), + Config::PrimaryMinReleaseToOsIntervalMs); + atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + // Not supported by the Primary, but not an error either. + return true; } uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); @@ -203,13 +224,21 @@ public: return TotalReleasedBytes; } - bool useMemoryTagging() { return false; } - void disableMemoryTagging() {} + const char *getRegionInfoArrayAddress() const { return nullptr; } + static uptr getRegionInfoArraySize() { return 0; } + + static BlockInfo findNearestBlock(UNUSED const char *RegionInfoData, + UNUSED uptr Ptr) { + return {}; + } + + AtomicOptions Options; private: static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr RegionSize = 1UL << RegionSizeLog; - static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> RegionSizeLog; + static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; + static const uptr NumRegions = + SCUDO_MMAP_RANGE_SIZE >> Config::PrimaryRegionSizeLog; static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; typedef FlatByteMap<NumRegions> ByteMap; @@ -225,21 +254,24 @@ private: u64 LastReleaseAtNs; }; - struct ALIGNED(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { + struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; uptr CurrentRegion; uptr CurrentRegionAllocated; SizeClassStats Stats; - bool CanRelease; u32 RandState; uptr AllocatedUser; + // Lowest & highest region index allocated for this size class, to avoid + // looping through the whole NumRegions. + uptr MinRegionIndex; + uptr MaxRegionIndex; ReleaseToOsInfo ReleaseInfo; }; static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { - const uptr Id = Mem >> RegionSizeLog; + const uptr Id = Mem >> Config::PrimaryRegionSizeLog; CHECK_LT(Id, NumRegions); return Id; } @@ -248,7 +280,7 @@ private: uptr MapSize = 2 * RegionSize; const uptr MapBase = reinterpret_cast<uptr>( map(nullptr, MapSize, "scudo:primary", MAP_ALLOWNOMEM)); - if (UNLIKELY(!MapBase)) + if (!MapBase) return 0; const uptr MapEnd = MapBase + MapSize; uptr Region = MapBase; @@ -269,7 +301,7 @@ private: return Region; } - uptr allocateRegion(uptr ClassId) { + uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); uptr Region = 0; { @@ -280,11 +312,12 @@ private: if (!Region) Region = allocateRegionSlow(); if (LIKELY(Region)) { + // Sci->Mutex is held by the caller, updating the Min/Max is safe. const uptr RegionIndex = computeRegionId(Region); - if (RegionIndex < MinRegionIndex) - MinRegionIndex = RegionIndex; - if (RegionIndex > MaxRegionIndex) - MaxRegionIndex = RegionIndex; + if (RegionIndex < Sci->MinRegionIndex) + Sci->MinRegionIndex = RegionIndex; + if (RegionIndex > Sci->MaxRegionIndex) + Sci->MaxRegionIndex = RegionIndex; PossibleRegions.set(RegionIndex, static_cast<u8>(ClassId + 1U)); } return Region; @@ -295,29 +328,6 @@ private: return &SizeClassInfoArray[ClassId]; } - bool populateBatches(CacheT *C, SizeClassInfo *Sci, uptr ClassId, - TransferBatch **CurrentBatch, u32 MaxCount, - void **PointersArray, u32 Count) { - if (ClassId != SizeClassMap::BatchClassId) - shuffle(PointersArray, Count, &Sci->RandState); - TransferBatch *B = *CurrentBatch; - for (uptr I = 0; I < Count; I++) { - if (B && B->getCount() == MaxCount) { - Sci->FreeList.push_back(B); - B = nullptr; - } - if (!B) { - B = C->createBatch(ClassId, PointersArray[I]); - if (UNLIKELY(!B)) - return false; - B->clear(); - } - B->add(PointersArray[I]); - } - *CurrentBatch = B; - return true; - } - NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) { uptr Region; @@ -332,7 +342,7 @@ private: Offset = Sci->CurrentRegionAllocated; } else { DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); - Region = allocateRegion(ClassId); + Region = allocateRegion(Sci, ClassId); if (UNLIKELY(!Region)) return nullptr; C->getStats().add(StatMapped, RegionSize); @@ -353,38 +363,36 @@ private: static_cast<u32>((RegionSize - Offset) / Size)); DCHECK_GT(NumberOfBlocks, 0U); - TransferBatch *B = nullptr; constexpr u32 ShuffleArraySize = MaxNumBatches * TransferBatch::MaxNumCached; // Fill the transfer batches and put them in the size-class freelist. We // need to randomize the blocks for security purposes, so we first fill a // local array that we then shuffle before populating the batches. - void *ShuffleArray[ShuffleArraySize]; - u32 Count = 0; - const uptr AllocatedUser = Size * NumberOfBlocks; - for (uptr I = Region + Offset; I < Region + Offset + AllocatedUser; - I += Size) { - ShuffleArray[Count++] = reinterpret_cast<void *>(I); - if (Count == ShuffleArraySize) { - if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount, - ShuffleArray, Count))) - return nullptr; - Count = 0; - } - } - if (Count) { - if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount, ShuffleArray, - Count))) + CompactPtrT ShuffleArray[ShuffleArraySize]; + DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + + uptr P = Region + Offset; + for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) + ShuffleArray[I] = reinterpret_cast<CompactPtrT>(P); + // No need to shuffle the batches size class. + if (ClassId != SizeClassMap::BatchClassId) + shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState); + for (u32 I = 0; I < NumberOfBlocks;) { + TransferBatch *B = + C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I])); + if (UNLIKELY(!B)) return nullptr; - } - DCHECK(B); - if (!Sci->FreeList.empty()) { + const u32 N = Min(MaxCount, NumberOfBlocks - I); + B->setFromArray(&ShuffleArray[I], N); Sci->FreeList.push_back(B); - B = Sci->FreeList.front(); - Sci->FreeList.pop_front(); + I += N; } + TransferBatch *B = Sci->FreeList.front(); + Sci->FreeList.pop_front(); + DCHECK(B); DCHECK_GT(B->getCount(), 0); + const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize); // If there is not enough room in the region currently associated to fit @@ -414,16 +422,12 @@ private: AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased); } - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); - CHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); + DCHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); const uptr BytesInFreeList = Sci->AllocatedUser - (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; @@ -441,14 +445,14 @@ private: if (BlockSize < PageSize / 16U) { if (!Force && BytesPushed < Sci->AllocatedUser / 16U) return 0; - // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + // We want 8x% to 9x% free bytes (the larger the block, the lower the %). if ((BytesInFreeList * 100U) / Sci->AllocatedUser < (100U - 1U - BlockSize / 16U)) return 0; } if (!Force) { - const s32 IntervalMs = getReleaseToOsIntervalMs(); + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + @@ -458,54 +462,44 @@ private: } } - DCHECK_GT(MinRegionIndex, 0U); - uptr First = 0; - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { - if (PossibleRegions[I] - 1U == ClassId) { - First = I; - break; - } - } - uptr Last = 0; - for (uptr I = MaxRegionIndex; I >= MinRegionIndex; I--) { - if (PossibleRegions[I] - 1U == ClassId) { - Last = I; - break; - } - } + const uptr First = Sci->MinRegionIndex; + const uptr Last = Sci->MaxRegionIndex; + DCHECK_NE(Last, 0U); + DCHECK_LE(First, Last); uptr TotalReleasedBytes = 0; - if (First != 0U && Last != 0U) { - const uptr Base = First * RegionSize; - const uptr NumberOfRegions = Last - First + 1U; - ReleaseRecorder Recorder(Base); - releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions, - BlockSize, &Recorder); - if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; - Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; - } + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + ReleaseRecorder Recorder(Base); + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + auto DecompactPtr = [](CompactPtrT CompactPtr) { + return reinterpret_cast<uptr>(CompactPtr); + }; + releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize, + &Recorder, DecompactPtr, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { + Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + return TotalReleasedBytes; } - SizeClassInfo SizeClassInfoArray[NumClasses]; + SizeClassInfo SizeClassInfoArray[NumClasses] = {}; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions; - // Keep track of the lowest & highest regions allocated to avoid looping - // through the whole NumRegions. - uptr MinRegionIndex; - uptr MaxRegionIndex; - atomic_s32 ReleaseToOsIntervalMs; + ByteMap PossibleRegions = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions; - uptr RegionsStash[MaxStashedRegions]; + uptr NumberOfStashedRegions = 0; + uptr RegionsStash[MaxStashedRegions] = {}; }; } // namespace scudo diff --git a/standalone/primary64.h b/standalone/primary64.h index 7bdb7ae6e49..94375fceee1 100644 --- a/standalone/primary64.h +++ b/standalone/primary64.h @@ -14,6 +14,7 @@ #include "list.h" #include "local_cache.h" #include "memtag.h" +#include "options.h" #include "release.h" #include "stats.h" #include "string_utils.h" @@ -39,25 +40,18 @@ namespace scudo { // The memory used by this allocator is never unmapped, but can be partially // released if the platform allows for it. -template <class SizeClassMapT, uptr RegionSizeLog, - s32 MinReleaseToOsIntervalMs = INT32_MIN, - s32 MaxReleaseToOsIntervalMs = INT32_MAX, - bool MaySupportMemoryTagging = false> -class SizeClassAllocator64 { +template <typename Config> class SizeClassAllocator64 { public: - typedef SizeClassMapT SizeClassMap; - typedef SizeClassAllocator64< - SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs, - MaxReleaseToOsIntervalMs, MaySupportMemoryTagging> - ThisT; + typedef typename Config::PrimaryCompactPtrT CompactPtrT; + static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale; + typedef typename Config::SizeClassMap SizeClassMap; + typedef SizeClassAllocator64<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; typedef typename CacheT::TransferBatch TransferBatch; - static const bool SupportsMemoryTagging = - MaySupportMemoryTagging && archSupportsMemoryTagging(); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? sizeof(TransferBatch) + ? roundUpTo(sizeof(TransferBatch), 1U << CompactPtrScale) : SizeClassMap::getSizeByClassId(ClassId); } @@ -66,11 +60,11 @@ public: void initLinkerInitialized(s32 ReleaseToOsInterval) { // Reserve the space required for the Primary. PrimaryBase = reinterpret_cast<uptr>( - map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data)); + map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data)); u32 Seed; const u64 Time = getMonotonicTime(); - if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) + if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { @@ -79,22 +73,9 @@ public: Region->RegionBeg = getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; Region->RandState = getRandomU32(&Seed); - // Releasing smaller size classes doesn't necessarily yield to a - // meaningful RSS impact: there are more blocks per page, they are - // randomized around, and thus pages are less likely to be entirely empty. - // On top of this, attempting to release those require more iterations and - // memory accesses which ends up being fairly costly. The current lower - // limit is mostly arbitrary and based on empirical observations. - // TODO(kostyak): make the lower limit a runtime option - Region->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Region->CanRelease) - Region->ReleaseInfo.LastReleaseAtNs = Time; + Region->ReleaseInfo.LastReleaseAtNs = Time; } - setReleaseToOsIntervalMs(ReleaseToOsInterval); - - if (SupportsMemoryTagging) - UseMemoryTagging = systemSupportsMemoryTagging(); + setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -128,7 +109,7 @@ public: ScopedLock L(Region->Mutex); Region->FreeList.push_front(B); Region->Stats.PushedBlocks += B->getCount(); - if (Region->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Region, ClassId); } @@ -185,18 +166,23 @@ public: getStats(Str, I, 0); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = Max( + Min(static_cast<s32>(Value), Config::PrimaryMaxReleaseToOsIntervalMs), + Config::PrimaryMinReleaseToOsIntervalMs); + atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + // Not supported by the Primary, but not an error either. + return true; } uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); @@ -204,13 +190,78 @@ public: return TotalReleasedBytes; } - bool useMemoryTagging() const { - return SupportsMemoryTagging && UseMemoryTagging; + const char *getRegionInfoArrayAddress() const { + return reinterpret_cast<const char *>(RegionInfoArray); } - void disableMemoryTagging() { UseMemoryTagging = false; } + + static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } + + uptr getCompactPtrBaseByClassId(uptr ClassId) { + // If we are not compacting pointers, base everything off of 0. + if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0) + return 0; + return getRegionInfo(ClassId)->RegionBeg; + } + + CompactPtrT compactPtr(uptr ClassId, uptr Ptr) { + DCHECK_LE(ClassId, SizeClassMap::LargestClassId); + return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr); + } + + void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) { + DCHECK_LE(ClassId, SizeClassMap::LargestClassId); + return reinterpret_cast<void *>( + decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); + } + + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + const RegionInfo *RegionInfoArray = + reinterpret_cast<const RegionInfo *>(RegionInfoData); + uptr ClassId; + uptr MinDistance = -1UL; + for (uptr I = 0; I != NumClasses; ++I) { + if (I == SizeClassMap::BatchClassId) + continue; + uptr Begin = RegionInfoArray[I].RegionBeg; + uptr End = Begin + RegionInfoArray[I].AllocatedUser; + if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) + continue; + uptr RegionDistance; + if (Begin <= Ptr) { + if (Ptr < End) + RegionDistance = 0; + else + RegionDistance = Ptr - End; + } else { + RegionDistance = Begin - Ptr; + } + + if (RegionDistance < MinDistance) { + MinDistance = RegionDistance; + ClassId = I; + } + } + + BlockInfo B = {}; + if (MinDistance <= 8192) { + B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; + B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; + B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); + B.BlockBegin = + B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * + sptr(B.BlockSize)); + while (B.BlockBegin < B.RegionBegin) + B.BlockBegin += B.BlockSize; + while (B.RegionEnd < B.BlockBegin + B.BlockSize) + B.BlockBegin -= B.BlockSize; + } + return B; + } + + AtomicOptions Options; private: - static const uptr RegionSize = 1UL << RegionSizeLog; + static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr PrimarySize = RegionSize * NumClasses; @@ -231,26 +282,28 @@ private: u64 LastReleaseAtNs; }; - struct ALIGNED(SCUDO_CACHE_LINE_SIZE) RegionInfo { + struct UnpaddedRegionInfo { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; - RegionStats Stats; - bool CanRelease; - bool Exhausted; - u32 RandState; - uptr RegionBeg; - uptr MappedUser; // Bytes mapped for user memory. - uptr AllocatedUser; // Bytes allocated for user memory. - MapPlatformData Data; - ReleaseToOsInfo ReleaseInfo; + uptr RegionBeg = 0; + RegionStats Stats = {}; + u32 RandState = 0; + uptr MappedUser = 0; // Bytes mapped for user memory. + uptr AllocatedUser = 0; // Bytes allocated for user memory. + MapPlatformData Data = {}; + ReleaseToOsInfo ReleaseInfo = {}; + bool Exhausted = false; + }; + struct RegionInfo : UnpaddedRegionInfo { + char Padding[SCUDO_CACHE_LINE_SIZE - + (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {}; }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); - uptr PrimaryBase; - MapPlatformData Data; - atomic_s32 ReleaseToOsIntervalMs; - bool UseMemoryTagging; - RegionInfo RegionInfoArray[NumClasses]; + uptr PrimaryBase = 0; + MapPlatformData Data = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); @@ -258,31 +311,15 @@ private: } uptr getRegionBaseByClassId(uptr ClassId) const { - return PrimaryBase + (ClassId << RegionSizeLog); + return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); } - bool populateBatches(CacheT *C, RegionInfo *Region, uptr ClassId, - TransferBatch **CurrentBatch, u32 MaxCount, - void **PointersArray, u32 Count) { - // No need to shuffle the batches size class. - if (ClassId != SizeClassMap::BatchClassId) - shuffle(PointersArray, Count, &Region->RandState); - TransferBatch *B = *CurrentBatch; - for (uptr I = 0; I < Count; I++) { - if (B && B->getCount() == MaxCount) { - Region->FreeList.push_back(B); - B = nullptr; - } - if (!B) { - B = C->createBatch(ClassId, PointersArray[I]); - if (UNLIKELY(!B)) - return false; - B->clear(); - } - B->add(PointersArray[I]); - } - *CurrentBatch = B; - return true; + static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { + return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale); + } + + static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) { + return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale); } NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, @@ -296,31 +333,32 @@ private: // Map more space for blocks, if necessary. if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. - const uptr UserMapSize = + const uptr MapSize = roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); - if (UNLIKELY(RegionBase + MappedUser + UserMapSize > RegionSize)) { + if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { if (!Region->Exhausted) { Region->Exhausted = true; ScopedString Str(1024); getStats(&Str); Str.append( - "Scudo OOM: The process has Exhausted %zuM for size class %zu.\n", + "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", RegionSize >> 20, Size); Str.output(); } return nullptr; } - if (UNLIKELY(MappedUser == 0)) + if (MappedUser == 0) Region->Data = Data; - if (UNLIKELY(!map(reinterpret_cast<void *>(RegionBeg + MappedUser), - UserMapSize, "scudo:primary", - MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging() ? MAP_MEMTAG : 0), - &Region->Data))) + if (UNLIKELY(!map( + reinterpret_cast<void *>(RegionBeg + MappedUser), MapSize, + "scudo:primary", + MAP_ALLOWNOMEM | MAP_RESIZABLE | + (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0), + &Region->Data))) return nullptr; - Region->MappedUser += UserMapSize; - C->getStats().add(StatMapped, UserMapSize); + Region->MappedUser += MapSize; + C->getStats().add(StatMapped, MapSize); } const u32 NumberOfBlocks = Min( @@ -328,38 +366,37 @@ private: static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size)); DCHECK_GT(NumberOfBlocks, 0); - TransferBatch *B = nullptr; constexpr u32 ShuffleArraySize = MaxNumBatches * TransferBatch::MaxNumCached; - void *ShuffleArray[ShuffleArraySize]; - u32 Count = 0; - const uptr P = RegionBeg + Region->AllocatedUser; - const uptr AllocatedUser = Size * NumberOfBlocks; - for (uptr I = P; I < P + AllocatedUser; I += Size) { - ShuffleArray[Count++] = reinterpret_cast<void *>(I); - if (Count == ShuffleArraySize) { - if (UNLIKELY(!populateBatches(C, Region, ClassId, &B, MaxCount, - ShuffleArray, Count))) - return nullptr; - Count = 0; - } - } - if (Count) { - if (UNLIKELY(!populateBatches(C, Region, ClassId, &B, MaxCount, - ShuffleArray, Count))) + CompactPtrT ShuffleArray[ShuffleArraySize]; + DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + + const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); + uptr P = RegionBeg + Region->AllocatedUser; + for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) + ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); + // No need to shuffle the batches size class. + if (ClassId != SizeClassMap::BatchClassId) + shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState); + for (u32 I = 0; I < NumberOfBlocks;) { + TransferBatch *B = + C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal( + CompactPtrBase, ShuffleArray[I]))); + if (UNLIKELY(!B)) return nullptr; - } - DCHECK(B); - if (!Region->FreeList.empty()) { + const u32 N = Min(MaxCount, NumberOfBlocks - I); + B->setFromArray(&ShuffleArray[I], N); Region->FreeList.push_back(B); - B = Region->FreeList.front(); - Region->FreeList.pop_front(); + I += N; } + TransferBatch *B = Region->FreeList.front(); + Region->FreeList.pop_front(); + DCHECK(B); DCHECK_GT(B->getCount(), 0); + const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); Region->AllocatedUser += AllocatedUser; - Region->Exhausted = false; return B; } @@ -381,16 +418,12 @@ private: getRegionBaseByClassId(ClassId)); } - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); - CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); + DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); const uptr BytesInFreeList = Region->AllocatedUser - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; @@ -408,14 +441,14 @@ private: if (BlockSize < PageSize / 16U) { if (!Force && BytesPushed < Region->AllocatedUser / 16U) return 0; - // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + // We want 8x% to 9x% free bytes (the larger the block, the lower the %). if ((BytesInFreeList * 100U) / Region->AllocatedUser < (100U - 1U - BlockSize / 16U)) return 0; } if (!Force) { - const s32 IntervalMs = getReleaseToOsIntervalMs(); + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) return 0; if (Region->ReleaseInfo.LastReleaseAtNs + @@ -426,8 +459,13 @@ private: } ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); - releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - Region->AllocatedUser, 1U, BlockSize, &Recorder); + const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); + auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { + return decompactPtrInternal(CompactPtrBase, CompactPtr); + }; + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U, + BlockSize, &Recorder, DecompactPtr, SkipRegion); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/standalone/quarantine.h b/standalone/quarantine.h index 406a0e23804..8d4b38e21fc 100644 --- a/standalone/quarantine.h +++ b/standalone/quarantine.h @@ -161,7 +161,7 @@ public: private: SinglyLinkedList<QuarantineBatch> List; - atomic_uptr Size; + atomic_uptr Size = {}; void addToSize(uptr add) { atomic_store_relaxed(&Size, getSize() + add); } void subFromSize(uptr sub) { atomic_store_relaxed(&Size, getSize() - sub); } @@ -187,7 +187,12 @@ public: Cache.initLinkerInitialized(); } void init(uptr Size, uptr CacheSize) { - memset(this, 0, sizeof(*this)); + CacheMutex.init(); + Cache.init(); + RecycleMutex.init(); + MinSize = {}; + MaxSize = {}; + MaxCacheSize = {}; initLinkerInitialized(Size, CacheSize); } @@ -241,9 +246,9 @@ private: alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; CacheT Cache; alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; - atomic_uptr MinSize; - atomic_uptr MaxSize; - alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; + atomic_uptr MinSize = {}; + atomic_uptr MaxSize = {}; + alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; void NOINLINE recycle(uptr MinSize, Callback Cb) { CacheT Tmp; diff --git a/standalone/release.h b/standalone/release.h index b50f36fa0c0..293a8bc27ba 100644 --- a/standalone/release.h +++ b/standalone/release.h @@ -17,17 +17,19 @@ namespace scudo { class ReleaseRecorder { public: - ReleaseRecorder(uptr BaseAddress, MapPlatformData *Data = nullptr) - : BaseAddress(BaseAddress), Data(Data) {} + ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr) + : Base(Base), Data(Data) {} uptr getReleasedRangesCount() const { return ReleasedRangesCount; } uptr getReleasedBytes() const { return ReleasedBytes; } + uptr getBase() const { return Base; } + // Releases [From, To) range of pages back to OS. void releasePageRangeToOS(uptr From, uptr To) { const uptr Size = To - From; - releasePagesToOS(BaseAddress, From, Size, Data); + releasePagesToOS(Base, From, Size, Data); ReleasedRangesCount++; ReleasedBytes += Size; } @@ -35,7 +37,7 @@ public: private: uptr ReleasedRangesCount = 0; uptr ReleasedBytes = 0; - uptr BaseAddress = 0; + uptr Base = 0; MapPlatformData *Data = nullptr; }; @@ -52,20 +54,20 @@ public: PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { - CHECK_GT(Regions, 0); - CHECK_GT(NumCounters, 0); - CHECK_GT(MaxValue, 0); + DCHECK_GT(Regions, 0); + DCHECK_GT(NumCounters, 0); + DCHECK_GT(MaxValue, 0); constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); - CHECK_LE(CounterSizeBits, MaxCounterBits); + DCHECK_LE(CounterSizeBits, MaxCounterBits); CounterSizeBitsLog = getLog2(CounterSizeBits); CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits); const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog; - CHECK_GT(PackingRatio, 0); + DCHECK_GT(PackingRatio, 0); PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; @@ -79,7 +81,8 @@ public: memset(Buffer, 0, BufferSize); } else { Buffer = reinterpret_cast<uptr *>( - map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + map(nullptr, roundUpTo(BufferSize, getPageSizeCached()), + "scudo:counters", MAP_ALLOWNOMEM)); } } ~PackedCounterArray() { @@ -88,12 +91,12 @@ public: if (Buffer == &StaticBuffer[0]) Mutex.unlock(); else - unmap(reinterpret_cast<void *>(Buffer), BufferSize); + unmap(reinterpret_cast<void *>(Buffer), + roundUpTo(BufferSize, getPageSizeCached())); } bool isAllocated() const { return !!Buffer; } - uptr getCount() const { return NumCounters; } uptr get(uptr Region, uptr I) const { @@ -157,6 +160,11 @@ public: CurrentPage++; } + void skipPages(uptr N) { + closeOpenedRange(); + CurrentPage += N; + } + void finish() { closeOpenedRange(); } private: @@ -175,11 +183,13 @@ private: uptr CurrentRangeStatePage = 0; }; -template <class TransferBatchT, class ReleaseRecorderT> +template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT, + typename SkipRegionT> NOINLINE void -releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, +releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, - ReleaseRecorderT *Recorder) { + ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr, + SkipRegionT SkipRegion) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -223,44 +233,45 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, return; const uptr PageSizeLog = getLog2(PageSize); - const uptr RoundedSize = NumberOfRegions * (PagesCount << PageSizeLog); + const uptr RoundedRegionSize = PagesCount << PageSizeLog; + const uptr RoundedSize = NumberOfRegions * RoundedRegionSize; // Iterate over free chunks and count how many free chunks affect each // allocated page. if (BlockSize <= PageSize && PageSize % BlockSize == 0) { // Each chunk affects one page only. for (const auto &It : FreeList) { - // If dealing with a TransferBatch, the first pointer of the batch will - // point to the batch itself, we do not want to mark this for release as - // the batch is in use, so skip the first entry. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; - // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.inc(RegionIndex, PInRegion >> PageSizeLog); - } + for (u32 I = 0; I < It.getCount(); I++) { + const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); + if (P >= RoundedSize) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.inc(RegionIndex, PInRegion >> PageSizeLog); } } } else { // In all other cases chunks might affect more than one page. + DCHECK_GE(RegionSize, BlockSize); + const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; for (const auto &It : FreeList) { - // See TransferBatch comment above. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; - // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); + for (u32 I = 0; I < It.getCount(); I++) { + const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); + if (P >= RoundedSize) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + uptr PInRegion = P - RegionIndex * RegionSize; + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + // The last block in a region might straddle a page, so if it's + // free, we mark the following "pretend" memory block(s) as free. + if (PInRegion == LastBlockInRegion) { + PInRegion += BlockSize; + while (PInRegion < RoundedRegionSize) { + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + PInRegion += BlockSize; + } } } } @@ -271,10 +282,15 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, FreePagesRangeTracker<ReleaseRecorderT> RangeTracker(Recorder); if (SameBlockCountPerPage) { // Fast path, every page has the same number of chunks affecting it. - for (uptr I = 0; I < NumberOfRegions; I++) + for (uptr I = 0; I < NumberOfRegions; I++) { + if (SkipRegion(I)) { + RangeTracker.skipPages(PagesCount); + continue; + } for (uptr J = 0; J < PagesCount; J++) RangeTracker.processNextPage(Counters.get(I, J) == FullPagesBlockCountMax); + } } else { // Slow path, go through the pages keeping count how many chunks affect // each page. @@ -286,6 +302,10 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, // up the number of chunks on the current page and checking on every step // whether the page boundary was crossed. for (uptr I = 0; I < NumberOfRegions; I++) { + if (SkipRegion(I)) { + RangeTracker.skipPages(PagesCount); + continue; + } uptr PrevPageBoundary = 0; uptr CurrentBoundary = 0; for (uptr J = 0; J < PagesCount; J++) { @@ -301,7 +321,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, } } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); } } diff --git a/standalone/secondary.h b/standalone/secondary.h index 9d5f130f2d4..ea5d6808aec 100644 --- a/standalone/secondary.h +++ b/standalone/secondary.h @@ -9,9 +9,12 @@ #ifndef SCUDO_SECONDARY_H_ #define SCUDO_SECONDARY_H_ +#include "chunk.h" #include "common.h" #include "list.h" +#include "memtag.h" #include "mutex.h" +#include "options.h" #include "stats.h" #include "string_utils.h" @@ -28,134 +31,292 @@ namespace LargeBlock { struct Header { LargeBlock::Header *Prev; LargeBlock::Header *Next; - uptr BlockEnd; + uptr CommitBase; + uptr CommitSize; uptr MapBase; uptr MapSize; - MapPlatformData Data; + [[no_unique_address]] MapPlatformData Data; }; constexpr uptr getHeaderSize() { return roundUpTo(sizeof(Header), 1U << SCUDO_MIN_ALIGNMENT_LOG); } -static Header *getHeader(uptr Ptr) { - return reinterpret_cast<Header *>(Ptr - getHeaderSize()); +template <typename Config> static uptr addHeaderTag(uptr Ptr) { + if (allocatorSupportsMemoryTagging<Config>()) + return addFixedTag(Ptr, 1); + return Ptr; } -static Header *getHeader(const void *Ptr) { - return getHeader(reinterpret_cast<uptr>(Ptr)); +template <typename Config> static Header *getHeader(uptr Ptr) { + return reinterpret_cast<Header *>(addHeaderTag<Config>(Ptr) - + getHeaderSize()); +} + +template <typename Config> static Header *getHeader(const void *Ptr) { + return getHeader<Config>(reinterpret_cast<uptr>(Ptr)); } } // namespace LargeBlock +static void unmap(LargeBlock::Header *H) { + MapPlatformData Data = H->Data; + unmap(reinterpret_cast<void *>(H->MapBase), H->MapSize, UNMAP_ALL, &Data); +} + class MapAllocatorNoCache { public: void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {} void init(UNUSED s32 ReleaseToOsInterval) {} - bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H) { + bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment, + UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) { return false; } - bool store(UNUSED LargeBlock::Header *H) { return false; } - static bool canCache(UNUSED uptr Size) { return false; } + void store(UNUSED Options Options, LargeBlock::Header *H) { unmap(H); } + bool canCache(UNUSED uptr Size) { return false; } void disable() {} void enable() {} void releaseToOS() {} - void setReleaseToOsIntervalMs(UNUSED s32 Interval) {} + void disableMemoryTagging() {} + bool setOption(Option O, UNUSED sptr Value) { + if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount || + O == Option::MaxCacheEntrySize) + return false; + // Not supported by the Secondary Cache, but not an error either. + return true; + } }; -template <uptr MaxEntriesCount = 32U, uptr MaxEntrySize = 1UL << 19, - s32 MinReleaseToOsIntervalMs = INT32_MIN, - s32 MaxReleaseToOsIntervalMs = INT32_MAX> -class MapAllocatorCache { +static const uptr MaxUnusedCachePages = 4U; + +template <typename Config> +void mapSecondary(Options Options, uptr CommitBase, uptr CommitSize, + uptr AllocPos, uptr Flags, MapPlatformData *Data) { + const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached(); + if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) { + const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); + map(reinterpret_cast<void *>(CommitBase), UntaggedPos - CommitBase, + "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG | Flags, Data); + map(reinterpret_cast<void *>(UntaggedPos), + CommitBase + CommitSize - UntaggedPos, "scudo:secondary", + MAP_RESIZABLE | Flags, Data); + } else { + map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary", + MAP_RESIZABLE | (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | + Flags, + Data); + } +} + +template <typename Config> class MapAllocatorCache { public: - // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length - // arrays are an extension for some compilers. - // FIXME(kostyak): support (partially) the cache on Fuchsia. - static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, ""); + // Ensure the default maximum specified fits the array. + static_assert(Config::SecondaryCacheDefaultMaxEntriesCount <= + Config::SecondaryCacheEntriesArraySize, + ""); void initLinkerInitialized(s32 ReleaseToOsInterval) { - setReleaseToOsIntervalMs(ReleaseToOsInterval); + setOption(Option::MaxCacheEntriesCount, + static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntriesCount)); + setOption(Option::MaxCacheEntrySize, + static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntrySize)); + setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); initLinkerInitialized(ReleaseToOsInterval); } - bool store(LargeBlock::Header *H) { + void store(Options Options, LargeBlock::Header *H) { + if (!canCache(H->CommitSize)) + return unmap(H); + bool EntryCached = false; bool EmptyCache = false; + const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); const u64 Time = getMonotonicTime(); - { + const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + CachedBlock Entry; + Entry.CommitBase = H->CommitBase; + Entry.CommitSize = H->CommitSize; + Entry.MapBase = H->MapBase; + Entry.MapSize = H->MapSize; + Entry.BlockBegin = reinterpret_cast<uptr>(H + 1); + Entry.Data = H->Data; + Entry.Time = Time; + if (useMemoryTagging<Config>(Options)) { + if (Interval == 0 && !SCUDO_FUCHSIA) { + // Release the memory and make it inaccessible at the same time by + // creating a new MAP_NOACCESS mapping on top of the existing mapping. + // Fuchsia does not support replacing mappings by creating a new mapping + // on top so we just do the two syscalls there. + Entry.Time = 0; + mapSecondary<Config>(Options, Entry.CommitBase, Entry.CommitSize, + Entry.CommitBase, MAP_NOACCESS, &Entry.Data); + } else { + setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS, + &Entry.Data); + } + } else if (Interval == 0) { + releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.Time = 0; + } + do { ScopedLock L(Mutex); - if (EntriesCount == MaxEntriesCount) { + if (useMemoryTagging<Config>(Options) && QuarantinePos == -1U) { + // If we get here then memory tagging was disabled in between when we + // read Options and when we locked Mutex. We can't insert our entry into + // the quarantine or the cache because the permissions would be wrong so + // just unmap it. + break; + } + if (Config::SecondaryCacheQuarantineSize && + useMemoryTagging<Config>(Options)) { + QuarantinePos = + (QuarantinePos + 1) % Max(Config::SecondaryCacheQuarantineSize, 1u); + if (!Quarantine[QuarantinePos].CommitBase) { + Quarantine[QuarantinePos] = Entry; + return; + } + CachedBlock PrevEntry = Quarantine[QuarantinePos]; + Quarantine[QuarantinePos] = Entry; + if (OldestTime == 0) + OldestTime = Entry.Time; + Entry = PrevEntry; + } + if (EntriesCount >= MaxCount) { if (IsFullEvents++ == 4U) EmptyCache = true; } else { - for (uptr I = 0; I < MaxEntriesCount; I++) { - if (Entries[I].Block) + for (u32 I = 0; I < MaxCount; I++) { + if (Entries[I].CommitBase) continue; if (I != 0) Entries[I] = Entries[0]; - Entries[0].Block = reinterpret_cast<uptr>(H); - Entries[0].BlockEnd = H->BlockEnd; - Entries[0].MapBase = H->MapBase; - Entries[0].MapSize = H->MapSize; - Entries[0].Data = H->Data; - Entries[0].Time = Time; + Entries[0] = Entry; EntriesCount++; + if (OldestTime == 0) + OldestTime = Entry.Time; EntryCached = true; break; } } - } - s32 Interval; + } while (0); if (EmptyCache) empty(); - else if ((Interval = getReleaseToOsIntervalMs()) >= 0) + else if (Interval >= 0) releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); - return EntryCached; + if (!EntryCached) + unmap(reinterpret_cast<void *>(Entry.MapBase), Entry.MapSize, UNMAP_ALL, + &Entry.Data); } - bool retrieve(uptr Size, LargeBlock::Header **H) { + bool retrieve(Options Options, uptr Size, uptr Alignment, + LargeBlock::Header **H, bool *Zeroed) { const uptr PageSize = getPageSizeCached(); - ScopedLock L(Mutex); - if (EntriesCount == 0) - return false; - for (uptr I = 0; I < MaxEntriesCount; I++) { - if (!Entries[I].Block) - continue; - const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block; - if (Size > BlockSize) - continue; - if (Size < BlockSize - PageSize * 4U) - continue; - *H = reinterpret_cast<LargeBlock::Header *>(Entries[I].Block); - Entries[I].Block = 0; - (*H)->BlockEnd = Entries[I].BlockEnd; - (*H)->MapBase = Entries[I].MapBase; - (*H)->MapSize = Entries[I].MapSize; - (*H)->Data = Entries[I].Data; + const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + bool Found = false; + CachedBlock Entry; + uptr HeaderPos; + { + ScopedLock L(Mutex); + if (EntriesCount == 0) + return false; + for (u32 I = 0; I < MaxCount; I++) { + const uptr CommitBase = Entries[I].CommitBase; + if (!CommitBase) + continue; + const uptr CommitSize = Entries[I].CommitSize; + const uptr AllocPos = + roundDownTo(CommitBase + CommitSize - Size, Alignment); + HeaderPos = + AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + if (HeaderPos > CommitBase + CommitSize) + continue; + if (HeaderPos < CommitBase || + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) + continue; + Found = true; + Entry = Entries[I]; + Entries[I].CommitBase = 0; + break; + } + } + if (Found) { + *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(HeaderPos)); + *Zeroed = Entry.Time == 0; + if (useMemoryTagging<Config>(Options)) + setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data); + uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); + if (useMemoryTagging<Config>(Options)) { + if (*Zeroed) + storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), + NewBlockBegin); + else if (Entry.BlockBegin < NewBlockBegin) + storeTags(Entry.BlockBegin, NewBlockBegin); + else + storeTags(untagPointer(NewBlockBegin), + untagPointer(Entry.BlockBegin)); + } + (*H)->CommitBase = Entry.CommitBase; + (*H)->CommitSize = Entry.CommitSize; + (*H)->MapBase = Entry.MapBase; + (*H)->MapSize = Entry.MapSize; + (*H)->Data = Entry.Data; EntriesCount--; - return true; } - return false; + return Found; } - static bool canCache(uptr Size) { - return MaxEntriesCount != 0U && Size <= MaxEntrySize; + bool canCache(uptr Size) { + return atomic_load_relaxed(&MaxEntriesCount) != 0U && + Size <= atomic_load_relaxed(&MaxEntrySize); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast<s32>(Value), + Config::SecondaryCacheMaxReleaseToOsIntervalMs), + Config::SecondaryCacheMinReleaseToOsIntervalMs); + atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + if (O == Option::MaxCacheEntriesCount) { + const u32 MaxCount = static_cast<u32>(Value); + if (MaxCount > Config::SecondaryCacheEntriesArraySize) + return false; + atomic_store_relaxed(&MaxEntriesCount, MaxCount); + return true; + } + if (O == Option::MaxCacheEntrySize) { + atomic_store_relaxed(&MaxEntrySize, static_cast<uptr>(Value)); + return true; + } + // Not supported by the Secondary Cache, but not an error either. + return true; } void releaseToOS() { releaseOlderThan(UINT64_MAX); } + void disableMemoryTagging() { + ScopedLock L(Mutex); + for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { + if (Quarantine[I].CommitBase) { + unmap(reinterpret_cast<void *>(Quarantine[I].MapBase), + Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data); + Quarantine[I].CommitBase = 0; + } + } + const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + for (u32 I = 0; I < MaxCount; I++) + if (Entries[I].CommitBase) + setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0, + &Entries[I].Data); + QuarantinePos = -1U; + } + void disable() { Mutex.lock(); } void enable() { Mutex.unlock(); } @@ -166,17 +327,17 @@ private: void *MapBase; uptr MapSize; MapPlatformData Data; - } MapInfo[MaxEntriesCount]; + } MapInfo[Config::SecondaryCacheEntriesArraySize]; uptr N = 0; { ScopedLock L(Mutex); - for (uptr I = 0; I < MaxEntriesCount; I++) { - if (!Entries[I].Block) + for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { + if (!Entries[I].CommitBase) continue; MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase); MapInfo[N].MapSize = Entries[I].MapSize; MapInfo[N].Data = Entries[I].Data; - Entries[I].Block = 0; + Entries[I].CommitBase = 0; N++; } EntriesCount = 0; @@ -187,42 +348,53 @@ private: &MapInfo[I].Data); } - void releaseOlderThan(u64 Time) { - ScopedLock L(Mutex); - if (!EntriesCount) - return; - for (uptr I = 0; I < MaxEntriesCount; I++) { - if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time) - continue; - releasePagesToOS(Entries[I].Block, 0, - Entries[I].BlockEnd - Entries[I].Block, - &Entries[I].Data); - Entries[I].Time = 0; - } - } - - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - struct CachedBlock { - uptr Block; - uptr BlockEnd; + uptr CommitBase; + uptr CommitSize; uptr MapBase; uptr MapSize; - MapPlatformData Data; + uptr BlockBegin; + [[no_unique_address]] MapPlatformData Data; u64 Time; }; + void releaseIfOlderThan(CachedBlock &Entry, u64 Time) { + if (!Entry.CommitBase || !Entry.Time) + return; + if (Entry.Time > Time) { + if (OldestTime == 0 || Entry.Time < OldestTime) + OldestTime = Entry.Time; + return; + } + releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.Time = 0; + } + + void releaseOlderThan(u64 Time) { + ScopedLock L(Mutex); + if (!EntriesCount || OldestTime == 0 || OldestTime > Time) + return; + OldestTime = 0; + for (uptr I = 0; I < Config::SecondaryCacheQuarantineSize; I++) + releaseIfOlderThan(Quarantine[I], Time); + for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) + releaseIfOlderThan(Entries[I], Time); + } + HybridMutex Mutex; - CachedBlock Entries[MaxEntriesCount]; - u32 EntriesCount; - uptr LargestSize; - u32 IsFullEvents; - atomic_s32 ReleaseToOsIntervalMs; + u32 EntriesCount = 0; + u32 QuarantinePos = 0; + atomic_u32 MaxEntriesCount = {}; + atomic_uptr MaxEntrySize = {}; + u64 OldestTime = 0; + u32 IsFullEvents = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + + CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; + CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize] = {}; }; -template <class CacheT> class MapAllocator { +template <typename Config> class MapAllocator { public: void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) { Cache.initLinkerInitialized(ReleaseToOsInterval); @@ -235,13 +407,15 @@ public: initLinkerInitialized(S, ReleaseToOsInterval); } - void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, - bool ZeroContents = false); + void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0, + uptr *BlockEnd = nullptr, + FillContentsMode FillContents = NoFill); - void deallocate(void *Ptr); + void deallocate(Options Options, void *Ptr); static uptr getBlockEnd(void *Ptr) { - return LargeBlock::getHeader(Ptr)->BlockEnd; + auto *B = LargeBlock::getHeader<Config>(Ptr); + return B->CommitBase + B->CommitSize; } static uptr getBlockSize(void *Ptr) { @@ -261,28 +435,32 @@ public: } template <typename F> void iterateOverBlocks(F Callback) const { - for (const auto &H : InUseBlocks) - Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize()); + for (const auto &H : InUseBlocks) { + uptr Ptr = reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize(); + if (allocatorSupportsMemoryTagging<Config>()) + Ptr = untagPointer(Ptr); + Callback(Ptr); + } } - static uptr canCache(uptr Size) { return CacheT::canCache(Size); } + uptr canCache(uptr Size) { return Cache.canCache(Size); } - void setReleaseToOsIntervalMs(s32 Interval) { - Cache.setReleaseToOsIntervalMs(Interval); - } + bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); } void releaseToOS() { Cache.releaseToOS(); } + void disableMemoryTagging() { Cache.disableMemoryTagging(); } + private: - CacheT Cache; + typename Config::SecondaryCache Cache; HybridMutex Mutex; DoublyLinkedList<LargeBlock::Header> InUseBlocks; - uptr AllocatedBytes; - uptr FreedBytes; - uptr LargestSize; - u32 NumberOfAllocs; - u32 NumberOfFrees; + uptr AllocatedBytes = 0; + uptr FreedBytes = 0; + uptr LargestSize = 0; + u32 NumberOfAllocs = 0; + u32 NumberOfFrees = 0; LocalStats Stats; }; @@ -297,24 +475,37 @@ private: // For allocations requested with an alignment greater than or equal to a page, // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). -template <class CacheT> -void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, - uptr *BlockEnd, bool ZeroContents) { - DCHECK_GE(Size, AlignmentHint); +template <typename Config> +void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, + uptr *BlockEndPtr, + FillContentsMode FillContents) { + if (Options.get(OptionBit::AddLargeAllocationSlack)) + Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG; + Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG); const uptr PageSize = getPageSizeCached(); - const uptr RoundedSize = - roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize); - - if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) { + uptr RoundedSize = + roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() + + Chunk::getHeaderSize(), + PageSize); + if (Alignment > PageSize) + RoundedSize += Alignment - PageSize; + + if (Alignment < PageSize && Cache.canCache(RoundedSize)) { LargeBlock::Header *H; - if (Cache.retrieve(RoundedSize, &H)) { - if (BlockEnd) - *BlockEnd = H->BlockEnd; - void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(H) + - LargeBlock::getHeaderSize()); - if (ZeroContents) - memset(Ptr, 0, H->BlockEnd - reinterpret_cast<uptr>(Ptr)); - const uptr BlockSize = H->BlockEnd - reinterpret_cast<uptr>(H); + bool Zeroed; + if (Cache.retrieve(Options, Size, Alignment, &H, &Zeroed)) { + const uptr BlockEnd = H->CommitBase + H->CommitSize; + if (BlockEndPtr) + *BlockEndPtr = BlockEnd; + uptr HInt = reinterpret_cast<uptr>(H); + if (allocatorSupportsMemoryTagging<Config>()) + HInt = untagPointer(HInt); + const uptr PtrInt = HInt + LargeBlock::getHeaderSize(); + void *Ptr = reinterpret_cast<void *>(PtrInt); + if (FillContents && !Zeroed) + memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, + BlockEnd - PtrInt); + const uptr BlockSize = BlockEnd - HInt; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -329,9 +520,8 @@ void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, MapPlatformData Data = {}; const uptr MapSize = RoundedSize + 2 * PageSize; - uptr MapBase = - reinterpret_cast<uptr>(map(nullptr, MapSize, "scudo:secondary", - MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); + uptr MapBase = reinterpret_cast<uptr>( + map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); if (UNLIKELY(!MapBase)) return nullptr; uptr CommitBase = MapBase + PageSize; @@ -339,11 +529,11 @@ void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, // In the unlikely event of alignments larger than a page, adjust the amount // of memory we want to commit, and trim the extra memory. - if (UNLIKELY(AlignmentHint >= PageSize)) { + if (UNLIKELY(Alignment >= PageSize)) { // For alignments greater than or equal to a page, the user pointer (eg: the // pointer that is returned by the C or C++ allocation APIs) ends up on a // page boundary , and our headers will live in the preceding page. - CommitBase = roundUpTo(MapBase + PageSize + 1, AlignmentHint) - PageSize; + CommitBase = roundUpTo(MapBase + PageSize + 1, Alignment) - PageSize; const uptr NewMapBase = CommitBase - PageSize; DCHECK_GE(NewMapBase, MapBase); // We only trim the extra memory on 32-bit platforms: 64-bit platforms @@ -352,9 +542,8 @@ void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, unmap(reinterpret_cast<void *>(MapBase), NewMapBase - MapBase, 0, &Data); MapBase = NewMapBase; } - const uptr NewMapEnd = CommitBase + PageSize + - roundUpTo((Size - AlignmentHint), PageSize) + - PageSize; + const uptr NewMapEnd = + CommitBase + PageSize + roundUpTo(Size, PageSize) + PageSize; DCHECK_LE(NewMapEnd, MapEnd); if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) { unmap(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd, 0, &Data); @@ -363,16 +552,22 @@ void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr Ptr = - reinterpret_cast<uptr>(map(reinterpret_cast<void *>(CommitBase), - CommitSize, "scudo:secondary", 0, &Data)); - LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>(Ptr); + const uptr AllocPos = roundDownTo(CommitBase + CommitSize - Size, Alignment); + mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, &Data); + const uptr HeaderPos = + AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(HeaderPos)); + if (useMemoryTagging<Config>(Options)) + storeTags(LargeBlock::addHeaderTag<Config>(CommitBase), + reinterpret_cast<uptr>(H + 1)); H->MapBase = MapBase; H->MapSize = MapEnd - MapBase; - H->BlockEnd = CommitBase + CommitSize; + H->CommitBase = CommitBase; + H->CommitSize = CommitSize; H->Data = Data; - if (BlockEnd) - *BlockEnd = CommitBase + CommitSize; + if (BlockEndPtr) + *BlockEndPtr = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -383,13 +578,13 @@ void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint, Stats.add(StatAllocated, CommitSize); Stats.add(StatMapped, H->MapSize); } - return reinterpret_cast<void *>(Ptr + LargeBlock::getHeaderSize()); + return reinterpret_cast<void *>(HeaderPos + LargeBlock::getHeaderSize()); } -template <class CacheT> void MapAllocator<CacheT>::deallocate(void *Ptr) { - LargeBlock::Header *H = LargeBlock::getHeader(Ptr); - const uptr Block = reinterpret_cast<uptr>(H); - const uptr CommitSize = H->BlockEnd - Block; +template <typename Config> +void MapAllocator<Config>::deallocate(Options Options, void *Ptr) { + LargeBlock::Header *H = LargeBlock::getHeader<Config>(Ptr); + const uptr CommitSize = H->CommitSize; { ScopedLock L(Mutex); InUseBlocks.remove(H); @@ -398,16 +593,11 @@ template <class CacheT> void MapAllocator<CacheT>::deallocate(void *Ptr) { Stats.sub(StatAllocated, CommitSize); Stats.sub(StatMapped, H->MapSize); } - if (CacheT::canCache(CommitSize) && Cache.store(H)) - return; - void *Addr = reinterpret_cast<void *>(H->MapBase); - const uptr Size = H->MapSize; - MapPlatformData Data = H->Data; - unmap(Addr, Size, UNMAP_ALL, &Data); + Cache.store(Options, H); } -template <class CacheT> -void MapAllocator<CacheT>::getStats(ScopedString *Str) const { +template <typename Config> +void MapAllocator<Config>::getStats(ScopedString *Str) const { Str->append( "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times " "(%zuK), remains %zu (%zuK) max %zuM\n", diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h index 5ed8e2845b3..1948802df0b 100644 --- a/standalone/size_class_map.h +++ b/standalone/size_class_map.h @@ -85,6 +85,14 @@ public: return T + (T >> S) * (ClassId & M) + SizeDelta; } + static u8 getSizeLSBByClassId(uptr ClassId) { + return u8(getLeastSignificantSetBitIndex(getSizeByClassId(ClassId))); + } + + static constexpr bool usesCompressedLSBFormat() { + return false; + } + static uptr getClassIdBySize(uptr Size) { if (Size <= SizeDelta + (1 << Config::MinSizeLog)) return 1; @@ -137,7 +145,41 @@ class TableSizeClassMap : public SizeClassMapBase<Config> { u8 Tab[getTableSize()] = {}; }; - static constexpr SizeTable Table = {}; + static constexpr SizeTable SzTable = {}; + + struct LSBTable { + constexpr LSBTable() { + u8 Min = 255, Max = 0; + for (uptr I = 0; I != ClassesSize; ++I) { + for (u8 Bit = 0; Bit != 64; ++Bit) { + if (Config::Classes[I] & (1 << Bit)) { + Tab[I] = Bit; + if (Bit < Min) + Min = Bit; + if (Bit > Max) + Max = Bit; + break; + } + } + } + + if (Max - Min > 3 || ClassesSize > 32) + return; + + UseCompressedFormat = true; + CompressedMin = Min; + for (uptr I = 0; I != ClassesSize; ++I) + CompressedValue |= u64(Tab[I] - Min) << (I * 2); + } + + u8 Tab[ClassesSize] = {}; + + bool UseCompressedFormat = false; + u8 CompressedMin = 0; + u64 CompressedValue = 0; + }; + + static constexpr LSBTable LTable = {}; public: static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; @@ -152,6 +194,18 @@ public: return Config::Classes[ClassId - 1]; } + static u8 getSizeLSBByClassId(uptr ClassId) { + if (LTable.UseCompressedFormat) + return ((LTable.CompressedValue >> ((ClassId - 1) * 2)) & 3) + + LTable.CompressedMin; + else + return LTable.Tab[ClassId - 1]; + } + + static constexpr bool usesCompressedLSBFormat() { + return LTable.UseCompressedFormat; + } + static uptr getClassIdBySize(uptr Size) { if (Size <= Config::Classes[0]) return 1; @@ -159,7 +213,7 @@ public: DCHECK_LE(Size, MaxSize); if (Size <= (1 << Config::MidSizeLog)) return ((Size - 1) >> Config::MinSizeLog) + 1; - return Table.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; + return SzTable.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; } static u32 getMaxCachedHint(uptr Size) { @@ -168,13 +222,24 @@ public: } }; +struct DefaultSizeClassConfig { + static const uptr NumBits = 3; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 17; + static const u32 MaxNumCachedHint = 10; + static const uptr MaxBytesCachedLog = 10; +}; + +typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap; + struct AndroidSizeClassConfig { #if SCUDO_WORDSIZE == 64U static const uptr NumBits = 7; static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 6; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 14; + static const u32 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 13; static constexpr u32 Classes[] = { @@ -208,16 +273,9 @@ struct AndroidSizeClassConfig { typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap; -struct DefaultSizeClassConfig { - static const uptr NumBits = 3; - static const uptr MinSizeLog = 5; - static const uptr MidSizeLog = 8; - static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 8; - static const uptr MaxBytesCachedLog = 10; -}; - -typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap; +#if SCUDO_WORDSIZE == 64U && defined(__clang__) +static_assert(AndroidSizeClassMap::usesCompressedLSBFormat(), ""); +#endif struct SvelteSizeClassConfig { #if SCUDO_WORDSIZE == 64U @@ -225,14 +283,14 @@ struct SvelteSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 4; + static const u32 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 10; #else static const uptr NumBits = 4; static const uptr MinSizeLog = 3; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 5; + static const u32 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 10; #endif }; diff --git a/standalone/stack_depot.h b/standalone/stack_depot.h new file mode 100644 index 00000000000..458198fcb7a --- /dev/null +++ b/standalone/stack_depot.h @@ -0,0 +1,144 @@ +//===-- stack_depot.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_STACK_DEPOT_H_ +#define SCUDO_STACK_DEPOT_H_ + +#include "atomic_helpers.h" +#include "mutex.h" + +namespace scudo { + +class MurMur2HashBuilder { + static const u32 M = 0x5bd1e995; + static const u32 Seed = 0x9747b28c; + static const u32 R = 24; + u32 H; + +public: + explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } + void add(u32 K) { + K *= M; + K ^= K >> R; + K *= M; + H *= M; + H ^= K; + } + u32 get() { + u32 X = H; + X ^= X >> 13; + X *= M; + X ^= X >> 15; + return X; + } +}; + +class StackDepot { + HybridMutex RingEndMu; + u32 RingEnd = 0; + + // This data structure stores a stack trace for each allocation and + // deallocation when stack trace recording is enabled, that may be looked up + // using a hash of the stack trace. The lower bits of the hash are an index + // into the Tab array, which stores an index into the Ring array where the + // stack traces are stored. As the name implies, Ring is a ring buffer, so a + // stack trace may wrap around to the start of the array. + // + // Each stack trace in Ring is prefixed by a stack trace marker consisting of + // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames + // and stack trace markers in the case where instruction pointers are 4-byte + // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the + // size of the stack trace in bits 33-63. + // + // The insert() function is potentially racy in its accesses to the Tab and + // Ring arrays, but find() is resilient to races in the sense that, barring + // hash collisions, it will either return the correct stack trace or no stack + // trace at all, even if two instances of insert() raced with one another. + // This is achieved by re-checking the hash of the stack trace before + // returning the trace. + +#ifdef SCUDO_FUZZ + // Use smaller table sizes for fuzzing in order to reduce input size. + static const uptr TabBits = 4; +#else + static const uptr TabBits = 16; +#endif + static const uptr TabSize = 1 << TabBits; + static const uptr TabMask = TabSize - 1; + atomic_u32 Tab[TabSize] = {}; + +#ifdef SCUDO_FUZZ + static const uptr RingBits = 4; +#else + static const uptr RingBits = 19; +#endif + static const uptr RingSize = 1 << RingBits; + static const uptr RingMask = RingSize - 1; + atomic_u64 Ring[RingSize] = {}; + +public: + // Insert hash of the stack trace [Begin, End) into the stack depot, and + // return the hash. + u32 insert(uptr *Begin, uptr *End) { + MurMur2HashBuilder B; + for (uptr *I = Begin; I != End; ++I) + B.add(u32(*I) >> 2); + u32 Hash = B.get(); + + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; + if (Entry == Id) + return Hash; + + ScopedLock Lock(RingEndMu); + RingPos = RingEnd; + atomic_store_relaxed(&Tab[Pos], RingPos); + atomic_store_relaxed(&Ring[RingPos], Id); + for (uptr *I = Begin; I != End; ++I) { + RingPos = (RingPos + 1) & RingMask; + atomic_store_relaxed(&Ring[RingPos], *I); + } + RingEnd = (RingPos + 1) & RingMask; + return Hash; + } + + // Look up a stack trace by hash. Returns true if successful. The trace may be + // accessed via operator[] passing indexes between *RingPosPtr and + // *RingPosPtr + *SizePtr. + bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + if (RingPos >= RingSize) + return false; + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 HashWithTagBit = (u64(Hash) << 1) | 1; + if ((Entry & 0x1ffffffff) != HashWithTagBit) + return false; + u32 Size = u32(Entry >> 33); + if (Size >= RingSize) + return false; + *RingPosPtr = (RingPos + 1) & RingMask; + *SizePtr = Size; + MurMur2HashBuilder B; + for (uptr I = 0; I != Size; ++I) { + RingPos = (RingPos + 1) & RingMask; + B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2); + } + return B.get() == Hash; + } + + u64 operator[](uptr RingPos) const { + return atomic_load_relaxed(&Ring[RingPos & RingMask]); + } +}; + +} // namespace scudo + +#endif // SCUDO_STACK_DEPOT_H_ diff --git a/standalone/stats.h b/standalone/stats.h index 38481e98e48..e15c0569497 100644 --- a/standalone/stats.h +++ b/standalone/stats.h @@ -46,11 +46,11 @@ public: uptr get(StatType I) const { return atomic_load_relaxed(&StatsArray[I]); } - LocalStats *Next; - LocalStats *Prev; + LocalStats *Next = nullptr; + LocalStats *Prev = nullptr; private: - atomic_uptr StatsArray[StatCount]; + atomic_uptr StatsArray[StatCount] = {}; }; // Global stats, used for aggregation and querying. @@ -58,7 +58,9 @@ class GlobalStats : public LocalStats { public: void initLinkerInitialized() {} void init() { - memset(this, 0, sizeof(*this)); + LocalStats::init(); + Mutex.init(); + StatsList = {}; initLinkerInitialized(); } @@ -87,8 +89,11 @@ public: S[I] = static_cast<sptr>(S[I]) >= 0 ? S[I] : 0; } - void disable() { Mutex.lock(); } - void enable() { Mutex.unlock(); } + void lock() { Mutex.lock(); } + void unlock() { Mutex.unlock(); } + + void disable() { lock(); } + void enable() { unlock(); } private: mutable HybridMutex Mutex; diff --git a/standalone/string_utils.cpp b/standalone/string_utils.cpp index 5de8b57bfcd..25bddbce34d 100644 --- a/standalone/string_utils.cpp +++ b/standalone/string_utils.cpp @@ -78,10 +78,11 @@ static int appendUnsigned(char **Buffer, const char *BufferEnd, u64 Num, static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num, u8 MinNumberLength, bool PadWithZero) { const bool Negative = (Num < 0); - return appendNumber(Buffer, BufferEnd, - static_cast<u64>(Negative ? -Num : Num), 10, - MinNumberLength, PadWithZero, Negative, - /*Upper=*/false); + const u64 UnsignedNum = (Num == INT64_MIN) + ? static_cast<u64>(INT64_MAX) + 1 + : static_cast<u64>(Negative ? -Num : Num); + return appendNumber(Buffer, BufferEnd, UnsignedNum, 10, MinNumberLength, + PadWithZero, Negative, /*Upper=*/false); } // Use the fact that explicitly requesting 0 Width (%0s) results in UB and @@ -114,8 +115,8 @@ static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) { return Res; } -int formatString(char *Buffer, uptr BufferLength, const char *Format, - va_list Args) { +static int formatString(char *Buffer, uptr BufferLength, const char *Format, + va_list Args) { static const char *PrintfFormatsHelp = "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; " "%[-]([0-9]*)?(\\.\\*)?s; %c\n"; @@ -158,16 +159,18 @@ int formatString(char *Buffer, uptr BufferLength, const char *Format, CHECK(!((Precision >= 0 || LeftJustified) && *Cur != 's')); switch (*Cur) { case 'd': { - DVal = HaveLL ? va_arg(Args, s64) - : HaveZ ? va_arg(Args, sptr) : va_arg(Args, int); + DVal = HaveLL ? va_arg(Args, s64) + : HaveZ ? va_arg(Args, sptr) + : va_arg(Args, int); Res += appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); break; } case 'u': case 'x': case 'X': { - UVal = HaveLL ? va_arg(Args, u64) - : HaveZ ? va_arg(Args, uptr) : va_arg(Args, unsigned); + UVal = HaveLL ? va_arg(Args, u64) + : HaveZ ? va_arg(Args, uptr) + : va_arg(Args, unsigned); const bool Upper = (*Cur == 'X'); Res += appendUnsigned(&Buffer, BufferEnd, UVal, (*Cur == 'u') ? 10 : 16, Width, PadWithZero, Upper); @@ -207,6 +210,14 @@ int formatString(char *Buffer, uptr BufferLength, const char *Format, return Res; } +int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) { + va_list Args; + va_start(Args, Format); + int Res = formatString(Buffer, BufferLength, Format, Args); + va_end(Args); + return Res; +} + void ScopedString::append(const char *Format, va_list Args) { DCHECK_LT(Length, String.size()); va_list ArgsCopy; @@ -219,6 +230,7 @@ void ScopedString::append(const char *Format, va_list Args) { static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1; String.resize(Length + AdditionalLength); formatString(String.data() + Length, AdditionalLength, Format, ArgsCopy); + va_end(ArgsCopy); Length = strlen(String.data()); CHECK_LT(Length, String.size()); } diff --git a/standalone/string_utils.h b/standalone/string_utils.h index acd60bda9d8..4880fa1e7cf 100644 --- a/standalone/string_utils.h +++ b/standalone/string_utils.h @@ -36,6 +36,7 @@ private: uptr Length; }; +int formatString(char *Buffer, uptr BufferLength, const char *Format, ...); void Printf(const char *Format, ...); } // namespace scudo diff --git a/standalone/tests/atomic_test.cpp b/standalone/tests/atomic_test.cpp index 103cd24624b..e90a642fd35 100644 --- a/standalone/tests/atomic_test.cpp +++ b/standalone/tests/atomic_test.cpp @@ -80,26 +80,14 @@ TEST(ScudoAtomicTest, AtomicStoreLoad) { template <typename T> void checkAtomicCompareExchange() { typedef typename T::Type Type; - { - Type OldVal = 42; - Type NewVal = 24; - Type V = OldVal; - EXPECT_TRUE(atomic_compare_exchange_strong( - reinterpret_cast<T *>(&V), &OldVal, NewVal, memory_order_relaxed)); - EXPECT_FALSE(atomic_compare_exchange_strong( - reinterpret_cast<T *>(&V), &OldVal, NewVal, memory_order_relaxed)); - EXPECT_EQ(NewVal, OldVal); - } - { - Type OldVal = 42; - Type NewVal = 24; - Type V = OldVal; - EXPECT_TRUE(atomic_compare_exchange_weak(reinterpret_cast<T *>(&V), &OldVal, + Type OldVal = 42; + Type NewVal = 24; + Type V = OldVal; + EXPECT_TRUE(atomic_compare_exchange_strong(reinterpret_cast<T *>(&V), &OldVal, NewVal, memory_order_relaxed)); - EXPECT_FALSE(atomic_compare_exchange_weak( - reinterpret_cast<T *>(&V), &OldVal, NewVal, memory_order_relaxed)); - EXPECT_EQ(NewVal, OldVal); - } + EXPECT_FALSE(atomic_compare_exchange_strong( + reinterpret_cast<T *>(&V), &OldVal, NewVal, memory_order_relaxed)); + EXPECT_EQ(NewVal, OldVal); } TEST(ScudoAtomicTest, AtomicCompareExchangeTest) { diff --git a/standalone/tests/checksum_test.cpp b/standalone/tests/checksum_test.cpp index 361d33c7e46..781f990ecb7 100644 --- a/standalone/tests/checksum_test.cpp +++ b/standalone/tests/checksum_test.cpp @@ -41,10 +41,10 @@ template <ComputeChecksum F> void verifyChecksumFunctionBitFlip() { scudo::u8 IdenticalChecksums = 0; for (scudo::uptr I = 0; I < ArraySize; I++) { for (scudo::uptr J = 0; J < SCUDO_WORDSIZE; J++) { - Array[I] ^= 1U << J; + Array[I] ^= scudo::uptr{1} << J; if (F(Seed, Array, ArraySize) == Reference) IdenticalChecksums++; - Array[I] ^= 1U << J; + Array[I] ^= scudo::uptr{1} << J; } } // Allow for a couple of identical checksums over the whole set of flips. diff --git a/standalone/tests/chunk_test.cpp b/standalone/tests/chunk_test.cpp index 13da70eff85..6458e23e142 100644 --- a/standalone/tests/chunk_test.cpp +++ b/standalone/tests/chunk_test.cpp @@ -41,7 +41,7 @@ TEST(ScudoChunkTest, ChunkCmpXchg) { initChecksum(); const scudo::uptr Size = 0x100U; scudo::Chunk::UnpackedHeader OldHeader = {}; - OldHeader.Origin = scudo::Chunk::Origin::Malloc; + OldHeader.OriginOrWasZeroed = scudo::Chunk::Origin::Malloc; OldHeader.ClassId = 0x42U; OldHeader.SizeOrUnusedBytes = Size; OldHeader.State = scudo::Chunk::State::Allocated; diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp index a2c06182a68..5db249d0a85 100644 --- a/standalone/tests/combined_test.cpp +++ b/standalone/tests/combined_test.cpp @@ -12,17 +12,18 @@ #include "combined.h" #include <condition_variable> +#include <memory> #include <mutex> +#include <set> +#include <stdlib.h> #include <thread> #include <vector> -static std::mutex Mutex; -static std::condition_variable Cv; -static bool Ready = false; - static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; +static constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U); -static void disableDebuggerdMaybe() { +// Fuchsia complains that the function is not used. +UNUSED static void disableDebuggerdMaybe() { #if SCUDO_ANDROID // Disable the debuggerd signal handler on Android, without this we can end // up spending a significant amount of time creating tombstones. @@ -31,12 +32,7 @@ static void disableDebuggerdMaybe() { } template <class AllocatorT> -bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size, - scudo::uptr Alignment) { - if (!Allocator->useMemoryTagging() || - !scudo::systemDetectsMemoryTagFaultsTestOnly()) - return false; - +bool isPrimaryAllocation(scudo::uptr Size, scudo::uptr Alignment) { const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG; if (Alignment < MinAlignment) Alignment = MinAlignment; @@ -49,46 +45,110 @@ bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size, template <class AllocatorT> void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, scudo::uptr Alignment) { - if (!isTaggedAllocation(Allocator, Size, Alignment)) - return; - - Size = scudo::roundUpTo(Size, scudo::archMemoryTagGranuleSize()); - EXPECT_DEATH( - { - disableDebuggerdMaybe(); - reinterpret_cast<char *>(P)[-1] = 0xaa; - }, - ""); - EXPECT_DEATH( - { - disableDebuggerdMaybe(); - reinterpret_cast<char *>(P)[Size] = 0xaa; - }, - ""); + const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG; + Size = scudo::roundUpTo(Size, MinAlignment); + if (Allocator->useMemoryTaggingTestOnly()) + EXPECT_DEATH( + { + disableDebuggerdMaybe(); + reinterpret_cast<char *>(P)[-1] = 0xaa; + }, + ""); + if (isPrimaryAllocation<AllocatorT>(Size, Alignment) + ? Allocator->useMemoryTaggingTestOnly() + : Alignment == MinAlignment) { + EXPECT_DEATH( + { + disableDebuggerdMaybe(); + reinterpret_cast<char *>(P)[Size] = 0xaa; + }, + ""); + } } -template <class Config> static void testAllocator() { - using AllocatorT = scudo::Allocator<Config>; - auto Deleter = [](AllocatorT *A) { - A->unmapTestOnly(); - delete A; - }; - std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, - Deleter); - Allocator->reset(); +template <typename Config> struct TestAllocator : scudo::Allocator<Config> { + TestAllocator() { + this->reset(); + this->initThreadMaybe(); + if (scudo::archSupportsMemoryTagging() && + !scudo::systemDetectsMemoryTagFaultsTestOnly()) + this->disableMemoryTagging(); + } + ~TestAllocator() { this->unmapTestOnly(); } - EXPECT_FALSE(Allocator->isOwned(&Mutex)); - EXPECT_FALSE(Allocator->isOwned(&Allocator)); - scudo::u64 StackVariable = 0x42424242U; - EXPECT_FALSE(Allocator->isOwned(&StackVariable)); - EXPECT_EQ(StackVariable, 0x42424242U); + void *operator new(size_t size) { + void *p = nullptr; + EXPECT_EQ(0, posix_memalign(&p, alignof(TestAllocator), size)); + return p; + } - constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U); + void operator delete(void *ptr) { free(ptr); } +}; + +template <class TypeParam> struct ScudoCombinedTest : public Test { + ScudoCombinedTest() { + UseQuarantine = std::is_same<TypeParam, scudo::AndroidConfig>::value; + Allocator = std::make_unique<AllocatorT>(); + } + ~ScudoCombinedTest() { + Allocator->releaseToOS(); + UseQuarantine = true; + } + + void RunTest(); + + void BasicTest(scudo::uptr SizeLogMin, scudo::uptr SizeLogMax); + + using AllocatorT = TestAllocator<TypeParam>; + std::unique_ptr<AllocatorT> Allocator; +}; + +#if SCUDO_FUCHSIA +#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, FuchsiaConfig) +#else +#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, DefaultConfig) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig) +#endif + +#define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ + using FIXTURE##NAME##_##TYPE = FIXTURE##NAME<scudo::TYPE>; \ + TEST_F(FIXTURE##NAME##_##TYPE, NAME) { Run(); } + +#define SCUDO_TYPED_TEST(FIXTURE, NAME) \ + template <class TypeParam> \ + struct FIXTURE##NAME : public FIXTURE<TypeParam> { \ + void Run(); \ + }; \ + SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run() + +SCUDO_TYPED_TEST(ScudoCombinedTest, IsOwned) { + auto *Allocator = this->Allocator.get(); + static scudo::u8 StaticBuffer[scudo::Chunk::getHeaderSize() + 1]; + EXPECT_FALSE( + Allocator->isOwned(&StaticBuffer[scudo::Chunk::getHeaderSize()])); + + scudo::u8 StackBuffer[scudo::Chunk::getHeaderSize() + 1]; + for (scudo::uptr I = 0; I < sizeof(StackBuffer); I++) + StackBuffer[I] = 0x42U; + EXPECT_FALSE(Allocator->isOwned(&StackBuffer[scudo::Chunk::getHeaderSize()])); + for (scudo::uptr I = 0; I < sizeof(StackBuffer); I++) + EXPECT_EQ(StackBuffer[I], 0x42U); +} + +template <class Config> +void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLogMin, + scudo::uptr SizeLogMax) { + auto *Allocator = this->Allocator.get(); // This allocates and deallocates a bunch of chunks, with a wide range of // sizes and alignments, with a focus on sizes that could trigger weird // behaviors (plus or minus a small delta of a power of two for example). - for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + for (scudo::uptr SizeLog = SizeLogMin; SizeLog <= SizeLogMax; SizeLog++) { for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) { const scudo::uptr Align = 1U << AlignLog; for (scudo::sptr Delta = -32; Delta <= 32; Delta++) { @@ -101,12 +161,20 @@ template <class Config> static void testAllocator() { EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align)); EXPECT_LE(Size, Allocator->getUsableSize(P)); memset(P, 0xaa, Size); - checkMemoryTaggingMaybe(Allocator.get(), P, Size, Align); + checkMemoryTaggingMaybe(Allocator, P, Size, Align); Allocator->deallocate(P, Origin, Size); } } } - Allocator->releaseToOS(); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined0) { this->BasicTest(0, 16); } +SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined1) { this->BasicTest(17, 18); } +SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined2) { this->BasicTest(19, 19); } +SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined3) { this->BasicTest(20, 20); } + +SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroContents) { + auto *Allocator = this->Allocator.get(); // Ensure that specifying ZeroContents returns a zero'd out block. for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { @@ -115,12 +183,60 @@ template <class Config> static void testAllocator() { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - EXPECT_EQ((reinterpret_cast<char *>(P))[I], 0); + ASSERT_EQ((reinterpret_cast<char *>(P))[I], 0); memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } } - Allocator->releaseToOS(); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroFill) { + auto *Allocator = this->Allocator.get(); + + // Ensure that specifying ZeroContents returns a zero'd out block. + Allocator->setFillContents(scudo::ZeroFill); + for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + for (scudo::uptr Delta = 0U; Delta <= 4U; Delta++) { + const scudo::uptr Size = (1U << SizeLog) + Delta * 128U; + void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); + EXPECT_NE(P, nullptr); + for (scudo::uptr I = 0; I < Size; I++) + ASSERT_EQ((reinterpret_cast<char *>(P))[I], 0); + memset(P, 0xaa, Size); + Allocator->deallocate(P, Origin, Size); + } + } +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, PatternOrZeroFill) { + auto *Allocator = this->Allocator.get(); + + // Ensure that specifying PatternOrZeroFill returns a pattern or zero filled + // block. The primary allocator only produces pattern filled blocks if MTE + // is disabled, so we only require pattern filled blocks in that case. + Allocator->setFillContents(scudo::PatternOrZeroFill); + for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + for (scudo::uptr Delta = 0U; Delta <= 4U; Delta++) { + const scudo::uptr Size = (1U << SizeLog) + Delta * 128U; + void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); + EXPECT_NE(P, nullptr); + for (scudo::uptr I = 0; I < Size; I++) { + unsigned char V = (reinterpret_cast<unsigned char *>(P))[I]; + if (isPrimaryAllocation<TestAllocator<TypeParam>>(Size, + 1U << MinAlignLog) && + !Allocator->useMemoryTaggingTestOnly()) + ASSERT_EQ(V, scudo::PatternFillByte); + else + ASSERT_TRUE(V == scudo::PatternFillByte || V == 0); + } + memset(P, 0xaa, Size); + Allocator->deallocate(P, Origin, Size); + } + } +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, BlockReuse) { + auto *Allocator = this->Allocator.get(); // Verify that a chunk will end up being reused, at some point. const scudo::uptr NeedleSize = 1024U; @@ -129,18 +245,20 @@ template <class Config> static void testAllocator() { bool Found = false; for (scudo::uptr I = 0; I < 1024U && !Found; I++) { void *P = Allocator->allocate(NeedleSize, Origin); - if (Allocator->untagPointerMaybe(P) == - Allocator->untagPointerMaybe(NeedleP)) + if (Allocator->getHeaderTaggedPointer(P) == + Allocator->getHeaderTaggedPointer(NeedleP)) Found = true; Allocator->deallocate(P, Origin); } EXPECT_TRUE(Found); +} - constexpr scudo::uptr MaxSize = Config::Primary::SizeClassMap::MaxSize; +SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLarge) { + auto *Allocator = this->Allocator.get(); // Reallocate a large chunk all the way down to a byte, verifying that we // preserve the data in the process. - scudo::uptr Size = MaxSize * 2; + scudo::uptr Size = TypeParam::Primary::SizeClassMap::MaxSize * 2; const scudo::uptr DataSize = 2048U; void *P = Allocator->allocate(Size, Origin); const char Marker = 0xab; @@ -154,13 +272,19 @@ template <class Config> static void testAllocator() { P = NewP; } Allocator->deallocate(P, Origin); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateSame) { + auto *Allocator = this->Allocator.get(); // Check that reallocating a chunk to a slightly smaller or larger size // returns the same chunk. This requires that all the sizes we iterate on use // the same block size, but that should be the case for MaxSize - 64 with our // default class size maps. - constexpr scudo::uptr ReallocSize = MaxSize - 64; - P = Allocator->allocate(ReallocSize, Origin); + constexpr scudo::uptr ReallocSize = + TypeParam::Primary::SizeClassMap::MaxSize - 64; + void *P = Allocator->allocate(ReallocSize, Origin); + const char Marker = 0xab; memset(P, Marker, ReallocSize); for (scudo::sptr Delta = -32; Delta < 32; Delta += 8) { const scudo::uptr NewSize = ReallocSize + Delta; @@ -168,17 +292,24 @@ template <class Config> static void testAllocator() { EXPECT_EQ(NewP, P); for (scudo::uptr I = 0; I < ReallocSize - 32; I++) EXPECT_EQ((reinterpret_cast<char *>(NewP))[I], Marker); - checkMemoryTaggingMaybe(Allocator.get(), NewP, NewSize, 0); + checkMemoryTaggingMaybe(Allocator, NewP, NewSize, 0); } Allocator->deallocate(P, Origin); +} +SCUDO_TYPED_TEST(ScudoCombinedTest, IterateOverChunks) { + auto *Allocator = this->Allocator.get(); // Allocates a bunch of chunks, then iterate over all the chunks, ensuring // they are the ones we allocated. This requires the allocator to not have any // other allocated chunk at this point (eg: won't work with the Quarantine). + // FIXME: Make it work with UseQuarantine and tagging enabled. Internals of + // iterateOverChunks reads header by tagged and non-tagger pointers so one of + // them will fail. if (!UseQuarantine) { std::vector<void *> V; for (scudo::uptr I = 0; I < 64U; I++) - V.push_back(Allocator->allocate(rand() % (MaxSize / 2U), Origin)); + V.push_back(Allocator->allocate( + rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); Allocator->disable(); Allocator->iterateOverChunks( 0U, static_cast<scudo::uptr>(SCUDO_MMAP_RANGE_SIZE - 1), @@ -189,46 +320,42 @@ template <class Config> static void testAllocator() { }, reinterpret_cast<void *>(&V)); Allocator->enable(); - while (!V.empty()) { - Allocator->deallocate(V.back(), Origin); - V.pop_back(); - } + for (auto P : V) + Allocator->deallocate(P, Origin); } +} - Allocator->releaseToOS(); +SCUDO_TYPED_TEST(ScudoCombinedTest, UseAfterFree) { + auto *Allocator = this->Allocator.get(); - if (Allocator->useMemoryTagging() && - scudo::systemDetectsMemoryTagFaultsTestOnly()) { - // Check that use-after-free is detected. - for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { - const scudo::uptr Size = 1U << SizeLog; - if (!isTaggedAllocation(Allocator.get(), Size, 1)) - continue; - // UAF detection is probabilistic, so we repeat the test up to 256 times - // if necessary. With 15 possible tags this means a 1 in 15^256 chance of - // a false positive. - EXPECT_DEATH( - { - disableDebuggerdMaybe(); - for (unsigned I = 0; I != 256; ++I) { - void *P = Allocator->allocate(Size, Origin); - Allocator->deallocate(P, Origin); - reinterpret_cast<char *>(P)[0] = 0xaa; - } - }, - ""); - EXPECT_DEATH( - { - disableDebuggerdMaybe(); - for (unsigned I = 0; I != 256; ++I) { - void *P = Allocator->allocate(Size, Origin); - Allocator->deallocate(P, Origin); - reinterpret_cast<char *>(P)[Size - 1] = 0xaa; - } - }, - ""); - } + // Check that use-after-free is detected. + for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + const scudo::uptr Size = 1U << SizeLog; + if (!Allocator->useMemoryTaggingTestOnly()) + continue; + EXPECT_DEATH( + { + disableDebuggerdMaybe(); + void *P = Allocator->allocate(Size, Origin); + Allocator->deallocate(P, Origin); + reinterpret_cast<char *>(P)[0] = 0xaa; + }, + ""); + EXPECT_DEATH( + { + disableDebuggerdMaybe(); + void *P = Allocator->allocate(Size, Origin); + Allocator->deallocate(P, Origin); + reinterpret_cast<char *>(P)[Size - 1] = 0xaa; + }, + ""); + } +} +SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemoryTagging) { + auto *Allocator = this->Allocator.get(); + + if (Allocator->useMemoryTaggingTestOnly()) { // Check that disabling memory tagging works correctly. void *P = Allocator->allocate(2048, Origin); EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, ""); @@ -238,7 +365,7 @@ template <class Config> static void testAllocator() { Allocator->deallocate(P, Origin); P = Allocator->allocate(2048, Origin); - EXPECT_EQ(Allocator->untagPointerMaybe(P), P); + EXPECT_EQ(scudo::untagPointer(P), P); reinterpret_cast<char *>(P)[2048] = 0xaa; Allocator->deallocate(P, Origin); @@ -248,6 +375,10 @@ template <class Config> static void testAllocator() { // Re-enable them now. scudo::enableMemoryTagChecksTestOnly(); } +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) { + auto *Allocator = this->Allocator.get(); scudo::uptr BufferSize = 8192; std::vector<char> Buffer(BufferSize); @@ -265,63 +396,52 @@ template <class Config> static void testAllocator() { EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos); } -// Test that multiple instantiations of the allocator have not messed up the -// process's signal handlers (GWP-ASan used to do this). -void testSEGV() { - const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); -} +SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) { + auto *Allocator = this->Allocator.get(); -TEST(ScudoCombinedTest, BasicCombined) { - UseQuarantine = false; - testAllocator<scudo::AndroidSvelteConfig>(); -#if SCUDO_FUCHSIA - testAllocator<scudo::FuchsiaConfig>(); -#else - testAllocator<scudo::DefaultConfig>(); - UseQuarantine = true; - testAllocator<scudo::AndroidConfig>(); - testSEGV(); -#endif -} + std::vector<void *> V; + for (scudo::uptr I = 0; I < 64U; I++) + V.push_back(Allocator->allocate( + rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + for (auto P : V) + Allocator->deallocate(P, Origin); -template <typename AllocatorT> static void stressAllocator(AllocatorT *A) { - { - std::unique_lock<std::mutex> Lock(Mutex); - while (!Ready) - Cv.wait(Lock); - } - std::vector<std::pair<void *, scudo::uptr>> V; - for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = std::rand() % 4096U; - void *P = A->allocate(Size, Origin); - // A region could have ran out of memory, resulting in a null P. - if (P) - V.push_back(std::make_pair(P, Size)); - } - while (!V.empty()) { - auto Pair = V.back(); - A->deallocate(Pair.first, Origin, Pair.second); - V.pop_back(); - } + bool UnlockRequired; + auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + EXPECT_TRUE(!TSD->Cache.isEmpty()); + TSD->Cache.drain(); + EXPECT_TRUE(TSD->Cache.isEmpty()); + if (UnlockRequired) + TSD->unlock(); } -template <class Config> static void testAllocatorThreaded() { - using AllocatorT = scudo::Allocator<Config>; - auto Deleter = [](AllocatorT *A) { - A->unmapTestOnly(); - delete A; - }; - std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, - Deleter); - Allocator->reset(); +SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { + std::mutex Mutex; + std::condition_variable Cv; + bool Ready = false; + auto *Allocator = this->Allocator.get(); std::thread Threads[32]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(stressAllocator<AllocatorT>, Allocator.get()); + Threads[I] = std::thread([&]() { + { + std::unique_lock<std::mutex> Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + std::vector<std::pair<void *, scudo::uptr>> V; + for (scudo::uptr I = 0; I < 256U; I++) { + const scudo::uptr Size = std::rand() % 4096U; + void *P = Allocator->allocate(Size, Origin); + // A region could have ran out of memory, resulting in a null P. + if (P) + V.push_back(std::make_pair(P, Size)); + } + while (!V.empty()) { + auto Pair = V.back(); + Allocator->deallocate(Pair.first, Origin, Pair.second); + V.pop_back(); + } + }); { std::unique_lock<std::mutex> Lock(Mutex); Ready = true; @@ -332,16 +452,21 @@ template <class Config> static void testAllocatorThreaded() { Allocator->releaseToOS(); } -TEST(ScudoCombinedTest, ThreadedCombined) { - UseQuarantine = false; - testAllocatorThreaded<scudo::AndroidSvelteConfig>(); #if SCUDO_FUCHSIA - testAllocatorThreaded<scudo::FuchsiaConfig>(); +#define SKIP_ON_FUCHSIA(T) DISABLED_##T #else - testAllocatorThreaded<scudo::DefaultConfig>(); - UseQuarantine = true; - testAllocatorThreaded<scudo::AndroidConfig>(); +#define SKIP_ON_FUCHSIA(T) T #endif + +// Test that multiple instantiations of the allocator have not messed up the +// process's signal handlers (GWP-ASan used to do this). +TEST(ScudoCombinedTest, SKIP_ON_FUCHSIA(testSEGV)) { + const scudo::uptr Size = 4 * scudo::getPageSizeCached(); + scudo::MapPlatformData Data = {}; + void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data); + EXPECT_NE(P, nullptr); + EXPECT_DEATH(memset(P, 0xaa, Size), ""); + scudo::unmap(P, Size, UNMAP_ALL, &Data); } struct DeathSizeClassConfig { @@ -355,23 +480,24 @@ struct DeathSizeClassConfig { static const scudo::uptr DeathRegionSizeLog = 20U; struct DeathConfig { + static const bool MaySupportMemoryTagging = false; + // Tiny allocator, its Primary only serves chunks of four sizes. - using DeathSizeClassMap = scudo::FixedSizeClassMap<DeathSizeClassConfig>; - typedef scudo::SizeClassAllocator64<DeathSizeClassMap, DeathRegionSizeLog> - Primary; - typedef scudo::MapAllocator<scudo::MapAllocatorNoCache> Secondary; - template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U>; + using SizeClassMap = scudo::FixedSizeClassMap<DeathSizeClassConfig>; + typedef scudo::SizeClassAllocator64<DeathConfig> Primary; + static const scudo::uptr PrimaryRegionSizeLog = DeathRegionSizeLog; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr PrimaryCompactPtrT; + static const scudo::uptr PrimaryCompactPtrScale = 0; + + typedef scudo::MapAllocatorNoCache SecondaryCache; + template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>; }; TEST(ScudoCombinedTest, DeathCombined) { - using AllocatorT = scudo::Allocator<DeathConfig>; - auto Deleter = [](AllocatorT *A) { - A->unmapTestOnly(); - delete A; - }; - std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, - Deleter); - Allocator->reset(); + using AllocatorT = TestAllocator<DeathConfig>; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); const scudo::uptr Size = 1000U; void *P = Allocator->allocate(Size, Origin); @@ -405,14 +531,8 @@ TEST(ScudoCombinedTest, DeathCombined) { // Ensure that releaseToOS can be called prior to any other allocator // operation without issue. TEST(ScudoCombinedTest, ReleaseToOS) { - using AllocatorT = scudo::Allocator<DeathConfig>; - auto Deleter = [](AllocatorT *A) { - A->unmapTestOnly(); - delete A; - }; - std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, - Deleter); - Allocator->reset(); + using AllocatorT = TestAllocator<DeathConfig>; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); Allocator->releaseToOS(); } @@ -420,25 +540,19 @@ TEST(ScudoCombinedTest, ReleaseToOS) { // Verify that when a region gets full, the allocator will still manage to // fulfill the allocation through a larger size class. TEST(ScudoCombinedTest, FullRegion) { - using AllocatorT = scudo::Allocator<DeathConfig>; - auto Deleter = [](AllocatorT *A) { - A->unmapTestOnly(); - delete A; - }; - std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, - Deleter); - Allocator->reset(); + using AllocatorT = TestAllocator<DeathConfig>; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); std::vector<void *> V; scudo::uptr FailedAllocationsCount = 0; for (scudo::uptr ClassId = 1U; - ClassId <= DeathConfig::DeathSizeClassMap::LargestClassId; ClassId++) { + ClassId <= DeathConfig::SizeClassMap::LargestClassId; ClassId++) { const scudo::uptr Size = - DeathConfig::DeathSizeClassMap::getSizeByClassId(ClassId); + DeathConfig::SizeClassMap::getSizeByClassId(ClassId); // Allocate enough to fill all of the regions above this one. const scudo::uptr MaxNumberOfChunks = ((1U << DeathRegionSizeLog) / Size) * - (DeathConfig::DeathSizeClassMap::LargestClassId - ClassId + 1); + (DeathConfig::SizeClassMap::LargestClassId - ClassId + 1); void *P; for (scudo::uptr I = 0; I <= MaxNumberOfChunks; I++) { P = Allocator->allocate(Size - 64U, Origin); @@ -454,3 +568,83 @@ TEST(ScudoCombinedTest, FullRegion) { } EXPECT_EQ(FailedAllocationsCount, 0U); } + +TEST(ScudoCombinedTest, OddEven) { + using AllocatorT = TestAllocator<scudo::AndroidConfig>; + using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); + + if (!Allocator->useMemoryTaggingTestOnly()) + return; + + auto CheckOddEven = [](scudo::uptr P1, scudo::uptr P2) { + scudo::uptr Tag1 = scudo::extractTag(scudo::loadTag(P1)); + scudo::uptr Tag2 = scudo::extractTag(scudo::loadTag(P2)); + EXPECT_NE(Tag1 % 2, Tag2 % 2); + }; + + for (scudo::uptr ClassId = 1U; ClassId <= SizeClassMap::LargestClassId; + ClassId++) { + const scudo::uptr Size = SizeClassMap::getSizeByClassId(ClassId); + + std::set<scudo::uptr> Ptrs; + bool Found = false; + for (unsigned I = 0; I != 65536; ++I) { + scudo::uptr P = scudo::untagPointer(reinterpret_cast<scudo::uptr>( + Allocator->allocate(Size - scudo::Chunk::getHeaderSize(), Origin))); + if (Ptrs.count(P - Size)) { + Found = true; + CheckOddEven(P, P - Size); + break; + } + if (Ptrs.count(P + Size)) { + Found = true; + CheckOddEven(P, P + Size); + break; + } + Ptrs.insert(P); + } + EXPECT_TRUE(Found); + } +} + +TEST(ScudoCombinedTest, DisableMemInit) { + using AllocatorT = TestAllocator<scudo::AndroidConfig>; + using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap; + auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT()); + + std::vector<void *> Ptrs(65536, nullptr); + + Allocator->setOption(scudo::Option::ThreadDisableMemInit, 1); + + constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U); + + // Test that if mem-init is disabled on a thread, calloc should still work as + // expected. This is tricky to ensure when MTE is enabled, so this test tries + // to exercise the relevant code on our MTE path. + for (scudo::uptr ClassId = 1U; ClassId <= 8; ClassId++) { + const scudo::uptr Size = + SizeClassMap::getSizeByClassId(ClassId) - scudo::Chunk::getHeaderSize(); + if (Size < 8) + continue; + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size, Origin); + memset(Ptrs[I], 0xaa, Size); + } + for (unsigned I = 0; I != Ptrs.size(); ++I) + Allocator->deallocate(Ptrs[I], Origin, Size); + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size - 8, Origin); + memset(Ptrs[I], 0xbb, Size - 8); + } + for (unsigned I = 0; I != Ptrs.size(); ++I) + Allocator->deallocate(Ptrs[I], Origin, Size - 8); + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); + for (scudo::uptr J = 0; J < Size; ++J) + ASSERT_EQ((reinterpret_cast<char *>(Ptrs[I]))[J], 0); + } + } + + Allocator->setOption(scudo::Option::ThreadDisableMemInit, 0); +} diff --git a/standalone/tests/mutex_test.cpp b/standalone/tests/mutex_test.cpp index ce715a19332..ed56cb5219e 100644 --- a/standalone/tests/mutex_test.cpp +++ b/standalone/tests/mutex_test.cpp @@ -52,7 +52,7 @@ private: static const scudo::u32 Size = 64U; typedef scudo::u64 T; scudo::HybridMutex &Mutex; - ALIGNED(SCUDO_CACHE_LINE_SIZE) T Data[Size]; + alignas(SCUDO_CACHE_LINE_SIZE) T Data[Size]; }; const scudo::u32 NumberOfThreads = 8; diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp index 010bf84490e..e7aa6f795b6 100644 --- a/standalone/tests/primary_test.cpp +++ b/standalone/tests/primary_test.cpp @@ -14,6 +14,7 @@ #include <condition_variable> #include <mutex> +#include <stdlib.h> #include <thread> #include <vector> @@ -21,16 +22,90 @@ // 32-bit architectures. It's not something we want to encourage, but we still // should ensure the tests pass. -template <typename Primary> static void testPrimary() { - const scudo::uptr NumberOfAllocations = 32U; - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +struct TestConfig1 { + static const scudo::uptr PrimaryRegionSizeLog = 18U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; + typedef scudo::uptr PrimaryCompactPtrT; + static const scudo::uptr PrimaryCompactPtrScale = 0; +}; + +struct TestConfig2 { + static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; + typedef scudo::uptr PrimaryCompactPtrT; + static const scudo::uptr PrimaryCompactPtrScale = 0; +}; + +struct TestConfig3 { + static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = true; + typedef scudo::uptr PrimaryCompactPtrT; + static const scudo::uptr PrimaryCompactPtrScale = 0; +}; + +template <typename BaseConfig, typename SizeClassMapT> +struct Config : public BaseConfig { + using SizeClassMap = SizeClassMapT; +}; + +template <typename BaseConfig, typename SizeClassMapT> +struct SizeClassAllocator + : public scudo::SizeClassAllocator64<Config<BaseConfig, SizeClassMapT>> {}; +template <typename SizeClassMapT> +struct SizeClassAllocator<TestConfig1, SizeClassMapT> + : public scudo::SizeClassAllocator32<Config<TestConfig1, SizeClassMapT>> {}; + +template <typename BaseConfig, typename SizeClassMapT> +struct TestAllocator : public SizeClassAllocator<BaseConfig, SizeClassMapT> { + ~TestAllocator() { this->unmapTestOnly(); } + + void *operator new(size_t size) { + void *p = nullptr; + EXPECT_EQ(0, posix_memalign(&p, alignof(TestAllocator), size)); + return p; + } + + void operator delete(void *ptr) { free(ptr); } +}; + +template <class BaseConfig> struct ScudoPrimaryTest : public Test {}; + +#if SCUDO_FUCHSIA +#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) +#else +#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) +#endif + +#define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ + using FIXTURE##NAME##_##TYPE = FIXTURE##NAME<TYPE>; \ + TEST_F(FIXTURE##NAME##_##TYPE, NAME) { Run(); } + +#define SCUDO_TYPED_TEST(FIXTURE, NAME) \ + template <class TypeParam> \ + struct FIXTURE##NAME : public FIXTURE<TypeParam> { \ + void Run(); \ + }; \ + SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ + template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run() + +SCUDO_TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); + const scudo::uptr NumberOfAllocations = 32U; for (scudo::uptr I = 0; I <= 16U; I++) { const scudo::uptr Size = 1UL << I; if (!Primary::canAllocate(Size)) @@ -52,19 +127,20 @@ template <typename Primary> static void testPrimary() { Str.output(); } -TEST(ScudoPrimaryTest, BasicPrimary) { +struct SmallRegionsConfig { using SizeClassMap = scudo::DefaultSizeClassMap; -#if !SCUDO_FUCHSIA - testPrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>(); -#endif - testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>(); - testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>(); -} + static const scudo::uptr PrimaryRegionSizeLog = 20U; + static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const bool MaySupportMemoryTagging = false; + typedef scudo::uptr PrimaryCompactPtrT; + static const scudo::uptr PrimaryCompactPtrScale = 0; +}; // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes. // For the 32-bit one, it requires actually exhausting memory, so we skip it. TEST(ScudoPrimaryTest, Primary64OOM) { - using Primary = scudo::SizeClassAllocator64<scudo::DefaultSizeClassMap, 20U>; + using Primary = scudo::SizeClassAllocator64<SmallRegionsConfig>; using TransferBatch = Primary::CacheT::TransferBatch; Primary Allocator; Allocator.init(/*ReleaseToOsInterval=*/-1); @@ -83,7 +159,7 @@ TEST(ScudoPrimaryTest, Primary64OOM) { break; } for (scudo::u32 J = 0; J < B->getCount(); J++) - memset(B->get(J), 'B', Size); + memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size); Batches.push_back(B); } while (!Batches.empty()) { @@ -99,12 +175,9 @@ TEST(ScudoPrimaryTest, Primary64OOM) { Allocator.unmapTestOnly(); } -template <typename Primary> static void testIteratePrimary() { - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); @@ -138,53 +211,40 @@ template <typename Primary> static void testIteratePrimary() { Str.output(); } -TEST(ScudoPrimaryTest, PrimaryIterate) { - using SizeClassMap = scudo::DefaultSizeClassMap; -#if !SCUDO_FUCHSIA - testIteratePrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>(); -#endif - testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>(); - testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>(); -} - -static std::mutex Mutex; -static std::condition_variable Cv; -static bool Ready = false; - -template <typename Primary> static void performAllocations(Primary *Allocator) { - static THREADLOCAL typename Primary::CacheT Cache; - Cache.init(nullptr, Allocator); - std::vector<std::pair<scudo::uptr, void *>> V; - { - std::unique_lock<std::mutex> Lock(Mutex); - while (!Ready) - Cv.wait(Lock); - } - for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = std::rand() % Primary::SizeClassMap::MaxSize / 4; - const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); - void *P = Cache.allocate(ClassId); - if (P) - V.push_back(std::make_pair(ClassId, P)); - } - while (!V.empty()) { - auto Pair = V.back(); - Cache.deallocate(Pair.first, Pair.second); - V.pop_back(); - } - Cache.destroy(nullptr); -} - -template <typename Primary> static void testPrimaryThreaded() { - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { + using Primary = TestAllocator<TypeParam, scudo::SvelteSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); + std::mutex Mutex; + std::condition_variable Cv; + bool Ready = false; std::thread Threads[32]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(performAllocations<Primary>, Allocator.get()); + Threads[I] = std::thread([&]() { + static thread_local typename Primary::CacheT Cache; + Cache.init(nullptr, Allocator.get()); + std::vector<std::pair<scudo::uptr, void *>> V; + { + std::unique_lock<std::mutex> Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + for (scudo::uptr I = 0; I < 256U; I++) { + const scudo::uptr Size = + std::rand() % Primary::SizeClassMap::MaxSize / 4; + const scudo::uptr ClassId = + Primary::SizeClassMap::getClassIdBySize(Size); + void *P = Cache.allocate(ClassId); + if (P) + V.push_back(std::make_pair(ClassId, P)); + } + while (!V.empty()) { + auto Pair = V.back(); + Cache.deallocate(Pair.first, Pair.second); + V.pop_back(); + } + Cache.destroy(nullptr); + }); { std::unique_lock<std::mutex> Lock(Mutex); Ready = true; @@ -198,24 +258,12 @@ template <typename Primary> static void testPrimaryThreaded() { Str.output(); } -TEST(ScudoPrimaryTest, PrimaryThreaded) { - using SizeClassMap = scudo::SvelteSizeClassMap; -#if !SCUDO_FUCHSIA - testPrimaryThreaded<scudo::SizeClassAllocator32<SizeClassMap, 18U>>(); -#endif - testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U>>(); - testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>(); -} - // Through a simple allocation that spans two pages, verify that releaseToOS // actually releases some bytes (at least one page worth). This is a regression // test for an error in how the release criteria were computed. -template <typename Primary> static void testReleaseToOS() { - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +SCUDO_TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); @@ -228,12 +276,3 @@ template <typename Primary> static void testReleaseToOS() { Cache.destroy(nullptr); EXPECT_GT(Allocator->releaseToOS(), 0U); } - -TEST(ScudoPrimaryTest, ReleaseToOS) { - using SizeClassMap = scudo::DefaultSizeClassMap; -#if !SCUDO_FUCHSIA - testReleaseToOS<scudo::SizeClassAllocator32<SizeClassMap, 18U>>(); -#endif - testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U>>(); - testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>(); -} diff --git a/standalone/tests/quarantine_test.cpp b/standalone/tests/quarantine_test.cpp index 0422c2ff373..91de56a78c9 100644 --- a/standalone/tests/quarantine_test.cpp +++ b/standalone/tests/quarantine_test.cpp @@ -219,12 +219,17 @@ TEST(ScudoQuarantineTest, GlobalQuarantine) { Str.output(); } -void *populateQuarantine(void *Param) { +struct PopulateQuarantineThread { + pthread_t Thread; + QuarantineT *Quarantine; CacheT Cache; - Cache.init(); - QuarantineT *Quarantine = reinterpret_cast<QuarantineT *>(Param); +}; + +void *populateQuarantine(void *Param) { + PopulateQuarantineThread *P = static_cast<PopulateQuarantineThread *>(Param); + P->Cache.init(); for (scudo::uptr I = 0; I < 128UL; I++) - Quarantine->put(&Cache, Cb, FakePtr, LargeBlockSize); + P->Quarantine->put(&P->Cache, Cb, FakePtr, LargeBlockSize); return 0; } @@ -233,13 +238,18 @@ TEST(ScudoQuarantineTest, ThreadedGlobalQuarantine) { Quarantine.init(MaxQuarantineSize, MaxCacheSize); const scudo::uptr NumberOfThreads = 32U; - pthread_t T[NumberOfThreads]; - for (scudo::uptr I = 0; I < NumberOfThreads; I++) - pthread_create(&T[I], 0, populateQuarantine, &Quarantine); + PopulateQuarantineThread T[NumberOfThreads]; + for (scudo::uptr I = 0; I < NumberOfThreads; I++) { + T[I].Quarantine = &Quarantine; + pthread_create(&T[I].Thread, 0, populateQuarantine, &T[I]); + } for (scudo::uptr I = 0; I < NumberOfThreads; I++) - pthread_join(T[I], 0); + pthread_join(T[I].Thread, 0); scudo::ScopedString Str(1024); Quarantine.getStats(&Str); Str.output(); + + for (scudo::uptr I = 0; I < NumberOfThreads; I++) + Quarantine.drainAndRecycle(&T[I].Cache, Cb); } diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp index 8907520d30c..04c02891e91 100644 --- a/standalone/tests/release_test.cpp +++ b/standalone/tests/release_test.cpp @@ -38,7 +38,8 @@ TEST(ScudoReleaseTest, PackedCounterArray) { // Make sure counters request one memory page for the buffer. const scudo::uptr NumCounters = (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); - scudo::PackedCounterArray Counters(1U, NumCounters, 1UL << ((1UL << I) - 1)); + scudo::PackedCounterArray Counters(1U, NumCounters, + 1UL << ((1UL << I) - 1)); Counters.inc(0U, 0U); for (scudo::uptr C = 1; C < NumCounters - 1; C++) { EXPECT_EQ(0UL, Counters.get(0U, C)); @@ -48,7 +49,7 @@ TEST(ScudoReleaseTest, PackedCounterArray) { EXPECT_EQ(0UL, Counters.get(0U, NumCounters - 1)); Counters.inc(0U, NumCounters - 1); if (I > 0) { - Counters.incRange(0U, 0U, NumCounters - 1); + Counters.incRange(0u, 0U, NumCounters - 1); for (scudo::uptr C = 0; C < NumCounters; C++) EXPECT_EQ(2UL, Counters.get(0U, C)); } @@ -123,6 +124,8 @@ public: for (scudo::uptr I = From; I < To; I += PageSize) ReportedPages.insert(I); } + + scudo::uptr getBase() const { return 0; } }; // Simplified version of a TransferBatch. @@ -189,9 +192,11 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { } // Release the memory. + auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; }; + auto DecompactPtr = [](scudo::uptr P) { return P; }; ReleasedPagesRecorder Recorder; - releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, 1U, BlockSize, - &Recorder); + releaseFreeMemoryToOS(FreeList, MaxBlocks * BlockSize, 1U, BlockSize, + &Recorder, DecompactPtr, SkipRegion); // Verify that there are no released pages touched by used chunks and all // ranges of free chunks big enough to contain the entire memory pages had @@ -240,7 +245,9 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { if (InFreeRange) { scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize); - while (P + PageSize <= MaxBlocks * BlockSize) { + const scudo::uptr EndPage = + scudo::roundUpTo(MaxBlocks * BlockSize, PageSize); + while (P + PageSize <= EndPage) { const bool PageReleased = Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end(); EXPECT_EQ(true, PageReleased); diff --git a/standalone/tests/scudo_unit_test.h b/standalone/tests/scudo_unit_test.h index 55d039ef77c..555a935254c 100644 --- a/standalone/tests/scudo_unit_test.h +++ b/standalone/tests/scudo_unit_test.h @@ -10,16 +10,23 @@ #if SCUDO_FUCHSIA #include <zxtest/zxtest.h> +using Test = ::zxtest::Test; #else #include "gtest/gtest.h" +using Test = ::testing::Test; #endif // If EXPECT_DEATH isn't defined, make it a no-op. #ifndef EXPECT_DEATH +// If ASSERT_DEATH is defined, make EXPECT_DEATH a wrapper to it. +#ifdef ASSERT_DEATH +#define EXPECT_DEATH(X, Y) ASSERT_DEATH(([&] { X; }), "") +#else #define EXPECT_DEATH(X, Y) \ do { \ } while (0) -#endif +#endif // ASSERT_DEATH +#endif // EXPECT_DEATH // If EXPECT_STREQ isn't defined, define our own simple one. #ifndef EXPECT_STREQ diff --git a/standalone/tests/scudo_unit_test_main.cpp b/standalone/tests/scudo_unit_test_main.cpp index 20deca998d9..9bbf6e75a5c 100644 --- a/standalone/tests/scudo_unit_test_main.cpp +++ b/standalone/tests/scudo_unit_test_main.cpp @@ -29,11 +29,11 @@ __scudo_default_options() { "dealloc_type_mismatch=" DEALLOC_TYPE_MISMATCH; } -int main(int argc, char **argv) { +// The zxtest library provides a default main function that does the same thing +// for Fuchsia builds. #if !SCUDO_FUCHSIA +int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -#else - return RUN_ALL_TESTS(argc, argv); -#endif } +#endif diff --git a/standalone/tests/secondary_test.cpp b/standalone/tests/secondary_test.cpp index d2260b9c15b..a55704297de 100644 --- a/standalone/tests/secondary_test.cpp +++ b/standalone/tests/secondary_test.cpp @@ -8,6 +8,7 @@ #include "tests/scudo_unit_test.h" +#include "allocator_config.h" #include "secondary.h" #include <stdio.h> @@ -18,35 +19,37 @@ #include <thread> #include <vector> -template <class SecondaryT> static void testSecondaryBasic(void) { +template <typename Config> static void testSecondaryBasic(void) { + using SecondaryT = scudo::MapAllocator<Config>; + scudo::GlobalStats S; S.init(); - SecondaryT *L = new SecondaryT; + std::unique_ptr<SecondaryT> L(new SecondaryT); L->init(&S); const scudo::uptr Size = 1U << 16; - void *P = L->allocate(Size); + void *P = L->allocate(scudo::Options{}, Size); EXPECT_NE(P, nullptr); memset(P, 'A', Size); EXPECT_GE(SecondaryT::getBlockSize(P), Size); - L->deallocate(P); + L->deallocate(scudo::Options{}, P); // If the Secondary can't cache that pointer, it will be unmapped. - if (!SecondaryT::canCache(Size)) + if (!L->canCache(Size)) EXPECT_DEATH(memset(P, 'A', Size), ""); const scudo::uptr Align = 1U << 16; - P = L->allocate(Size + Align, Align); + P = L->allocate(scudo::Options{}, Size + Align, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast<void *>( scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align)); memset(AlignedP, 'A', Size); - L->deallocate(P); + L->deallocate(scudo::Options{}, P); std::vector<void *> V; for (scudo::uptr I = 0; I < 32U; I++) - V.push_back(L->allocate(Size)); + V.push_back(L->allocate(scudo::Options{}, Size)); std::shuffle(V.begin(), V.end(), std::mt19937(std::random_device()())); while (!V.empty()) { - L->deallocate(V.back()); + L->deallocate(scudo::Options{}, V.back()); V.pop_back(); } scudo::ScopedString Str(1024); @@ -54,20 +57,29 @@ template <class SecondaryT> static void testSecondaryBasic(void) { Str.output(); } +struct NoCacheConfig { + typedef scudo::MapAllocatorNoCache SecondaryCache; + static const bool MaySupportMemoryTagging = false; +}; + +struct TestConfig { + typedef scudo::MapAllocatorCache<TestConfig> SecondaryCache; + static const bool MaySupportMemoryTagging = false; + static const scudo::u32 SecondaryCacheEntriesArraySize = 128U; + static const scudo::u32 SecondaryCacheQuarantineSize = 0U; + static const scudo::u32 SecondaryCacheDefaultMaxEntriesCount = 64U; + static const scudo::uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 20; + static const scudo::s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 SecondaryCacheMaxReleaseToOsIntervalMs = INT32_MAX; +}; + TEST(ScudoSecondaryTest, SecondaryBasic) { - testSecondaryBasic<scudo::MapAllocator<scudo::MapAllocatorNoCache>>(); -#if !SCUDO_FUCHSIA - testSecondaryBasic<scudo::MapAllocator<scudo::MapAllocatorCache<>>>(); - testSecondaryBasic< - scudo::MapAllocator<scudo::MapAllocatorCache<64U, 1UL << 20>>>(); -#endif + testSecondaryBasic<NoCacheConfig>(); + testSecondaryBasic<scudo::DefaultConfig>(); + testSecondaryBasic<TestConfig>(); } -#if SCUDO_FUCHSIA -using LargeAllocator = scudo::MapAllocator<scudo::MapAllocatorNoCache>; -#else -using LargeAllocator = scudo::MapAllocator<scudo::MapAllocatorCache<>>; -#endif +using LargeAllocator = scudo::MapAllocator<scudo::DefaultConfig>; // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the @@ -75,7 +87,7 @@ using LargeAllocator = scudo::MapAllocator<scudo::MapAllocatorCache<>>; TEST(ScudoSecondaryTest, SecondaryCombinations) { constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16); constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign); - LargeAllocator *L = new LargeAllocator; + std::unique_ptr<LargeAllocator> L(new LargeAllocator); L->init(nullptr); for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) { for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16; @@ -88,12 +100,12 @@ TEST(ScudoSecondaryTest, SecondaryCombinations) { scudo::roundUpTo((1U << SizeLog) + Delta, MinAlign); const scudo::uptr Size = HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0); - void *P = L->allocate(Size, Align); + void *P = L->allocate(scudo::Options{}, Size, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast<void *>( scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align)); memset(AlignedP, 0xff, UserSize); - L->deallocate(P); + L->deallocate(scudo::Options{}, P); } } } @@ -103,12 +115,12 @@ TEST(ScudoSecondaryTest, SecondaryCombinations) { } TEST(ScudoSecondaryTest, SecondaryIterate) { - LargeAllocator *L = new LargeAllocator; + std::unique_ptr<LargeAllocator> L(new LargeAllocator); L->init(nullptr); std::vector<void *> V; const scudo::uptr PageSize = scudo::getPageSizeCached(); for (scudo::uptr I = 0; I < 32U; I++) - V.push_back(L->allocate((std::rand() % 16) * PageSize)); + V.push_back(L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize)); auto Lambda = [V](scudo::uptr Block) { EXPECT_NE(std::find(V.begin(), V.end(), reinterpret_cast<void *>(Block)), V.end()); @@ -117,7 +129,7 @@ TEST(ScudoSecondaryTest, SecondaryIterate) { L->iterateOverBlocks(Lambda); L->enable(); while (!V.empty()) { - L->deallocate(V.back()); + L->deallocate(scudo::Options{}, V.back()); V.pop_back(); } scudo::ScopedString Str(1024); @@ -125,9 +137,32 @@ TEST(ScudoSecondaryTest, SecondaryIterate) { Str.output(); } +TEST(ScudoSecondaryTest, SecondaryOptions) { + std::unique_ptr<LargeAllocator> L(new LargeAllocator); + L->init(nullptr); + // Attempt to set a maximum number of entries higher than the array size. + EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); + // A negative number will be cast to a scudo::u32, and fail. + EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, -1)); + if (L->canCache(0U)) { + // Various valid combinations. + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(L->canCache(1UL << 18)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17)); + EXPECT_FALSE(L->canCache(1UL << 18)); + EXPECT_TRUE(L->canCache(1UL << 16)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 0U)); + EXPECT_FALSE(L->canCache(1UL << 16)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(L->canCache(1UL << 16)); + } +} + static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; static void performAllocations(LargeAllocator *L) { std::vector<void *> V; @@ -140,24 +175,25 @@ static void performAllocations(LargeAllocator *L) { for (scudo::uptr I = 0; I < 128U; I++) { // Deallocate 75% of the blocks. const bool Deallocate = (rand() & 3) != 0; - void *P = L->allocate((std::rand() % 16) * PageSize); + void *P = L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize); if (Deallocate) - L->deallocate(P); + L->deallocate(scudo::Options{}, P); else V.push_back(P); } while (!V.empty()) { - L->deallocate(V.back()); + L->deallocate(scudo::Options{}, V.back()); V.pop_back(); } } TEST(ScudoSecondaryTest, SecondaryThreadsRace) { - LargeAllocator *L = new LargeAllocator; + Ready = false; + std::unique_ptr<LargeAllocator> L(new LargeAllocator); L->init(nullptr, /*ReleaseToOsInterval=*/0); std::thread Threads[16]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(performAllocations, L); + Threads[I] = std::thread(performAllocations, L.get()); { std::unique_lock<std::mutex> Lock(Mutex); Ready = true; diff --git a/standalone/tests/tsd_test.cpp b/standalone/tests/tsd_test.cpp index 4a3cf1cd0fc..58ac9e74b98 100644 --- a/standalone/tests/tsd_test.cpp +++ b/standalone/tests/tsd_test.cpp @@ -13,6 +13,7 @@ #include <condition_variable> #include <mutex> +#include <set> #include <thread> // We mock out an allocator with a TSD registry, mostly using empty stubs. The @@ -47,12 +48,12 @@ private: struct OneCache { template <class Allocator> - using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 1U>; + using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 1U, 1U>; }; struct SharedCaches { template <class Allocator> - using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 16U>; + using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 16U, 8U>; }; struct ExclusiveCaches { @@ -116,7 +117,7 @@ TEST(ScudoTSDTest, TSDRegistryBasic) { static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { auto Registry = Allocator->getTSDRegistry(); @@ -145,6 +146,7 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) { } template <class AllocatorT> static void testRegistryThreaded() { + Ready = false; auto Deleter = [](AllocatorT *A) { A->unmapTestOnly(); delete A; @@ -171,3 +173,74 @@ TEST(ScudoTSDTest, TSDRegistryThreaded) { testRegistryThreaded<MockAllocator<ExclusiveCaches>>(); #endif } + +static std::set<void *> Pointers; + +static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) { + std::set<void *> Set; + auto Registry = Allocator->getTSDRegistry(); + { + std::unique_lock<std::mutex> Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false); + bool UnlockRequired; + for (scudo::uptr I = 0; I < 4096U; I++) { + auto TSD = Registry->getTSDAndLock(&UnlockRequired); + EXPECT_NE(TSD, nullptr); + Set.insert(reinterpret_cast<void *>(TSD)); + if (UnlockRequired) + TSD->unlock(); + } + { + std::unique_lock<std::mutex> Lock(Mutex); + Pointers.insert(Set.begin(), Set.end()); + } +} + +TEST(ScudoTSDTest, TSDRegistryTSDsCount) { + Ready = false; + Pointers.clear(); + using AllocatorT = MockAllocator<SharedCaches>; + auto Deleter = [](AllocatorT *A) { + A->unmapTestOnly(); + delete A; + }; + std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT, + Deleter); + Allocator->reset(); + // We attempt to use as many TSDs as the shared cache offers by creating a + // decent amount of threads that will be run concurrently and attempt to get + // and lock TSDs. We put them all in a set and count the number of entries + // after we are done. + std::thread Threads[32]; + for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) + Threads[I] = std::thread(stressSharedRegistry, Allocator.get()); + { + std::unique_lock<std::mutex> Lock(Mutex); + Ready = true; + Cv.notify_all(); + } + for (auto &T : Threads) + T.join(); + // The initial number of TSDs we get will be the minimum of the default count + // and the number of CPUs. + EXPECT_LE(Pointers.size(), 8U); + Pointers.clear(); + auto Registry = Allocator->getTSDRegistry(); + // Increase the number of TSDs to 16. + Registry->setOption(scudo::Option::MaxTSDsCount, 16); + Ready = false; + for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) + Threads[I] = std::thread(stressSharedRegistry, Allocator.get()); + { + std::unique_lock<std::mutex> Lock(Mutex); + Ready = true; + Cv.notify_all(); + } + for (auto &T : Threads) + T.join(); + // We should get 16 distinct TSDs back. + EXPECT_EQ(Pointers.size(), 16U); +} diff --git a/standalone/tests/wrappers_c_test.cpp b/standalone/tests/wrappers_c_test.cpp index 8b2bc6ecbd5..eed8f031933 100644 --- a/standalone/tests/wrappers_c_test.cpp +++ b/standalone/tests/wrappers_c_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "scudo/interface.h" #include "tests/scudo_unit_test.h" #include <errno.h> @@ -41,8 +42,19 @@ TEST(ScudoWrappersCTest, Malloc) { EXPECT_NE(P, nullptr); EXPECT_LE(Size, malloc_usable_size(P)); EXPECT_EQ(reinterpret_cast<uintptr_t>(P) % FIRST_32_SECOND_64(8U, 16U), 0U); + + // An update to this warning in Clang now triggers in this line, but it's ok + // because the check is expecting a bad pointer and should fail. +#if defined(__has_warning) && __has_warning("-Wfree-nonheap-object") +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfree-nonheap-object" +#endif EXPECT_DEATH( free(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(P) | 1U)), ""); +#if defined(__has_warning) && __has_warning("-Wfree-nonheap-object") +#pragma GCC diagnostic pop +#endif + free(P); EXPECT_DEATH(free(P), ""); @@ -82,6 +94,18 @@ TEST(ScudoWrappersCTest, Calloc) { EXPECT_EQ(errno, ENOMEM); } +TEST(ScudoWrappersCTest, SmallAlign) { + void *P; + for (size_t Size = 1; Size <= 0x10000; Size <<= 1) { + for (size_t Align = 1; Align <= 0x10000; Align <<= 1) { + for (size_t Count = 0; Count < 3; ++Count) { + P = memalign(Align, Size); + EXPECT_TRUE(reinterpret_cast<uintptr_t>(P) % Align == 0); + } + } + } +} + TEST(ScudoWrappersCTest, Memalign) { void *P; for (size_t I = FIRST_32_SECOND_64(2U, 3U); I <= 18U; I++) { @@ -188,14 +212,6 @@ TEST(ScudoWrappersCTest, Realloc) { } } -#ifndef M_DECAY_TIME -#define M_DECAY_TIME -100 -#endif - -#ifndef M_PURGE -#define M_PURGE -101 -#endif - #if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallOpt) { errno = 0; @@ -209,6 +225,12 @@ TEST(ScudoWrappersCTest, MallOpt) { EXPECT_EQ(mallopt(M_DECAY_TIME, 0), 1); EXPECT_EQ(mallopt(M_DECAY_TIME, 1), 1); EXPECT_EQ(mallopt(M_DECAY_TIME, 0), 1); + + if (SCUDO_ANDROID) { + EXPECT_EQ(mallopt(M_CACHE_COUNT_MAX, 100), 1); + EXPECT_EQ(mallopt(M_CACHE_SIZE_MAX, 1024 * 1024 * 2), 1); + EXPECT_EQ(mallopt(M_TSDS_COUNT_MAX, 10), 1); + } } #endif @@ -304,8 +326,10 @@ TEST(ScudoWrappersCTest, MallocIterateBoundary) { } } -// We expect heap operations within a disable/enable scope to deadlock. +// Fuchsia doesn't have alarm, fork or malloc_info. +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallocDisableDeadlock) { + // We expect heap operations within a disable/enable scope to deadlock. EXPECT_DEATH( { void *P = malloc(Size); @@ -319,9 +343,6 @@ TEST(ScudoWrappersCTest, MallocDisableDeadlock) { ""); } -// Fuchsia doesn't have fork or malloc_info. -#if !SCUDO_FUCHSIA - TEST(ScudoWrappersCTest, MallocInfo) { // Use volatile so that the allocations don't get optimized away. void *volatile P1 = malloc(1234); @@ -372,6 +393,7 @@ TEST(ScudoWrappersCTest, Fork) { static pthread_mutex_t Mutex; static pthread_cond_t Conditional = PTHREAD_COND_INITIALIZER; +static bool Ready; static void *enableMalloc(void *Unused) { // Initialize the allocator for this thread. @@ -382,6 +404,7 @@ static void *enableMalloc(void *Unused) { // Signal the main thread we are ready. pthread_mutex_lock(&Mutex); + Ready = true; pthread_cond_signal(&Conditional); pthread_mutex_unlock(&Mutex); @@ -394,11 +417,13 @@ static void *enableMalloc(void *Unused) { TEST(ScudoWrappersCTest, DisableForkEnable) { pthread_t ThreadId; + Ready = false; EXPECT_EQ(pthread_create(&ThreadId, nullptr, &enableMalloc, nullptr), 0); // Wait for the thread to be warmed up. pthread_mutex_lock(&Mutex); - pthread_cond_wait(&Conditional, &Mutex); + while (!Ready) + pthread_cond_wait(&Conditional, &Mutex); pthread_mutex_unlock(&Mutex); // Disable the allocator and fork. fork should succeed after malloc_enable. diff --git a/standalone/tests/wrappers_cpp_test.cpp b/standalone/tests/wrappers_cpp_test.cpp index 4ccef5bb0de..9df06dcdf14 100644 --- a/standalone/tests/wrappers_cpp_test.cpp +++ b/standalone/tests/wrappers_cpp_test.cpp @@ -66,6 +66,10 @@ public: }; TEST(ScudoWrappersCppTest, New) { + if (getenv("SKIP_TYPE_MISMATCH")) { + printf("Skipped type mismatch tests.\n"); + return; + } testCxxNew<bool>(); testCxxNew<uint8_t>(); testCxxNew<uint16_t>(); @@ -79,7 +83,7 @@ TEST(ScudoWrappersCppTest, New) { static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; static void stressNew() { std::vector<uintptr_t *> V; @@ -103,6 +107,7 @@ static void stressNew() { } TEST(ScudoWrappersCppTest, ThreadedNew) { + Ready = false; std::thread Threads[32]; for (size_t I = 0U; I < sizeof(Threads) / sizeof(Threads[0]); I++) Threads[I] = std::thread(stressNew); diff --git a/standalone/tools/compute_size_class_config.cpp b/standalone/tools/compute_size_class_config.cpp index 82f37b6647e..8b17be0e965 100644 --- a/standalone/tools/compute_size_class_config.cpp +++ b/standalone/tools/compute_size_class_config.cpp @@ -19,9 +19,8 @@ struct Alloc { }; size_t measureWastage(const std::vector<Alloc> &allocs, - const std::vector<size_t> &classes, - size_t pageSize, - size_t headerSize) { + const std::vector<size_t> &classes, size_t pageSize, + size_t headerSize) { size_t totalWastage = 0; for (auto &a : allocs) { size_t sizePlusHeader = a.size + headerSize; @@ -55,7 +54,8 @@ void readAllocs(std::vector<Alloc> &allocs, const char *path) { } Alloc a; - while (fscanf(f, "<alloc size=\"%zu\" count=\"%zu\"/>\n", &a.size, &a.count) == 2) + while (fscanf(f, "<alloc size=\"%zu\" count=\"%zu\"/>\n", &a.size, + &a.count) == 2) allocs.push_back(a); fclose(f); } @@ -157,5 +157,6 @@ struct MySizeClassConfig { }; static const uptr SizeDelta = %zu; }; -)", headerSize); +)", + headerSize); } diff --git a/standalone/tsd.h b/standalone/tsd.h index 20f0d69cabf..a6e669b66e6 100644 --- a/standalone/tsd.h +++ b/standalone/tsd.h @@ -23,10 +23,10 @@ namespace scudo { -template <class Allocator> struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { +template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { typename Allocator::CacheT Cache; typename Allocator::QuarantineCacheT QuarantineCache; - u8 DestructorIterations; + u8 DestructorIterations = 0; void initLinkerInitialized(Allocator *Instance) { Instance->initCache(&Cache); @@ -59,7 +59,7 @@ template <class Allocator> struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { private: HybridMutex Mutex; - atomic_uptr Precedence; + atomic_uptr Precedence = {}; }; } // namespace scudo diff --git a/standalone/tsd_exclusive.h b/standalone/tsd_exclusive.h index 3492509b5a8..a907ed4684a 100644 --- a/standalone/tsd_exclusive.h +++ b/standalone/tsd_exclusive.h @@ -13,10 +13,13 @@ namespace scudo { -enum class ThreadState : u8 { - NotInitialized = 0, - Initialized, - TornDown, +struct ThreadState { + bool DisableMemInit : 1; + enum { + NotInitialized = 0, + Initialized, + TornDown, + } InitState : 2; }; template <class Allocator> void teardownThread(void *Ptr); @@ -33,16 +36,30 @@ template <class Allocator> struct TSDRegistryExT { initLinkerInitialized(Instance); } - void unmapTestOnly() {} + void initOnceMaybe(Allocator *Instance) { + ScopedLock L(Mutex); + if (LIKELY(Initialized)) + return; + initLinkerInitialized(Instance); // Sets Initialized. + } + + void unmapTestOnly() { + Allocator *Instance = + reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey)); + if (!Instance) + return; + ThreadTSD.commitBack(Instance); + State = {}; + } ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { - if (LIKELY(State != ThreadState::NotInitialized)) + if (LIKELY(State.InitState != ThreadState::NotInitialized)) return; initThread(Instance, MinimalInit); } ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) { - if (LIKELY(State == ThreadState::Initialized && + if (LIKELY(State.InitState == ThreadState::Initialized && !atomic_load(&Disabled, memory_order_acquire))) { *UnlockRequired = false; return &ThreadTSD; @@ -66,14 +83,17 @@ template <class Allocator> struct TSDRegistryExT { Mutex.unlock(); } -private: - void initOnceMaybe(Allocator *Instance) { - ScopedLock L(Mutex); - if (LIKELY(Initialized)) - return; - initLinkerInitialized(Instance); // Sets Initialized. + bool setOption(Option O, UNUSED sptr Value) { + if (O == Option::ThreadDisableMemInit) + State.DisableMemInit = Value; + if (O == Option::MaxTSDsCount) + return false; + return true; } + bool getDisableMemInit() { return State.DisableMemInit; } + +private: // Using minimal initialization allows for global initialization while keeping // the thread specific structure untouched. The fallback structure will be // used instead. @@ -84,25 +104,25 @@ private: CHECK_EQ( pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0); ThreadTSD.initLinkerInitialized(Instance); - State = ThreadState::Initialized; + State.InitState = ThreadState::Initialized; Instance->callPostInitCallback(); } - pthread_key_t PThreadKey; - bool Initialized; - atomic_u8 Disabled; + pthread_key_t PThreadKey = {}; + bool Initialized = false; + atomic_u8 Disabled = {}; TSD<Allocator> FallbackTSD; HybridMutex Mutex; - static THREADLOCAL ThreadState State; - static THREADLOCAL TSD<Allocator> ThreadTSD; + static thread_local ThreadState State; + static thread_local TSD<Allocator> ThreadTSD; friend void teardownThread<Allocator>(void *Ptr); }; template <class Allocator> -THREADLOCAL TSD<Allocator> TSDRegistryExT<Allocator>::ThreadTSD; +thread_local TSD<Allocator> TSDRegistryExT<Allocator>::ThreadTSD; template <class Allocator> -THREADLOCAL ThreadState TSDRegistryExT<Allocator>::State; +thread_local ThreadState TSDRegistryExT<Allocator>::State; template <class Allocator> void teardownThread(void *Ptr) { typedef TSDRegistryExT<Allocator> TSDRegistryT; @@ -120,7 +140,7 @@ template <class Allocator> void teardownThread(void *Ptr) { return; } TSDRegistryT::ThreadTSD.commitBack(Instance); - TSDRegistryT::State = ThreadState::TornDown; + TSDRegistryT::State.InitState = ThreadState::TornDown; } } // namespace scudo diff --git a/standalone/tsd_shared.h b/standalone/tsd_shared.h index 038a5905ff4..afe3623ce40 100644 --- a/standalone/tsd_shared.h +++ b/standalone/tsd_shared.h @@ -9,36 +9,28 @@ #ifndef SCUDO_TSD_SHARED_H_ #define SCUDO_TSD_SHARED_H_ -#include "linux.h" // for getAndroidTlsPtr() #include "tsd.h" +#if SCUDO_HAS_PLATFORM_TLS_SLOT +// This is a platform-provided header that needs to be on the include path when +// Scudo is compiled. It must declare a function with the prototype: +// uintptr_t *getPlatformAllocatorTlsSlot() +// that returns the address of a thread-local word of storage reserved for +// Scudo, that must be zero-initialized in newly created threads. +#include "scudo_platform_tls_slot.h" +#endif + namespace scudo { -template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { +template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount> +struct TSDRegistrySharedT { void initLinkerInitialized(Allocator *Instance) { Instance->initLinkerInitialized(); - CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS - const u32 NumberOfCPUs = getNumberOfCPUs(); - NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0) - ? MaxTSDCount - : Min(NumberOfCPUs, MaxTSDCount); - for (u32 I = 0; I < NumberOfTSDs; I++) + for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].initLinkerInitialized(Instance); - // Compute all the coprimes of NumberOfTSDs. This will be used to walk the - // array of TSDs in a random order. For details, see: - // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/ - for (u32 I = 0; I < NumberOfTSDs; I++) { - u32 A = I + 1; - u32 B = NumberOfTSDs; - // Find the GCD between I + 1 and NumberOfTSDs. If 1, they are coprimes. - while (B != 0) { - const u32 T = A; - A = B; - B = T % B; - } - if (A == 1) - CoPrimes[NumberOfCoPrimes++] = I + 1; - } + const u32 NumberOfCPUs = getNumberOfCPUs(); + setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount + : Min(NumberOfCPUs, DefaultTSDCount)); Initialized = true; } void init(Allocator *Instance) { @@ -46,11 +38,15 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { initLinkerInitialized(Instance); } - void unmapTestOnly() { - setCurrentTSD(nullptr); - pthread_key_delete(PThreadKey); + void initOnceMaybe(Allocator *Instance) { + ScopedLock L(Mutex); + if (LIKELY(Initialized)) + return; + initLinkerInitialized(Instance); // Sets Initialized. } + void unmapTestOnly() { setCurrentTSD(nullptr); } + ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, UNUSED bool MinimalInit) { if (LIKELY(getCurrentTSD())) @@ -66,49 +62,88 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT { if (TSD->tryLock()) return TSD; // If that fails, go down the slow path. + if (TSDsArraySize == 1U) { + // Only 1 TSD, not need to go any further. + // The compiler will optimize this one way or the other. + TSD->lock(); + return TSD; + } return getTSDAndLockSlow(TSD); } void disable() { Mutex.lock(); - for (u32 I = 0; I < NumberOfTSDs; I++) + for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].lock(); } void enable() { - for (s32 I = static_cast<s32>(NumberOfTSDs - 1); I >= 0; I--) + for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--) TSDs[I].unlock(); Mutex.unlock(); } + bool setOption(Option O, sptr Value) { + if (O == Option::MaxTSDsCount) + return setNumberOfTSDs(static_cast<u32>(Value)); + if (O == Option::ThreadDisableMemInit) + setDisableMemInit(Value); + // Not supported by the TSD Registry, but not an error either. + return true; + } + + bool getDisableMemInit() const { return *getTlsPtr() & 1; } + private: - ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) { -#if _BIONIC - *getAndroidTlsPtr() = reinterpret_cast<uptr>(CurrentTSD); -#elif SCUDO_LINUX - ThreadTSD = CurrentTSD; + ALWAYS_INLINE uptr *getTlsPtr() const { +#if SCUDO_HAS_PLATFORM_TLS_SLOT + return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot()); #else - CHECK_EQ( - pthread_setspecific(PThreadKey, reinterpret_cast<void *>(CurrentTSD)), - 0); + static thread_local uptr ThreadTSD; + return &ThreadTSD; #endif } + static_assert(alignof(TSD<Allocator>) >= 2, ""); + + ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) { + *getTlsPtr() &= 1; + *getTlsPtr() |= reinterpret_cast<uptr>(CurrentTSD); + } + ALWAYS_INLINE TSD<Allocator> *getCurrentTSD() { -#if _BIONIC - return reinterpret_cast<TSD<Allocator> *>(*getAndroidTlsPtr()); -#elif SCUDO_LINUX - return ThreadTSD; -#else - return reinterpret_cast<TSD<Allocator> *>(pthread_getspecific(PThreadKey)); -#endif + return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL); } - void initOnceMaybe(Allocator *Instance) { - ScopedLock L(Mutex); - if (LIKELY(Initialized)) - return; - initLinkerInitialized(Instance); // Sets Initialized. + bool setNumberOfTSDs(u32 N) { + ScopedLock L(MutexTSDs); + if (N < NumberOfTSDs) + return false; + if (N > TSDsArraySize) + N = TSDsArraySize; + NumberOfTSDs = N; + NumberOfCoPrimes = 0; + // Compute all the coprimes of NumberOfTSDs. This will be used to walk the + // array of TSDs in a random order. For details, see: + // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/ + for (u32 I = 0; I < N; I++) { + u32 A = I + 1; + u32 B = N; + // Find the GCD between I + 1 and N. If 1, they are coprimes. + while (B != 0) { + const u32 T = A; + A = B; + B = T % B; + } + if (A == 1) + CoPrimes[NumberOfCoPrimes++] = I + 1; + } + return true; + } + + void setDisableMemInit(bool B) { + *getTlsPtr() &= ~1ULL; + *getTlsPtr() |= B; } NOINLINE void initThread(Allocator *Instance) { @@ -120,17 +155,23 @@ private: } NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) { - if (MaxTSDCount > 1U && NumberOfTSDs > 1U) { - // Use the Precedence of the current TSD as our random seed. Since we are - // in the slow path, it means that tryLock failed, and as a result it's - // very likely that said Precedence is non-zero. - const u32 R = static_cast<u32>(CurrentTSD->getPrecedence()); - const u32 Inc = CoPrimes[R % NumberOfCoPrimes]; - u32 Index = R % NumberOfTSDs; + // Use the Precedence of the current TSD as our random seed. Since we are + // in the slow path, it means that tryLock failed, and as a result it's + // very likely that said Precedence is non-zero. + const u32 R = static_cast<u32>(CurrentTSD->getPrecedence()); + u32 N, Inc; + { + ScopedLock L(MutexTSDs); + N = NumberOfTSDs; + DCHECK_NE(NumberOfCoPrimes, 0U); + Inc = CoPrimes[R % NumberOfCoPrimes]; + } + if (N > 1U) { + u32 Index = R % N; uptr LowestPrecedence = UINTPTR_MAX; TSD<Allocator> *CandidateTSD = nullptr; // Go randomly through at most 4 contexts and find a candidate. - for (u32 I = 0; I < Min(4U, NumberOfTSDs); I++) { + for (u32 I = 0; I < Min(4U, N); I++) { if (TSDs[Index].tryLock()) { setCurrentTSD(&TSDs[Index]); return &TSDs[Index]; @@ -142,8 +183,8 @@ private: LowestPrecedence = Precedence; } Index += Inc; - if (Index >= NumberOfTSDs) - Index -= NumberOfTSDs; + if (Index >= N) + Index -= N; } if (CandidateTSD) { CandidateTSD->lock(); @@ -156,25 +197,16 @@ private: return CurrentTSD; } - pthread_key_t PThreadKey; - atomic_u32 CurrentIndex; - u32 NumberOfTSDs; - u32 NumberOfCoPrimes; - u32 CoPrimes[MaxTSDCount]; - bool Initialized; + atomic_u32 CurrentIndex = {}; + u32 NumberOfTSDs = 0; + u32 NumberOfCoPrimes = 0; + u32 CoPrimes[TSDsArraySize] = {}; + bool Initialized = false; HybridMutex Mutex; - TSD<Allocator> TSDs[MaxTSDCount]; -#if SCUDO_LINUX && !_BIONIC - static THREADLOCAL TSD<Allocator> *ThreadTSD; -#endif + HybridMutex MutexTSDs; + TSD<Allocator> TSDs[TSDsArraySize]; }; -#if SCUDO_LINUX && !_BIONIC -template <class Allocator, u32 MaxTSDCount> -THREADLOCAL TSD<Allocator> - *TSDRegistrySharedT<Allocator, MaxTSDCount>::ThreadTSD; -#endif - } // namespace scudo #endif // SCUDO_TSD_SHARED_H_ diff --git a/standalone/wrappers_c.cpp b/standalone/wrappers_c.cpp index 098cc089a1c..81c7dd60ee3 100644 --- a/standalone/wrappers_c.cpp +++ b/standalone/wrappers_c.cpp @@ -26,6 +26,7 @@ extern "C" void SCUDO_PREFIX(malloc_postinit)(); // Export the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. +SCUDO_REQUIRE_CONSTANT_INITIALIZATION scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; #include "wrappers_c.inc" diff --git a/standalone/wrappers_c.h b/standalone/wrappers_c.h index 33a0c53cec0..6d0cecdc4b4 100644 --- a/standalone/wrappers_c.h +++ b/standalone/wrappers_c.h @@ -41,12 +41,4 @@ struct __scudo_mallinfo { #define SCUDO_MALLINFO __scudo_mallinfo #endif -#ifndef M_DECAY_TIME -#define M_DECAY_TIME -100 -#endif - -#ifndef M_PURGE -#define M_PURGE -101 -#endif - #endif // SCUDO_WRAPPERS_C_H_ diff --git a/standalone/wrappers_c.inc b/standalone/wrappers_c.inc index 5a6c1a8d408..43efb02cb86 100644 --- a/standalone/wrappers_c.inc +++ b/standalone/wrappers_c.inc @@ -155,7 +155,7 @@ void SCUDO_PREFIX(malloc_postinit)() { SCUDO_PREFIX(malloc_enable)); } -INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) { +INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, int value) { if (param == M_DECAY_TIME) { if (SCUDO_ANDROID) { if (value == 0) { @@ -173,8 +173,29 @@ INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) { } else if (param == M_PURGE) { SCUDO_ALLOCATOR.releaseToOS(); return 1; + } else { + scudo::Option option; + switch (param) { + case M_MEMTAG_TUNING: + option = scudo::Option::MemtagTuning; + break; + case M_THREAD_DISABLE_MEM_INIT: + option = scudo::Option::ThreadDisableMemInit; + break; + case M_CACHE_COUNT_MAX: + option = scudo::Option::MaxCacheEntriesCount; + break; + case M_CACHE_SIZE_MAX: + option = scudo::Option::MaxCacheEntrySize; + break; + case M_TSDS_COUNT_MAX: + option = scudo::Option::MaxTSDsCount; + break; + default: + return 0; + } + return SCUDO_ALLOCATOR.setOption(option, static_cast<scudo::sptr>(value)); } - return 0; } INTERFACE WEAK void *SCUDO_PREFIX(aligned_alloc)(size_t alignment, @@ -213,10 +234,38 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { // Disable memory tagging for the heap. The caller must disable memory tag // checks globally (e.g. by clearing TCF0 on aarch64) before calling this -// function, and may not re-enable them after calling the function. The program -// must be single threaded at the point when the function is called. +// function, and may not re-enable them after calling the function. INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() { SCUDO_ALLOCATOR.disableMemoryTagging(); } +// Sets whether scudo records stack traces and other metadata for allocations +// and deallocations. This function only has an effect if the allocator and +// hardware support memory tagging. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) { + SCUDO_ALLOCATOR.setTrackAllocationStacks(track); +} + +// Sets whether scudo zero-initializes all allocated memory. +INTERFACE WEAK void SCUDO_PREFIX(malloc_set_zero_contents)(int zero_contents) { + SCUDO_ALLOCATOR.setFillContents(zero_contents ? scudo::ZeroFill + : scudo::NoFill); +} + +// Sets whether scudo pattern-initializes all allocated memory. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_pattern_fill_contents)(int pattern_fill_contents) { + SCUDO_ALLOCATOR.setFillContents( + pattern_fill_contents ? scudo::PatternOrZeroFill : scudo::NoFill); +} + +// Sets whether scudo adds a small amount of slack at the end of large +// allocations, before the guard page. This can be enabled to work around buggy +// applications that read a few bytes past the end of their allocation. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_add_large_allocation_slack)(int add_slack) { + SCUDO_ALLOCATOR.setAddLargeAllocationSlack(add_slack); +} + } // extern "C" diff --git a/standalone/wrappers_c_bionic.cpp b/standalone/wrappers_c_bionic.cpp index 7a012a23bcf..18c3bf2c0ed 100644 --- a/standalone/wrappers_c_bionic.cpp +++ b/standalone/wrappers_c_bionic.cpp @@ -23,6 +23,7 @@ #define SCUDO_ALLOCATOR Allocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; @@ -36,6 +37,7 @@ static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> #define SCUDO_ALLOCATOR SvelteAllocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; @@ -48,4 +50,39 @@ static scudo::Allocator<scudo::AndroidSvelteConfig, // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } +INTERFACE void +__scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size) { + Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, + ring_buffer, memory, memory_tags, memory_addr, + memory_size); +} + +INTERFACE const char *__scudo_get_stack_depot_addr() { + return Allocator.getStackDepotAddress(); +} + +INTERFACE size_t __scudo_get_stack_depot_size() { + return sizeof(scudo::StackDepot); +} + +INTERFACE const char *__scudo_get_region_info_addr() { + return Allocator.getRegionInfoArrayAddress(); +} + +INTERFACE size_t __scudo_get_region_info_size() { + return Allocator.getRegionInfoArraySize(); +} + +INTERFACE const char *__scudo_get_ring_buffer_addr() { + return Allocator.getRingBufferAddress(); +} + +INTERFACE size_t __scudo_get_ring_buffer_size() { + return Allocator.getRingBufferSize(); +} + #endif // SCUDO_ANDROID && _BIONIC |