diff options
Diffstat (limited to 'libfuzzer/FuzzerMerge.cpp')
-rw-r--r-- | libfuzzer/FuzzerMerge.cpp | 211 |
1 files changed, 171 insertions, 40 deletions
diff --git a/libfuzzer/FuzzerMerge.cpp b/libfuzzer/FuzzerMerge.cpp index 162453c..24bd119 100644 --- a/libfuzzer/FuzzerMerge.cpp +++ b/libfuzzer/FuzzerMerge.cpp @@ -77,8 +77,8 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { size_t ExpectedStartMarker = 0; const size_t kInvalidStartMarker = -1; size_t LastSeenStartMarker = kInvalidStartMarker; - Vector<uint32_t> TmpFeatures; - Set<uint32_t> PCs; + std::vector<uint32_t> TmpFeatures; + std::set<uint32_t> PCs; while (std::getline(IS, Line, '\n')) { std::istringstream ISS1(Line); std::string Marker; @@ -132,15 +132,16 @@ size_t Merger::ApproximateMemoryConsumption() const { // Decides which files need to be merged (add those to NewFiles). // Returns the number of new features added. -size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, - Set<uint32_t> *NewFeatures, - const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, - Vector<std::string> *NewFiles) { +size_t Merger::Merge(const std::set<uint32_t> &InitialFeatures, + std::set<uint32_t> *NewFeatures, + const std::set<uint32_t> &InitialCov, + std::set<uint32_t> *NewCov, + std::vector<std::string> *NewFiles) { NewFiles->clear(); NewFeatures->clear(); NewCov->clear(); assert(NumFilesInFirstCorpus <= Files.size()); - Set<uint32_t> AllFeatures = InitialFeatures; + std::set<uint32_t> AllFeatures = InitialFeatures; // What features are in the initial corpus? for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { @@ -150,7 +151,7 @@ size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, // Remove all features that we already know from all other inputs. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { auto &Cur = Files[i].Features; - Vector<uint32_t> Tmp; + std::vector<uint32_t> Tmp; std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); Cur.swap(Tmp); @@ -188,15 +189,16 @@ size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, return NewFeatures->size(); } -Set<uint32_t> Merger::AllFeatures() const { - Set<uint32_t> S; +std::set<uint32_t> Merger::AllFeatures() const { + std::set<uint32_t> S; for (auto &File : Files) S.insert(File.Features.begin(), File.Features.end()); return S; } // Inner process. May crash if the target crashes. -void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { +void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath, + bool IsSetCoverMerge) { Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); Merger M; std::ifstream IF(CFPath); @@ -212,11 +214,11 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { M.Files.size() - M.FirstNotProcessedFile); std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); - Set<size_t> AllFeatures; + std::set<size_t> AllFeatures; auto PrintStatsWrapper = [this, &AllFeatures](const char* Where) { this->PrintStats(Where, "\n", 0, AllFeatures.size()); }; - Set<const TracePC::PCTableEntry *> AllPCs; + std::set<const TracePC::PCTableEntry *> AllPCs; for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { Fuzzer::MaybeExitGracefully(); auto U = FileToVector(M.Files[i].Name); @@ -234,13 +236,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { // Collect coverage. We are iterating over the files in this order: // * First, files in the initial corpus ordered by size, smallest first. // * Then, all other files, smallest first. - // So it makes no sense to record all features for all files, instead we - // only record features that were not seen before. - Set<size_t> UniqFeatures; - TPC.CollectFeatures([&](size_t Feature) { - if (AllFeatures.insert(Feature).second) - UniqFeatures.insert(Feature); - }); + std::set<size_t> Features; + if (IsSetCoverMerge) + TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); }); + else + TPC.CollectFeatures([&](size_t Feature) { + if (AllFeatures.insert(Feature).second) + Features.insert(Feature); + }); TPC.UpdateObservedPCs(); // Show stats. if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) @@ -249,7 +252,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { PrintStatsWrapper("LOADED"); // Write the post-run marker and the coverage. OF << "FT " << i; - for (size_t F : UniqFeatures) + for (size_t F : Features) OF << " " << F; OF << "\n"; OF << "COV " << i; @@ -263,15 +266,137 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { PrintStatsWrapper("DONE "); } -static size_t WriteNewControlFile(const std::string &CFPath, - const Vector<SizedFile> &OldCorpus, - const Vector<SizedFile> &NewCorpus, - const Vector<MergeFileInfo> &KnownFiles) { +// Merges all corpora into the first corpus. A file is added into +// the first corpus only if it adds new features. Unlike `Merger::Merge`, +// this implementation calculates an approximation of the minimum set +// of corpora files, that cover all known features (set cover problem). +// Generally, this means that files with more features are preferred for +// merge into the first corpus. When two files have the same number of +// features, the smaller one is preferred. +size_t Merger::SetCoverMerge(const std::set<uint32_t> &InitialFeatures, + std::set<uint32_t> *NewFeatures, + const std::set<uint32_t> &InitialCov, + std::set<uint32_t> *NewCov, + std::vector<std::string> *NewFiles) { + assert(NumFilesInFirstCorpus <= Files.size()); + NewFiles->clear(); + NewFeatures->clear(); + NewCov->clear(); + std::set<uint32_t> AllFeatures; + // 1 << 21 - 1 is the maximum feature index. + // See 'kFeatureSetSize' in 'FuzzerCorpus.h'. + const uint32_t kFeatureSetSize = 1 << 21; + std::vector<bool> Covered(kFeatureSetSize, false); + size_t NumCovered = 0; + + std::set<uint32_t> ExistingFeatures = InitialFeatures; + for (size_t i = 0; i < NumFilesInFirstCorpus; ++i) + ExistingFeatures.insert(Files[i].Features.begin(), Files[i].Features.end()); + + // Mark the existing features as covered. + for (const auto &F : ExistingFeatures) { + if (!Covered[F % kFeatureSetSize]) { + ++NumCovered; + Covered[F % kFeatureSetSize] = true; + } + // Calculate an underestimation of the set of covered features + // since the `Covered` bitvector is smaller than the feature range. + AllFeatures.insert(F % kFeatureSetSize); + } + + std::set<size_t> RemainingFiles; + for (size_t i = NumFilesInFirstCorpus; i < Files.size(); ++i) { + // Construct an incremental sequence which represent the + // indices to all files (excluding those in the initial corpus). + // RemainingFiles = range(NumFilesInFirstCorpus..Files.size()). + RemainingFiles.insert(i); + // Insert this file's unique features to all features. + for (const auto &F : Files[i].Features) + AllFeatures.insert(F % kFeatureSetSize); + } + + // Integrate files into Covered until set is complete. + while (NumCovered != AllFeatures.size()) { + // Index to file with largest number of unique features. + size_t MaxFeaturesIndex = NumFilesInFirstCorpus; + // Indices to remove from RemainingFiles. + std::set<size_t> RemoveIndices; + // Running max unique feature count. + // Updated upon finding a file with more features. + size_t MaxNumFeatures = 0; + + // Iterate over all files not yet integrated into Covered, + // to find the file which has the largest number of + // features that are not already in Covered. + for (const auto &i : RemainingFiles) { + const auto &File = Files[i]; + size_t CurrentUnique = 0; + // Count number of features in this file + // which are not yet in Covered. + for (const auto &F : File.Features) + if (!Covered[F % kFeatureSetSize]) + ++CurrentUnique; + + if (CurrentUnique == 0) { + // All features in this file are already in Covered: skip next time. + RemoveIndices.insert(i); + } else if (CurrentUnique > MaxNumFeatures || + (CurrentUnique == MaxNumFeatures && + File.Size < Files[MaxFeaturesIndex].Size)) { + // Update the max features file based on unique features + // Break ties by selecting smaller files. + MaxNumFeatures = CurrentUnique; + MaxFeaturesIndex = i; + } + } + // Must be a valid index/ + assert(MaxFeaturesIndex < Files.size()); + // Remove any feature-less files found. + for (const auto &i : RemoveIndices) + RemainingFiles.erase(i); + if (MaxNumFeatures == 0) { + // Did not find a file that adds unique features. + // This means that we should have no remaining files. + assert(RemainingFiles.size() == 0); + assert(NumCovered == AllFeatures.size()); + break; + } + + // MaxFeaturesIndex must be an element of Remaining. + assert(RemainingFiles.find(MaxFeaturesIndex) != RemainingFiles.end()); + // Remove the file with the most features from Remaining. + RemainingFiles.erase(MaxFeaturesIndex); + const auto &MaxFeatureFile = Files[MaxFeaturesIndex]; + // Add the features of the max feature file to Covered. + for (const auto &F : MaxFeatureFile.Features) { + if (!Covered[F % kFeatureSetSize]) { + ++NumCovered; + Covered[F % kFeatureSetSize] = true; + NewFeatures->insert(F); + } + } + // Add the index to this file to the result. + NewFiles->push_back(MaxFeatureFile.Name); + // Update NewCov with the additional coverage + // that MaxFeatureFile provides. + for (const auto &C : MaxFeatureFile.Cov) + if (InitialCov.find(C) == InitialCov.end()) + NewCov->insert(C); + } + + return NewFeatures->size(); +} + +static size_t +WriteNewControlFile(const std::string &CFPath, + const std::vector<SizedFile> &OldCorpus, + const std::vector<SizedFile> &NewCorpus, + const std::vector<MergeFileInfo> &KnownFiles) { std::unordered_set<std::string> FilesToSkip; for (auto &SF: KnownFiles) FilesToSkip.insert(SF.Name); - Vector<std::string> FilesToUse; + std::vector<std::string> FilesToUse; auto MaybeUseFile = [=, &FilesToUse](std::string Name) { if (FilesToSkip.find(Name) == FilesToSkip.end()) FilesToUse.push_back(Name); @@ -299,19 +424,19 @@ static size_t WriteNewControlFile(const std::string &CFPath, } // Outer process. Does not call the target code and thus should not fail. -void CrashResistantMerge(const Vector<std::string> &Args, - const Vector<SizedFile> &OldCorpus, - const Vector<SizedFile> &NewCorpus, - Vector<std::string> *NewFiles, - const Set<uint32_t> &InitialFeatures, - Set<uint32_t> *NewFeatures, - const Set<uint32_t> &InitialCov, - Set<uint32_t> *NewCov, - const std::string &CFPath, - bool V /*Verbose*/) { +void CrashResistantMerge(const std::vector<std::string> &Args, + const std::vector<SizedFile> &OldCorpus, + const std::vector<SizedFile> &NewCorpus, + std::vector<std::string> *NewFiles, + const std::set<uint32_t> &InitialFeatures, + std::set<uint32_t> *NewFeatures, + const std::set<uint32_t> &InitialCov, + std::set<uint32_t> *NewCov, const std::string &CFPath, + bool V, /*Verbose*/ + bool IsSetCoverMerge) { if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge. size_t NumAttempts = 0; - Vector<MergeFileInfo> KnownFiles; + std::vector<MergeFileInfo> KnownFiles; if (FileSize(CFPath)) { VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n", CFPath.c_str()); @@ -363,6 +488,7 @@ void CrashResistantMerge(const Vector<std::string> &Args, // Every inner process should execute at least one input. Command BaseCmd(Args); BaseCmd.removeFlag("merge"); + BaseCmd.removeFlag("set_cover_merge"); BaseCmd.removeFlag("fork"); BaseCmd.removeFlag("collect_data_flow"); for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { @@ -370,14 +496,16 @@ void CrashResistantMerge(const Vector<std::string> &Args, VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt); Command Cmd(BaseCmd); Cmd.addFlag("merge_control_file", CFPath); - Cmd.addFlag("merge_inner", "1"); + // If we are going to use the set cover implementation for + // minimization add the merge_inner=2 internal flag. + Cmd.addFlag("merge_inner", IsSetCoverMerge ? "2" : "1"); if (!V) { Cmd.setOutputFile(getDevNull()); Cmd.combineOutAndErr(); } auto ExitCode = ExecuteCommand(Cmd); if (!ExitCode) { - VPrintf(V, "MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt); + VPrintf(V, "MERGE-OUTER: successful in %zd attempt(s)\n", Attempt); break; } } @@ -395,7 +523,10 @@ void CrashResistantMerge(const Vector<std::string> &Args, M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end()); - M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); + if (IsSetCoverMerge) + M.SetCoverMerge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); + else + M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " "%zd new coverage edges\n", NewFiles->size(), NewFeatures->size(), NewCov->size()); |