1 1.1 kamil //===- FuzzerMerge.cpp - merging corpora ----------------------------------===// 2 1.1 kamil // 3 1.1 kamil // The LLVM Compiler Infrastructure 4 1.1 kamil // 5 1.1 kamil // This file is distributed under the University of Illinois Open Source 6 1.1 kamil // License. See LICENSE.TXT for details. 7 1.1 kamil // 8 1.1 kamil //===----------------------------------------------------------------------===// 9 1.1 kamil // Merging corpora. 10 1.1 kamil //===----------------------------------------------------------------------===// 11 1.1 kamil 12 1.1 kamil #include "FuzzerCommand.h" 13 1.1 kamil #include "FuzzerMerge.h" 14 1.1 kamil #include "FuzzerIO.h" 15 1.1 kamil #include "FuzzerInternal.h" 16 1.1 kamil #include "FuzzerTracePC.h" 17 1.1 kamil #include "FuzzerUtil.h" 18 1.1 kamil 19 1.1 kamil #include <fstream> 20 1.1 kamil #include <iterator> 21 1.1 kamil #include <set> 22 1.1 kamil #include <sstream> 23 1.1 kamil 24 1.1 kamil namespace fuzzer { 25 1.1 kamil 26 1.1 kamil bool Merger::Parse(const std::string &Str, bool ParseCoverage) { 27 1.1 kamil std::istringstream SS(Str); 28 1.1 kamil return Parse(SS, ParseCoverage); 29 1.1 kamil } 30 1.1 kamil 31 1.1 kamil void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { 32 1.1 kamil if (!Parse(IS, ParseCoverage)) { 33 1.1 kamil Printf("MERGE: failed to parse the control file (unexpected error)\n"); 34 1.1 kamil exit(1); 35 1.1 kamil } 36 1.1 kamil } 37 1.1 kamil 38 1.1 kamil // The control file example: 39 1.1 kamil // 40 1.1 kamil // 3 # The number of inputs 41 1.1 kamil // 1 # The number of inputs in the first corpus, <= the previous number 42 1.1 kamil // file0 43 1.1 kamil // file1 44 1.1 kamil // file2 # One file name per line. 45 1.1 kamil // STARTED 0 123 # FileID, file size 46 1.1 kamil // DONE 0 1 4 6 8 # FileID COV1 COV2 ... 47 1.1 kamil // STARTED 1 456 # If DONE is missing, the input crashed while processing. 48 1.1 kamil // STARTED 2 567 49 1.1 kamil // DONE 2 8 9 50 1.1 kamil bool Merger::Parse(std::istream &IS, bool ParseCoverage) { 51 1.1 kamil LastFailure.clear(); 52 1.1 kamil std::string Line; 53 1.1 kamil 54 1.1 kamil // Parse NumFiles. 55 1.1 kamil if (!std::getline(IS, Line, '\n')) return false; 56 1.1 kamil std::istringstream L1(Line); 57 1.1 kamil size_t NumFiles = 0; 58 1.1 kamil L1 >> NumFiles; 59 1.1 kamil if (NumFiles == 0 || NumFiles > 10000000) return false; 60 1.1 kamil 61 1.1 kamil // Parse NumFilesInFirstCorpus. 62 1.1 kamil if (!std::getline(IS, Line, '\n')) return false; 63 1.1 kamil std::istringstream L2(Line); 64 1.1 kamil NumFilesInFirstCorpus = NumFiles + 1; 65 1.1 kamil L2 >> NumFilesInFirstCorpus; 66 1.1 kamil if (NumFilesInFirstCorpus > NumFiles) return false; 67 1.1 kamil 68 1.1 kamil // Parse file names. 69 1.1 kamil Files.resize(NumFiles); 70 1.1 kamil for (size_t i = 0; i < NumFiles; i++) 71 1.1 kamil if (!std::getline(IS, Files[i].Name, '\n')) 72 1.1 kamil return false; 73 1.1 kamil 74 1.1 kamil // Parse STARTED and DONE lines. 75 1.1 kamil size_t ExpectedStartMarker = 0; 76 1.1 kamil const size_t kInvalidStartMarker = -1; 77 1.1 kamil size_t LastSeenStartMarker = kInvalidStartMarker; 78 1.1 kamil Vector<uint32_t> TmpFeatures; 79 1.1 kamil while (std::getline(IS, Line, '\n')) { 80 1.1 kamil std::istringstream ISS1(Line); 81 1.1 kamil std::string Marker; 82 1.1 kamil size_t N; 83 1.1 kamil ISS1 >> Marker; 84 1.1 kamil ISS1 >> N; 85 1.1 kamil if (Marker == "STARTED") { 86 1.1 kamil // STARTED FILE_ID FILE_SIZE 87 1.1 kamil if (ExpectedStartMarker != N) 88 1.1 kamil return false; 89 1.1 kamil ISS1 >> Files[ExpectedStartMarker].Size; 90 1.1 kamil LastSeenStartMarker = ExpectedStartMarker; 91 1.1 kamil assert(ExpectedStartMarker < Files.size()); 92 1.1 kamil ExpectedStartMarker++; 93 1.1 kamil } else if (Marker == "DONE") { 94 1.1 kamil // DONE FILE_ID COV1 COV2 COV3 ... 95 1.1 kamil size_t CurrentFileIdx = N; 96 1.1 kamil if (CurrentFileIdx != LastSeenStartMarker) 97 1.1 kamil return false; 98 1.1 kamil LastSeenStartMarker = kInvalidStartMarker; 99 1.1 kamil if (ParseCoverage) { 100 1.1 kamil TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. 101 1.1 kamil while (ISS1 >> std::hex >> N) 102 1.1 kamil TmpFeatures.push_back(N); 103 1.1 kamil std::sort(TmpFeatures.begin(), TmpFeatures.end()); 104 1.1 kamil Files[CurrentFileIdx].Features = TmpFeatures; 105 1.1 kamil } 106 1.1 kamil } else { 107 1.1 kamil return false; 108 1.1 kamil } 109 1.1 kamil } 110 1.1 kamil if (LastSeenStartMarker != kInvalidStartMarker) 111 1.1 kamil LastFailure = Files[LastSeenStartMarker].Name; 112 1.1 kamil 113 1.1 kamil FirstNotProcessedFile = ExpectedStartMarker; 114 1.1 kamil return true; 115 1.1 kamil } 116 1.1 kamil 117 1.1 kamil size_t Merger::ApproximateMemoryConsumption() const { 118 1.1 kamil size_t Res = 0; 119 1.1 kamil for (const auto &F: Files) 120 1.1 kamil Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); 121 1.1 kamil return Res; 122 1.1 kamil } 123 1.1 kamil 124 1.1 kamil // Decides which files need to be merged (add thost to NewFiles). 125 1.1 kamil // Returns the number of new features added. 126 1.1 kamil size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, 127 1.1 kamil Vector<std::string> *NewFiles) { 128 1.1 kamil NewFiles->clear(); 129 1.1 kamil assert(NumFilesInFirstCorpus <= Files.size()); 130 1.1 kamil Set<uint32_t> AllFeatures(InitialFeatures); 131 1.1 kamil 132 1.1 kamil // What features are in the initial corpus? 133 1.1 kamil for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { 134 1.1 kamil auto &Cur = Files[i].Features; 135 1.1 kamil AllFeatures.insert(Cur.begin(), Cur.end()); 136 1.1 kamil } 137 1.1 kamil size_t InitialNumFeatures = AllFeatures.size(); 138 1.1 kamil 139 1.1 kamil // Remove all features that we already know from all other inputs. 140 1.1 kamil for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { 141 1.1 kamil auto &Cur = Files[i].Features; 142 1.1 kamil Vector<uint32_t> Tmp; 143 1.1 kamil std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), 144 1.1 kamil AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); 145 1.1 kamil Cur.swap(Tmp); 146 1.1 kamil } 147 1.1 kamil 148 1.1 kamil // Sort. Give preference to 149 1.1 kamil // * smaller files 150 1.1 kamil // * files with more features. 151 1.1 kamil std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), 152 1.1 kamil [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { 153 1.1 kamil if (a.Size != b.Size) 154 1.1 kamil return a.Size < b.Size; 155 1.1 kamil return a.Features.size() > b.Features.size(); 156 1.1 kamil }); 157 1.1 kamil 158 1.1 kamil // One greedy pass: add the file's features to AllFeatures. 159 1.1 kamil // If new features were added, add this file to NewFiles. 160 1.1 kamil for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { 161 1.1 kamil auto &Cur = Files[i].Features; 162 1.1 kamil // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), 163 1.1 kamil // Files[i].Size, Cur.size()); 164 1.1 kamil size_t OldSize = AllFeatures.size(); 165 1.1 kamil AllFeatures.insert(Cur.begin(), Cur.end()); 166 1.1 kamil if (AllFeatures.size() > OldSize) 167 1.1 kamil NewFiles->push_back(Files[i].Name); 168 1.1 kamil } 169 1.1 kamil return AllFeatures.size() - InitialNumFeatures; 170 1.1 kamil } 171 1.1 kamil 172 1.1 kamil void Merger::PrintSummary(std::ostream &OS) { 173 1.1 kamil for (auto &File : Files) { 174 1.1 kamil OS << std::hex; 175 1.1 kamil OS << File.Name << " size: " << File.Size << " features: "; 176 1.1 kamil for (auto Feature : File.Features) 177 1.1 kamil OS << " " << Feature; 178 1.1 kamil OS << "\n"; 179 1.1 kamil } 180 1.1 kamil } 181 1.1 kamil 182 1.1 kamil Set<uint32_t> Merger::AllFeatures() const { 183 1.1 kamil Set<uint32_t> S; 184 1.1 kamil for (auto &File : Files) 185 1.1 kamil S.insert(File.Features.begin(), File.Features.end()); 186 1.1 kamil return S; 187 1.1 kamil } 188 1.1 kamil 189 1.1 kamil Set<uint32_t> Merger::ParseSummary(std::istream &IS) { 190 1.1 kamil std::string Line, Tmp; 191 1.1 kamil Set<uint32_t> Res; 192 1.1 kamil while (std::getline(IS, Line, '\n')) { 193 1.1 kamil size_t N; 194 1.1 kamil std::istringstream ISS1(Line); 195 1.1 kamil ISS1 >> Tmp; // Name 196 1.1 kamil ISS1 >> Tmp; // size: 197 1.1 kamil assert(Tmp == "size:" && "Corrupt summary file"); 198 1.1 kamil ISS1 >> std::hex; 199 1.1 kamil ISS1 >> N; // File Size 200 1.1 kamil ISS1 >> Tmp; // features: 201 1.1 kamil assert(Tmp == "features:" && "Corrupt summary file"); 202 1.1 kamil while (ISS1 >> std::hex >> N) 203 1.1 kamil Res.insert(N); 204 1.1 kamil } 205 1.1 kamil return Res; 206 1.1 kamil } 207 1.1 kamil 208 1.1 kamil // Inner process. May crash if the target crashes. 209 1.1 kamil void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { 210 1.1 kamil Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); 211 1.1 kamil Merger M; 212 1.1 kamil std::ifstream IF(CFPath); 213 1.1 kamil M.ParseOrExit(IF, false); 214 1.1 kamil IF.close(); 215 1.1 kamil if (!M.LastFailure.empty()) 216 1.1 kamil Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", 217 1.1 kamil M.LastFailure.c_str()); 218 1.1 kamil 219 1.1 kamil Printf("MERGE-INNER: %zd total files;" 220 1.1 kamil " %zd processed earlier; will process %zd files now\n", 221 1.1 kamil M.Files.size(), M.FirstNotProcessedFile, 222 1.1 kamil M.Files.size() - M.FirstNotProcessedFile); 223 1.1 kamil 224 1.1 kamil std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); 225 1.1 kamil Set<size_t> AllFeatures; 226 1.1 kamil for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { 227 1.1 kamil MaybeExitGracefully(); 228 1.1 kamil auto U = FileToVector(M.Files[i].Name); 229 1.1 kamil if (U.size() > MaxInputLen) { 230 1.1 kamil U.resize(MaxInputLen); 231 1.1 kamil U.shrink_to_fit(); 232 1.1 kamil } 233 1.1 kamil std::ostringstream StartedLine; 234 1.1 kamil // Write the pre-run marker. 235 1.1 kamil OF << "STARTED " << std::dec << i << " " << U.size() << "\n"; 236 1.1 kamil OF.flush(); // Flush is important since Command::Execute may crash. 237 1.1 kamil // Run. 238 1.1 kamil TPC.ResetMaps(); 239 1.1 kamil ExecuteCallback(U.data(), U.size()); 240 1.1 kamil // Collect coverage. We are iterating over the files in this order: 241 1.1 kamil // * First, files in the initial corpus ordered by size, smallest first. 242 1.1 kamil // * Then, all other files, smallest first. 243 1.1 kamil // So it makes no sense to record all features for all files, instead we 244 1.1 kamil // only record features that were not seen before. 245 1.1 kamil Set<size_t> UniqFeatures; 246 1.1 kamil TPC.CollectFeatures([&](size_t Feature) { 247 1.1 kamil if (AllFeatures.insert(Feature).second) 248 1.1 kamil UniqFeatures.insert(Feature); 249 1.1 kamil }); 250 1.1 kamil // Show stats. 251 1.1 kamil if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) 252 1.1 kamil PrintStats("pulse "); 253 1.1 kamil // Write the post-run marker and the coverage. 254 1.1 kamil OF << "DONE " << i; 255 1.1 kamil for (size_t F : UniqFeatures) 256 1.1 kamil OF << " " << std::hex << F; 257 1.1 kamil OF << "\n"; 258 1.1 kamil OF.flush(); 259 1.1 kamil } 260 1.1 kamil } 261 1.1 kamil 262 1.1 kamil static void WriteNewControlFile(const std::string &CFPath, 263 1.1 kamil const Vector<SizedFile> &AllFiles, 264 1.1 kamil size_t NumFilesInFirstCorpus) { 265 1.1 kamil RemoveFile(CFPath); 266 1.1 kamil std::ofstream ControlFile(CFPath); 267 1.1 kamil ControlFile << AllFiles.size() << "\n"; 268 1.1 kamil ControlFile << NumFilesInFirstCorpus << "\n"; 269 1.1 kamil for (auto &SF: AllFiles) 270 1.1 kamil ControlFile << SF.File << "\n"; 271 1.1 kamil if (!ControlFile) { 272 1.1 kamil Printf("MERGE-OUTER: failed to write to the control file: %s\n", 273 1.1 kamil CFPath.c_str()); 274 1.1 kamil exit(1); 275 1.1 kamil } 276 1.1 kamil } 277 1.1 kamil 278 1.1 kamil // Outer process. Does not call the target code and thus sohuld not fail. 279 1.1 kamil void Fuzzer::CrashResistantMerge(const Vector<std::string> &Args, 280 1.1 kamil const Vector<std::string> &Corpora, 281 1.1 kamil const char *CoverageSummaryInputPathOrNull, 282 1.1 kamil const char *CoverageSummaryOutputPathOrNull, 283 1.1 kamil const char *MergeControlFilePathOrNull) { 284 1.1 kamil if (Corpora.size() <= 1) { 285 1.1 kamil Printf("Merge requires two or more corpus dirs\n"); 286 1.1 kamil return; 287 1.1 kamil } 288 1.1 kamil auto CFPath = 289 1.1 kamil MergeControlFilePathOrNull 290 1.1 kamil ? MergeControlFilePathOrNull 291 1.1 kamil : DirPlusFile(TmpDir(), 292 1.1 kamil "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); 293 1.1 kamil 294 1.1 kamil size_t NumAttempts = 0; 295 1.1 kamil if (MergeControlFilePathOrNull && FileSize(MergeControlFilePathOrNull)) { 296 1.1 kamil Printf("MERGE-OUTER: non-empty control file provided: '%s'\n", 297 1.1 kamil MergeControlFilePathOrNull); 298 1.1 kamil Merger M; 299 1.1 kamil std::ifstream IF(MergeControlFilePathOrNull); 300 1.1 kamil if (M.Parse(IF, /*ParseCoverage=*/false)) { 301 1.1 kamil Printf("MERGE-OUTER: control file ok, %zd files total," 302 1.1 kamil " first not processed file %zd\n", 303 1.1 kamil M.Files.size(), M.FirstNotProcessedFile); 304 1.1 kamil if (!M.LastFailure.empty()) 305 1.1 kamil Printf("MERGE-OUTER: '%s' will be skipped as unlucky " 306 1.1 kamil "(merge has stumbled on it the last time)\n", 307 1.1 kamil M.LastFailure.c_str()); 308 1.1 kamil if (M.FirstNotProcessedFile >= M.Files.size()) { 309 1.1 kamil Printf("MERGE-OUTER: nothing to do, merge has been completed before\n"); 310 1.1 kamil exit(0); 311 1.1 kamil } 312 1.1 kamil 313 1.1 kamil NumAttempts = M.Files.size() - M.FirstNotProcessedFile; 314 1.1 kamil } else { 315 1.1 kamil Printf("MERGE-OUTER: bad control file, will overwrite it\n"); 316 1.1 kamil } 317 1.1 kamil } 318 1.1 kamil 319 1.1 kamil if (!NumAttempts) { 320 1.1 kamil // The supplied control file is empty or bad, create a fresh one. 321 1.1 kamil Vector<SizedFile> AllFiles; 322 1.1 kamil GetSizedFilesFromDir(Corpora[0], &AllFiles); 323 1.1 kamil size_t NumFilesInFirstCorpus = AllFiles.size(); 324 1.1 kamil std::sort(AllFiles.begin(), AllFiles.end()); 325 1.1 kamil for (size_t i = 1; i < Corpora.size(); i++) 326 1.1 kamil GetSizedFilesFromDir(Corpora[i], &AllFiles); 327 1.1 kamil std::sort(AllFiles.begin() + NumFilesInFirstCorpus, AllFiles.end()); 328 1.1 kamil Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n", 329 1.1 kamil AllFiles.size(), NumFilesInFirstCorpus); 330 1.1 kamil WriteNewControlFile(CFPath, AllFiles, NumFilesInFirstCorpus); 331 1.1 kamil NumAttempts = AllFiles.size(); 332 1.1 kamil } 333 1.1 kamil 334 1.1 kamil // Execute the inner process until it passes. 335 1.1 kamil // Every inner process should execute at least one input. 336 1.1 kamil Command BaseCmd(Args); 337 1.1 kamil BaseCmd.removeFlag("merge"); 338 1.1 kamil bool Success = false; 339 1.1 kamil for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { 340 1.1 kamil MaybeExitGracefully(); 341 1.1 kamil Printf("MERGE-OUTER: attempt %zd\n", Attempt); 342 1.1 kamil Command Cmd(BaseCmd); 343 1.1 kamil Cmd.addFlag("merge_control_file", CFPath); 344 1.1 kamil Cmd.addFlag("merge_inner", "1"); 345 1.1 kamil auto ExitCode = ExecuteCommand(Cmd); 346 1.1 kamil if (!ExitCode) { 347 1.1 kamil Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt); 348 1.1 kamil Success = true; 349 1.1 kamil break; 350 1.1 kamil } 351 1.1 kamil } 352 1.1 kamil if (!Success) { 353 1.1 kamil Printf("MERGE-OUTER: zero succesfull attempts, exiting\n"); 354 1.1 kamil exit(1); 355 1.1 kamil } 356 1.1 kamil // Read the control file and do the merge. 357 1.1 kamil Merger M; 358 1.1 kamil std::ifstream IF(CFPath); 359 1.1 kamil IF.seekg(0, IF.end); 360 1.1 kamil Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg()); 361 1.1 kamil IF.seekg(0, IF.beg); 362 1.1 kamil M.ParseOrExit(IF, true); 363 1.1 kamil IF.close(); 364 1.1 kamil Printf("MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", 365 1.1 kamil M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); 366 1.1 kamil if (CoverageSummaryOutputPathOrNull) { 367 1.1 kamil Printf("MERGE-OUTER: writing coverage summary for %zd files to %s\n", 368 1.1 kamil M.Files.size(), CoverageSummaryOutputPathOrNull); 369 1.1 kamil std::ofstream SummaryOut(CoverageSummaryOutputPathOrNull); 370 1.1 kamil M.PrintSummary(SummaryOut); 371 1.1 kamil } 372 1.1 kamil Vector<std::string> NewFiles; 373 1.1 kamil Set<uint32_t> InitialFeatures; 374 1.1 kamil if (CoverageSummaryInputPathOrNull) { 375 1.1 kamil std::ifstream SummaryIn(CoverageSummaryInputPathOrNull); 376 1.1 kamil InitialFeatures = M.ParseSummary(SummaryIn); 377 1.1 kamil Printf("MERGE-OUTER: coverage summary loaded from %s, %zd features found\n", 378 1.1 kamil CoverageSummaryInputPathOrNull, InitialFeatures.size()); 379 1.1 kamil } 380 1.1 kamil size_t NumNewFeatures = M.Merge(InitialFeatures, &NewFiles); 381 1.1 kamil Printf("MERGE-OUTER: %zd new files with %zd new features added\n", 382 1.1 kamil NewFiles.size(), NumNewFeatures); 383 1.1 kamil for (auto &F: NewFiles) 384 1.1 kamil WriteToOutputCorpus(FileToVector(F, MaxInputLen)); 385 1.1 kamil // We are done, delete the control file if it was a temporary one. 386 1.1 kamil if (!MergeControlFilePathOrNull) 387 1.1 kamil RemoveFile(CFPath); 388 1.1 kamil } 389 1.1 kamil 390 1.1 kamil } // namespace fuzzer 391