1 #!/usr/bin/env python 2 #===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===# 3 # 4 # The LLVM Compiler Infrastructure 5 # 6 # This file is distributed under the University of Illinois Open Source 7 # License. See LICENSE.TXT for details. 8 # 9 #===------------------------------------------------------------------------===# 10 # Runs the data-flow tracer several times on the same input in order to collect 11 # the complete trace for all input bytes (running it on all bytes at once 12 # may fail if DFSan runs out of labels). 13 # Usage: 14 # 15 # # Collect dataflow for one input, store it in OUTPUT (default is stdout) 16 # collect_data_flow.py BINARY INPUT [OUTPUT] 17 # 18 # # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR 19 # collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR 20 #===------------------------------------------------------------------------===# 21 import atexit 22 import hashlib 23 import sys 24 import os 25 import subprocess 26 import tempfile 27 import shutil 28 29 tmpdir = "" 30 31 def cleanup(d): 32 print("removing: %s" % d) 33 shutil.rmtree(d) 34 35 def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir): 36 print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir, 37 output_dir)) 38 assert not os.path.exists(output_dir) 39 os.mkdir(output_dir) 40 for root, dirs, files in os.walk(corpus_dir): 41 for f in files: 42 path = os.path.join(root, f) 43 sha1 = hashlib.sha1(open(path).read()).hexdigest() 44 output = os.path.join(output_dir, sha1) 45 subprocess.call([self, exe, path, output]) 46 functions_txt = open(os.path.join(output_dir, "functions.txt"), "w") 47 subprocess.call([exe], stdout=functions_txt) 48 49 50 def main(argv): 51 exe = argv[1] 52 inp = argv[2] 53 if os.path.isdir(inp): 54 return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3]) 55 size = os.path.getsize(inp) 56 q = [[0, size]] 57 tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") 58 atexit.register(cleanup, tmpdir) 59 print "tmpdir: ", tmpdir 60 outputs = [] 61 while len(q): 62 r = q.pop() 63 print "******* Trying: ", r 64 tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) 65 ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) 66 if ret and r[1] - r[0] >= 2: 67 q.append([r[0], (r[1] + r[0]) / 2]) 68 q.append([(r[1] + r[0]) / 2, r[1]]) 69 else: 70 outputs.append(tmpfile) 71 print "******* Success: ", r 72 f = sys.stdout 73 if len(argv) >= 4: 74 f = open(argv[3], "w") 75 merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py") 76 subprocess.call([merge] + outputs, stdout=f) 77 78 if __name__ == '__main__': 79 main(sys.argv) 80