Home | History | Annotate | Line # | Download | only in scripts
      1 #!/usr/bin/env python
      2 #===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
      3 #
      4 #                     The LLVM Compiler Infrastructure
      5 #
      6 # This file is distributed under the University of Illinois Open Source
      7 # License. See LICENSE.TXT for details.
      8 #
      9 #===------------------------------------------------------------------------===#
     10 # Runs the data-flow tracer several times on the same input in order to collect
     11 # the complete trace for all input bytes (running it on all bytes at once
     12 # may fail if DFSan runs out of labels).
     13 # Usage:
     14 #
     15 #   # Collect dataflow for one input, store it in OUTPUT (default is stdout)
     16 #   collect_data_flow.py BINARY INPUT [OUTPUT]
     17 #
     18 #   # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
     19 #   collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
     20 #===------------------------------------------------------------------------===#
     21 import atexit
     22 import hashlib
     23 import sys
     24 import os
     25 import subprocess
     26 import tempfile
     27 import shutil
     28 
     29 tmpdir = ""
     30 
     31 def cleanup(d):
     32   print("removing: %s" % d)
     33   shutil.rmtree(d)
     34 
     35 def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
     36   print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
     37                                                                output_dir))
     38   assert not os.path.exists(output_dir)
     39   os.mkdir(output_dir)
     40   for root, dirs, files in os.walk(corpus_dir):
     41     for f in files:
     42       path = os.path.join(root, f)
     43       sha1 = hashlib.sha1(open(path).read()).hexdigest()
     44       output = os.path.join(output_dir, sha1)
     45       subprocess.call([self, exe, path, output])
     46   functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
     47   subprocess.call([exe], stdout=functions_txt)
     48 
     49 
     50 def main(argv):
     51   exe = argv[1]
     52   inp = argv[2]
     53   if os.path.isdir(inp):
     54     return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
     55   size = os.path.getsize(inp)
     56   q = [[0, size]]
     57   tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
     58   atexit.register(cleanup, tmpdir)
     59   print "tmpdir: ", tmpdir
     60   outputs = []
     61   while len(q):
     62     r = q.pop()
     63     print "******* Trying:  ", r
     64     tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
     65     ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
     66     if ret and r[1] - r[0] >= 2:
     67       q.append([r[0], (r[1] + r[0]) / 2])
     68       q.append([(r[1] + r[0]) / 2, r[1]])
     69     else:
     70       outputs.append(tmpfile)
     71       print "******* Success: ", r
     72   f = sys.stdout
     73   if len(argv) >= 4:
     74     f = open(argv[3], "w")
     75   merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
     76   subprocess.call([merge] + outputs, stdout=f)
     77 
     78 if __name__ == '__main__':
     79   main(sys.argv)
     80