Home | History | Annotate | Line # | Download | only in fuzz
fuzz.py revision 1.1.1.1
      1 #!/usr/bin/env python
      2 
      3 # ################################################################
      4 # Copyright (c) Meta Platforms, Inc. and affiliates.
      5 # All rights reserved.
      6 #
      7 # This source code is licensed under both the BSD-style license (found in the
      8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
      9 # in the COPYING file in the root directory of this source tree).
     10 # You may select, at your option, one of the above-listed licenses.
     11 # ##########################################################################
     12 
     13 import argparse
     14 import contextlib
     15 import os
     16 import re
     17 import shlex
     18 import shutil
     19 import subprocess
     20 import sys
     21 import tempfile
     22 
     23 
     24 def abs_join(a, *p):
     25     return os.path.abspath(os.path.join(a, *p))
     26 
     27 
     28 class InputType(object):
     29     RAW_DATA = 1
     30     COMPRESSED_DATA = 2
     31     DICTIONARY_DATA = 3
     32 
     33 
     34 class FrameType(object):
     35     ZSTD = 1
     36     BLOCK = 2
     37 
     38 
     39 class TargetInfo(object):
     40     def __init__(self, input_type, frame_type=FrameType.ZSTD):
     41         self.input_type = input_type
     42         self.frame_type = frame_type
     43 
     44 
     45 # Constants
     46 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
     47 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
     48 TARGET_INFO = {
     49     'simple_round_trip': TargetInfo(InputType.RAW_DATA),
     50     'stream_round_trip': TargetInfo(InputType.RAW_DATA),
     51     'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
     52     'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
     53     'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
     54     'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
     55     'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
     56     'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
     57     'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
     58     'simple_compress': TargetInfo(InputType.RAW_DATA),
     59     'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
     60     'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
     61     'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
     62     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
     63     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
     64     'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
     65     'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
     66     'huf_round_trip': TargetInfo(InputType.RAW_DATA),
     67     'huf_decompress': TargetInfo(InputType.RAW_DATA),
     68     'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
     69     'generate_sequences': TargetInfo(InputType.RAW_DATA),
     70 }
     71 TARGETS = list(TARGET_INFO.keys())
     72 ALL_TARGETS = TARGETS + ['all']
     73 FUZZ_RNG_SEED_SIZE = 4
     74 
     75 # Standard environment variables
     76 CC = os.environ.get('CC', 'cc')
     77 CXX = os.environ.get('CXX', 'c++')
     78 CPPFLAGS = os.environ.get('CPPFLAGS', '')
     79 CFLAGS = os.environ.get('CFLAGS', '-O3')
     80 CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
     81 LDFLAGS = os.environ.get('LDFLAGS', '')
     82 MFLAGS = os.environ.get('MFLAGS', '-j')
     83 THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
     84 
     85 # Fuzzing environment variables
     86 LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
     87 AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
     88 DECODECORPUS = os.environ.get('DECODECORPUS',
     89                               abs_join(FUZZ_DIR, '..', 'decodecorpus'))
     90 ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
     91 
     92 # Sanitizer environment variables
     93 MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
     94 MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
     95 MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
     96 MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
     97 
     98 
     99 def create(r):
    100     d = os.path.abspath(r)
    101     if not os.path.isdir(d):
    102         os.makedirs(d)
    103     return d
    104 
    105 
    106 def check(r):
    107     d = os.path.abspath(r)
    108     if not os.path.isdir(d):
    109         return None
    110     return d
    111 
    112 
    113 @contextlib.contextmanager
    114 def tmpdir():
    115     dirpath = tempfile.mkdtemp()
    116     try:
    117         yield dirpath
    118     finally:
    119         shutil.rmtree(dirpath, ignore_errors=True)
    120 
    121 
    122 def parse_targets(in_targets):
    123     targets = set()
    124     for target in in_targets:
    125         if not target:
    126             continue
    127         if target == 'all':
    128             targets = targets.union(TARGETS)
    129         elif target in TARGETS:
    130             targets.add(target)
    131         else:
    132             raise RuntimeError('{} is not a valid target'.format(target))
    133     return list(targets)
    134 
    135 
    136 def targets_parser(args, description):
    137     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    138     parser.add_argument(
    139         'TARGET',
    140         nargs='*',
    141         type=str,
    142         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
    143     args, extra = parser.parse_known_args(args)
    144     args.extra = extra
    145 
    146     args.TARGET = parse_targets(args.TARGET)
    147 
    148     return args
    149 
    150 
    151 def parse_env_flags(args, flags):
    152     """
    153     Look for flags set by environment variables.
    154     """
    155     san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
    156     nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
    157 
    158     def set_sanitizer(sanitizer, default, san, nosan):
    159         if sanitizer in san and sanitizer in nosan:
    160             raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
    161                                format(s=sanitizer))
    162         if sanitizer in san:
    163             return True
    164         if sanitizer in nosan:
    165             return False
    166         return default
    167 
    168     san = set(san_flags.split(','))
    169     nosan = set(nosan_flags.split(','))
    170 
    171     args.asan = set_sanitizer('address', args.asan, san, nosan)
    172     args.msan = set_sanitizer('memory', args.msan, san, nosan)
    173     args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
    174 
    175     args.sanitize = args.asan or args.msan or args.ubsan
    176 
    177     return args
    178 
    179 
    180 def compiler_version(cc, cxx):
    181     """
    182     Determines the compiler and version.
    183     Only works for clang and gcc.
    184     """
    185     cc_version_bytes = subprocess.check_output([cc, "--version"])
    186     cxx_version_bytes = subprocess.check_output([cxx, "--version"])
    187     compiler = None
    188     version = None
    189     print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
    190     if b'clang' in cc_version_bytes:
    191         assert(b'clang' in cxx_version_bytes)
    192         compiler = 'clang'
    193     elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
    194         assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
    195         compiler = 'gcc'
    196     if compiler is not None:
    197         version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
    198         version_match = re.search(version_regex, cc_version_bytes)
    199         version = tuple(int(version_match.group(i)) for i in range(1, 4))
    200     return compiler, version
    201 
    202 
    203 def overflow_ubsan_flags(cc, cxx):
    204     compiler, version = compiler_version(cc, cxx)
    205     if compiler == 'gcc' and version < (8, 0, 0):
    206         return ['-fno-sanitize=signed-integer-overflow']
    207     if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
    208         return ['-fno-sanitize=pointer-overflow']
    209     return []
    210 
    211 
    212 def build_parser(args):
    213     description = """
    214     Cleans the repository and builds a fuzz target (or all).
    215     Many flags default to environment variables (default says $X='y').
    216     Options that aren't enabling features default to the correct values for
    217     zstd.
    218     Enable sanitizers with --enable-*san.
    219     For regression testing just build.
    220     For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
    221     For AFL set CC and CXX to AFL's compilers and set
    222     LIB_FUZZING_ENGINE='libregression.a'.
    223     """
    224     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    225     parser.add_argument(
    226         '--lib-fuzzing-engine',
    227         dest='lib_fuzzing_engine',
    228         type=str,
    229         default=LIB_FUZZING_ENGINE,
    230         help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
    231               "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
    232 
    233     fuzz_group = parser.add_mutually_exclusive_group()
    234     fuzz_group.add_argument(
    235         '--enable-coverage',
    236         dest='coverage',
    237         action='store_true',
    238         help='Enable coverage instrumentation (-fsanitize-coverage)')
    239     fuzz_group.add_argument(
    240         '--enable-fuzzer',
    241         dest='fuzzer',
    242         action='store_true',
    243         help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
    244               'LIB_FUZZING_ENGINE is ignored')
    245     )
    246 
    247     parser.add_argument(
    248         '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
    249     parser.add_argument(
    250         '--enable-ubsan',
    251         dest='ubsan',
    252         action='store_true',
    253         help='Enable UBSAN')
    254     parser.add_argument(
    255         '--disable-ubsan-pointer-overflow',
    256         dest='ubsan_pointer_overflow',
    257         action='store_false',
    258         help='Disable UBSAN pointer overflow check (known failure)')
    259     parser.add_argument(
    260         '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
    261     parser.add_argument(
    262         '--enable-msan-track-origins', dest='msan_track_origins',
    263         action='store_true', help='Enable MSAN origin tracking')
    264     parser.add_argument(
    265         '--msan-extra-cppflags',
    266         dest='msan_extra_cppflags',
    267         type=str,
    268         default=MSAN_EXTRA_CPPFLAGS,
    269         help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
    270         format(MSAN_EXTRA_CPPFLAGS))
    271     parser.add_argument(
    272         '--msan-extra-cflags',
    273         dest='msan_extra_cflags',
    274         type=str,
    275         default=MSAN_EXTRA_CFLAGS,
    276         help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
    277             MSAN_EXTRA_CFLAGS))
    278     parser.add_argument(
    279         '--msan-extra-cxxflags',
    280         dest='msan_extra_cxxflags',
    281         type=str,
    282         default=MSAN_EXTRA_CXXFLAGS,
    283         help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
    284         format(MSAN_EXTRA_CXXFLAGS))
    285     parser.add_argument(
    286         '--msan-extra-ldflags',
    287         dest='msan_extra_ldflags',
    288         type=str,
    289         default=MSAN_EXTRA_LDFLAGS,
    290         help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
    291         format(MSAN_EXTRA_LDFLAGS))
    292     parser.add_argument(
    293         '--enable-sanitize-recover',
    294         dest='sanitize_recover',
    295         action='store_true',
    296         help='Non-fatal sanitizer errors where possible')
    297     parser.add_argument(
    298         '--debug',
    299         dest='debug',
    300         type=int,
    301         default=1,
    302         help='Set DEBUGLEVEL (default: 1)')
    303     parser.add_argument(
    304         '--force-memory-access',
    305         dest='memory_access',
    306         type=int,
    307         default=0,
    308         help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
    309     parser.add_argument(
    310         '--fuzz-rng-seed-size',
    311         dest='fuzz_rng_seed_size',
    312         type=int,
    313         default=4,
    314         help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
    315     parser.add_argument(
    316         '--disable-fuzzing-mode',
    317         dest='fuzzing_mode',
    318         action='store_false',
    319         help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
    320     parser.add_argument(
    321         '--enable-stateful-fuzzing',
    322         dest='stateful_fuzzing',
    323         action='store_true',
    324         help='Reuse contexts between runs (makes reproduction impossible)')
    325     parser.add_argument(
    326         '--custom-seq-prod',
    327         dest='third_party_seq_prod_obj',
    328         type=str,
    329         default=THIRD_PARTY_SEQ_PROD_OBJ,
    330         help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
    331     parser.add_argument(
    332         '--cc',
    333         dest='cc',
    334         type=str,
    335         default=CC,
    336         help="CC (default: $CC='{}')".format(CC))
    337     parser.add_argument(
    338         '--cxx',
    339         dest='cxx',
    340         type=str,
    341         default=CXX,
    342         help="CXX (default: $CXX='{}')".format(CXX))
    343     parser.add_argument(
    344         '--cppflags',
    345         dest='cppflags',
    346         type=str,
    347         default=CPPFLAGS,
    348         help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
    349     parser.add_argument(
    350         '--cflags',
    351         dest='cflags',
    352         type=str,
    353         default=CFLAGS,
    354         help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
    355     parser.add_argument(
    356         '--cxxflags',
    357         dest='cxxflags',
    358         type=str,
    359         default=CXXFLAGS,
    360         help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
    361     parser.add_argument(
    362         '--ldflags',
    363         dest='ldflags',
    364         type=str,
    365         default=LDFLAGS,
    366         help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
    367     parser.add_argument(
    368         '--mflags',
    369         dest='mflags',
    370         type=str,
    371         default=MFLAGS,
    372         help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
    373     parser.add_argument(
    374         'TARGET',
    375         nargs='*',
    376         type=str,
    377         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
    378     )
    379     args = parser.parse_args(args)
    380     args = parse_env_flags(args, ' '.join(
    381         [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
    382 
    383     # Check option sanity
    384     if args.msan and (args.asan or args.ubsan):
    385         raise RuntimeError('MSAN may not be used with any other sanitizers')
    386     if args.msan_track_origins and not args.msan:
    387         raise RuntimeError('--enable-msan-track-origins requires MSAN')
    388     if args.sanitize_recover and not args.sanitize:
    389         raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
    390 
    391     return args
    392 
    393 
    394 def build(args):
    395     try:
    396         args = build_parser(args)
    397     except Exception as e:
    398         print(e)
    399         return 1
    400     # The compilation flags we are setting
    401     targets = args.TARGET
    402     cc = args.cc
    403     cxx = args.cxx
    404     cppflags = shlex.split(args.cppflags)
    405     cflags = shlex.split(args.cflags)
    406     ldflags = shlex.split(args.ldflags)
    407     cxxflags = shlex.split(args.cxxflags)
    408     mflags = shlex.split(args.mflags)
    409     # Flags to be added to both cflags and cxxflags
    410     common_flags = [
    411         '-Werror',
    412         '-Wno-error=declaration-after-statement',
    413         '-Wno-error=c++-compat',
    414         '-Wno-error=deprecated' # C files are sometimes compiled with CXX
    415     ]
    416 
    417     cppflags += [
    418         '-DDEBUGLEVEL={}'.format(args.debug),
    419         '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
    420         '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
    421     ]
    422 
    423     # Set flags for options
    424     assert not (args.fuzzer and args.coverage)
    425     if args.coverage:
    426         common_flags += [
    427             '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
    428         ]
    429     if args.fuzzer:
    430         common_flags += ['-fsanitize=fuzzer']
    431         args.lib_fuzzing_engine = ''
    432 
    433     mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
    434 
    435     if args.sanitize_recover:
    436         recover_flags = ['-fsanitize-recover=all']
    437     else:
    438         recover_flags = ['-fno-sanitize-recover=all']
    439     if args.sanitize:
    440         common_flags += recover_flags
    441 
    442     if args.msan:
    443         msan_flags = ['-fsanitize=memory']
    444         if args.msan_track_origins:
    445             msan_flags += ['-fsanitize-memory-track-origins']
    446         common_flags += msan_flags
    447         # Append extra MSAN flags (it might require special setup)
    448         cppflags += [args.msan_extra_cppflags]
    449         cflags += [args.msan_extra_cflags]
    450         cxxflags += [args.msan_extra_cxxflags]
    451         ldflags += [args.msan_extra_ldflags]
    452 
    453     if args.asan:
    454         common_flags += ['-fsanitize=address']
    455 
    456     if args.ubsan:
    457         ubsan_flags = ['-fsanitize=undefined']
    458         if not args.ubsan_pointer_overflow:
    459             ubsan_flags += overflow_ubsan_flags(cc, cxx)
    460         common_flags += ubsan_flags
    461 
    462     if args.stateful_fuzzing:
    463         cppflags += ['-DSTATEFUL_FUZZING']
    464 
    465     if args.third_party_seq_prod_obj:
    466         cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
    467         mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
    468 
    469     if args.fuzzing_mode:
    470         cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
    471 
    472     if args.lib_fuzzing_engine == 'libregression.a':
    473         targets = ['libregression.a'] + targets
    474 
    475     # Append the common flags
    476     cflags += common_flags
    477     cxxflags += common_flags
    478 
    479     # Prepare the flags for Make
    480     cc_str = "CC={}".format(cc)
    481     cxx_str = "CXX={}".format(cxx)
    482     cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
    483     cflags_str = "CFLAGS={}".format(' '.join(cflags))
    484     cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
    485     ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
    486 
    487     # Print the flags
    488     print('MFLAGS={}'.format(' '.join(mflags)))
    489     print(cc_str)
    490     print(cxx_str)
    491     print(cppflags_str)
    492     print(cflags_str)
    493     print(cxxflags_str)
    494     print(ldflags_str)
    495 
    496     # Clean and build
    497     clean_cmd = ['make', 'clean'] + mflags
    498     print(' '.join(clean_cmd))
    499     subprocess.check_call(clean_cmd)
    500     build_cmd = [
    501         'make',
    502         '-j',
    503         cc_str,
    504         cxx_str,
    505         cppflags_str,
    506         cflags_str,
    507         cxxflags_str,
    508         ldflags_str,
    509     ] + mflags + targets
    510     print(' '.join(build_cmd))
    511     subprocess.check_call(build_cmd)
    512     return 0
    513 
    514 
    515 def libfuzzer_parser(args):
    516     description = """
    517     Runs a libfuzzer binary.
    518     Passes all extra arguments to libfuzzer.
    519     The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
    520     libFuzzer.a.
    521     Generates output in the CORPORA directory, puts crashes in the ARTIFACT
    522     directory, and takes extra input from the SEED directory.
    523     To merge AFL's output pass the SEED as AFL's output directory and pass
    524     '-merge=1'.
    525     """
    526     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    527     parser.add_argument(
    528         '--corpora',
    529         type=str,
    530         help='Override the default corpora dir (default: {})'.format(
    531             abs_join(CORPORA_DIR, 'TARGET')))
    532     parser.add_argument(
    533         '--artifact',
    534         type=str,
    535         help='Override the default artifact dir (default: {})'.format(
    536             abs_join(CORPORA_DIR, 'TARGET-crash')))
    537     parser.add_argument(
    538         '--seed',
    539         type=str,
    540         help='Override the default seed dir (default: {})'.format(
    541             abs_join(CORPORA_DIR, 'TARGET-seed')))
    542     parser.add_argument(
    543         'TARGET',
    544         type=str,
    545         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
    546     args, extra = parser.parse_known_args(args)
    547     args.extra = extra
    548 
    549     if args.TARGET and args.TARGET not in TARGETS:
    550         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
    551 
    552     return args
    553 
    554 
    555 def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
    556     if corpora is None:
    557         corpora = abs_join(CORPORA_DIR, target)
    558     if artifact is None:
    559         artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
    560     if seed is None:
    561         seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
    562     if extra_args is None:
    563         extra_args = []
    564 
    565     target = abs_join(FUZZ_DIR, target)
    566 
    567     corpora = [create(corpora)]
    568     artifact = create(artifact)
    569     seed = check(seed)
    570 
    571     corpora += [artifact]
    572     if seed is not None:
    573         corpora += [seed]
    574 
    575     cmd = [target, '-artifact_prefix={}/'.format(artifact)]
    576     cmd += corpora + extra_args
    577     print(' '.join(cmd))
    578     subprocess.check_call(cmd)
    579 
    580 
    581 def libfuzzer_cmd(args):
    582     try:
    583         args = libfuzzer_parser(args)
    584     except Exception as e:
    585         print(e)
    586         return 1
    587     libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
    588     return 0
    589 
    590 
    591 def afl_parser(args):
    592     description = """
    593     Runs an afl-fuzz job.
    594     Passes all extra arguments to afl-fuzz.
    595     The fuzzer should have been built with CC/CXX set to the AFL compilers,
    596     and with LIB_FUZZING_ENGINE='libregression.a'.
    597     Takes input from CORPORA and writes output to OUTPUT.
    598     Uses AFL_FUZZ as the binary (set from flag or environment variable).
    599     """
    600     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    601     parser.add_argument(
    602         '--corpora',
    603         type=str,
    604         help='Override the default corpora dir (default: {})'.format(
    605             abs_join(CORPORA_DIR, 'TARGET')))
    606     parser.add_argument(
    607         '--output',
    608         type=str,
    609         help='Override the default AFL output dir (default: {})'.format(
    610             abs_join(CORPORA_DIR, 'TARGET-afl')))
    611     parser.add_argument(
    612         '--afl-fuzz',
    613         type=str,
    614         default=AFL_FUZZ,
    615         help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
    616     parser.add_argument(
    617         'TARGET',
    618         type=str,
    619         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
    620     args, extra = parser.parse_known_args(args)
    621     args.extra = extra
    622 
    623     if args.TARGET and args.TARGET not in TARGETS:
    624         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
    625 
    626     if not args.corpora:
    627         args.corpora = abs_join(CORPORA_DIR, args.TARGET)
    628     if not args.output:
    629         args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
    630 
    631     return args
    632 
    633 
    634 def afl(args):
    635     try:
    636         args = afl_parser(args)
    637     except Exception as e:
    638         print(e)
    639         return 1
    640     target = abs_join(FUZZ_DIR, args.TARGET)
    641 
    642     corpora = create(args.corpora)
    643     output = create(args.output)
    644 
    645     cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
    646     cmd += [target, '@@']
    647     print(' '.join(cmd))
    648     subprocess.call(cmd)
    649     return 0
    650 
    651 
    652 def regression(args):
    653     try:
    654         description = """
    655         Runs one or more regression tests.
    656         The fuzzer should have been built with
    657         LIB_FUZZING_ENGINE='libregression.a'.
    658         Takes input from CORPORA.
    659         """
    660         args = targets_parser(args, description)
    661     except Exception as e:
    662         print(e)
    663         return 1
    664     for target in args.TARGET:
    665         corpora = create(abs_join(CORPORA_DIR, target))
    666         target = abs_join(FUZZ_DIR, target)
    667         cmd = [target, corpora]
    668         print(' '.join(cmd))
    669         subprocess.check_call(cmd)
    670     return 0
    671 
    672 
    673 def gen_parser(args):
    674     description = """
    675     Generate a seed corpus appropriate for TARGET with data generated with
    676     decodecorpus.
    677     The fuzz inputs are prepended with a seed before the zstd data, so the
    678     output of decodecorpus shouldn't be used directly.
    679     Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
    680     puts the output in SEED.
    681     DECODECORPUS is the decodecorpus binary, and must already be built.
    682     """
    683     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    684     parser.add_argument(
    685         '--number',
    686         '-n',
    687         type=int,
    688         default=100,
    689         help='Number of samples to generate')
    690     parser.add_argument(
    691         '--max-size-log',
    692         type=int,
    693         default=18,
    694         help='Maximum sample size to generate')
    695     parser.add_argument(
    696         '--seed',
    697         type=str,
    698         help='Override the default seed dir (default: {})'.format(
    699             abs_join(CORPORA_DIR, 'TARGET-seed')))
    700     parser.add_argument(
    701         '--decodecorpus',
    702         type=str,
    703         default=DECODECORPUS,
    704         help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
    705             DECODECORPUS))
    706     parser.add_argument(
    707         '--zstd',
    708         type=str,
    709         default=ZSTD,
    710         help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
    711     parser.add_argument(
    712         '--fuzz-rng-seed-size',
    713         type=int,
    714         default=4,
    715         help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
    716     )
    717     parser.add_argument(
    718         'TARGET',
    719         type=str,
    720         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
    721     args, extra = parser.parse_known_args(args)
    722     args.extra = extra
    723 
    724     if args.TARGET and args.TARGET not in TARGETS:
    725         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
    726 
    727     if not args.seed:
    728         args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
    729 
    730     if not os.path.isfile(args.decodecorpus):
    731         raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
    732                            format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
    733 
    734     return args
    735 
    736 
    737 def gen(args):
    738     try:
    739         args = gen_parser(args)
    740     except Exception as e:
    741         print(e)
    742         return 1
    743 
    744     seed = create(args.seed)
    745     with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
    746         info = TARGET_INFO[args.TARGET]
    747 
    748         if info.input_type == InputType.DICTIONARY_DATA:
    749             number = max(args.number, 1000)
    750         else:
    751             number = args.number
    752         cmd = [
    753             args.decodecorpus,
    754             '-n{}'.format(args.number),
    755             '-p{}/'.format(compressed),
    756             '-o{}'.format(decompressed),
    757         ]
    758 
    759         if info.frame_type == FrameType.BLOCK:
    760             cmd += [
    761                 '--gen-blocks',
    762                 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
    763             ]
    764         else:
    765             cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
    766 
    767         print(' '.join(cmd))
    768         subprocess.check_call(cmd)
    769 
    770         if info.input_type == InputType.RAW_DATA:
    771             print('using decompressed data in {}'.format(decompressed))
    772             samples = decompressed
    773         elif info.input_type == InputType.COMPRESSED_DATA:
    774             print('using compressed data in {}'.format(compressed))
    775             samples = compressed
    776         else:
    777             assert info.input_type == InputType.DICTIONARY_DATA
    778             print('making dictionary data from {}'.format(decompressed))
    779             samples = dict
    780             min_dict_size_log = 9
    781             max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
    782             for dict_size_log in range(min_dict_size_log, max_dict_size_log):
    783                 dict_size = 1 << dict_size_log
    784                 cmd = [
    785                     args.zstd,
    786                     '--train',
    787                     '-r', decompressed,
    788                     '--maxdict={}'.format(dict_size),
    789                     '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
    790                 ]
    791                 print(' '.join(cmd))
    792                 subprocess.check_call(cmd)
    793 
    794         # Copy the samples over and prepend the RNG seeds
    795         for name in os.listdir(samples):
    796             samplename = abs_join(samples, name)
    797             outname = abs_join(seed, name)
    798             with open(samplename, 'rb') as sample:
    799                 with open(outname, 'wb') as out:
    800                     CHUNK_SIZE = 131072
    801                     chunk = sample.read(CHUNK_SIZE)
    802                     while len(chunk) > 0:
    803                         out.write(chunk)
    804                         chunk = sample.read(CHUNK_SIZE)
    805     return 0
    806 
    807 
    808 def minimize(args):
    809     try:
    810         description = """
    811         Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
    812         TARGET_seed_corpus. All extra args are passed to libfuzzer.
    813         """
    814         args = targets_parser(args, description)
    815     except Exception as e:
    816         print(e)
    817         return 1
    818 
    819     for target in args.TARGET:
    820         # Merge the corpus + anything else into the seed_corpus
    821         corpus = abs_join(CORPORA_DIR, target)
    822         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
    823         extra_args = [corpus, "-merge=1"] + args.extra
    824         libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
    825         seeds = set(os.listdir(seed_corpus))
    826         # Copy all crashes directly into the seed_corpus if not already present
    827         crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
    828         for crash in os.listdir(crashes):
    829             if crash not in seeds:
    830                 shutil.copy(abs_join(crashes, crash), seed_corpus)
    831                 seeds.add(crash)
    832 
    833 
    834 def zip_cmd(args):
    835     try:
    836         description = """
    837         Zips up the seed corpus.
    838         """
    839         args = targets_parser(args, description)
    840     except Exception as e:
    841         print(e)
    842         return 1
    843 
    844     for target in args.TARGET:
    845         # Zip the seed_corpus
    846         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
    847         zip_file = "{}.zip".format(seed_corpus)
    848         cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
    849         print(' '.join(cmd))
    850         subprocess.check_call(cmd, cwd=seed_corpus)
    851 
    852 
    853 def list_cmd(args):
    854     print("\n".join(TARGETS))
    855 
    856 
    857 def short_help(args):
    858     name = args[0]
    859     print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
    860 
    861 
    862 def help(args):
    863     short_help(args)
    864     print("\tfuzzing helpers (select a command and pass -h for help)\n")
    865     print("Options:")
    866     print("\t-h, --help\tPrint this message")
    867     print("")
    868     print("Commands:")
    869     print("\tbuild\t\tBuild a fuzzer")
    870     print("\tlibfuzzer\tRun a libFuzzer fuzzer")
    871     print("\tafl\t\tRun an AFL fuzzer")
    872     print("\tregression\tRun a regression test")
    873     print("\tgen\t\tGenerate a seed corpus for a fuzzer")
    874     print("\tminimize\tMinimize the test corpora")
    875     print("\tzip\t\tZip the minimized corpora up")
    876     print("\tlist\t\tList the available targets")
    877 
    878 
    879 def main():
    880     args = sys.argv
    881     if len(args) < 2:
    882         help(args)
    883         return 1
    884     if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
    885         help(args)
    886         return 1
    887     command = args.pop(1)
    888     args[0] = "{} {}".format(args[0], command)
    889     if command == "build":
    890         return build(args)
    891     if command == "libfuzzer":
    892         return libfuzzer_cmd(args)
    893     if command == "regression":
    894         return regression(args)
    895     if command == "afl":
    896         return afl(args)
    897     if command == "gen":
    898         return gen(args)
    899     if command == "minimize":
    900         return minimize(args)
    901     if command == "zip":
    902         return zip_cmd(args)
    903     if command == "list":
    904         return list_cmd(args)
    905     short_help(args)
    906     print("Error: No such command {} (pass -h for help)".format(command))
    907     return 1
    908 
    909 
    910 if __name__ == "__main__":
    911     sys.exit(main())
    912