Home | History | Annotate | Line # | Download | only in dataflow
      1  1.1  kamil /*===- DataFlow.cpp - a standalone DataFlow tracer                  -------===//
      2  1.1  kamil //
      3  1.1  kamil //                     The LLVM Compiler Infrastructure
      4  1.1  kamil //
      5  1.1  kamil // This file is distributed under the University of Illinois Open Source
      6  1.1  kamil // License. See LICENSE.TXT for details.
      7  1.1  kamil //
      8  1.1  kamil //===----------------------------------------------------------------------===//
      9  1.1  kamil // An experimental data-flow tracer for fuzz targets.
     10  1.1  kamil // It is based on DFSan and SanitizerCoverage.
     11  1.1  kamil // https://clang.llvm.org/docs/DataFlowSanitizer.html
     12  1.1  kamil // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
     13  1.1  kamil //
     14  1.1  kamil // It executes the fuzz target on the given input while monitoring the
     15  1.1  kamil // data flow for every instrumented comparison instruction.
     16  1.1  kamil //
     17  1.1  kamil // The output shows which functions depend on which bytes of the input.
     18  1.1  kamil //
     19  1.1  kamil // Build:
     20  1.1  kamil //   1. Compile this file with -fsanitize=dataflow
     21  1.1  kamil //   2. Build the fuzz target with -g -fsanitize=dataflow
     22  1.1  kamil //       -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
     23  1.1  kamil //   3. Link those together with -fsanitize=dataflow
     24  1.1  kamil //
     25  1.1  kamil //  -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
     26  1.1  kamil //  instruction, DFSan modifies the calls to pass the data flow labels.
     27  1.1  kamil //  The callbacks update the data flow label for the current function.
     28  1.1  kamil //  See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
     29  1.1  kamil //
     30  1.1  kamil //  -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
     31  1.1  kamil //  entries so that the comparison callback knows that current function.
     32  1.1  kamil //
     33  1.1  kamil //
     34  1.1  kamil // Run:
     35  1.1  kamil //   # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
     36  1.1  kamil //   ./a.out INPUT_FILE [OUTPUT_FILE]
     37  1.1  kamil //
     38  1.1  kamil //   # Print all instrumented functions. llvm-symbolizer must be present in PATH
     39  1.1  kamil //   ./a.out
     40  1.1  kamil //
     41  1.1  kamil // Example output:
     42  1.1  kamil // ===============
     43  1.1  kamil //  F0 11111111111111
     44  1.1  kamil //  F1 10000000000000
     45  1.1  kamil //  ===============
     46  1.1  kamil // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
     47  1.1  kamil //    The byte string is LEN+1 bytes. The last byte is set if the function
     48  1.1  kamil //    depends on the input length.
     49  1.1  kamil //===----------------------------------------------------------------------===*/
     50  1.1  kamil 
     51  1.1  kamil #include <assert.h>
     52  1.1  kamil #include <stdio.h>
     53  1.1  kamil #include <stdlib.h>
     54  1.1  kamil #include <stdint.h>
     55  1.1  kamil #include <string.h>
     56  1.1  kamil 
     57  1.1  kamil #include <execinfo.h>  // backtrace_symbols_fd
     58  1.1  kamil 
     59  1.1  kamil #include <sanitizer/dfsan_interface.h>
     60  1.1  kamil 
     61  1.1  kamil extern "C" {
     62  1.1  kamil extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
     63  1.1  kamil __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
     64  1.1  kamil } // extern "C"
     65  1.1  kamil 
     66  1.1  kamil static size_t InputLen;
     67  1.1  kamil static size_t NumFuncs;
     68  1.1  kamil static const uintptr_t *FuncsBeg;
     69  1.1  kamil static __thread size_t CurrentFunc;
     70  1.1  kamil static dfsan_label *FuncLabels;  // Array of NumFuncs elements.
     71  1.1  kamil static char *PrintableStringForLabel;  // InputLen + 2 bytes.
     72  1.1  kamil static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
     73  1.1  kamil 
     74  1.1  kamil // Prints all instrumented functions.
     75  1.1  kamil static int PrintFunctions() {
     76  1.1  kamil   // We don't have the symbolizer integrated with dfsan yet.
     77  1.1  kamil   // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
     78  1.1  kamil   // TODO(kcc): this is pretty ugly and may break in lots of ways.
     79  1.1  kamil   //      We'll need to make a proper in-process symbolizer work with DFSan.
     80  1.1  kamil   FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
     81  1.1  kamil                      "| llvm-symbolizer "
     82  1.1  kamil                      "| grep 'dfs\\$' "
     83  1.1  kamil                      "| sed 's/dfs\\$//g'", "w");
     84  1.1  kamil   for (size_t I = 0; I < NumFuncs; I++) {
     85  1.1  kamil     uintptr_t PC = FuncsBeg[I * 2];
     86  1.1  kamil     void *const Buf[1] = {(void*)PC};
     87  1.1  kamil     backtrace_symbols_fd(Buf, 1, fileno(Pipe));
     88  1.1  kamil   }
     89  1.1  kamil   pclose(Pipe);
     90  1.1  kamil   return 0;
     91  1.1  kamil }
     92  1.1  kamil 
     93  1.1  kamil extern "C"
     94  1.1  kamil void SetBytesForLabel(dfsan_label L, char *Bytes) {
     95  1.1  kamil   if (LabelSeen[L])
     96  1.1  kamil     return;
     97  1.1  kamil   LabelSeen[L] = true;
     98  1.1  kamil   assert(L);
     99  1.1  kamil   if (L <= InputLen + 1) {
    100  1.1  kamil     Bytes[L - 1] = '1';
    101  1.1  kamil   } else {
    102  1.1  kamil     auto *DLI = dfsan_get_label_info(L);
    103  1.1  kamil     SetBytesForLabel(DLI->l1, Bytes);
    104  1.1  kamil     SetBytesForLabel(DLI->l2, Bytes);
    105  1.1  kamil   }
    106  1.1  kamil }
    107  1.1  kamil 
    108  1.1  kamil static char *GetPrintableStringForLabel(dfsan_label L) {
    109  1.1  kamil   memset(PrintableStringForLabel, '0', InputLen + 1);
    110  1.1  kamil   PrintableStringForLabel[InputLen + 1] = 0;
    111  1.1  kamil   memset(LabelSeen, 0, sizeof(LabelSeen));
    112  1.1  kamil   SetBytesForLabel(L, PrintableStringForLabel);
    113  1.1  kamil   return PrintableStringForLabel;
    114  1.1  kamil }
    115  1.1  kamil 
    116  1.1  kamil static void PrintDataFlow(FILE *Out) {
    117  1.1  kamil   for (size_t I = 0; I < NumFuncs; I++)
    118  1.1  kamil     if (FuncLabels[I])
    119  1.1  kamil       fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
    120  1.1  kamil }
    121  1.1  kamil 
    122  1.1  kamil int main(int argc, char **argv) {
    123  1.1  kamil   if (LLVMFuzzerInitialize)
    124  1.1  kamil     LLVMFuzzerInitialize(&argc, &argv);
    125  1.1  kamil   if (argc == 1)
    126  1.1  kamil     return PrintFunctions();
    127  1.1  kamil   assert(argc == 4 || argc == 5);
    128  1.1  kamil   size_t Beg = atoi(argv[1]);
    129  1.1  kamil   size_t End = atoi(argv[2]);
    130  1.1  kamil   assert(Beg < End);
    131  1.1  kamil 
    132  1.1  kamil   const char *Input = argv[3];
    133  1.1  kamil   fprintf(stderr, "INFO: reading '%s'\n", Input);
    134  1.1  kamil   FILE *In = fopen(Input, "r");
    135  1.1  kamil   assert(In);
    136  1.1  kamil   fseek(In, 0, SEEK_END);
    137  1.1  kamil   InputLen = ftell(In);
    138  1.1  kamil   fseek(In, 0, SEEK_SET);
    139  1.1  kamil   unsigned char *Buf = (unsigned char*)malloc(InputLen);
    140  1.1  kamil   size_t NumBytesRead = fread(Buf, 1, InputLen, In);
    141  1.1  kamil   assert(NumBytesRead == InputLen);
    142  1.1  kamil   PrintableStringForLabel = (char*)malloc(InputLen + 2);
    143  1.1  kamil   fclose(In);
    144  1.1  kamil 
    145  1.1  kamil   fprintf(stderr, "INFO: running '%s'\n", Input);
    146  1.1  kamil   for (size_t I = 1; I <= InputLen; I++) {
    147  1.1  kamil     dfsan_label L = dfsan_create_label("", nullptr);
    148  1.1  kamil     assert(L == I);
    149  1.1  kamil     size_t Idx = I - 1;
    150  1.1  kamil     if (Idx >= Beg && Idx < End)
    151  1.1  kamil       dfsan_set_label(L, Buf + Idx, 1);
    152  1.1  kamil   }
    153  1.1  kamil   dfsan_label SizeL = dfsan_create_label("", nullptr);
    154  1.1  kamil   assert(SizeL == InputLen + 1);
    155  1.1  kamil   dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
    156  1.1  kamil 
    157  1.1  kamil   LLVMFuzzerTestOneInput(Buf, InputLen);
    158  1.1  kamil   free(Buf);
    159  1.1  kamil 
    160  1.1  kamil   bool OutIsStdout = argc == 4;
    161  1.1  kamil   fprintf(stderr, "INFO: writing dataflow to %s\n",
    162  1.1  kamil           OutIsStdout ? "<stdout>" : argv[4]);
    163  1.1  kamil   FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
    164  1.1  kamil   PrintDataFlow(Out);
    165  1.1  kamil   if (!OutIsStdout) fclose(Out);
    166  1.1  kamil }
    167  1.1  kamil 
    168  1.1  kamil extern "C" {
    169  1.1  kamil 
    170  1.1  kamil void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
    171  1.1  kamil                                          uint32_t *stop) {
    172  1.1  kamil   assert(NumFuncs == 0 && "This tool does not support DSOs");
    173  1.1  kamil   assert(start < stop && "The code is not instrumented for coverage");
    174  1.1  kamil   if (start == stop || *start) return;  // Initialize only once.
    175  1.1  kamil   for (uint32_t *x = start; x < stop; x++)
    176  1.1  kamil     *x = ++NumFuncs;  // The first index is 1.
    177  1.1  kamil   FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
    178  1.1  kamil   fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
    179  1.1  kamil }
    180  1.1  kamil 
    181  1.1  kamil void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
    182  1.1  kamil                               const uintptr_t *pcs_end) {
    183  1.1  kamil   assert(NumFuncs == (pcs_end - pcs_beg) / 2);
    184  1.1  kamil   FuncsBeg = pcs_beg;
    185  1.1  kamil }
    186  1.1  kamil 
    187  1.1  kamil void __sanitizer_cov_trace_pc_indir(uint64_t x){}  // unused.
    188  1.1  kamil 
    189  1.1  kamil void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
    190  1.1  kamil   uint32_t FuncNum = *guard - 1;  // Guards start from 1.
    191  1.1  kamil   assert(FuncNum < NumFuncs);
    192  1.1  kamil   CurrentFunc = FuncNum;
    193  1.1  kamil }
    194  1.1  kamil 
    195  1.1  kamil void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
    196  1.1  kamil                                          dfsan_label L1, dfsan_label UnusedL) {
    197  1.1  kamil   assert(CurrentFunc < NumFuncs);
    198  1.1  kamil   FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
    199  1.1  kamil }
    200  1.1  kamil 
    201  1.1  kamil #define HOOK(Name, Type)                                                       \
    202  1.1  kamil   void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) {            \
    203  1.1  kamil     assert(CurrentFunc < NumFuncs);                                            \
    204  1.1  kamil     FuncLabels[CurrentFunc] =                                                  \
    205  1.1  kamil         dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2));             \
    206  1.1  kamil   }
    207  1.1  kamil 
    208  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
    209  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
    210  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
    211  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
    212  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
    213  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
    214  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
    215  1.1  kamil HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
    216  1.1  kamil 
    217  1.1  kamil } // extern "C"
    218