1 1.1 kamil /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// 2 1.1 kamil // 3 1.1 kamil // The LLVM Compiler Infrastructure 4 1.1 kamil // 5 1.1 kamil // This file is distributed under the University of Illinois Open Source 6 1.1 kamil // License. See LICENSE.TXT for details. 7 1.1 kamil // 8 1.1 kamil //===----------------------------------------------------------------------===// 9 1.1 kamil // An experimental data-flow tracer for fuzz targets. 10 1.1 kamil // It is based on DFSan and SanitizerCoverage. 11 1.1 kamil // https://clang.llvm.org/docs/DataFlowSanitizer.html 12 1.1 kamil // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow 13 1.1 kamil // 14 1.1 kamil // It executes the fuzz target on the given input while monitoring the 15 1.1 kamil // data flow for every instrumented comparison instruction. 16 1.1 kamil // 17 1.1 kamil // The output shows which functions depend on which bytes of the input. 18 1.1 kamil // 19 1.1 kamil // Build: 20 1.1 kamil // 1. Compile this file with -fsanitize=dataflow 21 1.1 kamil // 2. Build the fuzz target with -g -fsanitize=dataflow 22 1.1 kamil // -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp 23 1.1 kamil // 3. Link those together with -fsanitize=dataflow 24 1.1 kamil // 25 1.1 kamil // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison 26 1.1 kamil // instruction, DFSan modifies the calls to pass the data flow labels. 27 1.1 kamil // The callbacks update the data flow label for the current function. 28 1.1 kamil // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. 29 1.1 kamil // 30 1.1 kamil // -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function 31 1.1 kamil // entries so that the comparison callback knows that current function. 32 1.1 kamil // 33 1.1 kamil // 34 1.1 kamil // Run: 35 1.1 kamil // # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout) 36 1.1 kamil // ./a.out INPUT_FILE [OUTPUT_FILE] 37 1.1 kamil // 38 1.1 kamil // # Print all instrumented functions. llvm-symbolizer must be present in PATH 39 1.1 kamil // ./a.out 40 1.1 kamil // 41 1.1 kamil // Example output: 42 1.1 kamil // =============== 43 1.1 kamil // F0 11111111111111 44 1.1 kamil // F1 10000000000000 45 1.1 kamil // =============== 46 1.1 kamil // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. 47 1.1 kamil // The byte string is LEN+1 bytes. The last byte is set if the function 48 1.1 kamil // depends on the input length. 49 1.1 kamil //===----------------------------------------------------------------------===*/ 50 1.1 kamil 51 1.1 kamil #include <assert.h> 52 1.1 kamil #include <stdio.h> 53 1.1 kamil #include <stdlib.h> 54 1.1 kamil #include <stdint.h> 55 1.1 kamil #include <string.h> 56 1.1 kamil 57 1.1 kamil #include <execinfo.h> // backtrace_symbols_fd 58 1.1 kamil 59 1.1 kamil #include <sanitizer/dfsan_interface.h> 60 1.1 kamil 61 1.1 kamil extern "C" { 62 1.1 kamil extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); 63 1.1 kamil __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); 64 1.1 kamil } // extern "C" 65 1.1 kamil 66 1.1 kamil static size_t InputLen; 67 1.1 kamil static size_t NumFuncs; 68 1.1 kamil static const uintptr_t *FuncsBeg; 69 1.1 kamil static __thread size_t CurrentFunc; 70 1.1 kamil static dfsan_label *FuncLabels; // Array of NumFuncs elements. 71 1.1 kamil static char *PrintableStringForLabel; // InputLen + 2 bytes. 72 1.1 kamil static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; 73 1.1 kamil 74 1.1 kamil // Prints all instrumented functions. 75 1.1 kamil static int PrintFunctions() { 76 1.1 kamil // We don't have the symbolizer integrated with dfsan yet. 77 1.1 kamil // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. 78 1.1 kamil // TODO(kcc): this is pretty ugly and may break in lots of ways. 79 1.1 kamil // We'll need to make a proper in-process symbolizer work with DFSan. 80 1.1 kamil FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " 81 1.1 kamil "| llvm-symbolizer " 82 1.1 kamil "| grep 'dfs\\$' " 83 1.1 kamil "| sed 's/dfs\\$//g'", "w"); 84 1.1 kamil for (size_t I = 0; I < NumFuncs; I++) { 85 1.1 kamil uintptr_t PC = FuncsBeg[I * 2]; 86 1.1 kamil void *const Buf[1] = {(void*)PC}; 87 1.1 kamil backtrace_symbols_fd(Buf, 1, fileno(Pipe)); 88 1.1 kamil } 89 1.1 kamil pclose(Pipe); 90 1.1 kamil return 0; 91 1.1 kamil } 92 1.1 kamil 93 1.1 kamil extern "C" 94 1.1 kamil void SetBytesForLabel(dfsan_label L, char *Bytes) { 95 1.1 kamil if (LabelSeen[L]) 96 1.1 kamil return; 97 1.1 kamil LabelSeen[L] = true; 98 1.1 kamil assert(L); 99 1.1 kamil if (L <= InputLen + 1) { 100 1.1 kamil Bytes[L - 1] = '1'; 101 1.1 kamil } else { 102 1.1 kamil auto *DLI = dfsan_get_label_info(L); 103 1.1 kamil SetBytesForLabel(DLI->l1, Bytes); 104 1.1 kamil SetBytesForLabel(DLI->l2, Bytes); 105 1.1 kamil } 106 1.1 kamil } 107 1.1 kamil 108 1.1 kamil static char *GetPrintableStringForLabel(dfsan_label L) { 109 1.1 kamil memset(PrintableStringForLabel, '0', InputLen + 1); 110 1.1 kamil PrintableStringForLabel[InputLen + 1] = 0; 111 1.1 kamil memset(LabelSeen, 0, sizeof(LabelSeen)); 112 1.1 kamil SetBytesForLabel(L, PrintableStringForLabel); 113 1.1 kamil return PrintableStringForLabel; 114 1.1 kamil } 115 1.1 kamil 116 1.1 kamil static void PrintDataFlow(FILE *Out) { 117 1.1 kamil for (size_t I = 0; I < NumFuncs; I++) 118 1.1 kamil if (FuncLabels[I]) 119 1.1 kamil fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); 120 1.1 kamil } 121 1.1 kamil 122 1.1 kamil int main(int argc, char **argv) { 123 1.1 kamil if (LLVMFuzzerInitialize) 124 1.1 kamil LLVMFuzzerInitialize(&argc, &argv); 125 1.1 kamil if (argc == 1) 126 1.1 kamil return PrintFunctions(); 127 1.1 kamil assert(argc == 4 || argc == 5); 128 1.1 kamil size_t Beg = atoi(argv[1]); 129 1.1 kamil size_t End = atoi(argv[2]); 130 1.1 kamil assert(Beg < End); 131 1.1 kamil 132 1.1 kamil const char *Input = argv[3]; 133 1.1 kamil fprintf(stderr, "INFO: reading '%s'\n", Input); 134 1.1 kamil FILE *In = fopen(Input, "r"); 135 1.1 kamil assert(In); 136 1.1 kamil fseek(In, 0, SEEK_END); 137 1.1 kamil InputLen = ftell(In); 138 1.1 kamil fseek(In, 0, SEEK_SET); 139 1.1 kamil unsigned char *Buf = (unsigned char*)malloc(InputLen); 140 1.1 kamil size_t NumBytesRead = fread(Buf, 1, InputLen, In); 141 1.1 kamil assert(NumBytesRead == InputLen); 142 1.1 kamil PrintableStringForLabel = (char*)malloc(InputLen + 2); 143 1.1 kamil fclose(In); 144 1.1 kamil 145 1.1 kamil fprintf(stderr, "INFO: running '%s'\n", Input); 146 1.1 kamil for (size_t I = 1; I <= InputLen; I++) { 147 1.1 kamil dfsan_label L = dfsan_create_label("", nullptr); 148 1.1 kamil assert(L == I); 149 1.1 kamil size_t Idx = I - 1; 150 1.1 kamil if (Idx >= Beg && Idx < End) 151 1.1 kamil dfsan_set_label(L, Buf + Idx, 1); 152 1.1 kamil } 153 1.1 kamil dfsan_label SizeL = dfsan_create_label("", nullptr); 154 1.1 kamil assert(SizeL == InputLen + 1); 155 1.1 kamil dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); 156 1.1 kamil 157 1.1 kamil LLVMFuzzerTestOneInput(Buf, InputLen); 158 1.1 kamil free(Buf); 159 1.1 kamil 160 1.1 kamil bool OutIsStdout = argc == 4; 161 1.1 kamil fprintf(stderr, "INFO: writing dataflow to %s\n", 162 1.1 kamil OutIsStdout ? "<stdout>" : argv[4]); 163 1.1 kamil FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); 164 1.1 kamil PrintDataFlow(Out); 165 1.1 kamil if (!OutIsStdout) fclose(Out); 166 1.1 kamil } 167 1.1 kamil 168 1.1 kamil extern "C" { 169 1.1 kamil 170 1.1 kamil void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, 171 1.1 kamil uint32_t *stop) { 172 1.1 kamil assert(NumFuncs == 0 && "This tool does not support DSOs"); 173 1.1 kamil assert(start < stop && "The code is not instrumented for coverage"); 174 1.1 kamil if (start == stop || *start) return; // Initialize only once. 175 1.1 kamil for (uint32_t *x = start; x < stop; x++) 176 1.1 kamil *x = ++NumFuncs; // The first index is 1. 177 1.1 kamil FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); 178 1.1 kamil fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs); 179 1.1 kamil } 180 1.1 kamil 181 1.1 kamil void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, 182 1.1 kamil const uintptr_t *pcs_end) { 183 1.1 kamil assert(NumFuncs == (pcs_end - pcs_beg) / 2); 184 1.1 kamil FuncsBeg = pcs_beg; 185 1.1 kamil } 186 1.1 kamil 187 1.1 kamil void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. 188 1.1 kamil 189 1.1 kamil void __sanitizer_cov_trace_pc_guard(uint32_t *guard){ 190 1.1 kamil uint32_t FuncNum = *guard - 1; // Guards start from 1. 191 1.1 kamil assert(FuncNum < NumFuncs); 192 1.1 kamil CurrentFunc = FuncNum; 193 1.1 kamil } 194 1.1 kamil 195 1.1 kamil void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, 196 1.1 kamil dfsan_label L1, dfsan_label UnusedL) { 197 1.1 kamil assert(CurrentFunc < NumFuncs); 198 1.1 kamil FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); 199 1.1 kamil } 200 1.1 kamil 201 1.1 kamil #define HOOK(Name, Type) \ 202 1.1 kamil void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ 203 1.1 kamil assert(CurrentFunc < NumFuncs); \ 204 1.1 kamil FuncLabels[CurrentFunc] = \ 205 1.1 kamil dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ 206 1.1 kamil } 207 1.1 kamil 208 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) 209 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) 210 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) 211 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) 212 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) 213 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) 214 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) 215 1.1 kamil HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) 216 1.1 kamil 217 1.1 kamil } // extern "C" 218