1 1.1 mrg /* Handling inline asm in the analyzer. 2 1.1 mrg Copyright (C) 2021-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by David Malcolm <dmalcolm (at) redhat.com>. 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but 13 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of 14 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 1.1 mrg General Public License for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg #include "config.h" 22 1.1 mrg #include "system.h" 23 1.1 mrg #include "coretypes.h" 24 1.1 mrg #include "tree.h" 25 1.1 mrg #include "function.h" 26 1.1 mrg #include "basic-block.h" 27 1.1 mrg #include "gimple.h" 28 1.1 mrg #include "gimple-iterator.h" 29 1.1 mrg #include "diagnostic-core.h" 30 1.1 mrg #include "pretty-print.h" 31 1.1 mrg #include "tristate.h" 32 1.1 mrg #include "selftest.h" 33 1.1 mrg #include "json.h" 34 1.1 mrg #include "analyzer/analyzer.h" 35 1.1 mrg #include "analyzer/analyzer-logging.h" 36 1.1 mrg #include "options.h" 37 1.1 mrg #include "analyzer/call-string.h" 38 1.1 mrg #include "analyzer/program-point.h" 39 1.1 mrg #include "analyzer/store.h" 40 1.1 mrg #include "analyzer/region-model.h" 41 1.1 mrg #include "analyzer/region-model-reachability.h" 42 1.1 mrg #include "stmt.h" 43 1.1 mrg 44 1.1 mrg #if ENABLE_ANALYZER 45 1.1 mrg 46 1.1 mrg namespace ana { 47 1.1 mrg 48 1.1 mrg /* Minimal asm support for the analyzer. 49 1.1 mrg 50 1.1 mrg The objective of this code is to: 51 1.1 mrg - minimize false positives from the analyzer on the Linux kernel 52 1.1 mrg (which makes heavy use of inline asm), whilst 53 1.1 mrg - avoiding having to "teach" the compiler anything about specific strings 54 1.1 mrg in asm statements. 55 1.1 mrg 56 1.1 mrg Specifically, we want to: 57 1.1 mrg 58 1.1 mrg (a) mark asm outputs and certain other regions as having been written to, 59 1.1 mrg to avoid false postives from -Wanalyzer-use-of-uninitialized-value. 60 1.1 mrg 61 1.1 mrg (b) identify some of these stmts as "deterministic" so that we can 62 1.1 mrg write consistent outputs given consistent inputs, so that we can 63 1.1 mrg avoid false positives for paths in which an asm is invoked twice 64 1.1 mrg with the same inputs and is expected to emit the same output. 65 1.1 mrg 66 1.1 mrg This file implements heuristics for achieving the above. */ 67 1.1 mrg 68 1.1 mrg /* Determine if ASM_STMT is deterministic, in the sense of (b) above. 69 1.1 mrg 70 1.1 mrg Consider this x86 function taken from the Linux kernel 71 1.1 mrg (arch/x86/include/asm/barrier.h): 72 1.1 mrg 73 1.1 mrg static inline unsigned long array_index_mask_nospec(unsigned long index, 74 1.1 mrg unsigned long size) 75 1.1 mrg { 76 1.1 mrg unsigned long mask; 77 1.1 mrg 78 1.1 mrg asm volatile ("cmp %1,%2; sbb %0,%0;" 79 1.1 mrg :"=r" (mask) 80 1.1 mrg :"g"(size),"r" (index) 81 1.1 mrg :"cc"); 82 1.1 mrg return mask; 83 1.1 mrg } 84 1.1 mrg 85 1.1 mrg The above is a mitigation for Spectre-variant-1 attacks, for clamping 86 1.1 mrg an array access to within the range of [0, size] if the CPU speculates 87 1.1 mrg past the array bounds. 88 1.1 mrg 89 1.1 mrg However, it is ultimately used to implement wdev_to_wvif: 90 1.1 mrg 91 1.1 mrg static inline struct wfx_vif * 92 1.1 mrg wdev_to_wvif(struct wfx_dev *wdev, int vif_id) 93 1.1 mrg { 94 1.1 mrg vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif)); 95 1.1 mrg if (!wdev->vif[vif_id]) { 96 1.1 mrg return NULL; 97 1.1 mrg } 98 1.1 mrg return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv; 99 1.1 mrg } 100 1.1 mrg 101 1.1 mrg which is used by: 102 1.1 mrg 103 1.1 mrg if (wdev_to_wvif(wvif->wdev, 1)) 104 1.1 mrg return wdev_to_wvif(wvif->wdev, 1)->vif; 105 1.1 mrg 106 1.1 mrg The code has been written to assume that wdev_to_wvif is deterministic, 107 1.1 mrg and won't change from returning non-NULL at the "if" clause to 108 1.1 mrg returning NULL at the "->vif" dereference. 109 1.1 mrg 110 1.1 mrg By treating the above specific "asm volatile" as deterministic we avoid 111 1.1 mrg a false positive from -Wanalyzer-null-dereference. */ 112 1.1 mrg 113 1.1 mrg static bool 114 1.1 mrg deterministic_p (const gasm *asm_stmt) 115 1.1 mrg { 116 1.1 mrg /* Assume something volatile with no inputs is querying 117 1.1 mrg changeable state e.g. rdtsc. */ 118 1.1 mrg if (gimple_asm_ninputs (asm_stmt) == 0 119 1.1 mrg && gimple_asm_volatile_p (asm_stmt)) 120 1.1 mrg return false; 121 1.1 mrg 122 1.1 mrg /* Otherwise assume it's purely a function of its inputs. */ 123 1.1 mrg return true; 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg /* Update this model for the asm STMT, using CTXT to report any 127 1.1 mrg diagnostics. 128 1.1 mrg 129 1.1 mrg Compare with cfgexpand.cc: expand_asm_stmt. */ 130 1.1 mrg 131 1.1 mrg void 132 1.1 mrg region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt) 133 1.1 mrg { 134 1.1 mrg logger *logger = ctxt ? ctxt->get_logger () : NULL; 135 1.1 mrg LOG_SCOPE (logger); 136 1.1 mrg 137 1.1 mrg const unsigned noutputs = gimple_asm_noutputs (stmt); 138 1.1 mrg const unsigned ninputs = gimple_asm_ninputs (stmt); 139 1.1 mrg 140 1.1 mrg auto_vec<tree> output_tvec; 141 1.1 mrg auto_vec<tree> input_tvec; 142 1.1 mrg auto_vec<const char *> constraints; 143 1.1 mrg 144 1.1 mrg /* Copy the gimple vectors into new vectors that we can manipulate. */ 145 1.1 mrg output_tvec.safe_grow (noutputs, true); 146 1.1 mrg input_tvec.safe_grow (ninputs, true); 147 1.1 mrg constraints.safe_grow (noutputs + ninputs, true); 148 1.1 mrg 149 1.1 mrg for (unsigned i = 0; i < noutputs; ++i) 150 1.1 mrg { 151 1.1 mrg tree t = gimple_asm_output_op (stmt, i); 152 1.1 mrg output_tvec[i] = TREE_VALUE (t); 153 1.1 mrg constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); 154 1.1 mrg } 155 1.1 mrg for (unsigned i = 0; i < ninputs; i++) 156 1.1 mrg { 157 1.1 mrg tree t = gimple_asm_input_op (stmt, i); 158 1.1 mrg input_tvec[i] = TREE_VALUE (t); 159 1.1 mrg constraints[i + noutputs] 160 1.1 mrg = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); 161 1.1 mrg } 162 1.1 mrg 163 1.1 mrg /* Determine which regions are reachable from the inputs 164 1.1 mrg to this stmt. */ 165 1.1 mrg reachable_regions reachable_regs (this); 166 1.1 mrg 167 1.1 mrg int num_errors = 0; 168 1.1 mrg 169 1.1 mrg auto_vec<const region *> output_regions (noutputs); 170 1.1 mrg for (unsigned i = 0; i < noutputs; ++i) 171 1.1 mrg { 172 1.1 mrg tree val = output_tvec[i]; 173 1.1 mrg const char *constraint; 174 1.1 mrg bool is_inout; 175 1.1 mrg bool allows_reg; 176 1.1 mrg bool allows_mem; 177 1.1 mrg 178 1.1 mrg const region *dst_reg = get_lvalue (val, ctxt); 179 1.1 mrg output_regions.quick_push (dst_reg); 180 1.1 mrg reachable_regs.add (dst_reg, true); 181 1.1 mrg 182 1.1 mrg /* Try to parse the output constraint. If that fails, there's 183 1.1 mrg no point in going further. */ 184 1.1 mrg constraint = constraints[i]; 185 1.1 mrg if (!parse_output_constraint (&constraint, i, ninputs, noutputs, 186 1.1 mrg &allows_mem, &allows_reg, &is_inout)) 187 1.1 mrg { 188 1.1 mrg if (logger) 189 1.1 mrg logger->log ("error parsing constraint for output %i: %qs", 190 1.1 mrg i, constraint); 191 1.1 mrg num_errors++; 192 1.1 mrg continue; 193 1.1 mrg } 194 1.1 mrg 195 1.1 mrg if (logger) 196 1.1 mrg { 197 1.1 mrg logger->log ("output %i: %qs %qE" 198 1.1 mrg " is_inout: %i allows_reg: %i allows_mem: %i", 199 1.1 mrg i, constraint, val, 200 1.1 mrg (int)is_inout, (int)allows_reg, (int)allows_mem); 201 1.1 mrg logger->start_log_line (); 202 1.1 mrg logger->log_partial (" region: "); 203 1.1 mrg dst_reg->dump_to_pp (logger->get_printer (), true); 204 1.1 mrg logger->end_log_line (); 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg } 208 1.1 mrg 209 1.1 mrg /* Ideally should combine with inout_svals to determine the 210 1.1 mrg "effective inputs" and use this for the asm_output_svalue. */ 211 1.1 mrg 212 1.1 mrg auto_vec<const svalue *> input_svals (ninputs); 213 1.1 mrg for (unsigned i = 0; i < ninputs; i++) 214 1.1 mrg { 215 1.1 mrg tree val = input_tvec[i]; 216 1.1 mrg const char *constraint = constraints[i + noutputs]; 217 1.1 mrg bool allows_reg, allows_mem; 218 1.1 mrg if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0, 219 1.1 mrg constraints.address (), 220 1.1 mrg &allows_mem, &allows_reg)) 221 1.1 mrg { 222 1.1 mrg if (logger) 223 1.1 mrg logger->log ("error parsing constraint for input %i: %qs", 224 1.1 mrg i, constraint); 225 1.1 mrg num_errors++; 226 1.1 mrg continue; 227 1.1 mrg } 228 1.1 mrg 229 1.1 mrg tree src_expr = input_tvec[i]; 230 1.1 mrg const svalue *src_sval = get_rvalue (src_expr, ctxt); 231 1.1 mrg check_for_poison (src_sval, src_expr, ctxt); 232 1.1 mrg input_svals.quick_push (src_sval); 233 1.1 mrg reachable_regs.handle_sval (src_sval); 234 1.1 mrg 235 1.1 mrg if (logger) 236 1.1 mrg { 237 1.1 mrg logger->log ("input %i: %qs %qE" 238 1.1 mrg " allows_reg: %i allows_mem: %i", 239 1.1 mrg i, constraint, val, 240 1.1 mrg (int)allows_reg, (int)allows_mem); 241 1.1 mrg logger->start_log_line (); 242 1.1 mrg logger->log_partial (" sval: "); 243 1.1 mrg src_sval->dump_to_pp (logger->get_printer (), true); 244 1.1 mrg logger->end_log_line (); 245 1.1 mrg } 246 1.1 mrg } 247 1.1 mrg 248 1.1 mrg if (num_errors > 0) 249 1.1 mrg gcc_unreachable (); 250 1.1 mrg 251 1.1 mrg if (logger) 252 1.1 mrg { 253 1.1 mrg logger->log ("reachability: "); 254 1.1 mrg reachable_regs.dump_to_pp (logger->get_printer ()); 255 1.1 mrg logger->end_log_line (); 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg /* Given the regions that were reachable from the inputs we 259 1.1 mrg want to clobber them. 260 1.1 mrg This is similar to region_model::handle_unrecognized_call, 261 1.1 mrg but the unknown call policies seems too aggressive (e.g. purging state 262 1.1 mrg from anything that's ever escaped). Instead, clobber any clusters 263 1.1 mrg that were reachable in *this* asm stmt, rather than those that 264 1.1 mrg escaped, and we don't treat the values as having escaped. 265 1.1 mrg We also assume that asm stmts don't affect sm-state. */ 266 1.1 mrg for (auto iter = reachable_regs.begin_mutable_base_regs (); 267 1.1 mrg iter != reachable_regs.end_mutable_base_regs (); ++iter) 268 1.1 mrg { 269 1.1 mrg const region *base_reg = *iter; 270 1.1 mrg if (base_reg->symbolic_for_unknown_ptr_p () 271 1.1 mrg || !base_reg->tracked_p ()) 272 1.1 mrg continue; 273 1.1 mrg 274 1.1 mrg binding_cluster *cluster = m_store.get_or_create_cluster (base_reg); 275 1.1 mrg cluster->on_asm (stmt, m_mgr->get_store_manager (), 276 1.1 mrg conjured_purge (this, ctxt)); 277 1.1 mrg } 278 1.1 mrg 279 1.1 mrg /* Update the outputs. */ 280 1.1 mrg for (unsigned output_idx = 0; output_idx < noutputs; output_idx++) 281 1.1 mrg { 282 1.1 mrg tree dst_expr = output_tvec[output_idx]; 283 1.1 mrg const region *dst_reg = output_regions[output_idx]; 284 1.1 mrg 285 1.1 mrg const svalue *sval; 286 1.1 mrg if (deterministic_p (stmt) 287 1.1 mrg && input_svals.length () <= asm_output_svalue::MAX_INPUTS) 288 1.1 mrg sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr), 289 1.1 mrg stmt, 290 1.1 mrg output_idx, 291 1.1 mrg input_svals); 292 1.1 mrg else 293 1.1 mrg { 294 1.1 mrg sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr), 295 1.1 mrg stmt, 296 1.1 mrg dst_reg, 297 1.1 mrg conjured_purge (this, 298 1.1 mrg ctxt)); 299 1.1 mrg } 300 1.1 mrg set_value (dst_reg, sval, ctxt); 301 1.1 mrg } 302 1.1 mrg } 303 1.1 mrg 304 1.1 mrg } // namespace ana 305 1.1 mrg 306 1.1 mrg #endif /* #if ENABLE_ANALYZER */ 307