region-model-asm.cc revision 1.1 1 1.1 mrg /* Handling inline asm in the analyzer.
2 1.1 mrg Copyright (C) 2021-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by David Malcolm <dmalcolm (at) redhat.com>.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but
13 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 1.1 mrg General Public License for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg #include "config.h"
22 1.1 mrg #include "system.h"
23 1.1 mrg #include "coretypes.h"
24 1.1 mrg #include "tree.h"
25 1.1 mrg #include "function.h"
26 1.1 mrg #include "basic-block.h"
27 1.1 mrg #include "gimple.h"
28 1.1 mrg #include "gimple-iterator.h"
29 1.1 mrg #include "diagnostic-core.h"
30 1.1 mrg #include "pretty-print.h"
31 1.1 mrg #include "tristate.h"
32 1.1 mrg #include "selftest.h"
33 1.1 mrg #include "json.h"
34 1.1 mrg #include "analyzer/analyzer.h"
35 1.1 mrg #include "analyzer/analyzer-logging.h"
36 1.1 mrg #include "options.h"
37 1.1 mrg #include "analyzer/call-string.h"
38 1.1 mrg #include "analyzer/program-point.h"
39 1.1 mrg #include "analyzer/store.h"
40 1.1 mrg #include "analyzer/region-model.h"
41 1.1 mrg #include "analyzer/region-model-reachability.h"
42 1.1 mrg #include "stmt.h"
43 1.1 mrg
44 1.1 mrg #if ENABLE_ANALYZER
45 1.1 mrg
46 1.1 mrg namespace ana {
47 1.1 mrg
48 1.1 mrg /* Minimal asm support for the analyzer.
49 1.1 mrg
50 1.1 mrg The objective of this code is to:
51 1.1 mrg - minimize false positives from the analyzer on the Linux kernel
52 1.1 mrg (which makes heavy use of inline asm), whilst
53 1.1 mrg - avoiding having to "teach" the compiler anything about specific strings
54 1.1 mrg in asm statements.
55 1.1 mrg
56 1.1 mrg Specifically, we want to:
57 1.1 mrg
58 1.1 mrg (a) mark asm outputs and certain other regions as having been written to,
59 1.1 mrg to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
60 1.1 mrg
61 1.1 mrg (b) identify some of these stmts as "deterministic" so that we can
62 1.1 mrg write consistent outputs given consistent inputs, so that we can
63 1.1 mrg avoid false positives for paths in which an asm is invoked twice
64 1.1 mrg with the same inputs and is expected to emit the same output.
65 1.1 mrg
66 1.1 mrg This file implements heuristics for achieving the above. */
67 1.1 mrg
68 1.1 mrg /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
69 1.1 mrg
70 1.1 mrg Consider this x86 function taken from the Linux kernel
71 1.1 mrg (arch/x86/include/asm/barrier.h):
72 1.1 mrg
73 1.1 mrg static inline unsigned long array_index_mask_nospec(unsigned long index,
74 1.1 mrg unsigned long size)
75 1.1 mrg {
76 1.1 mrg unsigned long mask;
77 1.1 mrg
78 1.1 mrg asm volatile ("cmp %1,%2; sbb %0,%0;"
79 1.1 mrg :"=r" (mask)
80 1.1 mrg :"g"(size),"r" (index)
81 1.1 mrg :"cc");
82 1.1 mrg return mask;
83 1.1 mrg }
84 1.1 mrg
85 1.1 mrg The above is a mitigation for Spectre-variant-1 attacks, for clamping
86 1.1 mrg an array access to within the range of [0, size] if the CPU speculates
87 1.1 mrg past the array bounds.
88 1.1 mrg
89 1.1 mrg However, it is ultimately used to implement wdev_to_wvif:
90 1.1 mrg
91 1.1 mrg static inline struct wfx_vif *
92 1.1 mrg wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
93 1.1 mrg {
94 1.1 mrg vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
95 1.1 mrg if (!wdev->vif[vif_id]) {
96 1.1 mrg return NULL;
97 1.1 mrg }
98 1.1 mrg return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg which is used by:
102 1.1 mrg
103 1.1 mrg if (wdev_to_wvif(wvif->wdev, 1))
104 1.1 mrg return wdev_to_wvif(wvif->wdev, 1)->vif;
105 1.1 mrg
106 1.1 mrg The code has been written to assume that wdev_to_wvif is deterministic,
107 1.1 mrg and won't change from returning non-NULL at the "if" clause to
108 1.1 mrg returning NULL at the "->vif" dereference.
109 1.1 mrg
110 1.1 mrg By treating the above specific "asm volatile" as deterministic we avoid
111 1.1 mrg a false positive from -Wanalyzer-null-dereference. */
112 1.1 mrg
113 1.1 mrg static bool
114 1.1 mrg deterministic_p (const gasm *asm_stmt)
115 1.1 mrg {
116 1.1 mrg /* Assume something volatile with no inputs is querying
117 1.1 mrg changeable state e.g. rdtsc. */
118 1.1 mrg if (gimple_asm_ninputs (asm_stmt) == 0
119 1.1 mrg && gimple_asm_volatile_p (asm_stmt))
120 1.1 mrg return false;
121 1.1 mrg
122 1.1 mrg /* Otherwise assume it's purely a function of its inputs. */
123 1.1 mrg return true;
124 1.1 mrg }
125 1.1 mrg
126 1.1 mrg /* Update this model for the asm STMT, using CTXT to report any
127 1.1 mrg diagnostics.
128 1.1 mrg
129 1.1 mrg Compare with cfgexpand.cc: expand_asm_stmt. */
130 1.1 mrg
131 1.1 mrg void
132 1.1 mrg region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
133 1.1 mrg {
134 1.1 mrg logger *logger = ctxt ? ctxt->get_logger () : NULL;
135 1.1 mrg LOG_SCOPE (logger);
136 1.1 mrg
137 1.1 mrg const unsigned noutputs = gimple_asm_noutputs (stmt);
138 1.1 mrg const unsigned ninputs = gimple_asm_ninputs (stmt);
139 1.1 mrg
140 1.1 mrg auto_vec<tree> output_tvec;
141 1.1 mrg auto_vec<tree> input_tvec;
142 1.1 mrg auto_vec<const char *> constraints;
143 1.1 mrg
144 1.1 mrg /* Copy the gimple vectors into new vectors that we can manipulate. */
145 1.1 mrg output_tvec.safe_grow (noutputs, true);
146 1.1 mrg input_tvec.safe_grow (ninputs, true);
147 1.1 mrg constraints.safe_grow (noutputs + ninputs, true);
148 1.1 mrg
149 1.1 mrg for (unsigned i = 0; i < noutputs; ++i)
150 1.1 mrg {
151 1.1 mrg tree t = gimple_asm_output_op (stmt, i);
152 1.1 mrg output_tvec[i] = TREE_VALUE (t);
153 1.1 mrg constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
154 1.1 mrg }
155 1.1 mrg for (unsigned i = 0; i < ninputs; i++)
156 1.1 mrg {
157 1.1 mrg tree t = gimple_asm_input_op (stmt, i);
158 1.1 mrg input_tvec[i] = TREE_VALUE (t);
159 1.1 mrg constraints[i + noutputs]
160 1.1 mrg = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
161 1.1 mrg }
162 1.1 mrg
163 1.1 mrg /* Determine which regions are reachable from the inputs
164 1.1 mrg to this stmt. */
165 1.1 mrg reachable_regions reachable_regs (this);
166 1.1 mrg
167 1.1 mrg int num_errors = 0;
168 1.1 mrg
169 1.1 mrg auto_vec<const region *> output_regions (noutputs);
170 1.1 mrg for (unsigned i = 0; i < noutputs; ++i)
171 1.1 mrg {
172 1.1 mrg tree val = output_tvec[i];
173 1.1 mrg const char *constraint;
174 1.1 mrg bool is_inout;
175 1.1 mrg bool allows_reg;
176 1.1 mrg bool allows_mem;
177 1.1 mrg
178 1.1 mrg const region *dst_reg = get_lvalue (val, ctxt);
179 1.1 mrg output_regions.quick_push (dst_reg);
180 1.1 mrg reachable_regs.add (dst_reg, true);
181 1.1 mrg
182 1.1 mrg /* Try to parse the output constraint. If that fails, there's
183 1.1 mrg no point in going further. */
184 1.1 mrg constraint = constraints[i];
185 1.1 mrg if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
186 1.1 mrg &allows_mem, &allows_reg, &is_inout))
187 1.1 mrg {
188 1.1 mrg if (logger)
189 1.1 mrg logger->log ("error parsing constraint for output %i: %qs",
190 1.1 mrg i, constraint);
191 1.1 mrg num_errors++;
192 1.1 mrg continue;
193 1.1 mrg }
194 1.1 mrg
195 1.1 mrg if (logger)
196 1.1 mrg {
197 1.1 mrg logger->log ("output %i: %qs %qE"
198 1.1 mrg " is_inout: %i allows_reg: %i allows_mem: %i",
199 1.1 mrg i, constraint, val,
200 1.1 mrg (int)is_inout, (int)allows_reg, (int)allows_mem);
201 1.1 mrg logger->start_log_line ();
202 1.1 mrg logger->log_partial (" region: ");
203 1.1 mrg dst_reg->dump_to_pp (logger->get_printer (), true);
204 1.1 mrg logger->end_log_line ();
205 1.1 mrg }
206 1.1 mrg
207 1.1 mrg }
208 1.1 mrg
209 1.1 mrg /* Ideally should combine with inout_svals to determine the
210 1.1 mrg "effective inputs" and use this for the asm_output_svalue. */
211 1.1 mrg
212 1.1 mrg auto_vec<const svalue *> input_svals (ninputs);
213 1.1 mrg for (unsigned i = 0; i < ninputs; i++)
214 1.1 mrg {
215 1.1 mrg tree val = input_tvec[i];
216 1.1 mrg const char *constraint = constraints[i + noutputs];
217 1.1 mrg bool allows_reg, allows_mem;
218 1.1 mrg if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
219 1.1 mrg constraints.address (),
220 1.1 mrg &allows_mem, &allows_reg))
221 1.1 mrg {
222 1.1 mrg if (logger)
223 1.1 mrg logger->log ("error parsing constraint for input %i: %qs",
224 1.1 mrg i, constraint);
225 1.1 mrg num_errors++;
226 1.1 mrg continue;
227 1.1 mrg }
228 1.1 mrg
229 1.1 mrg tree src_expr = input_tvec[i];
230 1.1 mrg const svalue *src_sval = get_rvalue (src_expr, ctxt);
231 1.1 mrg check_for_poison (src_sval, src_expr, ctxt);
232 1.1 mrg input_svals.quick_push (src_sval);
233 1.1 mrg reachable_regs.handle_sval (src_sval);
234 1.1 mrg
235 1.1 mrg if (logger)
236 1.1 mrg {
237 1.1 mrg logger->log ("input %i: %qs %qE"
238 1.1 mrg " allows_reg: %i allows_mem: %i",
239 1.1 mrg i, constraint, val,
240 1.1 mrg (int)allows_reg, (int)allows_mem);
241 1.1 mrg logger->start_log_line ();
242 1.1 mrg logger->log_partial (" sval: ");
243 1.1 mrg src_sval->dump_to_pp (logger->get_printer (), true);
244 1.1 mrg logger->end_log_line ();
245 1.1 mrg }
246 1.1 mrg }
247 1.1 mrg
248 1.1 mrg if (num_errors > 0)
249 1.1 mrg gcc_unreachable ();
250 1.1 mrg
251 1.1 mrg if (logger)
252 1.1 mrg {
253 1.1 mrg logger->log ("reachability: ");
254 1.1 mrg reachable_regs.dump_to_pp (logger->get_printer ());
255 1.1 mrg logger->end_log_line ();
256 1.1 mrg }
257 1.1 mrg
258 1.1 mrg /* Given the regions that were reachable from the inputs we
259 1.1 mrg want to clobber them.
260 1.1 mrg This is similar to region_model::handle_unrecognized_call,
261 1.1 mrg but the unknown call policies seems too aggressive (e.g. purging state
262 1.1 mrg from anything that's ever escaped). Instead, clobber any clusters
263 1.1 mrg that were reachable in *this* asm stmt, rather than those that
264 1.1 mrg escaped, and we don't treat the values as having escaped.
265 1.1 mrg We also assume that asm stmts don't affect sm-state. */
266 1.1 mrg for (auto iter = reachable_regs.begin_mutable_base_regs ();
267 1.1 mrg iter != reachable_regs.end_mutable_base_regs (); ++iter)
268 1.1 mrg {
269 1.1 mrg const region *base_reg = *iter;
270 1.1 mrg if (base_reg->symbolic_for_unknown_ptr_p ()
271 1.1 mrg || !base_reg->tracked_p ())
272 1.1 mrg continue;
273 1.1 mrg
274 1.1 mrg binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
275 1.1 mrg cluster->on_asm (stmt, m_mgr->get_store_manager (),
276 1.1 mrg conjured_purge (this, ctxt));
277 1.1 mrg }
278 1.1 mrg
279 1.1 mrg /* Update the outputs. */
280 1.1 mrg for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
281 1.1 mrg {
282 1.1 mrg tree dst_expr = output_tvec[output_idx];
283 1.1 mrg const region *dst_reg = output_regions[output_idx];
284 1.1 mrg
285 1.1 mrg const svalue *sval;
286 1.1 mrg if (deterministic_p (stmt)
287 1.1 mrg && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
288 1.1 mrg sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
289 1.1 mrg stmt,
290 1.1 mrg output_idx,
291 1.1 mrg input_svals);
292 1.1 mrg else
293 1.1 mrg {
294 1.1 mrg sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
295 1.1 mrg stmt,
296 1.1 mrg dst_reg,
297 1.1 mrg conjured_purge (this,
298 1.1 mrg ctxt));
299 1.1 mrg }
300 1.1 mrg set_value (dst_reg, sval, ctxt);
301 1.1 mrg }
302 1.1 mrg }
303 1.1 mrg
304 1.1 mrg } // namespace ana
305 1.1 mrg
306 1.1 mrg #endif /* #if ENABLE_ANALYZER */
307