Home | History | Annotate | Line # | Download | only in analyzer
region-model-asm.cc revision 1.1
      1 /* Handling inline asm in the analyzer.
      2    Copyright (C) 2021-2022 Free Software Foundation, Inc.
      3    Contributed by David Malcolm <dmalcolm (at) redhat.com>.
      4 
      5 This file is part of GCC.
      6 
      7 GCC is free software; you can redistribute it and/or modify it
      8 under the terms of the GNU General Public License as published by
      9 the Free Software Foundation; either version 3, or (at your option)
     10 any later version.
     11 
     12 GCC is distributed in the hope that it will be useful, but
     13 WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 General Public License for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with GCC; see the file COPYING3.  If not see
     19 <http://www.gnu.org/licenses/>.  */
     20 
     21 #include "config.h"
     22 #include "system.h"
     23 #include "coretypes.h"
     24 #include "tree.h"
     25 #include "function.h"
     26 #include "basic-block.h"
     27 #include "gimple.h"
     28 #include "gimple-iterator.h"
     29 #include "diagnostic-core.h"
     30 #include "pretty-print.h"
     31 #include "tristate.h"
     32 #include "selftest.h"
     33 #include "json.h"
     34 #include "analyzer/analyzer.h"
     35 #include "analyzer/analyzer-logging.h"
     36 #include "options.h"
     37 #include "analyzer/call-string.h"
     38 #include "analyzer/program-point.h"
     39 #include "analyzer/store.h"
     40 #include "analyzer/region-model.h"
     41 #include "analyzer/region-model-reachability.h"
     42 #include "stmt.h"
     43 
     44 #if ENABLE_ANALYZER
     45 
     46 namespace ana {
     47 
     48 /* Minimal asm support for the analyzer.
     49 
     50    The objective of this code is to:
     51    - minimize false positives from the analyzer on the Linux kernel
     52    (which makes heavy use of inline asm), whilst
     53    - avoiding having to "teach" the compiler anything about specific strings
     54    in asm statements.
     55 
     56    Specifically, we want to:
     57 
     58    (a) mark asm outputs and certain other regions as having been written to,
     59        to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
     60 
     61    (b) identify some of these stmts as "deterministic" so that we can
     62        write consistent outputs given consistent inputs, so that we can
     63        avoid false positives for paths in which an asm is invoked twice
     64        with the same inputs and is expected to emit the same output.
     65 
     66    This file implements heuristics for achieving the above.  */
     67 
     68 /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
     69 
     70    Consider this x86 function taken from the Linux kernel
     71    (arch/x86/include/asm/barrier.h):
     72 
     73      static inline unsigned long array_index_mask_nospec(unsigned long index,
     74 							 unsigned long size)
     75      {
     76        unsigned long mask;
     77 
     78        asm volatile ("cmp %1,%2; sbb %0,%0;"
     79 		     :"=r" (mask)
     80 		     :"g"(size),"r" (index)
     81 		     :"cc");
     82        return mask;
     83      }
     84 
     85    The above is a mitigation for Spectre-variant-1 attacks, for clamping
     86    an array access to within the range of [0, size] if the CPU speculates
     87    past the array bounds.
     88 
     89    However, it is ultimately used to implement wdev_to_wvif:
     90 
     91      static inline struct wfx_vif *
     92      wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
     93      {
     94        vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
     95        if (!wdev->vif[vif_id]) {
     96 	 return NULL;
     97        }
     98        return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
     99      }
    100 
    101    which is used by:
    102 
    103      if (wdev_to_wvif(wvif->wdev, 1))
    104        return wdev_to_wvif(wvif->wdev, 1)->vif;
    105 
    106    The code has been written to assume that wdev_to_wvif is deterministic,
    107    and won't change from returning non-NULL at the "if" clause to
    108    returning NULL at the "->vif" dereference.
    109 
    110    By treating the above specific "asm volatile" as deterministic we avoid
    111    a false positive from -Wanalyzer-null-dereference.  */
    112 
    113 static bool
    114 deterministic_p (const gasm *asm_stmt)
    115 {
    116   /* Assume something volatile with no inputs is querying
    117      changeable state e.g. rdtsc.  */
    118   if (gimple_asm_ninputs (asm_stmt) == 0
    119       && gimple_asm_volatile_p (asm_stmt))
    120     return false;
    121 
    122   /* Otherwise assume it's purely a function of its inputs.  */
    123   return true;
    124 }
    125 
    126 /* Update this model for the asm STMT, using CTXT to report any
    127    diagnostics.
    128 
    129    Compare with cfgexpand.cc: expand_asm_stmt.  */
    130 
    131 void
    132 region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
    133 {
    134   logger *logger = ctxt ? ctxt->get_logger () : NULL;
    135   LOG_SCOPE (logger);
    136 
    137   const unsigned noutputs = gimple_asm_noutputs (stmt);
    138   const unsigned ninputs = gimple_asm_ninputs (stmt);
    139 
    140   auto_vec<tree> output_tvec;
    141   auto_vec<tree> input_tvec;
    142   auto_vec<const char *> constraints;
    143 
    144   /* Copy the gimple vectors into new vectors that we can manipulate.  */
    145   output_tvec.safe_grow (noutputs, true);
    146   input_tvec.safe_grow (ninputs, true);
    147   constraints.safe_grow (noutputs + ninputs, true);
    148 
    149   for (unsigned i = 0; i < noutputs; ++i)
    150     {
    151       tree t = gimple_asm_output_op (stmt, i);
    152       output_tvec[i] = TREE_VALUE (t);
    153       constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
    154     }
    155   for (unsigned i = 0; i < ninputs; i++)
    156     {
    157       tree t = gimple_asm_input_op (stmt, i);
    158       input_tvec[i] = TREE_VALUE (t);
    159       constraints[i + noutputs]
    160 	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
    161     }
    162 
    163   /* Determine which regions are reachable from the inputs
    164      to this stmt.  */
    165   reachable_regions reachable_regs (this);
    166 
    167   int num_errors = 0;
    168 
    169   auto_vec<const region *> output_regions (noutputs);
    170   for (unsigned i = 0; i < noutputs; ++i)
    171     {
    172       tree val = output_tvec[i];
    173       const char *constraint;
    174       bool is_inout;
    175       bool allows_reg;
    176       bool allows_mem;
    177 
    178       const region *dst_reg = get_lvalue (val, ctxt);
    179       output_regions.quick_push (dst_reg);
    180       reachable_regs.add (dst_reg, true);
    181 
    182       /* Try to parse the output constraint.  If that fails, there's
    183 	 no point in going further.  */
    184       constraint = constraints[i];
    185       if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
    186 				    &allows_mem, &allows_reg, &is_inout))
    187 	{
    188 	  if (logger)
    189 	    logger->log ("error parsing constraint for output %i: %qs",
    190 			 i, constraint);
    191 	  num_errors++;
    192 	  continue;
    193 	}
    194 
    195       if (logger)
    196 	{
    197 	  logger->log ("output %i: %qs %qE"
    198 		       " is_inout: %i allows_reg: %i allows_mem: %i",
    199 		       i, constraint, val,
    200 		       (int)is_inout, (int)allows_reg, (int)allows_mem);
    201 	  logger->start_log_line ();
    202 	  logger->log_partial ("  region: ");
    203 	  dst_reg->dump_to_pp (logger->get_printer (), true);
    204 	  logger->end_log_line ();
    205 	}
    206 
    207     }
    208 
    209   /* Ideally should combine with inout_svals to determine the
    210      "effective inputs" and use this for the asm_output_svalue.  */
    211 
    212   auto_vec<const svalue *> input_svals (ninputs);
    213   for (unsigned i = 0; i < ninputs; i++)
    214     {
    215       tree val = input_tvec[i];
    216       const char *constraint = constraints[i + noutputs];
    217       bool allows_reg, allows_mem;
    218       if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
    219 				    constraints.address (),
    220 				    &allows_mem, &allows_reg))
    221 	{
    222 	  if (logger)
    223 	    logger->log ("error parsing constraint for input %i: %qs",
    224 			 i, constraint);
    225 	  num_errors++;
    226 	  continue;
    227 	}
    228 
    229       tree src_expr = input_tvec[i];
    230       const svalue *src_sval = get_rvalue (src_expr, ctxt);
    231       check_for_poison (src_sval, src_expr, ctxt);
    232       input_svals.quick_push (src_sval);
    233       reachable_regs.handle_sval (src_sval);
    234 
    235       if (logger)
    236 	{
    237 	  logger->log ("input %i: %qs %qE"
    238 		       " allows_reg: %i allows_mem: %i",
    239 		       i, constraint, val,
    240 		       (int)allows_reg, (int)allows_mem);
    241 	  logger->start_log_line ();
    242 	  logger->log_partial ("  sval: ");
    243 	  src_sval->dump_to_pp (logger->get_printer (), true);
    244 	  logger->end_log_line ();
    245 	}
    246     }
    247 
    248   if (num_errors > 0)
    249     gcc_unreachable ();
    250 
    251   if (logger)
    252     {
    253       logger->log ("reachability: ");
    254       reachable_regs.dump_to_pp (logger->get_printer ());
    255       logger->end_log_line ();
    256     }
    257 
    258   /* Given the regions that were reachable from the inputs we
    259      want to clobber them.
    260      This is similar to region_model::handle_unrecognized_call,
    261      but the unknown call policies seems too aggressive (e.g. purging state
    262      from anything that's ever escaped).  Instead, clobber any clusters
    263      that were reachable in *this* asm stmt, rather than those that
    264      escaped, and we don't treat the values as having escaped.
    265      We also assume that asm stmts don't affect sm-state.  */
    266   for (auto iter = reachable_regs.begin_mutable_base_regs ();
    267        iter != reachable_regs.end_mutable_base_regs (); ++iter)
    268     {
    269       const region *base_reg = *iter;
    270       if (base_reg->symbolic_for_unknown_ptr_p ()
    271 	  || !base_reg->tracked_p ())
    272 	continue;
    273 
    274       binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
    275       cluster->on_asm (stmt, m_mgr->get_store_manager (),
    276 		       conjured_purge (this, ctxt));
    277     }
    278 
    279   /* Update the outputs.  */
    280   for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
    281     {
    282       tree dst_expr = output_tvec[output_idx];
    283       const region *dst_reg = output_regions[output_idx];
    284 
    285       const svalue *sval;
    286       if (deterministic_p (stmt)
    287 	  && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
    288 	sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
    289 						       stmt,
    290 						       output_idx,
    291 						       input_svals);
    292       else
    293 	{
    294 	  sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
    295 						       stmt,
    296 						       dst_reg,
    297 						       conjured_purge (this,
    298 								       ctxt));
    299 	}
    300       set_value (dst_reg, sval, ctxt);
    301     }
    302 }
    303 
    304 } // namespace ana
    305 
    306 #endif /* #if ENABLE_ANALYZER */
    307