Home | History | Annotate | Line # | Download | only in analyzer
      1  1.1  mrg /* Handling inline asm in the analyzer.
      2  1.1  mrg    Copyright (C) 2021-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by David Malcolm <dmalcolm (at) redhat.com>.
      4  1.1  mrg 
      5  1.1  mrg This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg GCC is free software; you can redistribute it and/or modify it
      8  1.1  mrg under the terms of the GNU General Public License as published by
      9  1.1  mrg the Free Software Foundation; either version 3, or (at your option)
     10  1.1  mrg any later version.
     11  1.1  mrg 
     12  1.1  mrg GCC is distributed in the hope that it will be useful, but
     13  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of
     14  1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  1.1  mrg General Public License for more details.
     16  1.1  mrg 
     17  1.1  mrg You should have received a copy of the GNU General Public License
     18  1.1  mrg along with GCC; see the file COPYING3.  If not see
     19  1.1  mrg <http://www.gnu.org/licenses/>.  */
     20  1.1  mrg 
     21  1.1  mrg #include "config.h"
     22  1.1  mrg #include "system.h"
     23  1.1  mrg #include "coretypes.h"
     24  1.1  mrg #include "tree.h"
     25  1.1  mrg #include "function.h"
     26  1.1  mrg #include "basic-block.h"
     27  1.1  mrg #include "gimple.h"
     28  1.1  mrg #include "gimple-iterator.h"
     29  1.1  mrg #include "diagnostic-core.h"
     30  1.1  mrg #include "pretty-print.h"
     31  1.1  mrg #include "tristate.h"
     32  1.1  mrg #include "selftest.h"
     33  1.1  mrg #include "json.h"
     34  1.1  mrg #include "analyzer/analyzer.h"
     35  1.1  mrg #include "analyzer/analyzer-logging.h"
     36  1.1  mrg #include "options.h"
     37  1.1  mrg #include "analyzer/call-string.h"
     38  1.1  mrg #include "analyzer/program-point.h"
     39  1.1  mrg #include "analyzer/store.h"
     40  1.1  mrg #include "analyzer/region-model.h"
     41  1.1  mrg #include "analyzer/region-model-reachability.h"
     42  1.1  mrg #include "stmt.h"
     43  1.1  mrg 
     44  1.1  mrg #if ENABLE_ANALYZER
     45  1.1  mrg 
     46  1.1  mrg namespace ana {
     47  1.1  mrg 
     48  1.1  mrg /* Minimal asm support for the analyzer.
     49  1.1  mrg 
     50  1.1  mrg    The objective of this code is to:
     51  1.1  mrg    - minimize false positives from the analyzer on the Linux kernel
     52  1.1  mrg    (which makes heavy use of inline asm), whilst
     53  1.1  mrg    - avoiding having to "teach" the compiler anything about specific strings
     54  1.1  mrg    in asm statements.
     55  1.1  mrg 
     56  1.1  mrg    Specifically, we want to:
     57  1.1  mrg 
     58  1.1  mrg    (a) mark asm outputs and certain other regions as having been written to,
     59  1.1  mrg        to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
     60  1.1  mrg 
     61  1.1  mrg    (b) identify some of these stmts as "deterministic" so that we can
     62  1.1  mrg        write consistent outputs given consistent inputs, so that we can
     63  1.1  mrg        avoid false positives for paths in which an asm is invoked twice
     64  1.1  mrg        with the same inputs and is expected to emit the same output.
     65  1.1  mrg 
     66  1.1  mrg    This file implements heuristics for achieving the above.  */
     67  1.1  mrg 
     68  1.1  mrg /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
     69  1.1  mrg 
     70  1.1  mrg    Consider this x86 function taken from the Linux kernel
     71  1.1  mrg    (arch/x86/include/asm/barrier.h):
     72  1.1  mrg 
     73  1.1  mrg      static inline unsigned long array_index_mask_nospec(unsigned long index,
     74  1.1  mrg 							 unsigned long size)
     75  1.1  mrg      {
     76  1.1  mrg        unsigned long mask;
     77  1.1  mrg 
     78  1.1  mrg        asm volatile ("cmp %1,%2; sbb %0,%0;"
     79  1.1  mrg 		     :"=r" (mask)
     80  1.1  mrg 		     :"g"(size),"r" (index)
     81  1.1  mrg 		     :"cc");
     82  1.1  mrg        return mask;
     83  1.1  mrg      }
     84  1.1  mrg 
     85  1.1  mrg    The above is a mitigation for Spectre-variant-1 attacks, for clamping
     86  1.1  mrg    an array access to within the range of [0, size] if the CPU speculates
     87  1.1  mrg    past the array bounds.
     88  1.1  mrg 
     89  1.1  mrg    However, it is ultimately used to implement wdev_to_wvif:
     90  1.1  mrg 
     91  1.1  mrg      static inline struct wfx_vif *
     92  1.1  mrg      wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
     93  1.1  mrg      {
     94  1.1  mrg        vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
     95  1.1  mrg        if (!wdev->vif[vif_id]) {
     96  1.1  mrg 	 return NULL;
     97  1.1  mrg        }
     98  1.1  mrg        return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
     99  1.1  mrg      }
    100  1.1  mrg 
    101  1.1  mrg    which is used by:
    102  1.1  mrg 
    103  1.1  mrg      if (wdev_to_wvif(wvif->wdev, 1))
    104  1.1  mrg        return wdev_to_wvif(wvif->wdev, 1)->vif;
    105  1.1  mrg 
    106  1.1  mrg    The code has been written to assume that wdev_to_wvif is deterministic,
    107  1.1  mrg    and won't change from returning non-NULL at the "if" clause to
    108  1.1  mrg    returning NULL at the "->vif" dereference.
    109  1.1  mrg 
    110  1.1  mrg    By treating the above specific "asm volatile" as deterministic we avoid
    111  1.1  mrg    a false positive from -Wanalyzer-null-dereference.  */
    112  1.1  mrg 
    113  1.1  mrg static bool
    114  1.1  mrg deterministic_p (const gasm *asm_stmt)
    115  1.1  mrg {
    116  1.1  mrg   /* Assume something volatile with no inputs is querying
    117  1.1  mrg      changeable state e.g. rdtsc.  */
    118  1.1  mrg   if (gimple_asm_ninputs (asm_stmt) == 0
    119  1.1  mrg       && gimple_asm_volatile_p (asm_stmt))
    120  1.1  mrg     return false;
    121  1.1  mrg 
    122  1.1  mrg   /* Otherwise assume it's purely a function of its inputs.  */
    123  1.1  mrg   return true;
    124  1.1  mrg }
    125  1.1  mrg 
    126  1.1  mrg /* Update this model for the asm STMT, using CTXT to report any
    127  1.1  mrg    diagnostics.
    128  1.1  mrg 
    129  1.1  mrg    Compare with cfgexpand.cc: expand_asm_stmt.  */
    130  1.1  mrg 
    131  1.1  mrg void
    132  1.1  mrg region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
    133  1.1  mrg {
    134  1.1  mrg   logger *logger = ctxt ? ctxt->get_logger () : NULL;
    135  1.1  mrg   LOG_SCOPE (logger);
    136  1.1  mrg 
    137  1.1  mrg   const unsigned noutputs = gimple_asm_noutputs (stmt);
    138  1.1  mrg   const unsigned ninputs = gimple_asm_ninputs (stmt);
    139  1.1  mrg 
    140  1.1  mrg   auto_vec<tree> output_tvec;
    141  1.1  mrg   auto_vec<tree> input_tvec;
    142  1.1  mrg   auto_vec<const char *> constraints;
    143  1.1  mrg 
    144  1.1  mrg   /* Copy the gimple vectors into new vectors that we can manipulate.  */
    145  1.1  mrg   output_tvec.safe_grow (noutputs, true);
    146  1.1  mrg   input_tvec.safe_grow (ninputs, true);
    147  1.1  mrg   constraints.safe_grow (noutputs + ninputs, true);
    148  1.1  mrg 
    149  1.1  mrg   for (unsigned i = 0; i < noutputs; ++i)
    150  1.1  mrg     {
    151  1.1  mrg       tree t = gimple_asm_output_op (stmt, i);
    152  1.1  mrg       output_tvec[i] = TREE_VALUE (t);
    153  1.1  mrg       constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
    154  1.1  mrg     }
    155  1.1  mrg   for (unsigned i = 0; i < ninputs; i++)
    156  1.1  mrg     {
    157  1.1  mrg       tree t = gimple_asm_input_op (stmt, i);
    158  1.1  mrg       input_tvec[i] = TREE_VALUE (t);
    159  1.1  mrg       constraints[i + noutputs]
    160  1.1  mrg 	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
    161  1.1  mrg     }
    162  1.1  mrg 
    163  1.1  mrg   /* Determine which regions are reachable from the inputs
    164  1.1  mrg      to this stmt.  */
    165  1.1  mrg   reachable_regions reachable_regs (this);
    166  1.1  mrg 
    167  1.1  mrg   int num_errors = 0;
    168  1.1  mrg 
    169  1.1  mrg   auto_vec<const region *> output_regions (noutputs);
    170  1.1  mrg   for (unsigned i = 0; i < noutputs; ++i)
    171  1.1  mrg     {
    172  1.1  mrg       tree val = output_tvec[i];
    173  1.1  mrg       const char *constraint;
    174  1.1  mrg       bool is_inout;
    175  1.1  mrg       bool allows_reg;
    176  1.1  mrg       bool allows_mem;
    177  1.1  mrg 
    178  1.1  mrg       const region *dst_reg = get_lvalue (val, ctxt);
    179  1.1  mrg       output_regions.quick_push (dst_reg);
    180  1.1  mrg       reachable_regs.add (dst_reg, true);
    181  1.1  mrg 
    182  1.1  mrg       /* Try to parse the output constraint.  If that fails, there's
    183  1.1  mrg 	 no point in going further.  */
    184  1.1  mrg       constraint = constraints[i];
    185  1.1  mrg       if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
    186  1.1  mrg 				    &allows_mem, &allows_reg, &is_inout))
    187  1.1  mrg 	{
    188  1.1  mrg 	  if (logger)
    189  1.1  mrg 	    logger->log ("error parsing constraint for output %i: %qs",
    190  1.1  mrg 			 i, constraint);
    191  1.1  mrg 	  num_errors++;
    192  1.1  mrg 	  continue;
    193  1.1  mrg 	}
    194  1.1  mrg 
    195  1.1  mrg       if (logger)
    196  1.1  mrg 	{
    197  1.1  mrg 	  logger->log ("output %i: %qs %qE"
    198  1.1  mrg 		       " is_inout: %i allows_reg: %i allows_mem: %i",
    199  1.1  mrg 		       i, constraint, val,
    200  1.1  mrg 		       (int)is_inout, (int)allows_reg, (int)allows_mem);
    201  1.1  mrg 	  logger->start_log_line ();
    202  1.1  mrg 	  logger->log_partial ("  region: ");
    203  1.1  mrg 	  dst_reg->dump_to_pp (logger->get_printer (), true);
    204  1.1  mrg 	  logger->end_log_line ();
    205  1.1  mrg 	}
    206  1.1  mrg 
    207  1.1  mrg     }
    208  1.1  mrg 
    209  1.1  mrg   /* Ideally should combine with inout_svals to determine the
    210  1.1  mrg      "effective inputs" and use this for the asm_output_svalue.  */
    211  1.1  mrg 
    212  1.1  mrg   auto_vec<const svalue *> input_svals (ninputs);
    213  1.1  mrg   for (unsigned i = 0; i < ninputs; i++)
    214  1.1  mrg     {
    215  1.1  mrg       tree val = input_tvec[i];
    216  1.1  mrg       const char *constraint = constraints[i + noutputs];
    217  1.1  mrg       bool allows_reg, allows_mem;
    218  1.1  mrg       if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
    219  1.1  mrg 				    constraints.address (),
    220  1.1  mrg 				    &allows_mem, &allows_reg))
    221  1.1  mrg 	{
    222  1.1  mrg 	  if (logger)
    223  1.1  mrg 	    logger->log ("error parsing constraint for input %i: %qs",
    224  1.1  mrg 			 i, constraint);
    225  1.1  mrg 	  num_errors++;
    226  1.1  mrg 	  continue;
    227  1.1  mrg 	}
    228  1.1  mrg 
    229  1.1  mrg       tree src_expr = input_tvec[i];
    230  1.1  mrg       const svalue *src_sval = get_rvalue (src_expr, ctxt);
    231  1.1  mrg       check_for_poison (src_sval, src_expr, ctxt);
    232  1.1  mrg       input_svals.quick_push (src_sval);
    233  1.1  mrg       reachable_regs.handle_sval (src_sval);
    234  1.1  mrg 
    235  1.1  mrg       if (logger)
    236  1.1  mrg 	{
    237  1.1  mrg 	  logger->log ("input %i: %qs %qE"
    238  1.1  mrg 		       " allows_reg: %i allows_mem: %i",
    239  1.1  mrg 		       i, constraint, val,
    240  1.1  mrg 		       (int)allows_reg, (int)allows_mem);
    241  1.1  mrg 	  logger->start_log_line ();
    242  1.1  mrg 	  logger->log_partial ("  sval: ");
    243  1.1  mrg 	  src_sval->dump_to_pp (logger->get_printer (), true);
    244  1.1  mrg 	  logger->end_log_line ();
    245  1.1  mrg 	}
    246  1.1  mrg     }
    247  1.1  mrg 
    248  1.1  mrg   if (num_errors > 0)
    249  1.1  mrg     gcc_unreachable ();
    250  1.1  mrg 
    251  1.1  mrg   if (logger)
    252  1.1  mrg     {
    253  1.1  mrg       logger->log ("reachability: ");
    254  1.1  mrg       reachable_regs.dump_to_pp (logger->get_printer ());
    255  1.1  mrg       logger->end_log_line ();
    256  1.1  mrg     }
    257  1.1  mrg 
    258  1.1  mrg   /* Given the regions that were reachable from the inputs we
    259  1.1  mrg      want to clobber them.
    260  1.1  mrg      This is similar to region_model::handle_unrecognized_call,
    261  1.1  mrg      but the unknown call policies seems too aggressive (e.g. purging state
    262  1.1  mrg      from anything that's ever escaped).  Instead, clobber any clusters
    263  1.1  mrg      that were reachable in *this* asm stmt, rather than those that
    264  1.1  mrg      escaped, and we don't treat the values as having escaped.
    265  1.1  mrg      We also assume that asm stmts don't affect sm-state.  */
    266  1.1  mrg   for (auto iter = reachable_regs.begin_mutable_base_regs ();
    267  1.1  mrg        iter != reachable_regs.end_mutable_base_regs (); ++iter)
    268  1.1  mrg     {
    269  1.1  mrg       const region *base_reg = *iter;
    270  1.1  mrg       if (base_reg->symbolic_for_unknown_ptr_p ()
    271  1.1  mrg 	  || !base_reg->tracked_p ())
    272  1.1  mrg 	continue;
    273  1.1  mrg 
    274  1.1  mrg       binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
    275  1.1  mrg       cluster->on_asm (stmt, m_mgr->get_store_manager (),
    276  1.1  mrg 		       conjured_purge (this, ctxt));
    277  1.1  mrg     }
    278  1.1  mrg 
    279  1.1  mrg   /* Update the outputs.  */
    280  1.1  mrg   for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
    281  1.1  mrg     {
    282  1.1  mrg       tree dst_expr = output_tvec[output_idx];
    283  1.1  mrg       const region *dst_reg = output_regions[output_idx];
    284  1.1  mrg 
    285  1.1  mrg       const svalue *sval;
    286  1.1  mrg       if (deterministic_p (stmt)
    287  1.1  mrg 	  && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
    288  1.1  mrg 	sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
    289  1.1  mrg 						       stmt,
    290  1.1  mrg 						       output_idx,
    291  1.1  mrg 						       input_svals);
    292  1.1  mrg       else
    293  1.1  mrg 	{
    294  1.1  mrg 	  sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
    295  1.1  mrg 						       stmt,
    296  1.1  mrg 						       dst_reg,
    297  1.1  mrg 						       conjured_purge (this,
    298  1.1  mrg 								       ctxt));
    299  1.1  mrg 	}
    300  1.1  mrg       set_value (dst_reg, sval, ctxt);
    301  1.1  mrg     }
    302  1.1  mrg }
    303  1.1  mrg 
    304  1.1  mrg } // namespace ana
    305  1.1  mrg 
    306  1.1  mrg #endif /* #if ENABLE_ANALYZER */
    307