Home | History | Annotate | Line # | Download | only in aarch64
aarch64-speculation.cc revision 1.1
      1 /* Speculation tracking and mitigation (e.g. CVE 2017-5753) for AArch64.
      2    Copyright (C) 2018-2019 Free Software Foundation, Inc.
      3    Contributed by ARM Ltd.
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    GCC is distributed in the hope that it will be useful, but
     13    WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15    General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with GCC; see the file COPYING3.  If not see
     19    <http://www.gnu.org/licenses/>.  */
     20 
     21 #include "config.h"
     22 #include "system.h"
     23 #include "coretypes.h"
     24 #include "target.h"
     25 #include "rtl.h"
     26 #include "tree-pass.h"
     27 #include "profile-count.h"
     28 #include "backend.h"
     29 #include "cfgbuild.h"
     30 #include "print-rtl.h"
     31 #include "cfgrtl.h"
     32 #include "function.h"
     33 #include "basic-block.h"
     34 #include "memmodel.h"
     35 #include "emit-rtl.h"
     36 #include "insn-attr.h"
     37 #include "df.h"
     38 #include "tm_p.h"
     39 #include "insn-config.h"
     40 #include "recog.h"
     41 
     42 /* This pass scans the RTL just before the final branch
     43    re-organisation pass.  The aim is to identify all places where
     44    there is conditional control flow and to insert code that tracks
     45    any speculative execution of a conditional branch.
     46 
     47    To do this we reserve a call-clobbered register (so that it can be
     48    initialized very early in the function prologue) that can then be
     49    updated each time there is a conditional branch.  At each such
     50    branch we then generate a code sequence that uses conditional
     51    select operations that are not subject to speculation themselves
     52    (we ignore for the moment situations where that might not always be
     53    strictly true).  For example, a branch sequence such as:
     54 
     55 	B.EQ	<dst>
     56 	...
     57    <dst>:
     58 
     59    is transformed to:
     60 
     61 	B.EQ	<dst>
     62 	CSEL	tracker, tracker, XZr, ne
     63 	...
     64    <dst>:
     65 	CSEL	tracker, tracker, XZr, eq
     66 
     67    Since we start with the tracker initialized to all bits one, if at any
     68    time the predicted control flow diverges from the architectural program
     69    behavior, then the tracker will become zero (but not otherwise).
     70 
     71    The tracker value can be used at any time at which a value needs
     72    guarding against incorrect speculation.  This can be done in
     73    several ways, but they all amount to the same thing.  For an
     74    untrusted address, or an untrusted offset to a trusted address, we
     75    can simply mask the address with the tracker with the untrusted
     76    value.  If the CPU is not speculating, or speculating correctly,
     77    then the value will remain unchanged, otherwise it will be clamped
     78    to zero.  For more complex scenarios we can compare the tracker
     79    against zero and use the flags to form a new selection with an
     80    alternate safe value.
     81 
     82    On implementations where the data processing instructions may
     83    themselves produce speculative values, the architecture requires
     84    that a CSDB instruction will resolve such data speculation, so each
     85    time we use the tracker for protecting a vulnerable value we also
     86    emit a CSDB: we do not need to do that each time the tracker itself
     87    is updated.
     88 
     89    At function boundaries, we need to communicate the speculation
     90    tracking state with the caller or the callee.  This is tricky
     91    because there is no register available for such a purpose without
     92    creating a new ABI.  We deal with this by relying on the principle
     93    that in all real programs the stack pointer, SP will never be NULL
     94    at a function boundary; we can thus encode the speculation state in
     95    SP by clearing SP if the speculation tracker itself is NULL.  After
     96    the call we recover the tracking state back from SP into the
     97    tracker register.  The results is that a function call sequence is
     98    transformed to
     99 
    100 	MOV	tmp, SP
    101 	AND	tmp, tmp, tracker
    102 	MOV	SP, tmp
    103 	BL	<callee>
    104 	CMP	SP, #0
    105 	CSETM	tracker, ne
    106 
    107    The additional MOV instructions in the pre-call sequence are needed
    108    because SP cannot be used directly with the AND instruction.
    109 
    110    The code inside a function body uses the post-call sequence in the
    111    prologue to establish the tracker and the pre-call sequence in the
    112    epilogue to re-encode the state for the return.
    113 
    114    The code sequences have the nice property that if called from, or
    115    calling a function that does not track speculation then the stack pointer
    116    will always be non-NULL and hence the tracker will be initialized to all
    117    bits one as we need: we lose the ability to fully track speculation in that
    118    case, but we are still architecturally safe.
    119 
    120    Tracking speculation in this way is quite expensive, both in code
    121    size and execution time.  We employ a number of tricks to try to
    122    limit this:
    123 
    124    1) Simple leaf functions with no conditional branches (or use of
    125    the tracker) do not need to establish a new tracker: they simply
    126    carry the tracking state through SP for the duration of the call.
    127    The same is also true for leaf functions that end in a tail-call.
    128 
    129    2) Back-to-back function calls in a single basic block also do not
    130    need to re-establish the tracker between the calls.  Again, we can
    131    carry the tracking state in SP for this period of time unless the
    132    tracker value is needed at that point in time.
    133 
    134    We run the pass just before the final branch reorganization pass so
    135    that we can handle most of the conditional branch cases using the
    136    standard edge insertion code.  The reorg pass will hopefully clean
    137    things up for afterwards so that the results aren't too
    138    horrible.  */
    139 
    140 /* Generate a code sequence to clobber SP if speculating incorreclty.  */
    141 static rtx_insn *
    142 aarch64_speculation_clobber_sp ()
    143 {
    144   rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
    145   rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
    146   rtx scratch = gen_rtx_REG (DImode, SPECULATION_SCRATCH_REGNUM);
    147 
    148   start_sequence ();
    149   emit_insn (gen_rtx_SET (scratch, sp));
    150   emit_insn (gen_anddi3 (scratch, scratch, tracker));
    151   emit_insn (gen_rtx_SET (sp, scratch));
    152   rtx_insn *seq = get_insns ();
    153   end_sequence ();
    154   return seq;
    155 }
    156 
    157 /* Generate a code sequence to establish the tracker variable from the
    158    contents of SP.  */
    159 static rtx_insn *
    160 aarch64_speculation_establish_tracker ()
    161 {
    162   rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
    163   rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
    164   start_sequence ();
    165   rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
    166   emit_insn (gen_cstoredi_neg (tracker,
    167 			       gen_rtx_NE (CCmode, cc, const0_rtx), cc));
    168   rtx_insn *seq = get_insns ();
    169   end_sequence ();
    170   return seq;
    171 }
    172 
    173 /* Main speculation tracking pass.  */
    174 unsigned int
    175 aarch64_do_track_speculation ()
    176 {
    177   basic_block bb;
    178   bool needs_tracking = false;
    179   bool need_second_pass = false;
    180   rtx_insn *insn;
    181   int fixups_pending = 0;
    182 
    183   FOR_EACH_BB_FN (bb, cfun)
    184     {
    185       insn = BB_END (bb);
    186 
    187       if (dump_file)
    188 	fprintf (dump_file, "Basic block %d:\n", bb->index);
    189 
    190       while (insn != BB_HEAD (bb)
    191 	     && NOTE_P (insn))
    192 	insn = PREV_INSN (insn);
    193 
    194       if (control_flow_insn_p (insn))
    195 	{
    196 	  if (any_condjump_p (insn))
    197 	    {
    198 	      if (dump_file)
    199 		{
    200 		  fprintf (dump_file, "  condjump\n");
    201 		  dump_insn_slim (dump_file, insn);
    202 		}
    203 
    204 	      rtx src = SET_SRC (pc_set (insn));
    205 
    206 	      /* Check for an inverted jump, where the fall-through edge
    207 		 appears first.  */
    208 	      bool inverted = GET_CODE (XEXP (src, 2)) != PC;
    209 	      /* The other edge must be the PC (we assume that we don't
    210 		 have conditional return instructions).  */
    211 	      gcc_assert (GET_CODE (XEXP (src, 1 + !inverted)) == PC);
    212 
    213 	      rtx cond = copy_rtx (XEXP (src, 0));
    214 	      gcc_assert (COMPARISON_P (cond)
    215 			  && REG_P (XEXP (cond, 0))
    216 			  && REGNO (XEXP (cond, 0)) == CC_REGNUM
    217 			  && XEXP (cond, 1) == const0_rtx);
    218 	      enum rtx_code inv_cond_code
    219 		= reversed_comparison_code (cond, insn);
    220 	      /* We should be able to reverse all conditions.  */
    221 	      gcc_assert (inv_cond_code != UNKNOWN);
    222 	      rtx inv_cond = gen_rtx_fmt_ee (inv_cond_code, GET_MODE (cond),
    223 					     copy_rtx (XEXP (cond, 0)),
    224 					     copy_rtx (XEXP (cond, 1)));
    225 	      if (inverted)
    226 		std::swap (cond, inv_cond);
    227 
    228 	      insert_insn_on_edge (gen_speculation_tracker (cond),
    229 				   BRANCH_EDGE (bb));
    230 	      insert_insn_on_edge (gen_speculation_tracker (inv_cond),
    231 				   FALLTHRU_EDGE (bb));
    232 	      needs_tracking = true;
    233 	    }
    234 	  else if (GET_CODE (PATTERN (insn)) == RETURN)
    235 	    {
    236 	      /* If we already know we'll need a second pass, don't put
    237 		 out the return sequence now, or we might end up with
    238 		 two copies.  Instead, we'll do all return statements
    239 		 during the second pass.  However, if this is the
    240 		 first return insn we've found and we already
    241 		 know that we'll need to emit the code, we can save a
    242 		 second pass by emitting the code now.  */
    243 	      if (needs_tracking && ! need_second_pass)
    244 		{
    245 		  rtx_insn *seq = aarch64_speculation_clobber_sp ();
    246 		  emit_insn_before (seq, insn);
    247 		}
    248 	      else
    249 		{
    250 		  fixups_pending++;
    251 		  need_second_pass = true;
    252 		}
    253 	    }
    254 	  else if (find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
    255 	    {
    256 	      rtx_insn *seq = aarch64_speculation_clobber_sp ();
    257 	      emit_insn_before (seq, insn);
    258 	      needs_tracking = true;
    259 	    }
    260 	}
    261       else
    262 	{
    263 	  if (dump_file)
    264 	    {
    265 	      fprintf (dump_file, "  other\n");
    266 	      dump_insn_slim (dump_file, insn);
    267 	    }
    268 	}
    269     }
    270 
    271   FOR_EACH_BB_FN (bb, cfun)
    272     {
    273       rtx_insn *end = BB_END (bb);
    274       rtx_insn *call_insn = NULL;
    275 
    276       if (bb->flags & BB_NON_LOCAL_GOTO_TARGET)
    277 	{
    278 	  rtx_insn *label = NULL;
    279 	  /* For non-local goto targets we have to recover the
    280 	     speculation state from SP.  Find the last code label at
    281 	     the head of the block and place the fixup sequence after
    282 	     that.  */
    283 	  for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
    284 	    {
    285 	      if (LABEL_P (insn))
    286 		label = insn;
    287 	      /* Never put anything before the basic block note.  */
    288 	      if (NOTE_INSN_BASIC_BLOCK_P (insn))
    289 		label = insn;
    290 	      if (INSN_P (insn))
    291 		break;
    292 	    }
    293 
    294 	  gcc_assert (label);
    295 	  emit_insn_after (aarch64_speculation_establish_tracker (), label);
    296 	}
    297 
    298       /* Scan the insns looking for calls.  We need to pass the
    299 	 speculation tracking state encoded in to SP.  After a call we
    300 	 restore the speculation tracking into the tracker register.
    301 	 To avoid unnecessary transfers we look for two or more calls
    302 	 within a single basic block and eliminate, where possible,
    303 	 any redundant operations.  */
    304       for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
    305 	{
    306 	  if (NONDEBUG_INSN_P (insn)
    307 	      && recog_memoized (insn) >= 0
    308 	      && (get_attr_speculation_barrier (insn)
    309 		  == SPECULATION_BARRIER_TRUE))
    310 	    {
    311 	      if (call_insn)
    312 		{
    313 		  /* This instruction requires the speculation
    314 		     tracking to be in the tracker register.  If there
    315 		     was an earlier call in this block, we need to
    316 		     copy the speculation tracking back there.  */
    317 		  emit_insn_after (aarch64_speculation_establish_tracker (),
    318 				   call_insn);
    319 		  call_insn = NULL;
    320 		}
    321 
    322 	      needs_tracking = true;
    323 	    }
    324 
    325 	  if (CALL_P (insn))
    326 	    {
    327 	      bool tailcall
    328 		= (SIBLING_CALL_P (insn)
    329 		   || find_reg_note (insn, REG_NORETURN, NULL_RTX));
    330 
    331 	      /* Tailcalls are like returns, we can eliminate the
    332 		 transfer between the tracker register and SP if we
    333 		 know that this function does not itself need
    334 		 tracking.  */
    335 	      if (tailcall && (need_second_pass || !needs_tracking))
    336 		{
    337 		  /* Don't clear call_insn if it is set - needs_tracking
    338 		     will be true in that case and so we will end
    339 		     up putting out mitigation sequences.  */
    340 		  fixups_pending++;
    341 		  need_second_pass = true;
    342 		  break;
    343 		}
    344 
    345 	      needs_tracking = true;
    346 
    347 	      /* We always need a transfer before the first call in a BB.  */
    348 	      if (!call_insn)
    349 		emit_insn_before (aarch64_speculation_clobber_sp (), insn);
    350 
    351 	      /* Tail-calls and no-return calls don't need any post-call
    352 		 reestablishment of the tracker.  */
    353 	      if (! tailcall)
    354 		call_insn = insn;
    355 	      else
    356 		call_insn = NULL;
    357 	    }
    358 
    359 	  if (insn == end)
    360 	    break;
    361 	}
    362 
    363       if (call_insn)
    364 	{
    365 	  rtx_insn *seq = aarch64_speculation_establish_tracker ();
    366 
    367 	  /* Handle debug insns at the end of the BB.  Put the extra
    368 	     insns after them.  This ensures that we have consistent
    369 	     behaviour for the placement of the extra insns between
    370 	     debug and non-debug builds.  */
    371 	  for (insn = call_insn;
    372 	       insn != end && DEBUG_INSN_P (NEXT_INSN (insn));
    373 	       insn = NEXT_INSN (insn))
    374 	    ;
    375 
    376 	  if (insn == end)
    377 	    {
    378 	      edge e = find_fallthru_edge (bb->succs);
    379 	      /* We need to be very careful about some calls that
    380 		 appear at the end of a basic block.  If the call
    381 		 involves exceptions, then the compiler may depend on
    382 		 this being the last instruction in the block.  The
    383 		 easiest way to handle this is to commit the new
    384 		 instructions on the fall-through edge and to let
    385 		 commit_edge_insertions clean things up for us.
    386 
    387 		 Sometimes, eg with OMP, there may not even be an
    388 		 outgoing edge after the call.  In that case, there's
    389 		 not much we can do, presumably the compiler has
    390 		 decided that the call can never return in this
    391 		 context.  */
    392 	      if (e)
    393 		{
    394 		  /* We need to set the location lists explicitly in
    395 		     this case.  */
    396 		  if (! INSN_P (seq))
    397 		    {
    398 		      start_sequence ();
    399 		      emit_insn (seq);
    400 		      seq = get_insns ();
    401 		      end_sequence ();
    402 		    }
    403 
    404 		  for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
    405 		    INSN_LOCATION (list) = INSN_LOCATION (call_insn);
    406 
    407 		  insert_insn_on_edge (seq, e);
    408 		}
    409 	    }
    410 	  else
    411 	    emit_insn_after (seq, call_insn);
    412 	}
    413     }
    414 
    415   if (needs_tracking)
    416     {
    417       if (need_second_pass)
    418 	{
    419 	  /* We found a return instruction before we found out whether
    420 	     or not we need to emit the tracking code, but we now
    421 	     know we do.  Run quickly over the basic blocks and
    422 	     fix up the return insns.  */
    423 	  FOR_EACH_BB_FN (bb, cfun)
    424 	    {
    425 	      insn = BB_END (bb);
    426 
    427 	      while (insn != BB_HEAD (bb)
    428 		     && NOTE_P (insn))
    429 		insn = PREV_INSN (insn);
    430 
    431 	      if ((control_flow_insn_p (insn)
    432 		   && GET_CODE (PATTERN (insn)) == RETURN)
    433 		  || (CALL_P (insn)
    434 		      && (SIBLING_CALL_P (insn)
    435 			  || find_reg_note (insn, REG_NORETURN, NULL_RTX))))
    436 		{
    437 		  rtx_insn *seq = aarch64_speculation_clobber_sp ();
    438 		  emit_insn_before (seq, insn);
    439 		  fixups_pending--;
    440 		}
    441 	    }
    442 	  gcc_assert (fixups_pending == 0);
    443 	}
    444 
    445       /* Set up the initial value of the tracker, using the incoming SP.  */
    446       insert_insn_on_edge (aarch64_speculation_establish_tracker (),
    447 			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
    448       commit_edge_insertions ();
    449     }
    450 
    451   return 0;
    452 }
    453 
    454 namespace {
    455 
    456 const pass_data pass_data_aarch64_track_speculation =
    457 {
    458   RTL_PASS,		/* type.  */
    459   "speculation",	/* name.  */
    460   OPTGROUP_NONE,	/* optinfo_flags.  */
    461   TV_MACH_DEP,		/* tv_id.  */
    462   0,			/* properties_required.  */
    463   0,			/* properties_provided.  */
    464   0,			/* properties_destroyed.  */
    465   0,			/* todo_flags_start.  */
    466   0			/* todo_flags_finish.  */
    467 };
    468 
    469 class pass_track_speculation : public rtl_opt_pass
    470 {
    471  public:
    472   pass_track_speculation(gcc::context *ctxt)
    473     : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt)
    474     {}
    475 
    476   /* opt_pass methods:  */
    477   virtual bool gate (function *)
    478     {
    479       return aarch64_track_speculation;
    480     }
    481 
    482   virtual unsigned int execute (function *)
    483     {
    484       return aarch64_do_track_speculation ();
    485     }
    486 }; // class pass_track_speculation.
    487 } // anon namespace.
    488 
    489 /* Create a new pass instance.  */
    490 rtl_opt_pass *
    491 make_pass_track_speculation (gcc::context *ctxt)
    492 {
    493   return new pass_track_speculation (ctxt);
    494 }
    495