Home | History | Annotate | Line # | Download | only in sh
sh-mem.cc revision 1.1.1.2
      1 /* Helper routines for memory move and comparison insns.
      2    Copyright (C) 2013-2015 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify
      7 it under the terms of the GNU General Public License as published by
      8 the Free Software Foundation; either version 3, or (at your option)
      9 any later version.
     10 
     11 GCC is distributed in the hope that it will be useful,
     12 but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 GNU General Public License for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "config.h"
     21 #include "system.h"
     22 #include "coretypes.h"
     23 #include "tm.h"
     24 #include "machmode.h"
     25 #include "rtl.h"
     26 #include "hash-set.h"
     27 #include "vec.h"
     28 #include "double-int.h"
     29 #include "input.h"
     30 #include "alias.h"
     31 #include "symtab.h"
     32 #include "wide-int.h"
     33 #include "inchash.h"
     34 #include "tree.h"
     35 #include "hashtab.h"
     36 #include "hard-reg-set.h"
     37 #include "function.h"
     38 #include "flags.h"
     39 #include "statistics.h"
     40 #include "real.h"
     41 #include "fixed-value.h"
     42 #include "insn-config.h"
     43 #include "expmed.h"
     44 #include "dojump.h"
     45 #include "explow.h"
     46 #include "calls.h"
     47 #include "emit-rtl.h"
     48 #include "varasm.h"
     49 #include "stmt.h"
     50 #include "expr.h"
     51 #include "tm_p.h"
     52 #include "predict.h"
     53 #include "dominance.h"
     54 #include "cfg.h"
     55 #include "cfgrtl.h"
     56 #include "cfganal.h"
     57 #include "lcm.h"
     58 #include "cfgbuild.h"
     59 #include "cfgcleanup.h"
     60 #include "basic-block.h"
     61 
     62 /* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
     63 static void
     64 force_into (rtx value, rtx target)
     65 {
     66   value = force_operand (value, target);
     67   if (! rtx_equal_p (value, target))
     68     emit_insn (gen_move_insn (target, value));
     69 }
     70 
     71 /* Emit code to perform a block move.  Choose the best method.
     72 
     73    OPERANDS[0] is the destination.
     74    OPERANDS[1] is the source.
     75    OPERANDS[2] is the size.
     76    OPERANDS[3] is the alignment safe to use.  */
     77 bool
     78 expand_block_move (rtx *operands)
     79 {
     80   int align = INTVAL (operands[3]);
     81   int constp = (CONST_INT_P (operands[2]));
     82   int bytes = (constp ? INTVAL (operands[2]) : 0);
     83 
     84   if (! constp)
     85     return false;
     86 
     87   /* If we could use mov.l to move words and dest is word-aligned, we
     88      can use movua.l for loads and still generate a relatively short
     89      and efficient sequence.  */
     90   if (TARGET_SH4A && align < 4
     91       && MEM_ALIGN (operands[0]) >= 32
     92       && can_move_by_pieces (bytes, 32))
     93     {
     94       rtx dest = copy_rtx (operands[0]);
     95       rtx src = copy_rtx (operands[1]);
     96       /* We could use different pseudos for each copied word, but
     97 	 since movua can only load into r0, it's kind of
     98 	 pointless.  */
     99       rtx temp = gen_reg_rtx (SImode);
    100       rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
    101       int copied = 0;
    102 
    103       while (copied + 4 <= bytes)
    104 	{
    105 	  rtx to = adjust_address (dest, SImode, copied);
    106 	  rtx from = adjust_automodify_address (src, BLKmode,
    107 						src_addr, copied);
    108 
    109 	  set_mem_size (from, 4);
    110 	  emit_insn (gen_movua (temp, from));
    111 	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
    112 	  emit_move_insn (to, temp);
    113 	  copied += 4;
    114 	}
    115 
    116       if (copied < bytes)
    117 	move_by_pieces (adjust_address (dest, BLKmode, copied),
    118 			adjust_automodify_address (src, BLKmode,
    119 						   src_addr, copied),
    120 			bytes - copied, align, 0);
    121 
    122       return true;
    123     }
    124 
    125   /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
    126      alignment, or if it isn't a multiple of 4 bytes, then fail.  */
    127   if (align < 4 || (bytes % 4 != 0))
    128     return false;
    129 
    130   if (TARGET_HARD_SH4)
    131     {
    132       if (bytes < 12)
    133 	return false;
    134       else if (bytes == 12)
    135 	{
    136 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    137 	  rtx r4 = gen_rtx_REG (SImode, 4);
    138 	  rtx r5 = gen_rtx_REG (SImode, 5);
    139 
    140 	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
    141 	  force_into (XEXP (operands[0], 0), r4);
    142 	  force_into (XEXP (operands[1], 0), r5);
    143 	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
    144 	  return true;
    145 	}
    146       else if (! optimize_size)
    147 	{
    148 	  const char *entry_name;
    149 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    150 	  int dwords;
    151 	  rtx r4 = gen_rtx_REG (SImode, 4);
    152 	  rtx r5 = gen_rtx_REG (SImode, 5);
    153 	  rtx r6 = gen_rtx_REG (SImode, 6);
    154 
    155 	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
    156 	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
    157 	  force_into (XEXP (operands[0], 0), r4);
    158 	  force_into (XEXP (operands[1], 0), r5);
    159 
    160 	  dwords = bytes >> 3;
    161 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
    162 	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
    163 	  return true;
    164 	}
    165       else
    166 	return false;
    167     }
    168   if (bytes < 64)
    169     {
    170       char entry[30];
    171       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    172       rtx r4 = gen_rtx_REG (SImode, 4);
    173       rtx r5 = gen_rtx_REG (SImode, 5);
    174 
    175       sprintf (entry, "__movmemSI%d", bytes);
    176       function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
    177       force_into (XEXP (operands[0], 0), r4);
    178       force_into (XEXP (operands[1], 0), r5);
    179       emit_insn (gen_block_move_real (func_addr_rtx));
    180       return true;
    181     }
    182 
    183   /* This is the same number of bytes as a memcpy call, but to a different
    184      less common function name, so this will occasionally use more space.  */
    185   if (! optimize_size)
    186     {
    187       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    188       int final_switch, while_loop;
    189       rtx r4 = gen_rtx_REG (SImode, 4);
    190       rtx r5 = gen_rtx_REG (SImode, 5);
    191       rtx r6 = gen_rtx_REG (SImode, 6);
    192 
    193       function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
    194       force_into (XEXP (operands[0], 0), r4);
    195       force_into (XEXP (operands[1], 0), r5);
    196 
    197       /* r6 controls the size of the move.  16 is decremented from it
    198 	 for each 64 bytes moved.  Then the negative bit left over is used
    199 	 as an index into a list of move instructions.  e.g., a 72 byte move
    200 	 would be set up with size(r6) = 14, for one iteration through the
    201 	 big while loop, and a switch of -2 for the last part.  */
    202 
    203       final_switch = 16 - ((bytes / 4) % 16);
    204       while_loop = ((bytes / 4) / 16 - 1) * 16;
    205       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
    206       emit_insn (gen_block_lump_real (func_addr_rtx));
    207       return true;
    208     }
    209 
    210   return false;
    211 }
    212 
    213 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
    214 static const int prob_likely = REG_BR_PROB_BASE / 4;
    215 
    216 /* Emit code to perform a strcmp.
    217 
    218    OPERANDS[0] is the destination.
    219    OPERANDS[1] is the first string.
    220    OPERANDS[2] is the second string.
    221    OPERANDS[3] is the known alignment.  */
    222 bool
    223 sh_expand_cmpstr (rtx *operands)
    224 {
    225   rtx addr1 = operands[1];
    226   rtx addr2 = operands[2];
    227   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    228   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    229   rtx tmp0 = gen_reg_rtx (SImode);
    230   rtx tmp1 = gen_reg_rtx (SImode);
    231   rtx tmp2 = gen_reg_rtx (SImode);
    232   rtx tmp3 = gen_reg_rtx (SImode);
    233 
    234   rtx jump;
    235   rtx_code_label *L_return = gen_label_rtx ();
    236   rtx_code_label *L_loop_byte = gen_label_rtx ();
    237   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    238   rtx_code_label *L_loop_long = gen_label_rtx ();
    239   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    240 
    241   int align = INTVAL (operands[3]);
    242 
    243   emit_move_insn (tmp0, const0_rtx);
    244 
    245   if (align < 4)
    246     {
    247       emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    248       emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    249       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    250       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    251     }
    252 
    253   addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    254   addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    255 
    256   /* tmp2 is aligned, OK to load.  */
    257   emit_move_insn (tmp3, addr2);
    258   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    259 
    260   /* start long loop.  */
    261   emit_label (L_loop_long);
    262 
    263   emit_move_insn (tmp2, tmp3);
    264 
    265   /* tmp1 is aligned, OK to load.  */
    266   emit_move_insn (tmp1, addr1);
    267   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
    268 
    269   /* Is there a 0 byte ?  */
    270   emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
    271 
    272   emit_insn (gen_cmpstr_t (tmp0, tmp3));
    273   jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    274   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    275 
    276   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    277 
    278   /* tmp2 is aligned, OK to load.  */
    279   emit_move_insn (tmp3, addr2);
    280   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    281 
    282   jump = emit_jump_insn (gen_branch_true (L_loop_long));
    283   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    284   /* end loop.  */
    285 
    286   /* Fallthu, substract words.  */
    287   if (TARGET_LITTLE_ENDIAN)
    288     {
    289       rtx low_1 = gen_lowpart (HImode, tmp1);
    290       rtx low_2 = gen_lowpart (HImode, tmp2);
    291 
    292       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    293       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    294       emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
    295       emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
    296       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    297       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    298     }
    299 
    300   jump = emit_jump_insn (gen_jump_compact (L_return));
    301   emit_barrier_after (jump);
    302 
    303   emit_label (L_end_loop_long);
    304 
    305   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
    306   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
    307 
    308   /* start byte loop.  */
    309   addr1 = adjust_address (addr1, QImode, 0);
    310   addr2 = adjust_address (addr2, QImode, 0);
    311 
    312   emit_label (L_loop_byte);
    313 
    314   emit_insn (gen_extendqisi2 (tmp2, addr2));
    315   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    316 
    317   emit_insn (gen_extendqisi2 (tmp1, addr1));
    318   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    319 
    320   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    321   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    322   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    323 
    324   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    325   if (flag_delayed_branch)
    326     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    327   jump = emit_jump_insn (gen_branch_true (L_loop_byte));
    328   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    329   /* end loop.  */
    330 
    331   emit_label (L_end_loop_byte);
    332 
    333   if (! flag_delayed_branch)
    334     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    335   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    336 
    337   emit_label (L_return);
    338 
    339   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    340 
    341   return true;
    342 }
    343 
    344 /* Emit code to perform a strncmp.
    345 
    346    OPERANDS[0] is the destination.
    347    OPERANDS[1] is the first string.
    348    OPERANDS[2] is the second string.
    349    OPERANDS[3] is the length.
    350    OPERANDS[4] is the known alignment.  */
    351 bool
    352 sh_expand_cmpnstr (rtx *operands)
    353 {
    354   rtx addr1 = operands[1];
    355   rtx addr2 = operands[2];
    356   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    357   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    358   rtx tmp1 = gen_reg_rtx (SImode);
    359   rtx tmp2 = gen_reg_rtx (SImode);
    360 
    361   rtx jump;
    362   rtx_code_label *L_return = gen_label_rtx ();
    363   rtx_code_label *L_loop_byte = gen_label_rtx ();
    364   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    365 
    366   rtx len = copy_to_mode_reg (SImode, operands[3]);
    367   int constp = CONST_INT_P (operands[3]);
    368 
    369   /* Loop on a register count.  */
    370   if (constp)
    371     {
    372       rtx tmp0 = gen_reg_rtx (SImode);
    373       rtx tmp3 = gen_reg_rtx (SImode);
    374       rtx lenw = gen_reg_rtx (SImode);
    375 
    376       rtx_code_label *L_loop_long = gen_label_rtx ();
    377       rtx_code_label *L_end_loop_long = gen_label_rtx ();
    378 
    379       int align = INTVAL (operands[4]);
    380       int bytes = INTVAL (operands[3]);
    381       int witers = bytes / 4;
    382 
    383       if (witers > 1)
    384 	{
    385 	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    386 	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    387 
    388 	  emit_move_insn (tmp0, const0_rtx);
    389 
    390 	  if (align < 4)
    391 	    {
    392 	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    393 	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    394 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    395 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    396 	    }
    397 
    398 	  /* word count. Do we have iterations ?  */
    399 	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    400 
    401 	  /* start long loop.  */
    402 	  emit_label (L_loop_long);
    403 
    404 	  /* tmp2 is aligned, OK to load.  */
    405 	  emit_move_insn (tmp2, addr2);
    406 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    407 						  GET_MODE_SIZE (SImode)));
    408 
    409 	  /* tmp1 is aligned, OK to load.  */
    410 	  emit_move_insn (tmp1, addr1);
    411 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    412 						  GET_MODE_SIZE (SImode)));
    413 
    414 	  /* Is there a 0 byte ?  */
    415 	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
    416 
    417 	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
    418 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    419 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    420 
    421 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    422 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
    423 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    424 
    425 	  if (TARGET_SH2)
    426 	    emit_insn (gen_dect (lenw, lenw));
    427 	  else
    428 	    {
    429 	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    430 	      emit_insn (gen_tstsi_t (lenw, lenw));
    431 	    }
    432 
    433 	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
    434 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    435 
    436 	  int sbytes = bytes % 4;
    437 
    438 	  /* end loop.  Reached max iterations.  */
    439 	  if (sbytes == 0)
    440 	    {
    441 	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    442 	      jump = emit_jump_insn (gen_jump_compact (L_return));
    443 	      emit_barrier_after (jump);
    444 	    }
    445 	  else
    446 	    {
    447 	      /* Remaining bytes to check.  */
    448 
    449 	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    450 	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    451 
    452 	      while (sbytes--)
    453 		{
    454 		  emit_insn (gen_extendqisi2 (tmp1, addr1));
    455 		  emit_insn (gen_extendqisi2 (tmp2, addr2));
    456 
    457 		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    458 		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    459 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    460 
    461 		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    462 		  if (flag_delayed_branch)
    463 		    emit_insn (gen_zero_extendqisi2 (tmp2,
    464 						     gen_lowpart (QImode,
    465 								  tmp2)));
    466 		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    467 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    468 
    469 		  addr1 = adjust_address (addr1, QImode,
    470 					  GET_MODE_SIZE (QImode));
    471 		  addr2 = adjust_address (addr2, QImode,
    472 					  GET_MODE_SIZE (QImode));
    473 		}
    474 
    475 	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    476 	      emit_barrier_after (jump);
    477 	    }
    478 
    479 	  emit_label (L_end_loop_long);
    480 
    481 	  /* Found last word.  Restart it byte per byte.  */
    482 
    483 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    484 						  -GET_MODE_SIZE (SImode)));
    485 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    486 						  -GET_MODE_SIZE (SImode)));
    487 
    488 	  /* fall thru.  */
    489 	}
    490 
    491       addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    492       addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    493 
    494       while (bytes--)
    495 	{
    496 	  emit_insn (gen_extendqisi2 (tmp1, addr1));
    497 	  emit_insn (gen_extendqisi2 (tmp2, addr2));
    498 
    499 	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    500 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    501 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    502 
    503 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    504 	  if (flag_delayed_branch)
    505 	    emit_insn (gen_zero_extendqisi2 (tmp2,
    506 					     gen_lowpart (QImode, tmp2)));
    507 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    508 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    509 
    510 	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
    511 	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
    512 	}
    513 
    514       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    515       emit_barrier_after (jump);
    516     }
    517   else
    518     {
    519       emit_insn (gen_cmpeqsi_t (len, const0_rtx));
    520       emit_move_insn (operands[0], const0_rtx);
    521       jump = emit_jump_insn (gen_branch_true (L_return));
    522       add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    523     }
    524 
    525   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    526   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    527 
    528   emit_label (L_loop_byte);
    529 
    530   emit_insn (gen_extendqisi2 (tmp2, addr2));
    531   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    532 
    533   emit_insn (gen_extendqisi2 (tmp1, addr1));
    534   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    535 
    536   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    537   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    538   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    539 
    540   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    541   if (flag_delayed_branch)
    542     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    543   jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    544   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    545 
    546   if (TARGET_SH2)
    547     emit_insn (gen_dect (len, len));
    548   else
    549     {
    550       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    551       emit_insn (gen_tstsi_t (len, len));
    552     }
    553 
    554   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    555   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    556   /* end byte loop.  */
    557 
    558   emit_label (L_end_loop_byte);
    559 
    560   if (! flag_delayed_branch)
    561     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    562   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    563 
    564   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    565 
    566   emit_label (L_return);
    567 
    568   return true;
    569 }
    570 
    571 /* Emit code to perform a strlen.
    572 
    573    OPERANDS[0] is the destination.
    574    OPERANDS[1] is the string.
    575    OPERANDS[2] is the char to search.
    576    OPERANDS[3] is the alignment.  */
    577 bool
    578 sh_expand_strlen (rtx *operands)
    579 {
    580   rtx addr1 = operands[1];
    581   rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
    582   rtx start_addr = gen_reg_rtx (Pmode);
    583   rtx tmp0 = gen_reg_rtx (SImode);
    584   rtx tmp1 = gen_reg_rtx (SImode);
    585   rtx_code_label *L_return = gen_label_rtx ();
    586   rtx_code_label *L_loop_byte = gen_label_rtx ();
    587 
    588   rtx jump;
    589   rtx_code_label *L_loop_long = gen_label_rtx ();
    590   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    591 
    592   int align = INTVAL (operands[3]);
    593 
    594   emit_move_insn (operands[0], GEN_INT (-1));
    595 
    596   /* remember start of string.  */
    597   emit_move_insn (start_addr, current_addr);
    598 
    599   if (align < 4)
    600     {
    601       emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
    602       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    603       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    604     }
    605 
    606   emit_move_insn (tmp0, operands[2]);
    607 
    608   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
    609 
    610   /* start long loop.  */
    611   emit_label (L_loop_long);
    612 
    613   /* tmp1 is aligned, OK to load.  */
    614   emit_move_insn (tmp1, addr1);
    615   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
    616 
    617   /* Is there a 0 byte ?  */
    618   emit_insn (gen_cmpstr_t (tmp0, tmp1));
    619 
    620   jump = emit_jump_insn (gen_branch_false (L_loop_long));
    621   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    622   /* end loop.  */
    623 
    624   emit_label (L_end_loop_long);
    625 
    626   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
    627 
    628   addr1 = adjust_address (addr1, QImode, 0);
    629 
    630   /* unroll remaining bytes.  */
    631   for (int i = 0; i < 4; ++i)
    632     {
    633       emit_insn (gen_extendqisi2 (tmp1, addr1));
    634       emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    635       emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    636       jump = emit_jump_insn (gen_branch_true (L_return));
    637       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    638     }
    639 
    640   emit_barrier_after (jump);
    641 
    642   /* start byte loop.  */
    643   emit_label (L_loop_byte);
    644 
    645   emit_insn (gen_extendqisi2 (tmp1, addr1));
    646   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    647 
    648   emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    649   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    650   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    651 
    652   /* end loop.  */
    653 
    654   emit_label (L_return);
    655 
    656   emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
    657   emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
    658 
    659   return true;
    660 }
    661 
    662 /* Emit code to perform a memset.
    663 
    664    OPERANDS[0] is the destination.
    665    OPERANDS[1] is the size;
    666    OPERANDS[2] is the char to search.
    667    OPERANDS[3] is the alignment.  */
    668 void
    669 sh_expand_setmem (rtx *operands)
    670 {
    671   rtx_code_label *L_loop_byte = gen_label_rtx ();
    672   rtx_code_label *L_loop_word = gen_label_rtx ();
    673   rtx_code_label *L_return = gen_label_rtx ();
    674   rtx jump;
    675   rtx dest = copy_rtx (operands[0]);
    676   rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
    677   rtx val = copy_to_mode_reg (SImode, operands[2]);
    678   int align = INTVAL (operands[3]);
    679   rtx len = copy_to_mode_reg (SImode, operands[1]);
    680 
    681   if (! CONST_INT_P (operands[1]))
    682     return;
    683 
    684   int count = INTVAL (operands[1]);
    685 
    686   if (CONST_INT_P (operands[2])
    687       && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
    688     {
    689       rtx lenw = gen_reg_rtx (SImode);
    690 
    691       if (align < 4)
    692 	{
    693 	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
    694 	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    695 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    696 	}
    697 
    698       /* word count. Do we have iterations ?  */
    699       emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    700 
    701       dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
    702 
    703       /* start loop.  */
    704       emit_label (L_loop_word);
    705 
    706       if (TARGET_SH2)
    707         emit_insn (gen_dect (lenw, lenw));
    708       else
    709 	{
    710 	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    711 	  emit_insn (gen_tstsi_t (lenw, lenw));
    712 	}
    713 
    714       emit_move_insn (dest, val);
    715       emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    716 						GET_MODE_SIZE (SImode)));
    717 
    718 
    719       jump = emit_jump_insn (gen_branch_false (L_loop_word));
    720       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    721       count = count % 4;
    722 
    723       dest = adjust_address (dest, QImode, 0);
    724 
    725       val = gen_lowpart (QImode, val);
    726 
    727       while (count--)
    728 	{
    729 	  emit_move_insn (dest, val);
    730 	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    731 						    GET_MODE_SIZE (QImode)));
    732 	}
    733 
    734       jump = emit_jump_insn (gen_jump_compact (L_return));
    735       emit_barrier_after (jump);
    736     }
    737 
    738   dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
    739 
    740   /* start loop.  */
    741   emit_label (L_loop_byte);
    742 
    743   if (TARGET_SH2)
    744     emit_insn (gen_dect (len, len));
    745   else
    746     {
    747       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    748       emit_insn (gen_tstsi_t (len, len));
    749     }
    750 
    751   val = gen_lowpart (QImode, val);
    752   emit_move_insn (dest, val);
    753   emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    754                                             GET_MODE_SIZE (QImode)));
    755 
    756   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    757   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    758 
    759   emit_label (L_return);
    760 }
    761