Home | History | Annotate | Line # | Download | only in sh
sh-mem.cc revision 1.1.1.2.2.1
      1 /* Helper routines for memory move and comparison insns.
      2    Copyright (C) 2013-2016 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify
      7 it under the terms of the GNU General Public License as published by
      8 the Free Software Foundation; either version 3, or (at your option)
      9 any later version.
     10 
     11 GCC is distributed in the hope that it will be useful,
     12 but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 GNU General Public License for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "config.h"
     21 #include "system.h"
     22 #include "coretypes.h"
     23 #include "tm.h"
     24 #include "function.h"
     25 #include "basic-block.h"
     26 #include "rtl.h"
     27 #include "tree.h"
     28 #include "tm_p.h"
     29 #include "emit-rtl.h"
     30 #include "explow.h"
     31 #include "expr.h"
     32 
     33 /* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
     34 static void
     35 force_into (rtx value, rtx target)
     36 {
     37   value = force_operand (value, target);
     38   if (! rtx_equal_p (value, target))
     39     emit_insn (gen_move_insn (target, value));
     40 }
     41 
     42 /* Emit code to perform a block move.  Choose the best method.
     43 
     44    OPERANDS[0] is the destination.
     45    OPERANDS[1] is the source.
     46    OPERANDS[2] is the size.
     47    OPERANDS[3] is the alignment safe to use.  */
     48 bool
     49 expand_block_move (rtx *operands)
     50 {
     51   int align = INTVAL (operands[3]);
     52   int constp = (CONST_INT_P (operands[2]));
     53   int bytes = (constp ? INTVAL (operands[2]) : 0);
     54 
     55   if (! constp)
     56     return false;
     57 
     58   /* If we could use mov.l to move words and dest is word-aligned, we
     59      can use movua.l for loads and still generate a relatively short
     60      and efficient sequence.  */
     61   if (TARGET_SH4A && align < 4
     62       && MEM_ALIGN (operands[0]) >= 32
     63       && can_move_by_pieces (bytes, 32))
     64     {
     65       rtx dest = copy_rtx (operands[0]);
     66       rtx src = copy_rtx (operands[1]);
     67       /* We could use different pseudos for each copied word, but
     68 	 since movua can only load into r0, it's kind of
     69 	 pointless.  */
     70       rtx temp = gen_reg_rtx (SImode);
     71       rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
     72       int copied = 0;
     73 
     74       while (copied + 4 <= bytes)
     75 	{
     76 	  rtx to = adjust_address (dest, SImode, copied);
     77 	  rtx from = adjust_automodify_address (src, BLKmode,
     78 						src_addr, copied);
     79 
     80 	  set_mem_size (from, 4);
     81 	  emit_insn (gen_movua (temp, from));
     82 	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
     83 	  emit_move_insn (to, temp);
     84 	  copied += 4;
     85 	}
     86 
     87       if (copied < bytes)
     88 	move_by_pieces (adjust_address (dest, BLKmode, copied),
     89 			adjust_automodify_address (src, BLKmode,
     90 						   src_addr, copied),
     91 			bytes - copied, align, 0);
     92 
     93       return true;
     94     }
     95 
     96   /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
     97      alignment, or if it isn't a multiple of 4 bytes, then fail.  */
     98   if (align < 4 || (bytes % 4 != 0))
     99     return false;
    100 
    101   if (TARGET_HARD_SH4)
    102     {
    103       if (bytes < 12)
    104 	return false;
    105       else if (bytes == 12)
    106 	{
    107 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    108 	  rtx r4 = gen_rtx_REG (SImode, 4);
    109 	  rtx r5 = gen_rtx_REG (SImode, 5);
    110 
    111 	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
    112 				     SFUNC_STATIC).lab;
    113 	  force_into (XEXP (operands[0], 0), r4);
    114 	  force_into (XEXP (operands[1], 0), r5);
    115 	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
    116 	  return true;
    117 	}
    118       else if (! optimize_size)
    119 	{
    120 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    121 	  rtx r4 = gen_rtx_REG (SImode, 4);
    122 	  rtx r5 = gen_rtx_REG (SImode, 5);
    123 	  rtx r6 = gen_rtx_REG (SImode, 6);
    124 
    125 	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
    126 						    ? "__movmem_i4_odd"
    127 						    : "__movmem_i4_even",
    128 				     SFUNC_STATIC).lab;
    129 	  force_into (XEXP (operands[0], 0), r4);
    130 	  force_into (XEXP (operands[1], 0), r5);
    131 
    132 	  int dwords = bytes >> 3;
    133 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
    134 	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
    135 	  return true;
    136 	}
    137       else
    138 	return false;
    139     }
    140   if (bytes < 64)
    141     {
    142       char entry[30];
    143       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    144       rtx r4 = gen_rtx_REG (SImode, 4);
    145       rtx r5 = gen_rtx_REG (SImode, 5);
    146 
    147       sprintf (entry, "__movmemSI%d", bytes);
    148       rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
    149       force_into (XEXP (operands[0], 0), r4);
    150       force_into (XEXP (operands[1], 0), r5);
    151       emit_insn (gen_block_move_real (func_addr_rtx, lab));
    152       return true;
    153     }
    154 
    155   /* This is the same number of bytes as a memcpy call, but to a different
    156      less common function name, so this will occasionally use more space.  */
    157   if (! optimize_size)
    158     {
    159       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    160       int final_switch, while_loop;
    161       rtx r4 = gen_rtx_REG (SImode, 4);
    162       rtx r5 = gen_rtx_REG (SImode, 5);
    163       rtx r6 = gen_rtx_REG (SImode, 6);
    164 
    165       rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
    166       force_into (XEXP (operands[0], 0), r4);
    167       force_into (XEXP (operands[1], 0), r5);
    168 
    169       /* r6 controls the size of the move.  16 is decremented from it
    170 	 for each 64 bytes moved.  Then the negative bit left over is used
    171 	 as an index into a list of move instructions.  e.g., a 72 byte move
    172 	 would be set up with size(r6) = 14, for one iteration through the
    173 	 big while loop, and a switch of -2 for the last part.  */
    174 
    175       final_switch = 16 - ((bytes / 4) % 16);
    176       while_loop = ((bytes / 4) / 16 - 1) * 16;
    177       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
    178       emit_insn (gen_block_lump_real (func_addr_rtx, lab));
    179       return true;
    180     }
    181 
    182   return false;
    183 }
    184 
    185 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
    186 static const int prob_likely = REG_BR_PROB_BASE / 4;
    187 
    188 /* Emit code to perform a strcmp.
    189 
    190    OPERANDS[0] is the destination.
    191    OPERANDS[1] is the first string.
    192    OPERANDS[2] is the second string.
    193    OPERANDS[3] is the known alignment.  */
    194 bool
    195 sh_expand_cmpstr (rtx *operands)
    196 {
    197   rtx addr1 = operands[1];
    198   rtx addr2 = operands[2];
    199   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    200   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    201   rtx tmp0 = gen_reg_rtx (SImode);
    202   rtx tmp1 = gen_reg_rtx (SImode);
    203   rtx tmp2 = gen_reg_rtx (SImode);
    204   rtx tmp3 = gen_reg_rtx (SImode);
    205 
    206   rtx jump;
    207   rtx_code_label *L_return = gen_label_rtx ();
    208   rtx_code_label *L_loop_byte = gen_label_rtx ();
    209   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    210   rtx_code_label *L_loop_long = gen_label_rtx ();
    211   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    212 
    213   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    214   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    215 
    216   if (addr1_alignment < 4 && addr2_alignment < 4)
    217     {
    218       emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    219       emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    220       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    221       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    222     }
    223   else if (addr1_alignment < 4 && addr2_alignment >= 4)
    224     {
    225       emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    226       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    227       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    228     }
    229   else if (addr1_alignment >= 4 && addr2_alignment < 4)
    230     {
    231       emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    232       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    233       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    234     }
    235 
    236   addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    237   addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    238 
    239   /* tmp2 is aligned, OK to load.  */
    240   emit_move_insn (tmp3, addr2);
    241   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    242 
    243   /* start long loop.  */
    244   emit_label (L_loop_long);
    245 
    246   emit_move_insn (tmp2, tmp3);
    247 
    248   /* tmp1 is aligned, OK to load.  */
    249   emit_move_insn (tmp1, addr1);
    250   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
    251 
    252   /* Is there a 0 byte ?  */
    253   emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
    254 
    255   emit_insn (gen_cmpstr_t (tmp0, tmp3));
    256   jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    257   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    258 
    259   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    260 
    261   /* tmp2 is aligned, OK to load.  */
    262   emit_move_insn (tmp3, addr2);
    263   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    264 
    265   jump = emit_jump_insn (gen_branch_true (L_loop_long));
    266   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    267   /* end loop.  */
    268 
    269   /* Fallthu, substract words.  */
    270   if (TARGET_LITTLE_ENDIAN)
    271     {
    272       rtx low_1 = gen_lowpart (HImode, tmp1);
    273       rtx low_2 = gen_lowpart (HImode, tmp2);
    274 
    275       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    276       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    277       emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
    278       emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
    279       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    280       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    281     }
    282 
    283   jump = emit_jump_insn (gen_jump_compact (L_return));
    284   emit_barrier_after (jump);
    285 
    286   emit_label (L_end_loop_long);
    287 
    288   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
    289   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
    290 
    291   /* start byte loop.  */
    292   addr1 = adjust_address (addr1, QImode, 0);
    293   addr2 = adjust_address (addr2, QImode, 0);
    294 
    295   emit_label (L_loop_byte);
    296 
    297   emit_insn (gen_extendqisi2 (tmp2, addr2));
    298   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    299 
    300   emit_insn (gen_extendqisi2 (tmp1, addr1));
    301   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    302 
    303   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    304   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    305   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    306 
    307   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    308   if (flag_delayed_branch)
    309     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    310   jump = emit_jump_insn (gen_branch_true (L_loop_byte));
    311   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    312   /* end loop.  */
    313 
    314   emit_label (L_end_loop_byte);
    315 
    316   if (! flag_delayed_branch)
    317     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    318   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    319 
    320   emit_label (L_return);
    321 
    322   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    323 
    324   return true;
    325 }
    326 
    327 /* Emit code to perform a strncmp.
    328 
    329    OPERANDS[0] is the destination.
    330    OPERANDS[1] is the first string.
    331    OPERANDS[2] is the second string.
    332    OPERANDS[3] is the length.
    333    OPERANDS[4] is the known alignment.  */
    334 bool
    335 sh_expand_cmpnstr (rtx *operands)
    336 {
    337   rtx addr1 = operands[1];
    338   rtx addr2 = operands[2];
    339   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    340   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    341   rtx tmp1 = gen_reg_rtx (SImode);
    342   rtx tmp2 = gen_reg_rtx (SImode);
    343 
    344   rtx jump;
    345   rtx_code_label *L_return = gen_label_rtx ();
    346   rtx_code_label *L_loop_byte = gen_label_rtx ();
    347   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    348 
    349   rtx len = copy_to_mode_reg (SImode, operands[3]);
    350   int constp = CONST_INT_P (operands[3]);
    351 
    352   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    353   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    354 
    355   /* Loop on a register count.  */
    356   if (constp)
    357     {
    358       rtx tmp0 = gen_reg_rtx (SImode);
    359       rtx tmp3 = gen_reg_rtx (SImode);
    360       rtx lenw = gen_reg_rtx (SImode);
    361 
    362       rtx_code_label *L_loop_long = gen_label_rtx ();
    363       rtx_code_label *L_end_loop_long = gen_label_rtx ();
    364 
    365       int bytes = INTVAL (operands[3]);
    366       int witers = bytes / 4;
    367 
    368       if (witers > 1)
    369 	{
    370 	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    371 	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    372 
    373 	  emit_move_insn (tmp0, const0_rtx);
    374 
    375 	  if (addr1_alignment < 4 && addr2_alignment < 4)
    376 	    {
    377 	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    378 	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    379 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    380 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    381 	    }
    382 	  else if (addr1_alignment < 4 && addr2_alignment >= 4)
    383 	    {
    384 	      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    385 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    386 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    387 	    }
    388 	  else if (addr1_alignment >= 4 && addr2_alignment < 4)
    389 	    {
    390 	      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    391 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    392 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    393 	    }
    394 
    395 	  /* word count. Do we have iterations ?  */
    396 	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    397 
    398 	  /* start long loop.  */
    399 	  emit_label (L_loop_long);
    400 
    401 	  /* tmp2 is aligned, OK to load.  */
    402 	  emit_move_insn (tmp2, addr2);
    403 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    404 						  GET_MODE_SIZE (SImode)));
    405 
    406 	  /* tmp1 is aligned, OK to load.  */
    407 	  emit_move_insn (tmp1, addr1);
    408 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    409 						  GET_MODE_SIZE (SImode)));
    410 
    411 	  /* Is there a 0 byte ?  */
    412 	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
    413 
    414 	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
    415 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    416 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    417 
    418 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    419 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
    420 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    421 
    422 	  if (TARGET_SH2)
    423 	    emit_insn (gen_dect (lenw, lenw));
    424 	  else
    425 	    {
    426 	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    427 	      emit_insn (gen_tstsi_t (lenw, lenw));
    428 	    }
    429 
    430 	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
    431 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    432 
    433 	  int sbytes = bytes % 4;
    434 
    435 	  /* end loop.  Reached max iterations.  */
    436 	  if (sbytes == 0)
    437 	    {
    438 	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    439 	      jump = emit_jump_insn (gen_jump_compact (L_return));
    440 	      emit_barrier_after (jump);
    441 	    }
    442 	  else
    443 	    {
    444 	      /* Remaining bytes to check.  */
    445 
    446 	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    447 	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    448 
    449 	      while (sbytes--)
    450 		{
    451 		  emit_insn (gen_extendqisi2 (tmp1, addr1));
    452 		  emit_insn (gen_extendqisi2 (tmp2, addr2));
    453 
    454 		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    455 		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    456 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    457 
    458 		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    459 		  if (flag_delayed_branch)
    460 		    emit_insn (gen_zero_extendqisi2 (tmp2,
    461 						     gen_lowpart (QImode,
    462 								  tmp2)));
    463 		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    464 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    465 
    466 		  addr1 = adjust_address (addr1, QImode,
    467 					  GET_MODE_SIZE (QImode));
    468 		  addr2 = adjust_address (addr2, QImode,
    469 					  GET_MODE_SIZE (QImode));
    470 		}
    471 
    472 	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    473 	      emit_barrier_after (jump);
    474 	    }
    475 
    476 	  emit_label (L_end_loop_long);
    477 
    478 	  /* Found last word.  Restart it byte per byte.  */
    479 
    480 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    481 						  -GET_MODE_SIZE (SImode)));
    482 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    483 						  -GET_MODE_SIZE (SImode)));
    484 
    485 	  /* fall thru.  */
    486 	}
    487 
    488       addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    489       addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    490 
    491       while (bytes--)
    492 	{
    493 	  emit_insn (gen_extendqisi2 (tmp1, addr1));
    494 	  emit_insn (gen_extendqisi2 (tmp2, addr2));
    495 
    496 	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    497 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    498 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    499 
    500 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    501 	  if (flag_delayed_branch)
    502 	    emit_insn (gen_zero_extendqisi2 (tmp2,
    503 					     gen_lowpart (QImode, tmp2)));
    504 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    505 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    506 
    507 	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
    508 	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
    509 	}
    510 
    511       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    512       emit_barrier_after (jump);
    513     }
    514   else
    515     {
    516       emit_insn (gen_cmpeqsi_t (len, const0_rtx));
    517       emit_move_insn (operands[0], const0_rtx);
    518       jump = emit_jump_insn (gen_branch_true (L_return));
    519       add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    520     }
    521 
    522   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    523   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    524 
    525   emit_label (L_loop_byte);
    526 
    527   emit_insn (gen_extendqisi2 (tmp2, addr2));
    528   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    529 
    530   emit_insn (gen_extendqisi2 (tmp1, addr1));
    531   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    532 
    533   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    534   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    535   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    536 
    537   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    538   if (flag_delayed_branch)
    539     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    540   jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    541   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    542 
    543   if (TARGET_SH2)
    544     emit_insn (gen_dect (len, len));
    545   else
    546     {
    547       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    548       emit_insn (gen_tstsi_t (len, len));
    549     }
    550 
    551   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    552   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    553   /* end byte loop.  */
    554 
    555   emit_label (L_end_loop_byte);
    556 
    557   if (! flag_delayed_branch)
    558     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    559   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    560 
    561   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    562 
    563   emit_label (L_return);
    564 
    565   return true;
    566 }
    567 
    568 /* Emit code to perform a strlen.
    569 
    570    OPERANDS[0] is the destination.
    571    OPERANDS[1] is the string.
    572    OPERANDS[2] is the char to search.
    573    OPERANDS[3] is the alignment.  */
    574 bool
    575 sh_expand_strlen (rtx *operands)
    576 {
    577   rtx addr1 = operands[1];
    578   rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
    579   rtx start_addr = gen_reg_rtx (Pmode);
    580   rtx tmp0 = gen_reg_rtx (SImode);
    581   rtx tmp1 = gen_reg_rtx (SImode);
    582   rtx_code_label *L_return = gen_label_rtx ();
    583   rtx_code_label *L_loop_byte = gen_label_rtx ();
    584 
    585   rtx jump;
    586   rtx_code_label *L_loop_long = gen_label_rtx ();
    587   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    588 
    589   int align = INTVAL (operands[3]);
    590 
    591   emit_move_insn (operands[0], GEN_INT (-1));
    592 
    593   /* remember start of string.  */
    594   emit_move_insn (start_addr, current_addr);
    595 
    596   if (align < 4)
    597     {
    598       emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
    599       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    600       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    601     }
    602 
    603   emit_move_insn (tmp0, operands[2]);
    604 
    605   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
    606 
    607   /* start long loop.  */
    608   emit_label (L_loop_long);
    609 
    610   /* tmp1 is aligned, OK to load.  */
    611   emit_move_insn (tmp1, addr1);
    612   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
    613 
    614   /* Is there a 0 byte ?  */
    615   emit_insn (gen_cmpstr_t (tmp0, tmp1));
    616 
    617   jump = emit_jump_insn (gen_branch_false (L_loop_long));
    618   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    619   /* end loop.  */
    620 
    621   emit_label (L_end_loop_long);
    622 
    623   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
    624 
    625   addr1 = adjust_address (addr1, QImode, 0);
    626 
    627   /* unroll remaining bytes.  */
    628   for (int i = 0; i < 4; ++i)
    629     {
    630       emit_insn (gen_extendqisi2 (tmp1, addr1));
    631       emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    632       emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    633       jump = emit_jump_insn (gen_branch_true (L_return));
    634       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    635     }
    636 
    637   emit_barrier_after (jump);
    638 
    639   /* start byte loop.  */
    640   emit_label (L_loop_byte);
    641 
    642   emit_insn (gen_extendqisi2 (tmp1, addr1));
    643   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    644 
    645   emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    646   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    647   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    648 
    649   /* end loop.  */
    650 
    651   emit_label (L_return);
    652 
    653   emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
    654   emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
    655 
    656   return true;
    657 }
    658 
    659 /* Emit code to perform a memset.
    660 
    661    OPERANDS[0] is the destination.
    662    OPERANDS[1] is the size;
    663    OPERANDS[2] is the char to search.
    664    OPERANDS[3] is the alignment.  */
    665 void
    666 sh_expand_setmem (rtx *operands)
    667 {
    668   rtx_code_label *L_loop_byte = gen_label_rtx ();
    669   rtx_code_label *L_loop_word = gen_label_rtx ();
    670   rtx_code_label *L_return = gen_label_rtx ();
    671   rtx jump;
    672   rtx dest = copy_rtx (operands[0]);
    673   rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
    674   rtx val = copy_to_mode_reg (SImode, operands[2]);
    675   int align = INTVAL (operands[3]);
    676   rtx len = copy_to_mode_reg (SImode, operands[1]);
    677 
    678   if (! CONST_INT_P (operands[1]))
    679     return;
    680 
    681   int count = INTVAL (operands[1]);
    682 
    683   if (CONST_INT_P (operands[2])
    684       && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
    685     {
    686       rtx lenw = gen_reg_rtx (SImode);
    687 
    688       if (align < 4)
    689 	{
    690 	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
    691 	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    692 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    693 	}
    694 
    695       /* word count. Do we have iterations ?  */
    696       emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    697 
    698       dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
    699 
    700       /* start loop.  */
    701       emit_label (L_loop_word);
    702 
    703       if (TARGET_SH2)
    704         emit_insn (gen_dect (lenw, lenw));
    705       else
    706 	{
    707 	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    708 	  emit_insn (gen_tstsi_t (lenw, lenw));
    709 	}
    710 
    711       emit_move_insn (dest, val);
    712       emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    713 						GET_MODE_SIZE (SImode)));
    714 
    715 
    716       jump = emit_jump_insn (gen_branch_false (L_loop_word));
    717       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    718       count = count % 4;
    719 
    720       dest = adjust_address (dest, QImode, 0);
    721 
    722       val = gen_lowpart (QImode, val);
    723 
    724       while (count--)
    725 	{
    726 	  emit_move_insn (dest, val);
    727 	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    728 						    GET_MODE_SIZE (QImode)));
    729 	}
    730 
    731       jump = emit_jump_insn (gen_jump_compact (L_return));
    732       emit_barrier_after (jump);
    733     }
    734 
    735   dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
    736 
    737   /* start loop.  */
    738   emit_label (L_loop_byte);
    739 
    740   if (TARGET_SH2)
    741     emit_insn (gen_dect (len, len));
    742   else
    743     {
    744       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    745       emit_insn (gen_tstsi_t (len, len));
    746     }
    747 
    748   val = gen_lowpart (QImode, val);
    749   emit_move_insn (dest, val);
    750   emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    751                                             GET_MODE_SIZE (QImode)));
    752 
    753   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    754   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    755 
    756   emit_label (L_return);
    757 }
    758