Home | History | Annotate | Line # | Download | only in sh
sh-mem.cc revision 1.1.1.7
      1 /* Helper routines for memory move and comparison insns.
      2    Copyright (C) 2013-2020 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify
      7 it under the terms of the GNU General Public License as published by
      8 the Free Software Foundation; either version 3, or (at your option)
      9 any later version.
     10 
     11 GCC is distributed in the hope that it will be useful,
     12 but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 GNU General Public License for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #define IN_TARGET_CODE 1
     21 
     22 #include "config.h"
     23 #include "system.h"
     24 #include "coretypes.h"
     25 #include "tm.h"
     26 #include "function.h"
     27 #include "basic-block.h"
     28 #include "rtl.h"
     29 #include "tree.h"
     30 #include "memmodel.h"
     31 #include "tm_p.h"
     32 #include "emit-rtl.h"
     33 #include "explow.h"
     34 #include "expr.h"
     35 
     36 /* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
     37 static void
     38 force_into (rtx value, rtx target)
     39 {
     40   value = force_operand (value, target);
     41   if (! rtx_equal_p (value, target))
     42     emit_insn (gen_move_insn (target, value));
     43 }
     44 
     45 /* Emit code to perform a block move.  Choose the best method.
     46 
     47    OPERANDS[0] is the destination.
     48    OPERANDS[1] is the source.
     49    OPERANDS[2] is the size.
     50    OPERANDS[3] is the alignment safe to use.  */
     51 bool
     52 expand_block_move (rtx *operands)
     53 {
     54   int align = INTVAL (operands[3]);
     55   int constp = (CONST_INT_P (operands[2]));
     56   int bytes = (constp ? INTVAL (operands[2]) : 0);
     57 
     58   if (! constp)
     59     return false;
     60 
     61   /* If we could use mov.l to move words and dest is word-aligned, we
     62      can use movua.l for loads and still generate a relatively short
     63      and efficient sequence.  */
     64   if (TARGET_SH4A && align < 4
     65       && MEM_ALIGN (operands[0]) >= 32
     66       && can_move_by_pieces (bytes, 32))
     67     {
     68       rtx dest = copy_rtx (operands[0]);
     69       rtx src = copy_rtx (operands[1]);
     70       /* We could use different pseudos for each copied word, but
     71 	 since movua can only load into r0, it's kind of
     72 	 pointless.  */
     73       rtx temp = gen_reg_rtx (SImode);
     74       rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
     75       int copied = 0;
     76 
     77       while (copied + 4 <= bytes)
     78 	{
     79 	  rtx to = adjust_address (dest, SImode, copied);
     80 	  rtx from = adjust_automodify_address (src, BLKmode,
     81 						src_addr, copied);
     82 
     83 	  set_mem_size (from, 4);
     84 	  emit_insn (gen_movua (temp, from));
     85 	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
     86 	  emit_move_insn (to, temp);
     87 	  copied += 4;
     88 	}
     89 
     90       if (copied < bytes)
     91 	move_by_pieces (adjust_address (dest, BLKmode, copied),
     92 			adjust_automodify_address (src, BLKmode,
     93 						   src_addr, copied),
     94 			bytes - copied, align, RETURN_BEGIN);
     95 
     96       return true;
     97     }
     98 
     99   /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
    100      alignment, or if it isn't a multiple of 4 bytes, then fail.  */
    101   if (align < 4 || (bytes % 4 != 0))
    102     return false;
    103 
    104   if (TARGET_HARD_SH4)
    105     {
    106       if (bytes < 12)
    107 	return false;
    108       else if (bytes == 12)
    109 	{
    110 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    111 	  rtx r4 = gen_rtx_REG (SImode, 4);
    112 	  rtx r5 = gen_rtx_REG (SImode, 5);
    113 
    114 	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
    115 				     SFUNC_STATIC).lab;
    116 	  force_into (XEXP (operands[0], 0), r4);
    117 	  force_into (XEXP (operands[1], 0), r5);
    118 	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
    119 	  return true;
    120 	}
    121       else if (! optimize_size)
    122 	{
    123 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    124 	  rtx r4 = gen_rtx_REG (SImode, 4);
    125 	  rtx r5 = gen_rtx_REG (SImode, 5);
    126 	  rtx r6 = gen_rtx_REG (SImode, 6);
    127 
    128 	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
    129 						    ? "__movmem_i4_odd"
    130 						    : "__movmem_i4_even",
    131 				     SFUNC_STATIC).lab;
    132 	  force_into (XEXP (operands[0], 0), r4);
    133 	  force_into (XEXP (operands[1], 0), r5);
    134 
    135 	  int dwords = bytes >> 3;
    136 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
    137 	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
    138 	  return true;
    139 	}
    140       else
    141 	return false;
    142     }
    143   if (bytes < 64)
    144     {
    145       char entry[30];
    146       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    147       rtx r4 = gen_rtx_REG (SImode, 4);
    148       rtx r5 = gen_rtx_REG (SImode, 5);
    149 
    150       sprintf (entry, "__movmemSI%d", bytes);
    151       rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
    152       force_into (XEXP (operands[0], 0), r4);
    153       force_into (XEXP (operands[1], 0), r5);
    154       emit_insn (gen_block_move_real (func_addr_rtx, lab));
    155       return true;
    156     }
    157 
    158   /* This is the same number of bytes as a memcpy call, but to a different
    159      less common function name, so this will occasionally use more space.  */
    160   if (! optimize_size)
    161     {
    162       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    163       int final_switch, while_loop;
    164       rtx r4 = gen_rtx_REG (SImode, 4);
    165       rtx r5 = gen_rtx_REG (SImode, 5);
    166       rtx r6 = gen_rtx_REG (SImode, 6);
    167 
    168       rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
    169       force_into (XEXP (operands[0], 0), r4);
    170       force_into (XEXP (operands[1], 0), r5);
    171 
    172       /* r6 controls the size of the move.  16 is decremented from it
    173 	 for each 64 bytes moved.  Then the negative bit left over is used
    174 	 as an index into a list of move instructions.  e.g., a 72 byte move
    175 	 would be set up with size(r6) = 14, for one iteration through the
    176 	 big while loop, and a switch of -2 for the last part.  */
    177 
    178       final_switch = 16 - ((bytes / 4) % 16);
    179       while_loop = ((bytes / 4) / 16 - 1) * 16;
    180       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
    181       emit_insn (gen_block_lump_real (func_addr_rtx, lab));
    182       return true;
    183     }
    184 
    185   return false;
    186 }
    187 
    188 static const int prob_unlikely
    189   = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10)
    190     .to_reg_br_prob_note ();
    191 static const int prob_likely
    192   = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4)
    193     .to_reg_br_prob_note ();
    194 
    195 /* Emit code to perform a strcmp.
    196 
    197    OPERANDS[0] is the destination.
    198    OPERANDS[1] is the first string.
    199    OPERANDS[2] is the second string.
    200    OPERANDS[3] is the known alignment.  */
    201 bool
    202 sh_expand_cmpstr (rtx *operands)
    203 {
    204   rtx addr1 = operands[1];
    205   rtx addr2 = operands[2];
    206   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    207   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    208   rtx tmp0 = gen_reg_rtx (SImode);
    209   rtx tmp1 = gen_reg_rtx (SImode);
    210   rtx tmp2 = gen_reg_rtx (SImode);
    211   rtx tmp3 = gen_reg_rtx (SImode);
    212 
    213   rtx_insn *jump;
    214   rtx_code_label *L_return = gen_label_rtx ();
    215   rtx_code_label *L_loop_byte = gen_label_rtx ();
    216   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    217   rtx_code_label *L_loop_long = gen_label_rtx ();
    218   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    219 
    220   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    221   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    222 
    223   if (addr1_alignment < 4 && addr2_alignment < 4)
    224     {
    225       emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    226       emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    227       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    228       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    229     }
    230   else if (addr1_alignment < 4 && addr2_alignment >= 4)
    231     {
    232       emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    233       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    234       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    235     }
    236   else if (addr1_alignment >= 4 && addr2_alignment < 4)
    237     {
    238       emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    239       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    240       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    241     }
    242 
    243   addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    244   addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    245 
    246   /* tmp2 is aligned, OK to load.  */
    247   emit_move_insn (tmp3, addr2);
    248   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    249 
    250   /* start long loop.  */
    251   emit_label (L_loop_long);
    252 
    253   emit_move_insn (tmp2, tmp3);
    254 
    255   /* tmp1 is aligned, OK to load.  */
    256   emit_move_insn (tmp1, addr1);
    257   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
    258 
    259   /* Is there a 0 byte ?  */
    260   emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
    261 
    262   emit_insn (gen_cmpstr_t (tmp0, tmp3));
    263   jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    264   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    265 
    266   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    267 
    268   /* tmp2 is aligned, OK to load.  */
    269   emit_move_insn (tmp3, addr2);
    270   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    271 
    272   jump = emit_jump_insn (gen_branch_true (L_loop_long));
    273   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    274   /* end loop.  */
    275 
    276   /* Fallthu, substract words.  */
    277   if (TARGET_LITTLE_ENDIAN)
    278     {
    279       rtx low_1 = gen_lowpart (HImode, tmp1);
    280       rtx low_2 = gen_lowpart (HImode, tmp2);
    281 
    282       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    283       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    284       emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
    285       emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
    286       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    287       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    288     }
    289 
    290   jump = emit_jump_insn (gen_jump_compact (L_return));
    291   emit_barrier_after (jump);
    292 
    293   emit_label (L_end_loop_long);
    294 
    295   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
    296   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
    297 
    298   /* start byte loop.  */
    299   addr1 = adjust_address (addr1, QImode, 0);
    300   addr2 = adjust_address (addr2, QImode, 0);
    301 
    302   emit_label (L_loop_byte);
    303 
    304   emit_insn (gen_extendqisi2 (tmp2, addr2));
    305   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    306 
    307   emit_insn (gen_extendqisi2 (tmp1, addr1));
    308   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    309 
    310   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    311   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    312   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    313 
    314   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    315   if (flag_delayed_branch)
    316     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    317   jump = emit_jump_insn (gen_branch_true (L_loop_byte));
    318   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    319   /* end loop.  */
    320 
    321   emit_label (L_end_loop_byte);
    322 
    323   if (! flag_delayed_branch)
    324     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    325   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    326 
    327   emit_label (L_return);
    328 
    329   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    330 
    331   return true;
    332 }
    333 
    334 /* Emit code to perform a strncmp.
    335 
    336    OPERANDS[0] is the destination.
    337    OPERANDS[1] is the first string.
    338    OPERANDS[2] is the second string.
    339    OPERANDS[3] is the length.
    340    OPERANDS[4] is the known alignment.  */
    341 bool
    342 sh_expand_cmpnstr (rtx *operands)
    343 {
    344   rtx addr1 = operands[1];
    345   rtx addr2 = operands[2];
    346   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    347   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    348   rtx tmp1 = gen_reg_rtx (SImode);
    349   rtx tmp2 = gen_reg_rtx (SImode);
    350 
    351   rtx_insn *jump;
    352   rtx_code_label *L_return = gen_label_rtx ();
    353   rtx_code_label *L_loop_byte = gen_label_rtx ();
    354   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    355 
    356   rtx len = copy_to_mode_reg (SImode, operands[3]);
    357   int constp = CONST_INT_P (operands[3]);
    358   HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
    359 
    360   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    361   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    362 
    363   /* Loop on a register count.  */
    364   if (constp && bytes >= 0 && bytes < 32)
    365     {
    366       rtx tmp0 = gen_reg_rtx (SImode);
    367       rtx tmp3 = gen_reg_rtx (SImode);
    368       rtx lenw = gen_reg_rtx (SImode);
    369 
    370       rtx_code_label *L_loop_long = gen_label_rtx ();
    371       rtx_code_label *L_end_loop_long = gen_label_rtx ();
    372 
    373       int witers = bytes / 4;
    374 
    375       if (witers > 1)
    376 	{
    377 	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    378 	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    379 
    380 	  emit_move_insn (tmp0, const0_rtx);
    381 
    382 	  if (addr1_alignment < 4 && addr2_alignment < 4)
    383 	    {
    384 	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    385 	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    386 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    387 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    388 	    }
    389 	  else if (addr1_alignment < 4 && addr2_alignment >= 4)
    390 	    {
    391 	      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    392 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    393 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    394 	    }
    395 	  else if (addr1_alignment >= 4 && addr2_alignment < 4)
    396 	    {
    397 	      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    398 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    399 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    400 	    }
    401 
    402 	  /* word count. Do we have iterations ?  */
    403 	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    404 
    405 	  /* start long loop.  */
    406 	  emit_label (L_loop_long);
    407 
    408 	  /* tmp2 is aligned, OK to load.  */
    409 	  emit_move_insn (tmp2, addr2);
    410 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    411 						  GET_MODE_SIZE (SImode)));
    412 
    413 	  /* tmp1 is aligned, OK to load.  */
    414 	  emit_move_insn (tmp1, addr1);
    415 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    416 						  GET_MODE_SIZE (SImode)));
    417 
    418 	  /* Is there a 0 byte ?  */
    419 	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
    420 
    421 	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
    422 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    423 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    424 
    425 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    426 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
    427 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    428 
    429 	  if (TARGET_SH2)
    430 	    emit_insn (gen_dect (lenw, lenw));
    431 	  else
    432 	    {
    433 	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    434 	      emit_insn (gen_tstsi_t (lenw, lenw));
    435 	    }
    436 
    437 	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
    438 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    439 
    440 	  int sbytes = bytes % 4;
    441 
    442 	  /* end loop.  Reached max iterations.  */
    443 	  if (sbytes == 0)
    444 	    {
    445 	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    446 	      jump = emit_jump_insn (gen_jump_compact (L_return));
    447 	      emit_barrier_after (jump);
    448 	    }
    449 	  else
    450 	    {
    451 	      /* Remaining bytes to check.  */
    452 
    453 	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    454 	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    455 
    456 	      while (sbytes--)
    457 		{
    458 		  emit_insn (gen_extendqisi2 (tmp1, addr1));
    459 		  emit_insn (gen_extendqisi2 (tmp2, addr2));
    460 
    461 		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    462 		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    463 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    464 
    465 		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    466 		  if (flag_delayed_branch)
    467 		    emit_insn (gen_zero_extendqisi2 (tmp2,
    468 						     gen_lowpart (QImode,
    469 								  tmp2)));
    470 		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    471 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    472 
    473 		  addr1 = adjust_address (addr1, QImode,
    474 					  GET_MODE_SIZE (QImode));
    475 		  addr2 = adjust_address (addr2, QImode,
    476 					  GET_MODE_SIZE (QImode));
    477 		}
    478 
    479 	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    480 	      emit_barrier_after (jump);
    481 	    }
    482 
    483 	  emit_label (L_end_loop_long);
    484 
    485 	  /* Found last word.  Restart it byte per byte.  */
    486 
    487 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    488 						  -GET_MODE_SIZE (SImode)));
    489 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    490 						  -GET_MODE_SIZE (SImode)));
    491 
    492 	  /* fall thru.  */
    493 	}
    494 
    495       addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    496       addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    497 
    498       while (bytes--)
    499 	{
    500 	  emit_insn (gen_extendqisi2 (tmp1, addr1));
    501 	  emit_insn (gen_extendqisi2 (tmp2, addr2));
    502 
    503 	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    504 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    505 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    506 
    507 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    508 	  if (flag_delayed_branch)
    509 	    emit_insn (gen_zero_extendqisi2 (tmp2,
    510 					     gen_lowpart (QImode, tmp2)));
    511 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    512 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    513 
    514 	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
    515 	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
    516 	}
    517 
    518       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    519       emit_barrier_after (jump);
    520     }
    521   else
    522     {
    523       emit_insn (gen_cmpeqsi_t (len, const0_rtx));
    524       emit_move_insn (operands[0], const0_rtx);
    525       jump = emit_jump_insn (gen_branch_true (L_return));
    526       add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    527     }
    528 
    529   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    530   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    531 
    532   emit_label (L_loop_byte);
    533 
    534   emit_insn (gen_extendqisi2 (tmp2, addr2));
    535   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    536 
    537   emit_insn (gen_extendqisi2 (tmp1, addr1));
    538   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    539 
    540   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    541   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    542   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    543 
    544   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    545   if (flag_delayed_branch)
    546     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    547   jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    548   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    549 
    550   if (TARGET_SH2)
    551     emit_insn (gen_dect (len, len));
    552   else
    553     {
    554       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    555       emit_insn (gen_tstsi_t (len, len));
    556     }
    557 
    558   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    559   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    560   /* end byte loop.  */
    561 
    562   emit_label (L_end_loop_byte);
    563 
    564   if (! flag_delayed_branch)
    565     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    566   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    567 
    568   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    569 
    570   emit_label (L_return);
    571 
    572   return true;
    573 }
    574 
    575 /* Emit code to perform a strlen.
    576 
    577    OPERANDS[0] is the destination.
    578    OPERANDS[1] is the string.
    579    OPERANDS[2] is the char to search.
    580    OPERANDS[3] is the alignment.  */
    581 bool
    582 sh_expand_strlen (rtx *operands)
    583 {
    584   rtx addr1 = operands[1];
    585   rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
    586   rtx start_addr = gen_reg_rtx (Pmode);
    587   rtx tmp0 = gen_reg_rtx (SImode);
    588   rtx tmp1 = gen_reg_rtx (SImode);
    589   rtx_code_label *L_return = gen_label_rtx ();
    590   rtx_code_label *L_loop_byte = gen_label_rtx ();
    591 
    592   rtx_insn *jump;
    593   rtx_code_label *L_loop_long = gen_label_rtx ();
    594   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    595 
    596   int align = INTVAL (operands[3]);
    597 
    598   emit_move_insn (operands[0], GEN_INT (-1));
    599 
    600   /* remember start of string.  */
    601   emit_move_insn (start_addr, current_addr);
    602 
    603   if (align < 4)
    604     {
    605       emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
    606       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    607       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    608     }
    609 
    610   emit_move_insn (tmp0, operands[2]);
    611 
    612   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
    613 
    614   /* start long loop.  */
    615   emit_label (L_loop_long);
    616 
    617   /* tmp1 is aligned, OK to load.  */
    618   emit_move_insn (tmp1, addr1);
    619   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
    620 
    621   /* Is there a 0 byte ?  */
    622   emit_insn (gen_cmpstr_t (tmp0, tmp1));
    623 
    624   jump = emit_jump_insn (gen_branch_false (L_loop_long));
    625   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    626   /* end loop.  */
    627 
    628   emit_label (L_end_loop_long);
    629 
    630   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
    631 
    632   addr1 = adjust_address (addr1, QImode, 0);
    633 
    634   /* unroll remaining bytes.  */
    635   for (int i = 0; i < 4; ++i)
    636     {
    637       emit_insn (gen_extendqisi2 (tmp1, addr1));
    638       emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    639       emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    640       jump = emit_jump_insn (gen_branch_true (L_return));
    641       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    642     }
    643 
    644   emit_barrier_after (jump);
    645 
    646   /* start byte loop.  */
    647   emit_label (L_loop_byte);
    648 
    649   emit_insn (gen_extendqisi2 (tmp1, addr1));
    650   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    651 
    652   emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    653   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    654   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    655 
    656   /* end loop.  */
    657 
    658   emit_label (L_return);
    659 
    660   emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
    661   emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
    662 
    663   return true;
    664 }
    665 
    666 /* Emit code to perform a memset.
    667 
    668    OPERANDS[0] is the destination.
    669    OPERANDS[1] is the size;
    670    OPERANDS[2] is the char to search.
    671    OPERANDS[3] is the alignment.  */
    672 void
    673 sh_expand_setmem (rtx *operands)
    674 {
    675   rtx_code_label *L_loop_byte = gen_label_rtx ();
    676   rtx_code_label *L_loop_word = gen_label_rtx ();
    677   rtx_code_label *L_return = gen_label_rtx ();
    678   rtx_insn *jump;
    679   rtx dest = copy_rtx (operands[0]);
    680   rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
    681   rtx val = copy_to_mode_reg (SImode, operands[2]);
    682   int align = INTVAL (operands[3]);
    683   rtx len = copy_to_mode_reg (SImode, operands[1]);
    684 
    685   if (! CONST_INT_P (operands[1]))
    686     return;
    687 
    688   int count = INTVAL (operands[1]);
    689 
    690   if (CONST_INT_P (operands[2])
    691       && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
    692     {
    693       rtx lenw = gen_reg_rtx (SImode);
    694 
    695       if (align < 4)
    696 	{
    697 	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
    698 	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    699 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    700 	}
    701 
    702       /* word count. Do we have iterations ?  */
    703       emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    704 
    705       dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
    706 
    707       /* start loop.  */
    708       emit_label (L_loop_word);
    709 
    710       if (TARGET_SH2)
    711         emit_insn (gen_dect (lenw, lenw));
    712       else
    713 	{
    714 	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    715 	  emit_insn (gen_tstsi_t (lenw, lenw));
    716 	}
    717 
    718       emit_move_insn (dest, val);
    719       emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    720 						GET_MODE_SIZE (SImode)));
    721 
    722 
    723       jump = emit_jump_insn (gen_branch_false (L_loop_word));
    724       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    725       count = count % 4;
    726 
    727       dest = adjust_address (dest, QImode, 0);
    728 
    729       val = gen_lowpart (QImode, val);
    730 
    731       while (count--)
    732 	{
    733 	  emit_move_insn (dest, val);
    734 	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    735 						    GET_MODE_SIZE (QImode)));
    736 	}
    737 
    738       jump = emit_jump_insn (gen_jump_compact (L_return));
    739       emit_barrier_after (jump);
    740     }
    741 
    742   dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
    743 
    744   /* start loop.  */
    745   emit_label (L_loop_byte);
    746 
    747   if (TARGET_SH2)
    748     emit_insn (gen_dect (len, len));
    749   else
    750     {
    751       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    752       emit_insn (gen_tstsi_t (len, len));
    753     }
    754 
    755   val = gen_lowpart (QImode, val);
    756   emit_move_insn (dest, val);
    757   emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    758                                             GET_MODE_SIZE (QImode)));
    759 
    760   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    761   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    762 
    763   emit_label (L_return);
    764 }
    765