Home | History | Annotate | Line # | Download | only in sh
sh-mem.cc revision 1.1.1.2.4.2
      1 /* Helper routines for memory move and comparison insns.
      2    Copyright (C) 2013-2017 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify
      7 it under the terms of the GNU General Public License as published by
      8 the Free Software Foundation; either version 3, or (at your option)
      9 any later version.
     10 
     11 GCC is distributed in the hope that it will be useful,
     12 but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 GNU General Public License for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "config.h"
     21 #include "system.h"
     22 #include "coretypes.h"
     23 #include "tm.h"
     24 #include "function.h"
     25 #include "basic-block.h"
     26 #include "rtl.h"
     27 #include "tree.h"
     28 #include "memmodel.h"
     29 #include "tm_p.h"
     30 #include "emit-rtl.h"
     31 #include "explow.h"
     32 #include "expr.h"
     33 
     34 /* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
     35 static void
     36 force_into (rtx value, rtx target)
     37 {
     38   value = force_operand (value, target);
     39   if (! rtx_equal_p (value, target))
     40     emit_insn (gen_move_insn (target, value));
     41 }
     42 
     43 /* Emit code to perform a block move.  Choose the best method.
     44 
     45    OPERANDS[0] is the destination.
     46    OPERANDS[1] is the source.
     47    OPERANDS[2] is the size.
     48    OPERANDS[3] is the alignment safe to use.  */
     49 bool
     50 expand_block_move (rtx *operands)
     51 {
     52   int align = INTVAL (operands[3]);
     53   int constp = (CONST_INT_P (operands[2]));
     54   int bytes = (constp ? INTVAL (operands[2]) : 0);
     55 
     56   if (! constp)
     57     return false;
     58 
     59   /* If we could use mov.l to move words and dest is word-aligned, we
     60      can use movua.l for loads and still generate a relatively short
     61      and efficient sequence.  */
     62   if (TARGET_SH4A && align < 4
     63       && MEM_ALIGN (operands[0]) >= 32
     64       && can_move_by_pieces (bytes, 32))
     65     {
     66       rtx dest = copy_rtx (operands[0]);
     67       rtx src = copy_rtx (operands[1]);
     68       /* We could use different pseudos for each copied word, but
     69 	 since movua can only load into r0, it's kind of
     70 	 pointless.  */
     71       rtx temp = gen_reg_rtx (SImode);
     72       rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
     73       int copied = 0;
     74 
     75       while (copied + 4 <= bytes)
     76 	{
     77 	  rtx to = adjust_address (dest, SImode, copied);
     78 	  rtx from = adjust_automodify_address (src, BLKmode,
     79 						src_addr, copied);
     80 
     81 	  set_mem_size (from, 4);
     82 	  emit_insn (gen_movua (temp, from));
     83 	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
     84 	  emit_move_insn (to, temp);
     85 	  copied += 4;
     86 	}
     87 
     88       if (copied < bytes)
     89 	move_by_pieces (adjust_address (dest, BLKmode, copied),
     90 			adjust_automodify_address (src, BLKmode,
     91 						   src_addr, copied),
     92 			bytes - copied, align, 0);
     93 
     94       return true;
     95     }
     96 
     97   /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
     98      alignment, or if it isn't a multiple of 4 bytes, then fail.  */
     99   if (align < 4 || (bytes % 4 != 0))
    100     return false;
    101 
    102   if (TARGET_HARD_SH4)
    103     {
    104       if (bytes < 12)
    105 	return false;
    106       else if (bytes == 12)
    107 	{
    108 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    109 	  rtx r4 = gen_rtx_REG (SImode, 4);
    110 	  rtx r5 = gen_rtx_REG (SImode, 5);
    111 
    112 	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
    113 				     SFUNC_STATIC).lab;
    114 	  force_into (XEXP (operands[0], 0), r4);
    115 	  force_into (XEXP (operands[1], 0), r5);
    116 	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
    117 	  return true;
    118 	}
    119       else if (! optimize_size)
    120 	{
    121 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
    122 	  rtx r4 = gen_rtx_REG (SImode, 4);
    123 	  rtx r5 = gen_rtx_REG (SImode, 5);
    124 	  rtx r6 = gen_rtx_REG (SImode, 6);
    125 
    126 	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
    127 						    ? "__movmem_i4_odd"
    128 						    : "__movmem_i4_even",
    129 				     SFUNC_STATIC).lab;
    130 	  force_into (XEXP (operands[0], 0), r4);
    131 	  force_into (XEXP (operands[1], 0), r5);
    132 
    133 	  int dwords = bytes >> 3;
    134 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
    135 	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
    136 	  return true;
    137 	}
    138       else
    139 	return false;
    140     }
    141   if (bytes < 64)
    142     {
    143       char entry[30];
    144       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    145       rtx r4 = gen_rtx_REG (SImode, 4);
    146       rtx r5 = gen_rtx_REG (SImode, 5);
    147 
    148       sprintf (entry, "__movmemSI%d", bytes);
    149       rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
    150       force_into (XEXP (operands[0], 0), r4);
    151       force_into (XEXP (operands[1], 0), r5);
    152       emit_insn (gen_block_move_real (func_addr_rtx, lab));
    153       return true;
    154     }
    155 
    156   /* This is the same number of bytes as a memcpy call, but to a different
    157      less common function name, so this will occasionally use more space.  */
    158   if (! optimize_size)
    159     {
    160       rtx func_addr_rtx = gen_reg_rtx (Pmode);
    161       int final_switch, while_loop;
    162       rtx r4 = gen_rtx_REG (SImode, 4);
    163       rtx r5 = gen_rtx_REG (SImode, 5);
    164       rtx r6 = gen_rtx_REG (SImode, 6);
    165 
    166       rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
    167       force_into (XEXP (operands[0], 0), r4);
    168       force_into (XEXP (operands[1], 0), r5);
    169 
    170       /* r6 controls the size of the move.  16 is decremented from it
    171 	 for each 64 bytes moved.  Then the negative bit left over is used
    172 	 as an index into a list of move instructions.  e.g., a 72 byte move
    173 	 would be set up with size(r6) = 14, for one iteration through the
    174 	 big while loop, and a switch of -2 for the last part.  */
    175 
    176       final_switch = 16 - ((bytes / 4) % 16);
    177       while_loop = ((bytes / 4) / 16 - 1) * 16;
    178       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
    179       emit_insn (gen_block_lump_real (func_addr_rtx, lab));
    180       return true;
    181     }
    182 
    183   return false;
    184 }
    185 
    186 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
    187 static const int prob_likely = REG_BR_PROB_BASE / 4;
    188 
    189 /* Emit code to perform a strcmp.
    190 
    191    OPERANDS[0] is the destination.
    192    OPERANDS[1] is the first string.
    193    OPERANDS[2] is the second string.
    194    OPERANDS[3] is the known alignment.  */
    195 bool
    196 sh_expand_cmpstr (rtx *operands)
    197 {
    198   rtx addr1 = operands[1];
    199   rtx addr2 = operands[2];
    200   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    201   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    202   rtx tmp0 = gen_reg_rtx (SImode);
    203   rtx tmp1 = gen_reg_rtx (SImode);
    204   rtx tmp2 = gen_reg_rtx (SImode);
    205   rtx tmp3 = gen_reg_rtx (SImode);
    206 
    207   rtx_insn *jump;
    208   rtx_code_label *L_return = gen_label_rtx ();
    209   rtx_code_label *L_loop_byte = gen_label_rtx ();
    210   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    211   rtx_code_label *L_loop_long = gen_label_rtx ();
    212   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    213 
    214   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    215   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    216 
    217   if (addr1_alignment < 4 && addr2_alignment < 4)
    218     {
    219       emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    220       emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    221       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    222       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    223     }
    224   else if (addr1_alignment < 4 && addr2_alignment >= 4)
    225     {
    226       emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    227       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    228       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    229     }
    230   else if (addr1_alignment >= 4 && addr2_alignment < 4)
    231     {
    232       emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    233       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    234       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    235     }
    236 
    237   addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    238   addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    239 
    240   /* tmp2 is aligned, OK to load.  */
    241   emit_move_insn (tmp3, addr2);
    242   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    243 
    244   /* start long loop.  */
    245   emit_label (L_loop_long);
    246 
    247   emit_move_insn (tmp2, tmp3);
    248 
    249   /* tmp1 is aligned, OK to load.  */
    250   emit_move_insn (tmp1, addr1);
    251   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
    252 
    253   /* Is there a 0 byte ?  */
    254   emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
    255 
    256   emit_insn (gen_cmpstr_t (tmp0, tmp3));
    257   jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    258   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    259 
    260   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    261 
    262   /* tmp2 is aligned, OK to load.  */
    263   emit_move_insn (tmp3, addr2);
    264   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
    265 
    266   jump = emit_jump_insn (gen_branch_true (L_loop_long));
    267   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    268   /* end loop.  */
    269 
    270   /* Fallthu, substract words.  */
    271   if (TARGET_LITTLE_ENDIAN)
    272     {
    273       rtx low_1 = gen_lowpart (HImode, tmp1);
    274       rtx low_2 = gen_lowpart (HImode, tmp2);
    275 
    276       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    277       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    278       emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
    279       emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
    280       emit_insn (gen_rotlhi3_8 (low_1, low_1));
    281       emit_insn (gen_rotlhi3_8 (low_2, low_2));
    282     }
    283 
    284   jump = emit_jump_insn (gen_jump_compact (L_return));
    285   emit_barrier_after (jump);
    286 
    287   emit_label (L_end_loop_long);
    288 
    289   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
    290   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
    291 
    292   /* start byte loop.  */
    293   addr1 = adjust_address (addr1, QImode, 0);
    294   addr2 = adjust_address (addr2, QImode, 0);
    295 
    296   emit_label (L_loop_byte);
    297 
    298   emit_insn (gen_extendqisi2 (tmp2, addr2));
    299   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    300 
    301   emit_insn (gen_extendqisi2 (tmp1, addr1));
    302   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    303 
    304   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    305   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    306   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    307 
    308   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    309   if (flag_delayed_branch)
    310     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    311   jump = emit_jump_insn (gen_branch_true (L_loop_byte));
    312   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    313   /* end loop.  */
    314 
    315   emit_label (L_end_loop_byte);
    316 
    317   if (! flag_delayed_branch)
    318     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    319   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    320 
    321   emit_label (L_return);
    322 
    323   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    324 
    325   return true;
    326 }
    327 
    328 /* Emit code to perform a strncmp.
    329 
    330    OPERANDS[0] is the destination.
    331    OPERANDS[1] is the first string.
    332    OPERANDS[2] is the second string.
    333    OPERANDS[3] is the length.
    334    OPERANDS[4] is the known alignment.  */
    335 bool
    336 sh_expand_cmpnstr (rtx *operands)
    337 {
    338   rtx addr1 = operands[1];
    339   rtx addr2 = operands[2];
    340   rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
    341   rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
    342   rtx tmp1 = gen_reg_rtx (SImode);
    343   rtx tmp2 = gen_reg_rtx (SImode);
    344 
    345   rtx_insn *jump;
    346   rtx_code_label *L_return = gen_label_rtx ();
    347   rtx_code_label *L_loop_byte = gen_label_rtx ();
    348   rtx_code_label *L_end_loop_byte = gen_label_rtx ();
    349 
    350   rtx len = copy_to_mode_reg (SImode, operands[3]);
    351   int constp = CONST_INT_P (operands[3]);
    352   HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
    353 
    354   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
    355   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
    356 
    357   /* Loop on a register count.  */
    358   if (constp && bytes >= 0 && bytes < 32)
    359     {
    360       rtx tmp0 = gen_reg_rtx (SImode);
    361       rtx tmp3 = gen_reg_rtx (SImode);
    362       rtx lenw = gen_reg_rtx (SImode);
    363 
    364       rtx_code_label *L_loop_long = gen_label_rtx ();
    365       rtx_code_label *L_end_loop_long = gen_label_rtx ();
    366 
    367       int witers = bytes / 4;
    368 
    369       if (witers > 1)
    370 	{
    371 	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
    372 	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
    373 
    374 	  emit_move_insn (tmp0, const0_rtx);
    375 
    376 	  if (addr1_alignment < 4 && addr2_alignment < 4)
    377 	    {
    378 	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
    379 	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
    380 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    381 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    382 	    }
    383 	  else if (addr1_alignment < 4 && addr2_alignment >= 4)
    384 	    {
    385 	      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
    386 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    387 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    388 	    }
    389 	  else if (addr1_alignment >= 4 && addr2_alignment < 4)
    390 	    {
    391 	      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
    392 	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    393 	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    394 	    }
    395 
    396 	  /* word count. Do we have iterations ?  */
    397 	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    398 
    399 	  /* start long loop.  */
    400 	  emit_label (L_loop_long);
    401 
    402 	  /* tmp2 is aligned, OK to load.  */
    403 	  emit_move_insn (tmp2, addr2);
    404 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    405 						  GET_MODE_SIZE (SImode)));
    406 
    407 	  /* tmp1 is aligned, OK to load.  */
    408 	  emit_move_insn (tmp1, addr1);
    409 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    410 						  GET_MODE_SIZE (SImode)));
    411 
    412 	  /* Is there a 0 byte ?  */
    413 	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
    414 
    415 	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
    416 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
    417 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    418 
    419 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    420 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
    421 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    422 
    423 	  if (TARGET_SH2)
    424 	    emit_insn (gen_dect (lenw, lenw));
    425 	  else
    426 	    {
    427 	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    428 	      emit_insn (gen_tstsi_t (lenw, lenw));
    429 	    }
    430 
    431 	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
    432 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    433 
    434 	  int sbytes = bytes % 4;
    435 
    436 	  /* end loop.  Reached max iterations.  */
    437 	  if (sbytes == 0)
    438 	    {
    439 	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    440 	      jump = emit_jump_insn (gen_jump_compact (L_return));
    441 	      emit_barrier_after (jump);
    442 	    }
    443 	  else
    444 	    {
    445 	      /* Remaining bytes to check.  */
    446 
    447 	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    448 	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    449 
    450 	      while (sbytes--)
    451 		{
    452 		  emit_insn (gen_extendqisi2 (tmp1, addr1));
    453 		  emit_insn (gen_extendqisi2 (tmp2, addr2));
    454 
    455 		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    456 		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    457 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    458 
    459 		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    460 		  if (flag_delayed_branch)
    461 		    emit_insn (gen_zero_extendqisi2 (tmp2,
    462 						     gen_lowpart (QImode,
    463 								  tmp2)));
    464 		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    465 		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    466 
    467 		  addr1 = adjust_address (addr1, QImode,
    468 					  GET_MODE_SIZE (QImode));
    469 		  addr2 = adjust_address (addr2, QImode,
    470 					  GET_MODE_SIZE (QImode));
    471 		}
    472 
    473 	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    474 	      emit_barrier_after (jump);
    475 	    }
    476 
    477 	  emit_label (L_end_loop_long);
    478 
    479 	  /* Found last word.  Restart it byte per byte.  */
    480 
    481 	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
    482 						  -GET_MODE_SIZE (SImode)));
    483 	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
    484 						  -GET_MODE_SIZE (SImode)));
    485 
    486 	  /* fall thru.  */
    487 	}
    488 
    489       addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    490       addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    491 
    492       while (bytes--)
    493 	{
    494 	  emit_insn (gen_extendqisi2 (tmp1, addr1));
    495 	  emit_insn (gen_extendqisi2 (tmp2, addr2));
    496 
    497 	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    498 	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    499 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    500 
    501 	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    502 	  if (flag_delayed_branch)
    503 	    emit_insn (gen_zero_extendqisi2 (tmp2,
    504 					     gen_lowpart (QImode, tmp2)));
    505 	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    506 	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    507 
    508 	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
    509 	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
    510 	}
    511 
    512       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
    513       emit_barrier_after (jump);
    514     }
    515   else
    516     {
    517       emit_insn (gen_cmpeqsi_t (len, const0_rtx));
    518       emit_move_insn (operands[0], const0_rtx);
    519       jump = emit_jump_insn (gen_branch_true (L_return));
    520       add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    521     }
    522 
    523   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
    524   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
    525 
    526   emit_label (L_loop_byte);
    527 
    528   emit_insn (gen_extendqisi2 (tmp2, addr2));
    529   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
    530 
    531   emit_insn (gen_extendqisi2 (tmp1, addr1));
    532   emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
    533 
    534   emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
    535   jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
    536   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    537 
    538   emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
    539   if (flag_delayed_branch)
    540     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    541   jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
    542   add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
    543 
    544   if (TARGET_SH2)
    545     emit_insn (gen_dect (len, len));
    546   else
    547     {
    548       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    549       emit_insn (gen_tstsi_t (len, len));
    550     }
    551 
    552   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    553   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    554   /* end byte loop.  */
    555 
    556   emit_label (L_end_loop_byte);
    557 
    558   if (! flag_delayed_branch)
    559     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
    560   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
    561 
    562   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
    563 
    564   emit_label (L_return);
    565 
    566   return true;
    567 }
    568 
    569 /* Emit code to perform a strlen.
    570 
    571    OPERANDS[0] is the destination.
    572    OPERANDS[1] is the string.
    573    OPERANDS[2] is the char to search.
    574    OPERANDS[3] is the alignment.  */
    575 bool
    576 sh_expand_strlen (rtx *operands)
    577 {
    578   rtx addr1 = operands[1];
    579   rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
    580   rtx start_addr = gen_reg_rtx (Pmode);
    581   rtx tmp0 = gen_reg_rtx (SImode);
    582   rtx tmp1 = gen_reg_rtx (SImode);
    583   rtx_code_label *L_return = gen_label_rtx ();
    584   rtx_code_label *L_loop_byte = gen_label_rtx ();
    585 
    586   rtx_insn *jump;
    587   rtx_code_label *L_loop_long = gen_label_rtx ();
    588   rtx_code_label *L_end_loop_long = gen_label_rtx ();
    589 
    590   int align = INTVAL (operands[3]);
    591 
    592   emit_move_insn (operands[0], GEN_INT (-1));
    593 
    594   /* remember start of string.  */
    595   emit_move_insn (start_addr, current_addr);
    596 
    597   if (align < 4)
    598     {
    599       emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
    600       jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    601       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    602     }
    603 
    604   emit_move_insn (tmp0, operands[2]);
    605 
    606   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
    607 
    608   /* start long loop.  */
    609   emit_label (L_loop_long);
    610 
    611   /* tmp1 is aligned, OK to load.  */
    612   emit_move_insn (tmp1, addr1);
    613   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
    614 
    615   /* Is there a 0 byte ?  */
    616   emit_insn (gen_cmpstr_t (tmp0, tmp1));
    617 
    618   jump = emit_jump_insn (gen_branch_false (L_loop_long));
    619   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    620   /* end loop.  */
    621 
    622   emit_label (L_end_loop_long);
    623 
    624   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
    625 
    626   addr1 = adjust_address (addr1, QImode, 0);
    627 
    628   /* unroll remaining bytes.  */
    629   for (int i = 0; i < 4; ++i)
    630     {
    631       emit_insn (gen_extendqisi2 (tmp1, addr1));
    632       emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    633       emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    634       jump = emit_jump_insn (gen_branch_true (L_return));
    635       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    636     }
    637 
    638   emit_barrier_after (jump);
    639 
    640   /* start byte loop.  */
    641   emit_label (L_loop_byte);
    642 
    643   emit_insn (gen_extendqisi2 (tmp1, addr1));
    644   emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
    645 
    646   emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
    647   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    648   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    649 
    650   /* end loop.  */
    651 
    652   emit_label (L_return);
    653 
    654   emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
    655   emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
    656 
    657   return true;
    658 }
    659 
    660 /* Emit code to perform a memset.
    661 
    662    OPERANDS[0] is the destination.
    663    OPERANDS[1] is the size;
    664    OPERANDS[2] is the char to search.
    665    OPERANDS[3] is the alignment.  */
    666 void
    667 sh_expand_setmem (rtx *operands)
    668 {
    669   rtx_code_label *L_loop_byte = gen_label_rtx ();
    670   rtx_code_label *L_loop_word = gen_label_rtx ();
    671   rtx_code_label *L_return = gen_label_rtx ();
    672   rtx_insn *jump;
    673   rtx dest = copy_rtx (operands[0]);
    674   rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
    675   rtx val = copy_to_mode_reg (SImode, operands[2]);
    676   int align = INTVAL (operands[3]);
    677   rtx len = copy_to_mode_reg (SImode, operands[1]);
    678 
    679   if (! CONST_INT_P (operands[1]))
    680     return;
    681 
    682   int count = INTVAL (operands[1]);
    683 
    684   if (CONST_INT_P (operands[2])
    685       && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
    686     {
    687       rtx lenw = gen_reg_rtx (SImode);
    688 
    689       if (align < 4)
    690 	{
    691 	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
    692 	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    693 	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    694 	}
    695 
    696       /* word count. Do we have iterations ?  */
    697       emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
    698 
    699       dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
    700 
    701       /* start loop.  */
    702       emit_label (L_loop_word);
    703 
    704       if (TARGET_SH2)
    705         emit_insn (gen_dect (lenw, lenw));
    706       else
    707 	{
    708 	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
    709 	  emit_insn (gen_tstsi_t (lenw, lenw));
    710 	}
    711 
    712       emit_move_insn (dest, val);
    713       emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    714 						GET_MODE_SIZE (SImode)));
    715 
    716 
    717       jump = emit_jump_insn (gen_branch_false (L_loop_word));
    718       add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    719       count = count % 4;
    720 
    721       dest = adjust_address (dest, QImode, 0);
    722 
    723       val = gen_lowpart (QImode, val);
    724 
    725       while (count--)
    726 	{
    727 	  emit_move_insn (dest, val);
    728 	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    729 						    GET_MODE_SIZE (QImode)));
    730 	}
    731 
    732       jump = emit_jump_insn (gen_jump_compact (L_return));
    733       emit_barrier_after (jump);
    734     }
    735 
    736   dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
    737 
    738   /* start loop.  */
    739   emit_label (L_loop_byte);
    740 
    741   if (TARGET_SH2)
    742     emit_insn (gen_dect (len, len));
    743   else
    744     {
    745       emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
    746       emit_insn (gen_tstsi_t (len, len));
    747     }
    748 
    749   val = gen_lowpart (QImode, val);
    750   emit_move_insn (dest, val);
    751   emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
    752                                             GET_MODE_SIZE (QImode)));
    753 
    754   jump = emit_jump_insn (gen_branch_false (L_loop_byte));
    755   add_int_reg_note (jump, REG_BR_PROB, prob_likely);
    756 
    757   emit_label (L_return);
    758 }
    759