1 1.1 mrg /* Helper routines for memory move and comparison insns. 2 1.7 mrg Copyright (C) 2013-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of GCC. 5 1.1 mrg 6 1.1 mrg GCC is free software; you can redistribute it and/or modify 7 1.1 mrg it under the terms of the GNU General Public License as published by 8 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 9 1.1 mrg any later version. 10 1.1 mrg 11 1.1 mrg GCC is distributed in the hope that it will be useful, 12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 1.1 mrg GNU General Public License for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with GCC; see the file COPYING3. If not see 18 1.1 mrg <http://www.gnu.org/licenses/>. */ 19 1.1 mrg 20 1.4 mrg #define IN_TARGET_CODE 1 21 1.4 mrg 22 1.1 mrg #include "config.h" 23 1.1 mrg #include "system.h" 24 1.1 mrg #include "coretypes.h" 25 1.1 mrg #include "tm.h" 26 1.3 mrg #include "function.h" 27 1.3 mrg #include "basic-block.h" 28 1.1 mrg #include "rtl.h" 29 1.1 mrg #include "tree.h" 30 1.3 mrg #include "memmodel.h" 31 1.3 mrg #include "tm_p.h" 32 1.3 mrg #include "emit-rtl.h" 33 1.1 mrg #include "explow.h" 34 1.1 mrg #include "expr.h" 35 1.1 mrg 36 1.1 mrg /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ 37 1.1 mrg static void 38 1.1 mrg force_into (rtx value, rtx target) 39 1.1 mrg { 40 1.1 mrg value = force_operand (value, target); 41 1.1 mrg if (! rtx_equal_p (value, target)) 42 1.1 mrg emit_insn (gen_move_insn (target, value)); 43 1.1 mrg } 44 1.1 mrg 45 1.1 mrg /* Emit code to perform a block move. Choose the best method. 46 1.1 mrg 47 1.1 mrg OPERANDS[0] is the destination. 48 1.1 mrg OPERANDS[1] is the source. 49 1.1 mrg OPERANDS[2] is the size. 50 1.1 mrg OPERANDS[3] is the alignment safe to use. */ 51 1.1 mrg bool 52 1.1 mrg expand_block_move (rtx *operands) 53 1.1 mrg { 54 1.1 mrg int align = INTVAL (operands[3]); 55 1.1 mrg int constp = (CONST_INT_P (operands[2])); 56 1.1 mrg int bytes = (constp ? INTVAL (operands[2]) : 0); 57 1.1 mrg 58 1.1 mrg if (! constp) 59 1.1 mrg return false; 60 1.1 mrg 61 1.1 mrg /* If we could use mov.l to move words and dest is word-aligned, we 62 1.1 mrg can use movua.l for loads and still generate a relatively short 63 1.1 mrg and efficient sequence. */ 64 1.1 mrg if (TARGET_SH4A && align < 4 65 1.1 mrg && MEM_ALIGN (operands[0]) >= 32 66 1.1 mrg && can_move_by_pieces (bytes, 32)) 67 1.1 mrg { 68 1.1 mrg rtx dest = copy_rtx (operands[0]); 69 1.1 mrg rtx src = copy_rtx (operands[1]); 70 1.1 mrg /* We could use different pseudos for each copied word, but 71 1.1 mrg since movua can only load into r0, it's kind of 72 1.1 mrg pointless. */ 73 1.1 mrg rtx temp = gen_reg_rtx (SImode); 74 1.1 mrg rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); 75 1.1 mrg int copied = 0; 76 1.1 mrg 77 1.1 mrg while (copied + 4 <= bytes) 78 1.1 mrg { 79 1.1 mrg rtx to = adjust_address (dest, SImode, copied); 80 1.1 mrg rtx from = adjust_automodify_address (src, BLKmode, 81 1.1 mrg src_addr, copied); 82 1.1 mrg 83 1.1 mrg set_mem_size (from, 4); 84 1.1 mrg emit_insn (gen_movua (temp, from)); 85 1.1 mrg emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4)); 86 1.1 mrg emit_move_insn (to, temp); 87 1.1 mrg copied += 4; 88 1.1 mrg } 89 1.1 mrg 90 1.1 mrg if (copied < bytes) 91 1.1 mrg move_by_pieces (adjust_address (dest, BLKmode, copied), 92 1.1 mrg adjust_automodify_address (src, BLKmode, 93 1.1 mrg src_addr, copied), 94 1.5 mrg bytes - copied, align, RETURN_BEGIN); 95 1.1 mrg 96 1.1 mrg return true; 97 1.1 mrg } 98 1.1 mrg 99 1.1 mrg /* If it isn't a constant number of bytes, or if it doesn't have 4 byte 100 1.1 mrg alignment, or if it isn't a multiple of 4 bytes, then fail. */ 101 1.1 mrg if (align < 4 || (bytes % 4 != 0)) 102 1.1 mrg return false; 103 1.1 mrg 104 1.1 mrg if (TARGET_HARD_SH4) 105 1.1 mrg { 106 1.1 mrg if (bytes < 12) 107 1.1 mrg return false; 108 1.1 mrg else if (bytes == 12) 109 1.1 mrg { 110 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode); 111 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4); 112 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5); 113 1.1 mrg 114 1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4", 115 1.3 mrg SFUNC_STATIC).lab; 116 1.1 mrg force_into (XEXP (operands[0], 0), r4); 117 1.1 mrg force_into (XEXP (operands[1], 0), r5); 118 1.3 mrg emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab)); 119 1.1 mrg return true; 120 1.1 mrg } 121 1.1 mrg else if (! optimize_size) 122 1.1 mrg { 123 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode); 124 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4); 125 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5); 126 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6); 127 1.1 mrg 128 1.3 mrg rtx lab = function_symbol (func_addr_rtx, bytes & 4 129 1.3 mrg ? "__movmem_i4_odd" 130 1.3 mrg : "__movmem_i4_even", 131 1.3 mrg SFUNC_STATIC).lab; 132 1.1 mrg force_into (XEXP (operands[0], 0), r4); 133 1.1 mrg force_into (XEXP (operands[1], 0), r5); 134 1.1 mrg 135 1.3 mrg int dwords = bytes >> 3; 136 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); 137 1.3 mrg emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab)); 138 1.1 mrg return true; 139 1.1 mrg } 140 1.1 mrg else 141 1.1 mrg return false; 142 1.1 mrg } 143 1.1 mrg if (bytes < 64) 144 1.1 mrg { 145 1.1 mrg char entry[30]; 146 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode); 147 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4); 148 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5); 149 1.1 mrg 150 1.1 mrg sprintf (entry, "__movmemSI%d", bytes); 151 1.3 mrg rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab; 152 1.1 mrg force_into (XEXP (operands[0], 0), r4); 153 1.1 mrg force_into (XEXP (operands[1], 0), r5); 154 1.3 mrg emit_insn (gen_block_move_real (func_addr_rtx, lab)); 155 1.1 mrg return true; 156 1.1 mrg } 157 1.1 mrg 158 1.1 mrg /* This is the same number of bytes as a memcpy call, but to a different 159 1.1 mrg less common function name, so this will occasionally use more space. */ 160 1.1 mrg if (! optimize_size) 161 1.1 mrg { 162 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode); 163 1.1 mrg int final_switch, while_loop; 164 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4); 165 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5); 166 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6); 167 1.1 mrg 168 1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab; 169 1.1 mrg force_into (XEXP (operands[0], 0), r4); 170 1.1 mrg force_into (XEXP (operands[1], 0), r5); 171 1.1 mrg 172 1.1 mrg /* r6 controls the size of the move. 16 is decremented from it 173 1.1 mrg for each 64 bytes moved. Then the negative bit left over is used 174 1.1 mrg as an index into a list of move instructions. e.g., a 72 byte move 175 1.1 mrg would be set up with size(r6) = 14, for one iteration through the 176 1.1 mrg big while loop, and a switch of -2 for the last part. */ 177 1.1 mrg 178 1.1 mrg final_switch = 16 - ((bytes / 4) % 16); 179 1.1 mrg while_loop = ((bytes / 4) / 16 - 1) * 16; 180 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); 181 1.3 mrg emit_insn (gen_block_lump_real (func_addr_rtx, lab)); 182 1.1 mrg return true; 183 1.1 mrg } 184 1.1 mrg 185 1.1 mrg return false; 186 1.1 mrg } 187 1.1 mrg 188 1.4 mrg static const int prob_unlikely 189 1.4 mrg = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10) 190 1.4 mrg .to_reg_br_prob_note (); 191 1.4 mrg static const int prob_likely 192 1.4 mrg = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4) 193 1.4 mrg .to_reg_br_prob_note (); 194 1.1 mrg 195 1.1 mrg /* Emit code to perform a strcmp. 196 1.1 mrg 197 1.1 mrg OPERANDS[0] is the destination. 198 1.1 mrg OPERANDS[1] is the first string. 199 1.1 mrg OPERANDS[2] is the second string. 200 1.1 mrg OPERANDS[3] is the known alignment. */ 201 1.1 mrg bool 202 1.1 mrg sh_expand_cmpstr (rtx *operands) 203 1.1 mrg { 204 1.1 mrg rtx addr1 = operands[1]; 205 1.1 mrg rtx addr2 = operands[2]; 206 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); 207 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); 208 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode); 209 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode); 210 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode); 211 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode); 212 1.1 mrg 213 1.3 mrg rtx_insn *jump; 214 1.1 mrg rtx_code_label *L_return = gen_label_rtx (); 215 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx (); 216 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx (); 217 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx (); 218 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx (); 219 1.1 mrg 220 1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; 221 1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; 222 1.1 mrg 223 1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4) 224 1.1 mrg { 225 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); 226 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); 227 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 228 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 229 1.1 mrg } 230 1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4) 231 1.3 mrg { 232 1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); 233 1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 234 1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 235 1.3 mrg } 236 1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4) 237 1.3 mrg { 238 1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); 239 1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 240 1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 241 1.3 mrg } 242 1.1 mrg 243 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); 244 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); 245 1.1 mrg 246 1.1 mrg /* tmp2 is aligned, OK to load. */ 247 1.1 mrg emit_move_insn (tmp3, addr2); 248 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); 249 1.1 mrg 250 1.1 mrg /* start long loop. */ 251 1.1 mrg emit_label (L_loop_long); 252 1.1 mrg 253 1.1 mrg emit_move_insn (tmp2, tmp3); 254 1.1 mrg 255 1.1 mrg /* tmp1 is aligned, OK to load. */ 256 1.1 mrg emit_move_insn (tmp1, addr1); 257 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); 258 1.1 mrg 259 1.1 mrg /* Is there a 0 byte ? */ 260 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp3, tmp1)); 261 1.1 mrg 262 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3)); 263 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); 264 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 265 1.1 mrg 266 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 267 1.1 mrg 268 1.1 mrg /* tmp2 is aligned, OK to load. */ 269 1.1 mrg emit_move_insn (tmp3, addr2); 270 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); 271 1.1 mrg 272 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_long)); 273 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 274 1.1 mrg /* end loop. */ 275 1.1 mrg 276 1.1 mrg /* Fallthu, substract words. */ 277 1.1 mrg if (TARGET_LITTLE_ENDIAN) 278 1.1 mrg { 279 1.1 mrg rtx low_1 = gen_lowpart (HImode, tmp1); 280 1.1 mrg rtx low_2 = gen_lowpart (HImode, tmp2); 281 1.1 mrg 282 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1)); 283 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2)); 284 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp1, tmp1)); 285 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp2, tmp2)); 286 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1)); 287 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2)); 288 1.1 mrg } 289 1.1 mrg 290 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return)); 291 1.1 mrg emit_barrier_after (jump); 292 1.1 mrg 293 1.1 mrg emit_label (L_end_loop_long); 294 1.1 mrg 295 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); 296 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); 297 1.1 mrg 298 1.1 mrg /* start byte loop. */ 299 1.1 mrg addr1 = adjust_address (addr1, QImode, 0); 300 1.1 mrg addr2 = adjust_address (addr2, QImode, 0); 301 1.1 mrg 302 1.1 mrg emit_label (L_loop_byte); 303 1.1 mrg 304 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2)); 305 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); 306 1.1 mrg 307 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 308 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); 309 1.1 mrg 310 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 311 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 312 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 313 1.1 mrg 314 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 315 1.1 mrg if (flag_delayed_branch) 316 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 317 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_byte)); 318 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 319 1.1 mrg /* end loop. */ 320 1.1 mrg 321 1.1 mrg emit_label (L_end_loop_byte); 322 1.1 mrg 323 1.1 mrg if (! flag_delayed_branch) 324 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 325 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); 326 1.1 mrg 327 1.1 mrg emit_label (L_return); 328 1.1 mrg 329 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 330 1.1 mrg 331 1.1 mrg return true; 332 1.1 mrg } 333 1.1 mrg 334 1.1 mrg /* Emit code to perform a strncmp. 335 1.1 mrg 336 1.1 mrg OPERANDS[0] is the destination. 337 1.1 mrg OPERANDS[1] is the first string. 338 1.1 mrg OPERANDS[2] is the second string. 339 1.1 mrg OPERANDS[3] is the length. 340 1.1 mrg OPERANDS[4] is the known alignment. */ 341 1.1 mrg bool 342 1.1 mrg sh_expand_cmpnstr (rtx *operands) 343 1.1 mrg { 344 1.1 mrg rtx addr1 = operands[1]; 345 1.1 mrg rtx addr2 = operands[2]; 346 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); 347 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); 348 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode); 349 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode); 350 1.1 mrg 351 1.3 mrg rtx_insn *jump; 352 1.1 mrg rtx_code_label *L_return = gen_label_rtx (); 353 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx (); 354 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx (); 355 1.1 mrg 356 1.3 mrg rtx len = copy_to_mode_reg (SImode, operands[3]); 357 1.1 mrg int constp = CONST_INT_P (operands[3]); 358 1.3 mrg HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0; 359 1.3 mrg 360 1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; 361 1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; 362 1.1 mrg 363 1.1 mrg /* Loop on a register count. */ 364 1.3 mrg if (constp && bytes >= 0 && bytes < 32) 365 1.1 mrg { 366 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode); 367 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode); 368 1.1 mrg rtx lenw = gen_reg_rtx (SImode); 369 1.1 mrg 370 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx (); 371 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx (); 372 1.1 mrg 373 1.1 mrg int witers = bytes / 4; 374 1.1 mrg 375 1.1 mrg if (witers > 1) 376 1.1 mrg { 377 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); 378 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); 379 1.1 mrg 380 1.1 mrg emit_move_insn (tmp0, const0_rtx); 381 1.1 mrg 382 1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4) 383 1.1 mrg { 384 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); 385 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); 386 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 387 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 388 1.1 mrg } 389 1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4) 390 1.3 mrg { 391 1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); 392 1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 393 1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 394 1.3 mrg } 395 1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4) 396 1.3 mrg { 397 1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); 398 1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 399 1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 400 1.3 mrg } 401 1.1 mrg 402 1.1 mrg /* word count. Do we have iterations ? */ 403 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); 404 1.1 mrg 405 1.1 mrg /* start long loop. */ 406 1.1 mrg emit_label (L_loop_long); 407 1.1 mrg 408 1.1 mrg /* tmp2 is aligned, OK to load. */ 409 1.1 mrg emit_move_insn (tmp2, addr2); 410 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 411 1.1 mrg GET_MODE_SIZE (SImode))); 412 1.1 mrg 413 1.1 mrg /* tmp1 is aligned, OK to load. */ 414 1.1 mrg emit_move_insn (tmp1, addr1); 415 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 416 1.1 mrg GET_MODE_SIZE (SImode))); 417 1.1 mrg 418 1.1 mrg /* Is there a 0 byte ? */ 419 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); 420 1.1 mrg 421 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3)); 422 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); 423 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 424 1.1 mrg 425 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 426 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); 427 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 428 1.1 mrg 429 1.1 mrg if (TARGET_SH2) 430 1.1 mrg emit_insn (gen_dect (lenw, lenw)); 431 1.1 mrg else 432 1.1 mrg { 433 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); 434 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw)); 435 1.1 mrg } 436 1.1 mrg 437 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long)); 438 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 439 1.1 mrg 440 1.1 mrg int sbytes = bytes % 4; 441 1.1 mrg 442 1.1 mrg /* end loop. Reached max iterations. */ 443 1.1 mrg if (sbytes == 0) 444 1.1 mrg { 445 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 446 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return)); 447 1.1 mrg emit_barrier_after (jump); 448 1.1 mrg } 449 1.1 mrg else 450 1.1 mrg { 451 1.1 mrg /* Remaining bytes to check. */ 452 1.1 mrg 453 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 454 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 455 1.1 mrg 456 1.1 mrg while (sbytes--) 457 1.1 mrg { 458 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 459 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2)); 460 1.1 mrg 461 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 462 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 463 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 464 1.1 mrg 465 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 466 1.1 mrg if (flag_delayed_branch) 467 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, 468 1.1 mrg gen_lowpart (QImode, 469 1.1 mrg tmp2))); 470 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 471 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 472 1.1 mrg 473 1.1 mrg addr1 = adjust_address (addr1, QImode, 474 1.1 mrg GET_MODE_SIZE (QImode)); 475 1.1 mrg addr2 = adjust_address (addr2, QImode, 476 1.1 mrg GET_MODE_SIZE (QImode)); 477 1.1 mrg } 478 1.1 mrg 479 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); 480 1.1 mrg emit_barrier_after (jump); 481 1.1 mrg } 482 1.1 mrg 483 1.1 mrg emit_label (L_end_loop_long); 484 1.1 mrg 485 1.1 mrg /* Found last word. Restart it byte per byte. */ 486 1.1 mrg 487 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 488 1.1 mrg -GET_MODE_SIZE (SImode))); 489 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 490 1.1 mrg -GET_MODE_SIZE (SImode))); 491 1.1 mrg 492 1.1 mrg /* fall thru. */ 493 1.1 mrg } 494 1.1 mrg 495 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 496 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 497 1.1 mrg 498 1.1 mrg while (bytes--) 499 1.1 mrg { 500 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 501 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2)); 502 1.1 mrg 503 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 504 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 505 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 506 1.1 mrg 507 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 508 1.1 mrg if (flag_delayed_branch) 509 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, 510 1.1 mrg gen_lowpart (QImode, tmp2))); 511 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 512 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 513 1.1 mrg 514 1.1 mrg addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); 515 1.1 mrg addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); 516 1.1 mrg } 517 1.1 mrg 518 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); 519 1.1 mrg emit_barrier_after (jump); 520 1.1 mrg } 521 1.1 mrg else 522 1.1 mrg { 523 1.1 mrg emit_insn (gen_cmpeqsi_t (len, const0_rtx)); 524 1.1 mrg emit_move_insn (operands[0], const0_rtx); 525 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return)); 526 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 527 1.1 mrg } 528 1.1 mrg 529 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 530 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 531 1.1 mrg 532 1.1 mrg emit_label (L_loop_byte); 533 1.1 mrg 534 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2)); 535 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); 536 1.1 mrg 537 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 538 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); 539 1.1 mrg 540 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 541 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 542 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 543 1.1 mrg 544 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 545 1.1 mrg if (flag_delayed_branch) 546 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 547 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 548 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 549 1.1 mrg 550 1.1 mrg if (TARGET_SH2) 551 1.1 mrg emit_insn (gen_dect (len, len)); 552 1.1 mrg else 553 1.1 mrg { 554 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); 555 1.1 mrg emit_insn (gen_tstsi_t (len, len)); 556 1.1 mrg } 557 1.1 mrg 558 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 559 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 560 1.1 mrg /* end byte loop. */ 561 1.1 mrg 562 1.1 mrg emit_label (L_end_loop_byte); 563 1.1 mrg 564 1.1 mrg if (! flag_delayed_branch) 565 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 566 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); 567 1.1 mrg 568 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 569 1.1 mrg 570 1.1 mrg emit_label (L_return); 571 1.1 mrg 572 1.1 mrg return true; 573 1.1 mrg } 574 1.1 mrg 575 1.1 mrg /* Emit code to perform a strlen. 576 1.1 mrg 577 1.1 mrg OPERANDS[0] is the destination. 578 1.1 mrg OPERANDS[1] is the string. 579 1.1 mrg OPERANDS[2] is the char to search. 580 1.1 mrg OPERANDS[3] is the alignment. */ 581 1.1 mrg bool 582 1.1 mrg sh_expand_strlen (rtx *operands) 583 1.1 mrg { 584 1.1 mrg rtx addr1 = operands[1]; 585 1.1 mrg rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); 586 1.1 mrg rtx start_addr = gen_reg_rtx (Pmode); 587 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode); 588 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode); 589 1.1 mrg rtx_code_label *L_return = gen_label_rtx (); 590 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx (); 591 1.1 mrg 592 1.3 mrg rtx_insn *jump; 593 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx (); 594 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx (); 595 1.1 mrg 596 1.1 mrg int align = INTVAL (operands[3]); 597 1.1 mrg 598 1.1 mrg emit_move_insn (operands[0], GEN_INT (-1)); 599 1.1 mrg 600 1.1 mrg /* remember start of string. */ 601 1.1 mrg emit_move_insn (start_addr, current_addr); 602 1.1 mrg 603 1.1 mrg if (align < 4) 604 1.1 mrg { 605 1.1 mrg emit_insn (gen_tstsi_t (current_addr, GEN_INT (3))); 606 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 607 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 608 1.1 mrg } 609 1.1 mrg 610 1.1 mrg emit_move_insn (tmp0, operands[2]); 611 1.1 mrg 612 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); 613 1.1 mrg 614 1.1 mrg /* start long loop. */ 615 1.1 mrg emit_label (L_loop_long); 616 1.1 mrg 617 1.1 mrg /* tmp1 is aligned, OK to load. */ 618 1.1 mrg emit_move_insn (tmp1, addr1); 619 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); 620 1.1 mrg 621 1.1 mrg /* Is there a 0 byte ? */ 622 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp1)); 623 1.1 mrg 624 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long)); 625 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 626 1.1 mrg /* end loop. */ 627 1.1 mrg 628 1.1 mrg emit_label (L_end_loop_long); 629 1.1 mrg 630 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); 631 1.1 mrg 632 1.1 mrg addr1 = adjust_address (addr1, QImode, 0); 633 1.1 mrg 634 1.1 mrg /* unroll remaining bytes. */ 635 1.1 mrg for (int i = 0; i < 4; ++i) 636 1.1 mrg { 637 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 638 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); 639 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); 640 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return)); 641 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 642 1.1 mrg } 643 1.1 mrg 644 1.1 mrg emit_barrier_after (jump); 645 1.1 mrg 646 1.1 mrg /* start byte loop. */ 647 1.1 mrg emit_label (L_loop_byte); 648 1.1 mrg 649 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1)); 650 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); 651 1.1 mrg 652 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); 653 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 654 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 655 1.1 mrg 656 1.1 mrg /* end loop. */ 657 1.1 mrg 658 1.1 mrg emit_label (L_return); 659 1.1 mrg 660 1.1 mrg emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); 661 1.1 mrg emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); 662 1.1 mrg 663 1.1 mrg return true; 664 1.1 mrg } 665 1.1 mrg 666 1.1 mrg /* Emit code to perform a memset. 667 1.1 mrg 668 1.1 mrg OPERANDS[0] is the destination. 669 1.1 mrg OPERANDS[1] is the size; 670 1.1 mrg OPERANDS[2] is the char to search. 671 1.1 mrg OPERANDS[3] is the alignment. */ 672 1.1 mrg void 673 1.1 mrg sh_expand_setmem (rtx *operands) 674 1.1 mrg { 675 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx (); 676 1.1 mrg rtx_code_label *L_loop_word = gen_label_rtx (); 677 1.1 mrg rtx_code_label *L_return = gen_label_rtx (); 678 1.3 mrg rtx_insn *jump; 679 1.1 mrg rtx dest = copy_rtx (operands[0]); 680 1.1 mrg rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0)); 681 1.3 mrg rtx val = copy_to_mode_reg (SImode, operands[2]); 682 1.1 mrg int align = INTVAL (operands[3]); 683 1.3 mrg rtx len = copy_to_mode_reg (SImode, operands[1]); 684 1.1 mrg 685 1.1 mrg if (! CONST_INT_P (operands[1])) 686 1.1 mrg return; 687 1.1 mrg 688 1.1 mrg int count = INTVAL (operands[1]); 689 1.1 mrg 690 1.1 mrg if (CONST_INT_P (operands[2]) 691 1.1 mrg && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8) 692 1.1 mrg { 693 1.1 mrg rtx lenw = gen_reg_rtx (SImode); 694 1.1 mrg 695 1.1 mrg if (align < 4) 696 1.1 mrg { 697 1.1 mrg emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3))); 698 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 699 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 700 1.1 mrg } 701 1.1 mrg 702 1.1 mrg /* word count. Do we have iterations ? */ 703 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); 704 1.1 mrg 705 1.1 mrg dest = adjust_automodify_address (dest, SImode, dest_addr, 0); 706 1.1 mrg 707 1.1 mrg /* start loop. */ 708 1.1 mrg emit_label (L_loop_word); 709 1.1 mrg 710 1.1 mrg if (TARGET_SH2) 711 1.1 mrg emit_insn (gen_dect (lenw, lenw)); 712 1.1 mrg else 713 1.1 mrg { 714 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); 715 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw)); 716 1.1 mrg } 717 1.1 mrg 718 1.1 mrg emit_move_insn (dest, val); 719 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 720 1.1 mrg GET_MODE_SIZE (SImode))); 721 1.1 mrg 722 1.1 mrg 723 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_word)); 724 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 725 1.1 mrg count = count % 4; 726 1.1 mrg 727 1.1 mrg dest = adjust_address (dest, QImode, 0); 728 1.1 mrg 729 1.1 mrg val = gen_lowpart (QImode, val); 730 1.1 mrg 731 1.1 mrg while (count--) 732 1.1 mrg { 733 1.1 mrg emit_move_insn (dest, val); 734 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 735 1.1 mrg GET_MODE_SIZE (QImode))); 736 1.1 mrg } 737 1.1 mrg 738 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return)); 739 1.1 mrg emit_barrier_after (jump); 740 1.1 mrg } 741 1.1 mrg 742 1.1 mrg dest = adjust_automodify_address (dest, QImode, dest_addr, 0); 743 1.1 mrg 744 1.1 mrg /* start loop. */ 745 1.1 mrg emit_label (L_loop_byte); 746 1.1 mrg 747 1.1 mrg if (TARGET_SH2) 748 1.1 mrg emit_insn (gen_dect (len, len)); 749 1.1 mrg else 750 1.1 mrg { 751 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); 752 1.1 mrg emit_insn (gen_tstsi_t (len, len)); 753 1.1 mrg } 754 1.1 mrg 755 1.1 mrg val = gen_lowpart (QImode, val); 756 1.1 mrg emit_move_insn (dest, val); 757 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 758 1.1 mrg GET_MODE_SIZE (QImode))); 759 1.1 mrg 760 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 761 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely); 762 1.1 mrg 763 1.1 mrg emit_label (L_return); 764 1.1 mrg } 765