1 1.1 mrg /* Auxiliary functions for expand cpymem, setmem, cmpmem, load_multiple 2 1.1 mrg and store_multiple pattern of Andes NDS32 cpu for GNU compiler 3 1.1 mrg Copyright (C) 2012-2022 Free Software Foundation, Inc. 4 1.1 mrg Contributed by Andes Technology Corporation. 5 1.1 mrg 6 1.1 mrg This file is part of GCC. 7 1.1 mrg 8 1.1 mrg GCC is free software; you can redistribute it and/or modify it 9 1.1 mrg under the terms of the GNU General Public License as published 10 1.1 mrg by the Free Software Foundation; either version 3, or (at your 11 1.1 mrg option) any later version. 12 1.1 mrg 13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 1.1 mrg License for more details. 17 1.1 mrg 18 1.1 mrg You should have received a copy of the GNU General Public License 19 1.1 mrg along with GCC; see the file COPYING3. If not see 20 1.1 mrg <http://www.gnu.org/licenses/>. */ 21 1.1 mrg 22 1.1 mrg /* ------------------------------------------------------------------------ */ 23 1.1 mrg 24 1.1 mrg #define IN_TARGET_CODE 1 25 1.1 mrg 26 1.1 mrg #include "config.h" 27 1.1 mrg #include "system.h" 28 1.1 mrg #include "coretypes.h" 29 1.1 mrg #include "backend.h" 30 1.1 mrg #include "target.h" 31 1.1 mrg #include "rtl.h" 32 1.1 mrg #include "memmodel.h" 33 1.1 mrg #include "emit-rtl.h" 34 1.1 mrg #include "explow.h" 35 1.1 mrg #include "tree.h" 36 1.1 mrg #include "expr.h" 37 1.1 mrg #include "optabs.h" 38 1.1 mrg #include "nds32-protos.h" 39 1.1 mrg 40 1.1 mrg /* ------------------------------------------------------------------------ */ 41 1.1 mrg 42 1.1 mrg /* Auxiliary static function definitions. */ 43 1.1 mrg 44 1.1 mrg static void 45 1.1 mrg nds32_emit_load_store (rtx reg, rtx mem, 46 1.1 mrg enum machine_mode mode, 47 1.1 mrg int offset, bool load_p) 48 1.1 mrg { 49 1.1 mrg rtx new_mem; 50 1.1 mrg new_mem = adjust_address (mem, mode, offset); 51 1.1 mrg if (load_p) 52 1.1 mrg emit_move_insn (reg, new_mem); 53 1.1 mrg else 54 1.1 mrg emit_move_insn (new_mem, reg); 55 1.1 mrg } 56 1.1 mrg 57 1.1 mrg static void 58 1.1 mrg nds32_emit_post_inc_load_store (rtx reg, rtx base_reg, 59 1.1 mrg enum machine_mode mode, 60 1.1 mrg bool load_p) 61 1.1 mrg { 62 1.1 mrg gcc_assert (GET_MODE (reg) == mode); 63 1.1 mrg gcc_assert (GET_MODE (base_reg) == Pmode); 64 1.1 mrg 65 1.1 mrg /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may 66 1.1 mrg not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */ 67 1.1 mrg if (load_p) 68 1.1 mrg emit_move_insn (reg, 69 1.1 mrg gen_rtx_MEM (mode, 70 1.1 mrg base_reg)); 71 1.1 mrg else 72 1.1 mrg emit_move_insn (gen_rtx_MEM (mode, 73 1.1 mrg base_reg), 74 1.1 mrg reg); 75 1.1 mrg 76 1.1 mrg emit_move_insn (base_reg, 77 1.1 mrg plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode))); 78 1.1 mrg } 79 1.1 mrg 80 1.1 mrg static void 81 1.1 mrg nds32_emit_mem_move (rtx src, rtx dst, 82 1.1 mrg enum machine_mode mode, 83 1.1 mrg int addr_offset) 84 1.1 mrg { 85 1.1 mrg gcc_assert (MEM_P (src) && MEM_P (dst)); 86 1.1 mrg rtx tmp_reg = gen_reg_rtx (mode); 87 1.1 mrg nds32_emit_load_store (tmp_reg, src, mode, 88 1.1 mrg addr_offset, /* load_p */ true); 89 1.1 mrg nds32_emit_load_store (tmp_reg, dst, mode, 90 1.1 mrg addr_offset, /* load_p */ false); 91 1.1 mrg } 92 1.1 mrg 93 1.1 mrg static void 94 1.1 mrg nds32_emit_mem_move_block (int base_regno, int count, 95 1.1 mrg rtx *dst_base_reg, rtx *dst_mem, 96 1.1 mrg rtx *src_base_reg, rtx *src_mem, 97 1.1 mrg bool update_base_reg_p) 98 1.1 mrg { 99 1.1 mrg rtx new_base_reg; 100 1.1 mrg 101 1.1 mrg emit_insn (nds32_expand_load_multiple (base_regno, count, 102 1.1 mrg *src_base_reg, *src_mem, 103 1.1 mrg update_base_reg_p, &new_base_reg)); 104 1.1 mrg if (update_base_reg_p) 105 1.1 mrg { 106 1.1 mrg *src_base_reg = new_base_reg; 107 1.1 mrg *src_mem = gen_rtx_MEM (SImode, *src_base_reg); 108 1.1 mrg } 109 1.1 mrg 110 1.1 mrg emit_insn (nds32_expand_store_multiple (base_regno, count, 111 1.1 mrg *dst_base_reg, *dst_mem, 112 1.1 mrg update_base_reg_p, &new_base_reg)); 113 1.1 mrg 114 1.1 mrg if (update_base_reg_p) 115 1.1 mrg { 116 1.1 mrg *dst_base_reg = new_base_reg; 117 1.1 mrg *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg); 118 1.1 mrg } 119 1.1 mrg } 120 1.1 mrg 121 1.1 mrg /* ------------------------------------------------------------------------ */ 122 1.1 mrg 123 1.1 mrg /* Auxiliary function for expand cpymem pattern. */ 124 1.1 mrg 125 1.1 mrg static bool 126 1.1 mrg nds32_expand_cpymemsi_loop_unknown_size (rtx dstmem, rtx srcmem, 127 1.1 mrg rtx size, 128 1.1 mrg rtx alignment) 129 1.1 mrg { 130 1.1 mrg /* Emit loop version of cpymem. 131 1.1 mrg 132 1.1 mrg andi $size_least_3_bit, $size, #~7 133 1.1 mrg add $dst_end, $dst, $size 134 1.1 mrg move $dst_itr, $dst 135 1.1 mrg move $src_itr, $src 136 1.1 mrg beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. 137 1.1 mrg add $double_word_end, $dst, $size_least_3_bit 138 1.1 mrg 139 1.1 mrg .Ldouble_word_mode_loop: 140 1.1 mrg lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr 141 1.1 mrg smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr 142 1.1 mrg ! move will delete after register allocation 143 1.1 mrg move $src_itr, $src_itr' 144 1.1 mrg move $dst_itr, $dst_itr' 145 1.1 mrg ! Not readch upper bound. Loop. 146 1.1 mrg bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop 147 1.1 mrg 148 1.1 mrg .Lbyte_mode_entry: 149 1.1 mrg beq $dst_itr, $dst_end, .Lend_label 150 1.1 mrg .Lbyte_mode_loop: 151 1.1 mrg lbi.bi $tmp, [$src_itr], #1 152 1.1 mrg sbi.bi $tmp, [$dst_itr], #1 153 1.1 mrg ! Not readch upper bound. Loop. 154 1.1 mrg bne $dst_itr, $dst_end, .Lbyte_mode_loop 155 1.1 mrg .Lend_label: 156 1.1 mrg */ 157 1.1 mrg rtx dst_base_reg, src_base_reg; 158 1.1 mrg rtx dst_itr, src_itr; 159 1.1 mrg rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; 160 1.1 mrg rtx dst_end; 161 1.1 mrg rtx size_least_3_bit; 162 1.1 mrg rtx double_word_end; 163 1.1 mrg rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label; 164 1.1 mrg rtx tmp; 165 1.1 mrg rtx mask_least_3_bit; 166 1.1 mrg int start_regno; 167 1.1 mrg bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; 168 1.1 mrg 169 1.1 mrg if (TARGET_ISA_V3M && !align_to_4_bytes) 170 1.1 mrg return 0; 171 1.1 mrg 172 1.1 mrg if (TARGET_REDUCED_REGS) 173 1.1 mrg start_regno = 2; 174 1.1 mrg else 175 1.1 mrg start_regno = 16; 176 1.1 mrg 177 1.1 mrg dst_itr = gen_reg_rtx (Pmode); 178 1.1 mrg src_itr = gen_reg_rtx (Pmode); 179 1.1 mrg dst_end = gen_reg_rtx (Pmode); 180 1.1 mrg tmp = gen_reg_rtx (QImode); 181 1.1 mrg mask_least_3_bit = GEN_INT (~7); 182 1.1 mrg 183 1.1 mrg double_word_mode_loop = gen_label_rtx (); 184 1.1 mrg byte_mode_entry = gen_label_rtx (); 185 1.1 mrg byte_mode_loop = gen_label_rtx (); 186 1.1 mrg end_label = gen_label_rtx (); 187 1.1 mrg 188 1.1 mrg dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); 189 1.1 mrg src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); 190 1.1 mrg /* andi $size_least_3_bit, $size, #~7 */ 191 1.1 mrg size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit, 192 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 193 1.1 mrg /* add $dst_end, $dst, $size */ 194 1.1 mrg dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, 195 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 196 1.1 mrg 197 1.1 mrg /* move $dst_itr, $dst 198 1.1 mrg move $src_itr, $src */ 199 1.1 mrg emit_move_insn (dst_itr, dst_base_reg); 200 1.1 mrg emit_move_insn (src_itr, src_base_reg); 201 1.1 mrg 202 1.1 mrg /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */ 203 1.1 mrg emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL, 204 1.1 mrg SImode, 1, byte_mode_entry); 205 1.1 mrg /* add $double_word_end, $dst, $size_least_3_bit */ 206 1.1 mrg double_word_end = expand_binop (Pmode, add_optab, 207 1.1 mrg dst_base_reg, size_least_3_bit, 208 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 209 1.1 mrg 210 1.1 mrg /* .Ldouble_word_mode_loop: */ 211 1.1 mrg emit_label (double_word_mode_loop); 212 1.1 mrg /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr 213 1.1 mrg smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ 214 1.1 mrg src_itr_m = src_itr; 215 1.1 mrg dst_itr_m = dst_itr; 216 1.1 mrg srcmem_m = srcmem; 217 1.1 mrg dstmem_m = dstmem; 218 1.1 mrg nds32_emit_mem_move_block (start_regno, 2, 219 1.1 mrg &dst_itr_m, &dstmem_m, 220 1.1 mrg &src_itr_m, &srcmem_m, 221 1.1 mrg true); 222 1.1 mrg /* move $src_itr, $src_itr' 223 1.1 mrg move $dst_itr, $dst_itr' */ 224 1.1 mrg emit_move_insn (dst_itr, dst_itr_m); 225 1.1 mrg emit_move_insn (src_itr, src_itr_m); 226 1.1 mrg 227 1.1 mrg /* ! Not readch upper bound. Loop. 228 1.1 mrg bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ 229 1.1 mrg emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL, 230 1.1 mrg Pmode, 1, double_word_mode_loop); 231 1.1 mrg /* .Lbyte_mode_entry: */ 232 1.1 mrg emit_label (byte_mode_entry); 233 1.1 mrg 234 1.1 mrg /* beq $dst_itr, $dst_end, .Lend_label */ 235 1.1 mrg emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL, 236 1.1 mrg Pmode, 1, end_label); 237 1.1 mrg /* .Lbyte_mode_loop: */ 238 1.1 mrg emit_label (byte_mode_loop); 239 1.1 mrg 240 1.1 mrg /* lbi.bi $tmp, [$src_itr], #1 */ 241 1.1 mrg nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); 242 1.1 mrg 243 1.1 mrg /* sbi.bi $tmp, [$dst_itr], #1 */ 244 1.1 mrg nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); 245 1.1 mrg /* ! Not readch upper bound. Loop. 246 1.1 mrg bne $dst_itr, $dst_end, .Lbyte_mode_loop */ 247 1.1 mrg emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, 248 1.1 mrg SImode, 1, byte_mode_loop); 249 1.1 mrg 250 1.1 mrg /* .Lend_label: */ 251 1.1 mrg emit_label (end_label); 252 1.1 mrg 253 1.1 mrg return true; 254 1.1 mrg } 255 1.1 mrg 256 1.1 mrg static bool 257 1.1 mrg nds32_expand_cpymemsi_loop_known_size (rtx dstmem, rtx srcmem, 258 1.1 mrg rtx size, rtx alignment) 259 1.1 mrg { 260 1.1 mrg rtx dst_base_reg, src_base_reg; 261 1.1 mrg rtx dst_itr, src_itr; 262 1.1 mrg rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; 263 1.1 mrg rtx dst_end; 264 1.1 mrg rtx double_word_mode_loop, byte_mode_loop; 265 1.1 mrg rtx tmp; 266 1.1 mrg int start_regno; 267 1.1 mrg bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; 268 1.1 mrg unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); 269 1.1 mrg 270 1.1 mrg if (TARGET_ISA_V3M && !align_to_4_bytes) 271 1.1 mrg return 0; 272 1.1 mrg 273 1.1 mrg if (TARGET_REDUCED_REGS) 274 1.1 mrg start_regno = 2; 275 1.1 mrg else 276 1.1 mrg start_regno = 16; 277 1.1 mrg 278 1.1 mrg dst_itr = gen_reg_rtx (Pmode); 279 1.1 mrg src_itr = gen_reg_rtx (Pmode); 280 1.1 mrg dst_end = gen_reg_rtx (Pmode); 281 1.1 mrg tmp = gen_reg_rtx (QImode); 282 1.1 mrg 283 1.1 mrg double_word_mode_loop = gen_label_rtx (); 284 1.1 mrg byte_mode_loop = gen_label_rtx (); 285 1.1 mrg 286 1.1 mrg dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); 287 1.1 mrg src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); 288 1.1 mrg 289 1.1 mrg if (total_bytes < 8) 290 1.1 mrg { 291 1.1 mrg /* Emit total_bytes less than 8 loop version of cpymem. 292 1.1 mrg add $dst_end, $dst, $size 293 1.1 mrg move $dst_itr, $dst 294 1.1 mrg .Lbyte_mode_loop: 295 1.1 mrg lbi.bi $tmp, [$src_itr], #1 296 1.1 mrg sbi.bi $tmp, [$dst_itr], #1 297 1.1 mrg ! Not readch upper bound. Loop. 298 1.1 mrg bne $dst_itr, $dst_end, .Lbyte_mode_loop */ 299 1.1 mrg 300 1.1 mrg /* add $dst_end, $dst, $size */ 301 1.1 mrg dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, 302 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 303 1.1 mrg /* move $dst_itr, $dst 304 1.1 mrg move $src_itr, $src */ 305 1.1 mrg emit_move_insn (dst_itr, dst_base_reg); 306 1.1 mrg emit_move_insn (src_itr, src_base_reg); 307 1.1 mrg 308 1.1 mrg /* .Lbyte_mode_loop: */ 309 1.1 mrg emit_label (byte_mode_loop); 310 1.1 mrg 311 1.1 mrg /* lbi.bi $tmp, [$src_itr], #1 */ 312 1.1 mrg nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); 313 1.1 mrg 314 1.1 mrg /* sbi.bi $tmp, [$dst_itr], #1 */ 315 1.1 mrg nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); 316 1.1 mrg /* ! Not readch upper bound. Loop. 317 1.1 mrg bne $dst_itr, $dst_end, .Lbyte_mode_loop */ 318 1.1 mrg emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, 319 1.1 mrg SImode, 1, byte_mode_loop); 320 1.1 mrg return true; 321 1.1 mrg } 322 1.1 mrg else if (total_bytes % 8 == 0) 323 1.1 mrg { 324 1.1 mrg /* Emit multiple of 8 loop version of cpymem. 325 1.1 mrg 326 1.1 mrg add $dst_end, $dst, $size 327 1.1 mrg move $dst_itr, $dst 328 1.1 mrg move $src_itr, $src 329 1.1 mrg 330 1.1 mrg .Ldouble_word_mode_loop: 331 1.1 mrg lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr 332 1.1 mrg smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr 333 1.1 mrg ! move will delete after register allocation 334 1.1 mrg move $src_itr, $src_itr' 335 1.1 mrg move $dst_itr, $dst_itr' 336 1.1 mrg ! Not readch upper bound. Loop. 337 1.1 mrg bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ 338 1.1 mrg 339 1.1 mrg /* add $dst_end, $dst, $size */ 340 1.1 mrg dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, 341 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 342 1.1 mrg 343 1.1 mrg /* move $dst_itr, $dst 344 1.1 mrg move $src_itr, $src */ 345 1.1 mrg emit_move_insn (dst_itr, dst_base_reg); 346 1.1 mrg emit_move_insn (src_itr, src_base_reg); 347 1.1 mrg 348 1.1 mrg /* .Ldouble_word_mode_loop: */ 349 1.1 mrg emit_label (double_word_mode_loop); 350 1.1 mrg /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr 351 1.1 mrg smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ 352 1.1 mrg src_itr_m = src_itr; 353 1.1 mrg dst_itr_m = dst_itr; 354 1.1 mrg srcmem_m = srcmem; 355 1.1 mrg dstmem_m = dstmem; 356 1.1 mrg nds32_emit_mem_move_block (start_regno, 2, 357 1.1 mrg &dst_itr_m, &dstmem_m, 358 1.1 mrg &src_itr_m, &srcmem_m, 359 1.1 mrg true); 360 1.1 mrg /* move $src_itr, $src_itr' 361 1.1 mrg move $dst_itr, $dst_itr' */ 362 1.1 mrg emit_move_insn (dst_itr, dst_itr_m); 363 1.1 mrg emit_move_insn (src_itr, src_itr_m); 364 1.1 mrg 365 1.1 mrg /* ! Not readch upper bound. Loop. 366 1.1 mrg bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ 367 1.1 mrg emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, 368 1.1 mrg Pmode, 1, double_word_mode_loop); 369 1.1 mrg } 370 1.1 mrg else 371 1.1 mrg { 372 1.1 mrg /* Handle size greater than 8, and not a multiple of 8. */ 373 1.1 mrg return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, 374 1.1 mrg size, alignment); 375 1.1 mrg } 376 1.1 mrg 377 1.1 mrg return true; 378 1.1 mrg } 379 1.1 mrg 380 1.1 mrg static bool 381 1.1 mrg nds32_expand_cpymemsi_loop (rtx dstmem, rtx srcmem, 382 1.1 mrg rtx size, rtx alignment) 383 1.1 mrg { 384 1.1 mrg if (CONST_INT_P (size)) 385 1.1 mrg return nds32_expand_cpymemsi_loop_known_size (dstmem, srcmem, 386 1.1 mrg size, alignment); 387 1.1 mrg else 388 1.1 mrg return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, 389 1.1 mrg size, alignment); 390 1.1 mrg } 391 1.1 mrg 392 1.1 mrg static bool 393 1.1 mrg nds32_expand_cpymemsi_unroll (rtx dstmem, rtx srcmem, 394 1.1 mrg rtx total_bytes, rtx alignment) 395 1.1 mrg { 396 1.1 mrg rtx dst_base_reg, src_base_reg; 397 1.1 mrg rtx tmp_reg; 398 1.1 mrg int maximum_bytes; 399 1.1 mrg int maximum_bytes_per_inst; 400 1.1 mrg int maximum_regs; 401 1.1 mrg int start_regno; 402 1.1 mrg int i, inst_num; 403 1.1 mrg HOST_WIDE_INT remain_bytes, remain_words; 404 1.1 mrg bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; 405 1.1 mrg bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0; 406 1.1 mrg 407 1.1 mrg /* Because reduced-set regsiters has few registers 408 1.1 mrg (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' 409 1.1 mrg cannot be used for register allocation), 410 1.1 mrg using 8 registers (32 bytes) for moving memory block 411 1.1 mrg may easily consume all of them. 412 1.1 mrg It makes register allocation/spilling hard to work. 413 1.1 mrg So we only allow maximum=4 registers (16 bytes) for 414 1.1 mrg moving memory block under reduced-set registers. */ 415 1.1 mrg if (TARGET_REDUCED_REGS) 416 1.1 mrg { 417 1.1 mrg maximum_regs = 4; 418 1.1 mrg maximum_bytes = 64; 419 1.1 mrg start_regno = 2; 420 1.1 mrg } 421 1.1 mrg else 422 1.1 mrg { 423 1.1 mrg /* $r25 is $tp so we use up to 8 registers. */ 424 1.1 mrg maximum_regs = 8; 425 1.1 mrg maximum_bytes = 160; 426 1.1 mrg start_regno = 16; 427 1.1 mrg } 428 1.1 mrg maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD; 429 1.1 mrg 430 1.1 mrg /* 1. Total_bytes is integer for sure. 431 1.1 mrg 2. Alignment is integer for sure. 432 1.1 mrg 3. Maximum 4 or 10 registers and up to 4 instructions, 433 1.1 mrg 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes. 434 1.1 mrg 4. The dstmem cannot be volatile memory access. 435 1.1 mrg 5. The srcmem cannot be volatile memory access. 436 1.1 mrg 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT* 437 1.1 mrg support unalign access with v3m configure. */ 438 1.1 mrg if (GET_CODE (total_bytes) != CONST_INT 439 1.1 mrg || GET_CODE (alignment) != CONST_INT 440 1.1 mrg || INTVAL (total_bytes) > maximum_bytes 441 1.1 mrg || MEM_VOLATILE_P (dstmem) 442 1.1 mrg || MEM_VOLATILE_P (srcmem) 443 1.1 mrg || (TARGET_ISA_V3M && !align_to_4_bytes)) 444 1.1 mrg return false; 445 1.1 mrg 446 1.1 mrg dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); 447 1.1 mrg src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); 448 1.1 mrg remain_bytes = INTVAL (total_bytes); 449 1.1 mrg 450 1.1 mrg /* Do not update base address for last lmw/smw pair. */ 451 1.1 mrg inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1)) 452 1.1 mrg / maximum_bytes_per_inst) - 1; 453 1.1 mrg 454 1.1 mrg for (i = 0; i < inst_num; i++) 455 1.1 mrg { 456 1.1 mrg nds32_emit_mem_move_block (start_regno, maximum_regs, 457 1.1 mrg &dst_base_reg, &dstmem, 458 1.1 mrg &src_base_reg, &srcmem, 459 1.1 mrg true); 460 1.1 mrg } 461 1.1 mrg remain_bytes -= maximum_bytes_per_inst * inst_num; 462 1.1 mrg 463 1.1 mrg remain_words = remain_bytes / UNITS_PER_WORD; 464 1.1 mrg remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); 465 1.1 mrg 466 1.1 mrg if (remain_words != 0) 467 1.1 mrg { 468 1.1 mrg if (remain_bytes != 0) 469 1.1 mrg nds32_emit_mem_move_block (start_regno, remain_words, 470 1.1 mrg &dst_base_reg, &dstmem, 471 1.1 mrg &src_base_reg, &srcmem, 472 1.1 mrg true); 473 1.1 mrg else 474 1.1 mrg { 475 1.1 mrg /* Do not update address if no further byte to move. */ 476 1.1 mrg if (remain_words == 1) 477 1.1 mrg { 478 1.1 mrg /* emit move instruction if align to 4 byte and only 1 479 1.1 mrg word to move. */ 480 1.1 mrg if (align_to_4_bytes) 481 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, SImode, 0); 482 1.1 mrg else 483 1.1 mrg { 484 1.1 mrg tmp_reg = gen_reg_rtx (SImode); 485 1.1 mrg emit_insn ( 486 1.1 mrg gen_unaligned_load_w (tmp_reg, 487 1.1 mrg gen_rtx_MEM (SImode, src_base_reg))); 488 1.1 mrg emit_insn ( 489 1.1 mrg gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg), 490 1.1 mrg tmp_reg)); 491 1.1 mrg } 492 1.1 mrg } 493 1.1 mrg else 494 1.1 mrg nds32_emit_mem_move_block (start_regno, remain_words, 495 1.1 mrg &dst_base_reg, &dstmem, 496 1.1 mrg &src_base_reg, &srcmem, 497 1.1 mrg false); 498 1.1 mrg } 499 1.1 mrg } 500 1.1 mrg 501 1.1 mrg switch (remain_bytes) 502 1.1 mrg { 503 1.1 mrg case 3: 504 1.1 mrg case 2: 505 1.1 mrg { 506 1.1 mrg if (align_to_2_bytes) 507 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, HImode, 0); 508 1.1 mrg else 509 1.1 mrg { 510 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, QImode, 0); 511 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, QImode, 1); 512 1.1 mrg } 513 1.1 mrg 514 1.1 mrg if (remain_bytes == 3) 515 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, QImode, 2); 516 1.1 mrg break; 517 1.1 mrg } 518 1.1 mrg case 1: 519 1.1 mrg nds32_emit_mem_move (srcmem, dstmem, QImode, 0); 520 1.1 mrg break; 521 1.1 mrg case 0: 522 1.1 mrg break; 523 1.1 mrg default: 524 1.1 mrg gcc_unreachable (); 525 1.1 mrg } 526 1.1 mrg 527 1.1 mrg /* Successfully create patterns, return true. */ 528 1.1 mrg return true; 529 1.1 mrg } 530 1.1 mrg 531 1.1 mrg /* Function to move block memory content by 532 1.1 mrg using load_multiple and store_multiple. 533 1.1 mrg This is auxiliary extern function to help create rtx template. 534 1.1 mrg Check nds32-multiple.md file for the patterns. */ 535 1.1 mrg bool 536 1.1 mrg nds32_expand_cpymemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) 537 1.1 mrg { 538 1.1 mrg if (nds32_expand_cpymemsi_unroll (dstmem, srcmem, total_bytes, alignment)) 539 1.1 mrg return true; 540 1.1 mrg 541 1.1 mrg if (!optimize_size && optimize > 2) 542 1.1 mrg return nds32_expand_cpymemsi_loop (dstmem, srcmem, total_bytes, alignment); 543 1.1 mrg 544 1.1 mrg return false; 545 1.1 mrg } 546 1.1 mrg 547 1.1 mrg /* ------------------------------------------------------------------------ */ 548 1.1 mrg 549 1.1 mrg /* Auxiliary function for expand setmem pattern. */ 550 1.1 mrg 551 1.1 mrg static rtx 552 1.1 mrg nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) 553 1.1 mrg { 554 1.1 mrg gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); 555 1.1 mrg 556 1.1 mrg if (CONST_INT_P (value)) 557 1.1 mrg { 558 1.1 mrg unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode); 559 1.1 mrg rtx new_val = gen_int_mode (val | (val << 8) 560 1.1 mrg | (val << 16) | (val << 24), SImode); 561 1.1 mrg /* Just calculate at here if it's constant value. */ 562 1.1 mrg emit_move_insn (value4word, new_val); 563 1.1 mrg } 564 1.1 mrg else 565 1.1 mrg { 566 1.1 mrg if (NDS32_EXT_DSP_P ()) 567 1.1 mrg { 568 1.1 mrg /* ! prepare word 569 1.1 mrg insb $tmp, $value, 1 ! $tmp <- 0x0000abab 570 1.1 mrg pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ 571 1.1 mrg rtx tmp = gen_reg_rtx (SImode); 572 1.1 mrg 573 1.1 mrg convert_move (tmp, value, true); 574 1.1 mrg 575 1.1 mrg emit_insn ( 576 1.1 mrg gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); 577 1.1 mrg 578 1.1 mrg emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); 579 1.1 mrg } 580 1.1 mrg else 581 1.1 mrg { 582 1.1 mrg /* ! prepare word 583 1.1 mrg andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab 584 1.1 mrg slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 585 1.1 mrg or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab 586 1.1 mrg slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 587 1.1 mrg or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ 588 1.1 mrg 589 1.1 mrg rtx tmp1, tmp2, tmp3, tmp4; 590 1.1 mrg tmp1 = expand_binop (SImode, and_optab, value, 591 1.1 mrg gen_int_mode (0xff, SImode), 592 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 593 1.1 mrg tmp2 = expand_binop (SImode, ashl_optab, tmp1, 594 1.1 mrg gen_int_mode (8, SImode), 595 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 596 1.1 mrg tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, 597 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 598 1.1 mrg tmp4 = expand_binop (SImode, ashl_optab, tmp3, 599 1.1 mrg gen_int_mode (16, SImode), 600 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 601 1.1 mrg 602 1.1 mrg emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); 603 1.1 mrg } 604 1.1 mrg } 605 1.1 mrg 606 1.1 mrg return value4word; 607 1.1 mrg } 608 1.1 mrg 609 1.1 mrg static rtx 610 1.1 mrg nds32_gen_dup_4_byte_to_word_value (rtx value) 611 1.1 mrg { 612 1.1 mrg rtx value4word = gen_reg_rtx (SImode); 613 1.1 mrg nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); 614 1.1 mrg 615 1.1 mrg return value4word; 616 1.1 mrg } 617 1.1 mrg 618 1.1 mrg static rtx 619 1.1 mrg nds32_gen_dup_8_byte_to_double_word_value (rtx value) 620 1.1 mrg { 621 1.1 mrg rtx value4doubleword = gen_reg_rtx (DImode); 622 1.1 mrg 623 1.1 mrg nds32_gen_dup_4_byte_to_word_value_aux ( 624 1.1 mrg value, nds32_di_low_part_subreg(value4doubleword)); 625 1.1 mrg 626 1.1 mrg emit_move_insn (nds32_di_high_part_subreg(value4doubleword), 627 1.1 mrg nds32_di_low_part_subreg(value4doubleword)); 628 1.1 mrg return value4doubleword; 629 1.1 mrg } 630 1.1 mrg 631 1.1 mrg 632 1.1 mrg static rtx 633 1.1 mrg emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) 634 1.1 mrg { 635 1.1 mrg rtx word_mode_label = gen_label_rtx (); 636 1.1 mrg rtx word_mode_end_label = gen_label_rtx (); 637 1.1 mrg rtx byte_mode_size = gen_reg_rtx (SImode); 638 1.1 mrg rtx byte_mode_size_tmp = gen_reg_rtx (SImode); 639 1.1 mrg rtx word_mode_end = gen_reg_rtx (SImode); 640 1.1 mrg rtx size_for_word = gen_reg_rtx (SImode); 641 1.1 mrg 642 1.1 mrg /* and $size_for_word, $size, #~0x7 */ 643 1.1 mrg size_for_word = expand_binop (SImode, and_optab, size, 644 1.1 mrg gen_int_mode (~0x7, SImode), 645 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 646 1.1 mrg 647 1.1 mrg emit_move_insn (byte_mode_size, size); 648 1.1 mrg 649 1.1 mrg /* beqz $size_for_word, .Lbyte_mode_entry */ 650 1.1 mrg emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL, 651 1.1 mrg SImode, 1, word_mode_end_label); 652 1.1 mrg /* add $word_mode_end, $dst, $size_for_word */ 653 1.1 mrg word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, 654 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 655 1.1 mrg 656 1.1 mrg /* andi $byte_mode_size, $size, 0x7 */ 657 1.1 mrg byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), 658 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 659 1.1 mrg 660 1.1 mrg emit_move_insn (byte_mode_size, byte_mode_size_tmp); 661 1.1 mrg 662 1.1 mrg /* .Lword_mode: */ 663 1.1 mrg emit_label (word_mode_label); 664 1.1 mrg /* ! word-mode set loop 665 1.1 mrg smw.bim $value4word, [$dst_itr], $value4word, 0 666 1.1 mrg bne $word_mode_end, $dst_itr, .Lword_mode */ 667 1.1 mrg emit_insn (gen_unaligned_store_update_base_dw (itr, 668 1.1 mrg itr, 669 1.1 mrg value)); 670 1.1 mrg emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, 671 1.1 mrg Pmode, 1, word_mode_label); 672 1.1 mrg 673 1.1 mrg emit_label (word_mode_end_label); 674 1.1 mrg 675 1.1 mrg return byte_mode_size; 676 1.1 mrg } 677 1.1 mrg 678 1.1 mrg static rtx 679 1.1 mrg emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) 680 1.1 mrg { 681 1.1 mrg rtx end = gen_reg_rtx (Pmode); 682 1.1 mrg rtx byte_mode_label = gen_label_rtx (); 683 1.1 mrg rtx end_label = gen_label_rtx (); 684 1.1 mrg 685 1.1 mrg value = force_reg (QImode, value); 686 1.1 mrg 687 1.1 mrg if (need_end) 688 1.1 mrg end = expand_binop (Pmode, add_optab, itr, size, 689 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 690 1.1 mrg /* beqz $byte_mode_size, .Lend 691 1.1 mrg add $byte_mode_end, $dst_itr, $byte_mode_size */ 692 1.1 mrg emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL, 693 1.1 mrg SImode, 1, end_label); 694 1.1 mrg 695 1.1 mrg if (!need_end) 696 1.1 mrg end = expand_binop (Pmode, add_optab, itr, size, 697 1.1 mrg NULL_RTX, 0, OPTAB_WIDEN); 698 1.1 mrg 699 1.1 mrg /* .Lbyte_mode: */ 700 1.1 mrg emit_label (byte_mode_label); 701 1.1 mrg 702 1.1 mrg /* ! byte-mode set loop 703 1.1 mrg sbi.bi $value, [$dst_itr] ,1 704 1.1 mrg bne $byte_mode_end, $dst_itr, .Lbyte_mode */ 705 1.1 mrg nds32_emit_post_inc_load_store (value, itr, QImode, false); 706 1.1 mrg 707 1.1 mrg emit_cmp_and_jump_insns (end, itr, NE, NULL, 708 1.1 mrg Pmode, 1, byte_mode_label); 709 1.1 mrg /* .Lend: */ 710 1.1 mrg emit_label (end_label); 711 1.1 mrg 712 1.1 mrg if (need_end) 713 1.1 mrg return end; 714 1.1 mrg else 715 1.1 mrg return NULL_RTX; 716 1.1 mrg } 717 1.1 mrg 718 1.1 mrg static bool 719 1.1 mrg nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) 720 1.1 mrg { 721 1.1 mrg rtx value4doubleword; 722 1.1 mrg rtx value4byte; 723 1.1 mrg rtx dst; 724 1.1 mrg rtx byte_mode_size; 725 1.1 mrg 726 1.1 mrg /* Emit loop version of setmem. 727 1.1 mrg memset: 728 1.1 mrg ! prepare word 729 1.1 mrg andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab 730 1.1 mrg slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 731 1.1 mrg or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab 732 1.1 mrg slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 733 1.1 mrg or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab 734 1.1 mrg 735 1.1 mrg and $size_for_word, $size, #-4 736 1.1 mrg beqz $size_for_word, .Lword_mode_end 737 1.1 mrg 738 1.1 mrg add $word_mode_end, $dst, $size_for_word 739 1.1 mrg andi $byte_mode_size, $size, 3 740 1.1 mrg 741 1.1 mrg .Lword_mode: 742 1.1 mrg ! word-mode set loop 743 1.1 mrg smw.bim $value4word, [$dst], $value4word, 0 744 1.1 mrg bne $word_mode_end, $dst, .Lword_mode 745 1.1 mrg 746 1.1 mrg .Lword_mode_end: 747 1.1 mrg beqz $byte_mode_size, .Lend 748 1.1 mrg add $byte_mode_end, $dst, $byte_mode_size 749 1.1 mrg 750 1.1 mrg .Lbyte_mode: 751 1.1 mrg ! byte-mode set loop 752 1.1 mrg sbi.bi $value4word, [$dst] ,1 753 1.1 mrg bne $byte_mode_end, $dst, .Lbyte_mode 754 1.1 mrg .Lend: */ 755 1.1 mrg 756 1.1 mrg dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); 757 1.1 mrg 758 1.1 mrg /* ! prepare word 759 1.1 mrg andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab 760 1.1 mrg slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 761 1.1 mrg or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab 762 1.1 mrg slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 763 1.1 mrg or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ 764 1.1 mrg value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); 765 1.1 mrg 766 1.1 mrg /* and $size_for_word, $size, #-4 767 1.1 mrg beqz $size_for_word, .Lword_mode_end 768 1.1 mrg 769 1.1 mrg add $word_mode_end, $dst, $size_for_word 770 1.1 mrg andi $byte_mode_size, $size, 3 771 1.1 mrg 772 1.1 mrg .Lword_mode: 773 1.1 mrg ! word-mode set loop 774 1.1 mrg smw.bim $value4word, [$dst], $value4word, 0 775 1.1 mrg bne $word_mode_end, $dst, .Lword_mode 776 1.1 mrg .Lword_mode_end: */ 777 1.1 mrg byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); 778 1.1 mrg 779 1.1 mrg /* beqz $byte_mode_size, .Lend 780 1.1 mrg add $byte_mode_end, $dst, $byte_mode_size 781 1.1 mrg 782 1.1 mrg .Lbyte_mode: 783 1.1 mrg ! byte-mode set loop 784 1.1 mrg sbi.bi $value, [$dst] ,1 785 1.1 mrg bne $byte_mode_end, $dst, .Lbyte_mode 786 1.1 mrg .Lend: */ 787 1.1 mrg 788 1.1 mrg value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, 789 1.1 mrg subreg_lowpart_offset (QImode, DImode)); 790 1.1 mrg 791 1.1 mrg emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); 792 1.1 mrg 793 1.1 mrg return true; 794 1.1 mrg } 795 1.1 mrg 796 1.1 mrg static bool 797 1.1 mrg nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) 798 1.1 mrg { 799 1.1 mrg rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); 800 1.1 mrg rtx need_align_bytes = gen_reg_rtx (SImode); 801 1.1 mrg rtx last_2_bit = gen_reg_rtx (SImode); 802 1.1 mrg rtx byte_loop_base = gen_reg_rtx (SImode); 803 1.1 mrg rtx byte_loop_size = gen_reg_rtx (SImode); 804 1.1 mrg rtx remain_size = gen_reg_rtx (SImode); 805 1.1 mrg rtx new_base_reg; 806 1.1 mrg rtx value4byte, value4doubleword; 807 1.1 mrg rtx byte_mode_size; 808 1.1 mrg rtx last_byte_loop_label = gen_label_rtx (); 809 1.1 mrg 810 1.1 mrg size = force_reg (SImode, size); 811 1.1 mrg 812 1.1 mrg value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); 813 1.1 mrg value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, 814 1.1 mrg subreg_lowpart_offset (QImode, DImode)); 815 1.1 mrg 816 1.1 mrg emit_move_insn (byte_loop_size, size); 817 1.1 mrg emit_move_insn (byte_loop_base, base_reg); 818 1.1 mrg 819 1.1 mrg /* Jump to last byte loop if size is less than 16. */ 820 1.1 mrg emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL, 821 1.1 mrg SImode, 1, last_byte_loop_label); 822 1.1 mrg 823 1.1 mrg /* Make sure align to 4 byte first since v3m can't unalign access. */ 824 1.1 mrg emit_insn (gen_andsi3 (last_2_bit, 825 1.1 mrg base_reg, 826 1.1 mrg gen_int_mode (0x3, SImode))); 827 1.1 mrg 828 1.1 mrg emit_insn (gen_subsi3 (need_align_bytes, 829 1.1 mrg gen_int_mode (4, SImode), 830 1.1 mrg last_2_bit)); 831 1.1 mrg 832 1.1 mrg /* Align to 4 byte. */ 833 1.1 mrg new_base_reg = emit_setmem_byte_loop (base_reg, 834 1.1 mrg need_align_bytes, 835 1.1 mrg value4byte, 836 1.1 mrg true); 837 1.1 mrg 838 1.1 mrg /* Calculate remain size. */ 839 1.1 mrg emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); 840 1.1 mrg 841 1.1 mrg /* Set memory word by word. */ 842 1.1 mrg byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, 843 1.1 mrg remain_size, 844 1.1 mrg value4doubleword); 845 1.1 mrg 846 1.1 mrg emit_move_insn (byte_loop_base, new_base_reg); 847 1.1 mrg emit_move_insn (byte_loop_size, byte_mode_size); 848 1.1 mrg 849 1.1 mrg emit_label (last_byte_loop_label); 850 1.1 mrg 851 1.1 mrg /* And set memory for remain bytes. */ 852 1.1 mrg emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false); 853 1.1 mrg return true; 854 1.1 mrg } 855 1.1 mrg 856 1.1 mrg static bool 857 1.1 mrg nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, 858 1.1 mrg rtx align ATTRIBUTE_UNUSED, 859 1.1 mrg rtx expected_align ATTRIBUTE_UNUSED, 860 1.1 mrg rtx expected_size ATTRIBUTE_UNUSED) 861 1.1 mrg { 862 1.1 mrg unsigned maximum_regs, maximum_bytes, start_regno, regno; 863 1.1 mrg rtx value4word; 864 1.1 mrg rtx dst_base_reg, new_base_reg; 865 1.1 mrg unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; 866 1.1 mrg unsigned HOST_WIDE_INT real_size; 867 1.1 mrg 868 1.1 mrg if (TARGET_REDUCED_REGS) 869 1.1 mrg { 870 1.1 mrg maximum_regs = 4; 871 1.1 mrg maximum_bytes = 64; 872 1.1 mrg start_regno = 2; 873 1.1 mrg } 874 1.1 mrg else 875 1.1 mrg { 876 1.1 mrg maximum_regs = 8; 877 1.1 mrg maximum_bytes = 128; 878 1.1 mrg start_regno = 16; 879 1.1 mrg } 880 1.1 mrg 881 1.1 mrg real_size = UINTVAL (size) & GET_MODE_MASK(SImode); 882 1.1 mrg 883 1.1 mrg if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) 884 1.1 mrg return false; 885 1.1 mrg 886 1.1 mrg remain_bytes = real_size; 887 1.1 mrg 888 1.1 mrg gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); 889 1.1 mrg 890 1.1 mrg value4word = nds32_gen_dup_4_byte_to_word_value (value); 891 1.1 mrg 892 1.1 mrg prepare_regs = remain_bytes / UNITS_PER_WORD; 893 1.1 mrg 894 1.1 mrg dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); 895 1.1 mrg 896 1.1 mrg if (prepare_regs > maximum_regs) 897 1.1 mrg prepare_regs = maximum_regs; 898 1.1 mrg 899 1.1 mrg fill_per_smw = prepare_regs * UNITS_PER_WORD; 900 1.1 mrg 901 1.1 mrg regno = start_regno; 902 1.1 mrg switch (prepare_regs) 903 1.1 mrg { 904 1.1 mrg case 2: 905 1.1 mrg default: 906 1.1 mrg { 907 1.1 mrg rtx reg0 = gen_rtx_REG (SImode, regno); 908 1.1 mrg rtx reg1 = gen_rtx_REG (SImode, regno+1); 909 1.1 mrg unsigned last_regno = start_regno + prepare_regs - 1; 910 1.1 mrg 911 1.1 mrg emit_move_insn (reg0, value4word); 912 1.1 mrg emit_move_insn (reg1, value4word); 913 1.1 mrg rtx regd = gen_rtx_REG (DImode, regno); 914 1.1 mrg regno += 2; 915 1.1 mrg 916 1.1 mrg /* Try to utilize movd44! */ 917 1.1 mrg while (regno <= last_regno) 918 1.1 mrg { 919 1.1 mrg if ((regno + 1) <=last_regno) 920 1.1 mrg { 921 1.1 mrg rtx reg = gen_rtx_REG (DImode, regno); 922 1.1 mrg emit_move_insn (reg, regd); 923 1.1 mrg regno += 2; 924 1.1 mrg } 925 1.1 mrg else 926 1.1 mrg { 927 1.1 mrg rtx reg = gen_rtx_REG (SImode, regno); 928 1.1 mrg emit_move_insn (reg, reg0); 929 1.1 mrg regno += 1; 930 1.1 mrg } 931 1.1 mrg } 932 1.1 mrg break; 933 1.1 mrg } 934 1.1 mrg case 1: 935 1.1 mrg { 936 1.1 mrg rtx reg = gen_rtx_REG (SImode, regno++); 937 1.1 mrg emit_move_insn (reg, value4word); 938 1.1 mrg } 939 1.1 mrg break; 940 1.1 mrg case 0: 941 1.1 mrg break; 942 1.1 mrg } 943 1.1 mrg 944 1.1 mrg if (fill_per_smw) 945 1.1 mrg for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) 946 1.1 mrg { 947 1.1 mrg emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, 948 1.1 mrg dst_base_reg, dstmem, 949 1.1 mrg true, &new_base_reg)); 950 1.1 mrg dst_base_reg = new_base_reg; 951 1.1 mrg dstmem = gen_rtx_MEM (SImode, dst_base_reg); 952 1.1 mrg } 953 1.1 mrg 954 1.1 mrg remain_words = remain_bytes / UNITS_PER_WORD; 955 1.1 mrg 956 1.1 mrg if (remain_words) 957 1.1 mrg { 958 1.1 mrg emit_insn (nds32_expand_store_multiple (start_regno, remain_words, 959 1.1 mrg dst_base_reg, dstmem, 960 1.1 mrg true, &new_base_reg)); 961 1.1 mrg dst_base_reg = new_base_reg; 962 1.1 mrg dstmem = gen_rtx_MEM (SImode, dst_base_reg); 963 1.1 mrg } 964 1.1 mrg 965 1.1 mrg remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); 966 1.1 mrg 967 1.1 mrg if (remain_bytes) 968 1.1 mrg { 969 1.1 mrg value = simplify_gen_subreg (QImode, value4word, SImode, 970 1.1 mrg subreg_lowpart_offset(QImode, SImode)); 971 1.1 mrg int offset = 0; 972 1.1 mrg for (;remain_bytes;--remain_bytes, ++offset) 973 1.1 mrg { 974 1.1 mrg nds32_emit_load_store (value, dstmem, QImode, offset, false); 975 1.1 mrg } 976 1.1 mrg } 977 1.1 mrg 978 1.1 mrg return true; 979 1.1 mrg } 980 1.1 mrg 981 1.1 mrg bool 982 1.1 mrg nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align, 983 1.1 mrg rtx expected_align, 984 1.1 mrg rtx expected_size) 985 1.1 mrg { 986 1.1 mrg bool align_to_4_bytes = (INTVAL (align) & 3) == 0; 987 1.1 mrg 988 1.1 mrg /* Only expand at O3 */ 989 1.1 mrg if (optimize_size || optimize < 3) 990 1.1 mrg return false; 991 1.1 mrg 992 1.1 mrg if (TARGET_ISA_V3M && !align_to_4_bytes) 993 1.1 mrg return nds32_expand_setmem_loop_v3m (dstmem, size, value); 994 1.1 mrg 995 1.1 mrg if (nds32_expand_setmem_unroll (dstmem, size, value, 996 1.1 mrg align, expected_align, expected_size)) 997 1.1 mrg return true; 998 1.1 mrg 999 1.1 mrg return nds32_expand_setmem_loop (dstmem, size, value); 1000 1.1 mrg } 1001 1.1 mrg 1002 1.1 mrg /* ------------------------------------------------------------------------ */ 1003 1.1 mrg 1004 1.1 mrg /* Auxiliary function for expand strlen pattern. */ 1005 1.1 mrg 1006 1.1 mrg bool 1007 1.1 mrg nds32_expand_strlen (rtx result, rtx str, 1008 1.1 mrg rtx target_char, rtx align ATTRIBUTE_UNUSED) 1009 1.1 mrg { 1010 1.1 mrg rtx base_reg, backup_base_reg; 1011 1.1 mrg rtx ffb_result; 1012 1.1 mrg rtx target_char_ptr, length; 1013 1.1 mrg rtx loop_label, tmp; 1014 1.1 mrg 1015 1.1 mrg if (optimize_size || optimize < 3) 1016 1.1 mrg return false; 1017 1.1 mrg 1018 1.1 mrg gcc_assert (MEM_P (str)); 1019 1.1 mrg gcc_assert (CONST_INT_P (target_char) || REG_P (target_char)); 1020 1.1 mrg 1021 1.1 mrg base_reg = copy_to_mode_reg (SImode, XEXP (str, 0)); 1022 1.1 mrg loop_label = gen_label_rtx (); 1023 1.1 mrg 1024 1.1 mrg ffb_result = gen_reg_rtx (Pmode); 1025 1.1 mrg tmp = gen_reg_rtx (SImode); 1026 1.1 mrg backup_base_reg = gen_reg_rtx (SImode); 1027 1.1 mrg 1028 1.1 mrg /* Emit loop version of strlen. 1029 1.1 mrg move $backup_base, $base 1030 1.1 mrg .Lloop: 1031 1.1 mrg lmw.bim $tmp, [$base], $tmp, 0 1032 1.1 mrg ffb $ffb_result, $tmp, $target_char ! is there $target_char? 1033 1.1 mrg beqz $ffb_result, .Lloop 1034 1.1 mrg add $last_char_ptr, $base, $ffb_result 1035 1.1 mrg sub $length, $last_char_ptr, $backup_base */ 1036 1.1 mrg 1037 1.1 mrg /* move $backup_base, $base */ 1038 1.1 mrg emit_move_insn (backup_base_reg, base_reg); 1039 1.1 mrg 1040 1.1 mrg /* .Lloop: */ 1041 1.1 mrg emit_label (loop_label); 1042 1.1 mrg /* lmw.bim $tmp, [$base], $tmp, 0 */ 1043 1.1 mrg emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg)); 1044 1.1 mrg 1045 1.1 mrg /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */ 1046 1.1 mrg emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char)); 1047 1.1 mrg 1048 1.1 mrg /* beqz $ffb_result, .Lloop */ 1049 1.1 mrg emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL, 1050 1.1 mrg SImode, 1, loop_label); 1051 1.1 mrg 1052 1.1 mrg /* add $target_char_ptr, $base, $ffb_result */ 1053 1.1 mrg target_char_ptr = expand_binop (Pmode, add_optab, base_reg, 1054 1.1 mrg ffb_result, NULL_RTX, 0, OPTAB_WIDEN); 1055 1.1 mrg 1056 1.1 mrg /* sub $length, $target_char_ptr, $backup_base */ 1057 1.1 mrg length = expand_binop (Pmode, sub_optab, target_char_ptr, 1058 1.1 mrg backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN); 1059 1.1 mrg 1060 1.1 mrg emit_move_insn (result, length); 1061 1.1 mrg 1062 1.1 mrg return true; 1063 1.1 mrg } 1064 1.1 mrg 1065 1.1 mrg /* ------------------------------------------------------------------------ */ 1066 1.1 mrg 1067 1.1 mrg /* Functions to expand load_multiple and store_multiple. 1068 1.1 mrg They are auxiliary extern functions to help create rtx template. 1069 1.1 mrg Check nds32-multiple.md file for the patterns. */ 1070 1.1 mrg rtx 1071 1.1 mrg nds32_expand_load_multiple (int base_regno, int count, 1072 1.1 mrg rtx base_addr, rtx basemem, 1073 1.1 mrg bool update_base_reg_p, 1074 1.1 mrg rtx *update_base_reg) 1075 1.1 mrg { 1076 1.1 mrg int par_index; 1077 1.1 mrg int offset; 1078 1.1 mrg int start_idx; 1079 1.1 mrg rtx result; 1080 1.1 mrg rtx new_addr, mem, reg; 1081 1.1 mrg 1082 1.1 mrg /* Generate a unaligned load to prevent load instruction pull out from 1083 1.1 mrg parallel, and then it will generate lwi, and lose unaligned acces */ 1084 1.1 mrg if (count == 1) 1085 1.1 mrg { 1086 1.1 mrg reg = gen_rtx_REG (SImode, base_regno); 1087 1.1 mrg if (update_base_reg_p) 1088 1.1 mrg { 1089 1.1 mrg *update_base_reg = gen_reg_rtx (SImode); 1090 1.1 mrg return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr); 1091 1.1 mrg } 1092 1.1 mrg else 1093 1.1 mrg return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr)); 1094 1.1 mrg } 1095 1.1 mrg 1096 1.1 mrg /* Create the pattern that is presented in nds32-multiple.md. */ 1097 1.1 mrg if (update_base_reg_p) 1098 1.1 mrg { 1099 1.1 mrg result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); 1100 1.1 mrg start_idx = 1; 1101 1.1 mrg } 1102 1.1 mrg else 1103 1.1 mrg { 1104 1.1 mrg result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); 1105 1.1 mrg start_idx = 0; 1106 1.1 mrg } 1107 1.1 mrg 1108 1.1 mrg if (update_base_reg_p) 1109 1.1 mrg { 1110 1.1 mrg offset = count * 4; 1111 1.1 mrg new_addr = plus_constant (Pmode, base_addr, offset); 1112 1.1 mrg *update_base_reg = gen_reg_rtx (SImode); 1113 1.1 mrg 1114 1.1 mrg XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); 1115 1.1 mrg } 1116 1.1 mrg 1117 1.1 mrg for (par_index = 0; par_index < count; par_index++) 1118 1.1 mrg { 1119 1.1 mrg offset = par_index * 4; 1120 1.1 mrg /* 4-byte for loading data to each register. */ 1121 1.1 mrg new_addr = plus_constant (Pmode, base_addr, offset); 1122 1.1 mrg mem = adjust_automodify_address_nv (basemem, SImode, 1123 1.1 mrg new_addr, offset); 1124 1.1 mrg reg = gen_rtx_REG (SImode, base_regno + par_index); 1125 1.1 mrg 1126 1.1 mrg XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem); 1127 1.1 mrg } 1128 1.1 mrg 1129 1.1 mrg return result; 1130 1.1 mrg } 1131 1.1 mrg 1132 1.1 mrg rtx 1133 1.1 mrg nds32_expand_store_multiple (int base_regno, int count, 1134 1.1 mrg rtx base_addr, rtx basemem, 1135 1.1 mrg bool update_base_reg_p, 1136 1.1 mrg rtx *update_base_reg) 1137 1.1 mrg { 1138 1.1 mrg int par_index; 1139 1.1 mrg int offset; 1140 1.1 mrg int start_idx; 1141 1.1 mrg rtx result; 1142 1.1 mrg rtx new_addr, mem, reg; 1143 1.1 mrg 1144 1.1 mrg if (count == 1) 1145 1.1 mrg { 1146 1.1 mrg reg = gen_rtx_REG (SImode, base_regno); 1147 1.1 mrg if (update_base_reg_p) 1148 1.1 mrg { 1149 1.1 mrg *update_base_reg = gen_reg_rtx (SImode); 1150 1.1 mrg return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg); 1151 1.1 mrg } 1152 1.1 mrg else 1153 1.1 mrg return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg); 1154 1.1 mrg } 1155 1.1 mrg 1156 1.1 mrg /* Create the pattern that is presented in nds32-multiple.md. */ 1157 1.1 mrg 1158 1.1 mrg if (update_base_reg_p) 1159 1.1 mrg { 1160 1.1 mrg result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); 1161 1.1 mrg start_idx = 1; 1162 1.1 mrg } 1163 1.1 mrg else 1164 1.1 mrg { 1165 1.1 mrg result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); 1166 1.1 mrg start_idx = 0; 1167 1.1 mrg } 1168 1.1 mrg 1169 1.1 mrg if (update_base_reg_p) 1170 1.1 mrg { 1171 1.1 mrg offset = count * 4; 1172 1.1 mrg new_addr = plus_constant (Pmode, base_addr, offset); 1173 1.1 mrg *update_base_reg = gen_reg_rtx (SImode); 1174 1.1 mrg 1175 1.1 mrg XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); 1176 1.1 mrg } 1177 1.1 mrg 1178 1.1 mrg for (par_index = 0; par_index < count; par_index++) 1179 1.1 mrg { 1180 1.1 mrg offset = par_index * 4; 1181 1.1 mrg /* 4-byte for storing data to memory. */ 1182 1.1 mrg new_addr = plus_constant (Pmode, base_addr, offset); 1183 1.1 mrg mem = adjust_automodify_address_nv (basemem, SImode, 1184 1.1 mrg new_addr, offset); 1185 1.1 mrg reg = gen_rtx_REG (SImode, base_regno + par_index); 1186 1.1 mrg 1187 1.1 mrg XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg); 1188 1.1 mrg } 1189 1.1 mrg 1190 1.1 mrg return result; 1191 1.1 mrg } 1192 1.1 mrg 1193 1.1 mrg /* ------------------------------------------------------------------------ */ 1194