sh-mem.cc revision 1.1.1.2 1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "machmode.h"
25 #include "rtl.h"
26 #include "hash-set.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "hashtab.h"
36 #include "hard-reg-set.h"
37 #include "function.h"
38 #include "flags.h"
39 #include "statistics.h"
40 #include "real.h"
41 #include "fixed-value.h"
42 #include "insn-config.h"
43 #include "expmed.h"
44 #include "dojump.h"
45 #include "explow.h"
46 #include "calls.h"
47 #include "emit-rtl.h"
48 #include "varasm.h"
49 #include "stmt.h"
50 #include "expr.h"
51 #include "tm_p.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61
62 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
63 static void
64 force_into (rtx value, rtx target)
65 {
66 value = force_operand (value, target);
67 if (! rtx_equal_p (value, target))
68 emit_insn (gen_move_insn (target, value));
69 }
70
71 /* Emit code to perform a block move. Choose the best method.
72
73 OPERANDS[0] is the destination.
74 OPERANDS[1] is the source.
75 OPERANDS[2] is the size.
76 OPERANDS[3] is the alignment safe to use. */
77 bool
78 expand_block_move (rtx *operands)
79 {
80 int align = INTVAL (operands[3]);
81 int constp = (CONST_INT_P (operands[2]));
82 int bytes = (constp ? INTVAL (operands[2]) : 0);
83
84 if (! constp)
85 return false;
86
87 /* If we could use mov.l to move words and dest is word-aligned, we
88 can use movua.l for loads and still generate a relatively short
89 and efficient sequence. */
90 if (TARGET_SH4A && align < 4
91 && MEM_ALIGN (operands[0]) >= 32
92 && can_move_by_pieces (bytes, 32))
93 {
94 rtx dest = copy_rtx (operands[0]);
95 rtx src = copy_rtx (operands[1]);
96 /* We could use different pseudos for each copied word, but
97 since movua can only load into r0, it's kind of
98 pointless. */
99 rtx temp = gen_reg_rtx (SImode);
100 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
101 int copied = 0;
102
103 while (copied + 4 <= bytes)
104 {
105 rtx to = adjust_address (dest, SImode, copied);
106 rtx from = adjust_automodify_address (src, BLKmode,
107 src_addr, copied);
108
109 set_mem_size (from, 4);
110 emit_insn (gen_movua (temp, from));
111 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
112 emit_move_insn (to, temp);
113 copied += 4;
114 }
115
116 if (copied < bytes)
117 move_by_pieces (adjust_address (dest, BLKmode, copied),
118 adjust_automodify_address (src, BLKmode,
119 src_addr, copied),
120 bytes - copied, align, 0);
121
122 return true;
123 }
124
125 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
126 alignment, or if it isn't a multiple of 4 bytes, then fail. */
127 if (align < 4 || (bytes % 4 != 0))
128 return false;
129
130 if (TARGET_HARD_SH4)
131 {
132 if (bytes < 12)
133 return false;
134 else if (bytes == 12)
135 {
136 rtx func_addr_rtx = gen_reg_rtx (Pmode);
137 rtx r4 = gen_rtx_REG (SImode, 4);
138 rtx r5 = gen_rtx_REG (SImode, 5);
139
140 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
141 force_into (XEXP (operands[0], 0), r4);
142 force_into (XEXP (operands[1], 0), r5);
143 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
144 return true;
145 }
146 else if (! optimize_size)
147 {
148 const char *entry_name;
149 rtx func_addr_rtx = gen_reg_rtx (Pmode);
150 int dwords;
151 rtx r4 = gen_rtx_REG (SImode, 4);
152 rtx r5 = gen_rtx_REG (SImode, 5);
153 rtx r6 = gen_rtx_REG (SImode, 6);
154
155 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
156 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
157 force_into (XEXP (operands[0], 0), r4);
158 force_into (XEXP (operands[1], 0), r5);
159
160 dwords = bytes >> 3;
161 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
162 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
163 return true;
164 }
165 else
166 return false;
167 }
168 if (bytes < 64)
169 {
170 char entry[30];
171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
172 rtx r4 = gen_rtx_REG (SImode, 4);
173 rtx r5 = gen_rtx_REG (SImode, 5);
174
175 sprintf (entry, "__movmemSI%d", bytes);
176 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
177 force_into (XEXP (operands[0], 0), r4);
178 force_into (XEXP (operands[1], 0), r5);
179 emit_insn (gen_block_move_real (func_addr_rtx));
180 return true;
181 }
182
183 /* This is the same number of bytes as a memcpy call, but to a different
184 less common function name, so this will occasionally use more space. */
185 if (! optimize_size)
186 {
187 rtx func_addr_rtx = gen_reg_rtx (Pmode);
188 int final_switch, while_loop;
189 rtx r4 = gen_rtx_REG (SImode, 4);
190 rtx r5 = gen_rtx_REG (SImode, 5);
191 rtx r6 = gen_rtx_REG (SImode, 6);
192
193 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
194 force_into (XEXP (operands[0], 0), r4);
195 force_into (XEXP (operands[1], 0), r5);
196
197 /* r6 controls the size of the move. 16 is decremented from it
198 for each 64 bytes moved. Then the negative bit left over is used
199 as an index into a list of move instructions. e.g., a 72 byte move
200 would be set up with size(r6) = 14, for one iteration through the
201 big while loop, and a switch of -2 for the last part. */
202
203 final_switch = 16 - ((bytes / 4) % 16);
204 while_loop = ((bytes / 4) / 16 - 1) * 16;
205 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
206 emit_insn (gen_block_lump_real (func_addr_rtx));
207 return true;
208 }
209
210 return false;
211 }
212
213 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
214 static const int prob_likely = REG_BR_PROB_BASE / 4;
215
216 /* Emit code to perform a strcmp.
217
218 OPERANDS[0] is the destination.
219 OPERANDS[1] is the first string.
220 OPERANDS[2] is the second string.
221 OPERANDS[3] is the known alignment. */
222 bool
223 sh_expand_cmpstr (rtx *operands)
224 {
225 rtx addr1 = operands[1];
226 rtx addr2 = operands[2];
227 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
228 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
229 rtx tmp0 = gen_reg_rtx (SImode);
230 rtx tmp1 = gen_reg_rtx (SImode);
231 rtx tmp2 = gen_reg_rtx (SImode);
232 rtx tmp3 = gen_reg_rtx (SImode);
233
234 rtx jump;
235 rtx_code_label *L_return = gen_label_rtx ();
236 rtx_code_label *L_loop_byte = gen_label_rtx ();
237 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
238 rtx_code_label *L_loop_long = gen_label_rtx ();
239 rtx_code_label *L_end_loop_long = gen_label_rtx ();
240
241 int align = INTVAL (operands[3]);
242
243 emit_move_insn (tmp0, const0_rtx);
244
245 if (align < 4)
246 {
247 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
248 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
249 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
250 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
251 }
252
253 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
254 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
255
256 /* tmp2 is aligned, OK to load. */
257 emit_move_insn (tmp3, addr2);
258 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
259
260 /* start long loop. */
261 emit_label (L_loop_long);
262
263 emit_move_insn (tmp2, tmp3);
264
265 /* tmp1 is aligned, OK to load. */
266 emit_move_insn (tmp1, addr1);
267 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
268
269 /* Is there a 0 byte ? */
270 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
271
272 emit_insn (gen_cmpstr_t (tmp0, tmp3));
273 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
274 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
275
276 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
277
278 /* tmp2 is aligned, OK to load. */
279 emit_move_insn (tmp3, addr2);
280 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
281
282 jump = emit_jump_insn (gen_branch_true (L_loop_long));
283 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
284 /* end loop. */
285
286 /* Fallthu, substract words. */
287 if (TARGET_LITTLE_ENDIAN)
288 {
289 rtx low_1 = gen_lowpart (HImode, tmp1);
290 rtx low_2 = gen_lowpart (HImode, tmp2);
291
292 emit_insn (gen_rotlhi3_8 (low_1, low_1));
293 emit_insn (gen_rotlhi3_8 (low_2, low_2));
294 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
295 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
296 emit_insn (gen_rotlhi3_8 (low_1, low_1));
297 emit_insn (gen_rotlhi3_8 (low_2, low_2));
298 }
299
300 jump = emit_jump_insn (gen_jump_compact (L_return));
301 emit_barrier_after (jump);
302
303 emit_label (L_end_loop_long);
304
305 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
306 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
307
308 /* start byte loop. */
309 addr1 = adjust_address (addr1, QImode, 0);
310 addr2 = adjust_address (addr2, QImode, 0);
311
312 emit_label (L_loop_byte);
313
314 emit_insn (gen_extendqisi2 (tmp2, addr2));
315 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
316
317 emit_insn (gen_extendqisi2 (tmp1, addr1));
318 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
319
320 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
321 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
322 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
323
324 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
325 if (flag_delayed_branch)
326 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
327 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
328 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
329 /* end loop. */
330
331 emit_label (L_end_loop_byte);
332
333 if (! flag_delayed_branch)
334 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
335 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
336
337 emit_label (L_return);
338
339 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
340
341 return true;
342 }
343
344 /* Emit code to perform a strncmp.
345
346 OPERANDS[0] is the destination.
347 OPERANDS[1] is the first string.
348 OPERANDS[2] is the second string.
349 OPERANDS[3] is the length.
350 OPERANDS[4] is the known alignment. */
351 bool
352 sh_expand_cmpnstr (rtx *operands)
353 {
354 rtx addr1 = operands[1];
355 rtx addr2 = operands[2];
356 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
357 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
358 rtx tmp1 = gen_reg_rtx (SImode);
359 rtx tmp2 = gen_reg_rtx (SImode);
360
361 rtx jump;
362 rtx_code_label *L_return = gen_label_rtx ();
363 rtx_code_label *L_loop_byte = gen_label_rtx ();
364 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
365
366 rtx len = copy_to_mode_reg (SImode, operands[3]);
367 int constp = CONST_INT_P (operands[3]);
368
369 /* Loop on a register count. */
370 if (constp)
371 {
372 rtx tmp0 = gen_reg_rtx (SImode);
373 rtx tmp3 = gen_reg_rtx (SImode);
374 rtx lenw = gen_reg_rtx (SImode);
375
376 rtx_code_label *L_loop_long = gen_label_rtx ();
377 rtx_code_label *L_end_loop_long = gen_label_rtx ();
378
379 int align = INTVAL (operands[4]);
380 int bytes = INTVAL (operands[3]);
381 int witers = bytes / 4;
382
383 if (witers > 1)
384 {
385 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
386 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
387
388 emit_move_insn (tmp0, const0_rtx);
389
390 if (align < 4)
391 {
392 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
393 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
394 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
395 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
396 }
397
398 /* word count. Do we have iterations ? */
399 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
400
401 /* start long loop. */
402 emit_label (L_loop_long);
403
404 /* tmp2 is aligned, OK to load. */
405 emit_move_insn (tmp2, addr2);
406 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
407 GET_MODE_SIZE (SImode)));
408
409 /* tmp1 is aligned, OK to load. */
410 emit_move_insn (tmp1, addr1);
411 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
412 GET_MODE_SIZE (SImode)));
413
414 /* Is there a 0 byte ? */
415 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
416
417 emit_insn (gen_cmpstr_t (tmp0, tmp3));
418 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
419 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
420
421 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
422 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
423 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
424
425 if (TARGET_SH2)
426 emit_insn (gen_dect (lenw, lenw));
427 else
428 {
429 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
430 emit_insn (gen_tstsi_t (lenw, lenw));
431 }
432
433 jump = emit_jump_insn (gen_branch_false (L_loop_long));
434 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
435
436 int sbytes = bytes % 4;
437
438 /* end loop. Reached max iterations. */
439 if (sbytes == 0)
440 {
441 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
442 jump = emit_jump_insn (gen_jump_compact (L_return));
443 emit_barrier_after (jump);
444 }
445 else
446 {
447 /* Remaining bytes to check. */
448
449 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
450 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
451
452 while (sbytes--)
453 {
454 emit_insn (gen_extendqisi2 (tmp1, addr1));
455 emit_insn (gen_extendqisi2 (tmp2, addr2));
456
457 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
458 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
459 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
460
461 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
462 if (flag_delayed_branch)
463 emit_insn (gen_zero_extendqisi2 (tmp2,
464 gen_lowpart (QImode,
465 tmp2)));
466 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
467 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
468
469 addr1 = adjust_address (addr1, QImode,
470 GET_MODE_SIZE (QImode));
471 addr2 = adjust_address (addr2, QImode,
472 GET_MODE_SIZE (QImode));
473 }
474
475 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
476 emit_barrier_after (jump);
477 }
478
479 emit_label (L_end_loop_long);
480
481 /* Found last word. Restart it byte per byte. */
482
483 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
484 -GET_MODE_SIZE (SImode)));
485 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
486 -GET_MODE_SIZE (SImode)));
487
488 /* fall thru. */
489 }
490
491 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
492 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
493
494 while (bytes--)
495 {
496 emit_insn (gen_extendqisi2 (tmp1, addr1));
497 emit_insn (gen_extendqisi2 (tmp2, addr2));
498
499 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
500 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
501 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
502
503 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
504 if (flag_delayed_branch)
505 emit_insn (gen_zero_extendqisi2 (tmp2,
506 gen_lowpart (QImode, tmp2)));
507 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
508 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
509
510 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
511 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
512 }
513
514 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
515 emit_barrier_after (jump);
516 }
517 else
518 {
519 emit_insn (gen_cmpeqsi_t (len, const0_rtx));
520 emit_move_insn (operands[0], const0_rtx);
521 jump = emit_jump_insn (gen_branch_true (L_return));
522 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
523 }
524
525 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
526 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
527
528 emit_label (L_loop_byte);
529
530 emit_insn (gen_extendqisi2 (tmp2, addr2));
531 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
532
533 emit_insn (gen_extendqisi2 (tmp1, addr1));
534 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
535
536 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
537 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
538 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
539
540 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
541 if (flag_delayed_branch)
542 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
543 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
544 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
545
546 if (TARGET_SH2)
547 emit_insn (gen_dect (len, len));
548 else
549 {
550 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
551 emit_insn (gen_tstsi_t (len, len));
552 }
553
554 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
555 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
556 /* end byte loop. */
557
558 emit_label (L_end_loop_byte);
559
560 if (! flag_delayed_branch)
561 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
562 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
563
564 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
565
566 emit_label (L_return);
567
568 return true;
569 }
570
571 /* Emit code to perform a strlen.
572
573 OPERANDS[0] is the destination.
574 OPERANDS[1] is the string.
575 OPERANDS[2] is the char to search.
576 OPERANDS[3] is the alignment. */
577 bool
578 sh_expand_strlen (rtx *operands)
579 {
580 rtx addr1 = operands[1];
581 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
582 rtx start_addr = gen_reg_rtx (Pmode);
583 rtx tmp0 = gen_reg_rtx (SImode);
584 rtx tmp1 = gen_reg_rtx (SImode);
585 rtx_code_label *L_return = gen_label_rtx ();
586 rtx_code_label *L_loop_byte = gen_label_rtx ();
587
588 rtx jump;
589 rtx_code_label *L_loop_long = gen_label_rtx ();
590 rtx_code_label *L_end_loop_long = gen_label_rtx ();
591
592 int align = INTVAL (operands[3]);
593
594 emit_move_insn (operands[0], GEN_INT (-1));
595
596 /* remember start of string. */
597 emit_move_insn (start_addr, current_addr);
598
599 if (align < 4)
600 {
601 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
602 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
603 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
604 }
605
606 emit_move_insn (tmp0, operands[2]);
607
608 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
609
610 /* start long loop. */
611 emit_label (L_loop_long);
612
613 /* tmp1 is aligned, OK to load. */
614 emit_move_insn (tmp1, addr1);
615 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
616
617 /* Is there a 0 byte ? */
618 emit_insn (gen_cmpstr_t (tmp0, tmp1));
619
620 jump = emit_jump_insn (gen_branch_false (L_loop_long));
621 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
622 /* end loop. */
623
624 emit_label (L_end_loop_long);
625
626 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
627
628 addr1 = adjust_address (addr1, QImode, 0);
629
630 /* unroll remaining bytes. */
631 for (int i = 0; i < 4; ++i)
632 {
633 emit_insn (gen_extendqisi2 (tmp1, addr1));
634 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
635 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
636 jump = emit_jump_insn (gen_branch_true (L_return));
637 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
638 }
639
640 emit_barrier_after (jump);
641
642 /* start byte loop. */
643 emit_label (L_loop_byte);
644
645 emit_insn (gen_extendqisi2 (tmp1, addr1));
646 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
647
648 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
649 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
650 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
651
652 /* end loop. */
653
654 emit_label (L_return);
655
656 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
657 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
658
659 return true;
660 }
661
662 /* Emit code to perform a memset.
663
664 OPERANDS[0] is the destination.
665 OPERANDS[1] is the size;
666 OPERANDS[2] is the char to search.
667 OPERANDS[3] is the alignment. */
668 void
669 sh_expand_setmem (rtx *operands)
670 {
671 rtx_code_label *L_loop_byte = gen_label_rtx ();
672 rtx_code_label *L_loop_word = gen_label_rtx ();
673 rtx_code_label *L_return = gen_label_rtx ();
674 rtx jump;
675 rtx dest = copy_rtx (operands[0]);
676 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
677 rtx val = copy_to_mode_reg (SImode, operands[2]);
678 int align = INTVAL (operands[3]);
679 rtx len = copy_to_mode_reg (SImode, operands[1]);
680
681 if (! CONST_INT_P (operands[1]))
682 return;
683
684 int count = INTVAL (operands[1]);
685
686 if (CONST_INT_P (operands[2])
687 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
688 {
689 rtx lenw = gen_reg_rtx (SImode);
690
691 if (align < 4)
692 {
693 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
694 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
695 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
696 }
697
698 /* word count. Do we have iterations ? */
699 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
700
701 dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
702
703 /* start loop. */
704 emit_label (L_loop_word);
705
706 if (TARGET_SH2)
707 emit_insn (gen_dect (lenw, lenw));
708 else
709 {
710 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
711 emit_insn (gen_tstsi_t (lenw, lenw));
712 }
713
714 emit_move_insn (dest, val);
715 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
716 GET_MODE_SIZE (SImode)));
717
718
719 jump = emit_jump_insn (gen_branch_false (L_loop_word));
720 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
721 count = count % 4;
722
723 dest = adjust_address (dest, QImode, 0);
724
725 val = gen_lowpart (QImode, val);
726
727 while (count--)
728 {
729 emit_move_insn (dest, val);
730 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
731 GET_MODE_SIZE (QImode)));
732 }
733
734 jump = emit_jump_insn (gen_jump_compact (L_return));
735 emit_barrier_after (jump);
736 }
737
738 dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
739
740 /* start loop. */
741 emit_label (L_loop_byte);
742
743 if (TARGET_SH2)
744 emit_insn (gen_dect (len, len));
745 else
746 {
747 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
748 emit_insn (gen_tstsi_t (len, len));
749 }
750
751 val = gen_lowpart (QImode, val);
752 emit_move_insn (dest, val);
753 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
754 GET_MODE_SIZE (QImode)));
755
756 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
757 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
758
759 emit_label (L_return);
760 }
761