sh-mem.cc revision 1.1 1 1.1 mrg /* Helper routines for memory move and comparison insns.
2 1.1 mrg Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify
7 1.1 mrg it under the terms of the GNU General Public License as published by
8 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
9 1.1 mrg any later version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful,
12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 1.1 mrg GNU General Public License for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1 mrg #include "config.h"
21 1.1 mrg #include "system.h"
22 1.1 mrg #include "coretypes.h"
23 1.1 mrg #include "tm.h"
24 1.1 mrg #include "machmode.h"
25 1.1 mrg #include "rtl.h"
26 1.1 mrg #include "hash-set.h"
27 1.1 mrg #include "vec.h"
28 1.1 mrg #include "double-int.h"
29 1.1 mrg #include "input.h"
30 1.1 mrg #include "alias.h"
31 1.1 mrg #include "symtab.h"
32 1.1 mrg #include "wide-int.h"
33 1.1 mrg #include "inchash.h"
34 1.1 mrg #include "tree.h"
35 1.1 mrg #include "hashtab.h"
36 1.1 mrg #include "hard-reg-set.h"
37 1.1 mrg #include "function.h"
38 1.1 mrg #include "flags.h"
39 1.1 mrg #include "statistics.h"
40 1.1 mrg #include "real.h"
41 1.1 mrg #include "fixed-value.h"
42 1.1 mrg #include "insn-config.h"
43 1.1 mrg #include "expmed.h"
44 1.1 mrg #include "dojump.h"
45 1.1 mrg #include "explow.h"
46 1.1 mrg #include "calls.h"
47 1.1 mrg #include "emit-rtl.h"
48 1.1 mrg #include "varasm.h"
49 1.1 mrg #include "stmt.h"
50 1.1 mrg #include "expr.h"
51 1.1 mrg #include "tm_p.h"
52 1.1 mrg #include "predict.h"
53 1.1 mrg #include "dominance.h"
54 1.1 mrg #include "cfg.h"
55 1.1 mrg #include "cfgrtl.h"
56 1.1 mrg #include "cfganal.h"
57 1.1 mrg #include "lcm.h"
58 1.1 mrg #include "cfgbuild.h"
59 1.1 mrg #include "cfgcleanup.h"
60 1.1 mrg #include "basic-block.h"
61 1.1 mrg
62 1.1 mrg /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
63 1.1 mrg static void
64 1.1 mrg force_into (rtx value, rtx target)
65 1.1 mrg {
66 1.1 mrg value = force_operand (value, target);
67 1.1 mrg if (! rtx_equal_p (value, target))
68 1.1 mrg emit_insn (gen_move_insn (target, value));
69 1.1 mrg }
70 1.1 mrg
71 1.1 mrg /* Emit code to perform a block move. Choose the best method.
72 1.1 mrg
73 1.1 mrg OPERANDS[0] is the destination.
74 1.1 mrg OPERANDS[1] is the source.
75 1.1 mrg OPERANDS[2] is the size.
76 1.1 mrg OPERANDS[3] is the alignment safe to use. */
77 1.1 mrg bool
78 1.1 mrg expand_block_move (rtx *operands)
79 1.1 mrg {
80 1.1 mrg int align = INTVAL (operands[3]);
81 1.1 mrg int constp = (CONST_INT_P (operands[2]));
82 1.1 mrg int bytes = (constp ? INTVAL (operands[2]) : 0);
83 1.1 mrg
84 1.1 mrg if (! constp)
85 1.1 mrg return false;
86 1.1 mrg
87 1.1 mrg /* If we could use mov.l to move words and dest is word-aligned, we
88 1.1 mrg can use movua.l for loads and still generate a relatively short
89 1.1 mrg and efficient sequence. */
90 1.1 mrg if (TARGET_SH4A && align < 4
91 1.1 mrg && MEM_ALIGN (operands[0]) >= 32
92 1.1 mrg && can_move_by_pieces (bytes, 32))
93 1.1 mrg {
94 1.1 mrg rtx dest = copy_rtx (operands[0]);
95 1.1 mrg rtx src = copy_rtx (operands[1]);
96 1.1 mrg /* We could use different pseudos for each copied word, but
97 1.1 mrg since movua can only load into r0, it's kind of
98 1.1 mrg pointless. */
99 1.1 mrg rtx temp = gen_reg_rtx (SImode);
100 1.1 mrg rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
101 1.1 mrg int copied = 0;
102 1.1 mrg
103 1.1 mrg while (copied + 4 <= bytes)
104 1.1 mrg {
105 1.1 mrg rtx to = adjust_address (dest, SImode, copied);
106 1.1 mrg rtx from = adjust_automodify_address (src, BLKmode,
107 1.1 mrg src_addr, copied);
108 1.1 mrg
109 1.1 mrg set_mem_size (from, 4);
110 1.1 mrg emit_insn (gen_movua (temp, from));
111 1.1 mrg emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
112 1.1 mrg emit_move_insn (to, temp);
113 1.1 mrg copied += 4;
114 1.1 mrg }
115 1.1 mrg
116 1.1 mrg if (copied < bytes)
117 1.1 mrg move_by_pieces (adjust_address (dest, BLKmode, copied),
118 1.1 mrg adjust_automodify_address (src, BLKmode,
119 1.1 mrg src_addr, copied),
120 1.1 mrg bytes - copied, align, 0);
121 1.1 mrg
122 1.1 mrg return true;
123 1.1 mrg }
124 1.1 mrg
125 1.1 mrg /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
126 1.1 mrg alignment, or if it isn't a multiple of 4 bytes, then fail. */
127 1.1 mrg if (align < 4 || (bytes % 4 != 0))
128 1.1 mrg return false;
129 1.1 mrg
130 1.1 mrg if (TARGET_HARD_SH4)
131 1.1 mrg {
132 1.1 mrg if (bytes < 12)
133 1.1 mrg return false;
134 1.1 mrg else if (bytes == 12)
135 1.1 mrg {
136 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
137 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
138 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
139 1.1 mrg
140 1.1 mrg function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
141 1.1 mrg force_into (XEXP (operands[0], 0), r4);
142 1.1 mrg force_into (XEXP (operands[1], 0), r5);
143 1.1 mrg emit_insn (gen_block_move_real_i4 (func_addr_rtx));
144 1.1 mrg return true;
145 1.1 mrg }
146 1.1 mrg else if (! optimize_size)
147 1.1 mrg {
148 1.1 mrg const char *entry_name;
149 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
150 1.1 mrg int dwords;
151 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
152 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
153 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
154 1.1 mrg
155 1.1 mrg entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
156 1.1 mrg function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
157 1.1 mrg force_into (XEXP (operands[0], 0), r4);
158 1.1 mrg force_into (XEXP (operands[1], 0), r5);
159 1.1 mrg
160 1.1 mrg dwords = bytes >> 3;
161 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
162 1.1 mrg emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
163 1.1 mrg return true;
164 1.1 mrg }
165 1.1 mrg else
166 1.1 mrg return false;
167 1.1 mrg }
168 1.1 mrg if (bytes < 64)
169 1.1 mrg {
170 1.1 mrg char entry[30];
171 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
172 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
173 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
174 1.1 mrg
175 1.1 mrg sprintf (entry, "__movmemSI%d", bytes);
176 1.1 mrg function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
177 1.1 mrg force_into (XEXP (operands[0], 0), r4);
178 1.1 mrg force_into (XEXP (operands[1], 0), r5);
179 1.1 mrg emit_insn (gen_block_move_real (func_addr_rtx));
180 1.1 mrg return true;
181 1.1 mrg }
182 1.1 mrg
183 1.1 mrg /* This is the same number of bytes as a memcpy call, but to a different
184 1.1 mrg less common function name, so this will occasionally use more space. */
185 1.1 mrg if (! optimize_size)
186 1.1 mrg {
187 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
188 1.1 mrg int final_switch, while_loop;
189 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
190 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
191 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
192 1.1 mrg
193 1.1 mrg function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
194 1.1 mrg force_into (XEXP (operands[0], 0), r4);
195 1.1 mrg force_into (XEXP (operands[1], 0), r5);
196 1.1 mrg
197 1.1 mrg /* r6 controls the size of the move. 16 is decremented from it
198 1.1 mrg for each 64 bytes moved. Then the negative bit left over is used
199 1.1 mrg as an index into a list of move instructions. e.g., a 72 byte move
200 1.1 mrg would be set up with size(r6) = 14, for one iteration through the
201 1.1 mrg big while loop, and a switch of -2 for the last part. */
202 1.1 mrg
203 1.1 mrg final_switch = 16 - ((bytes / 4) % 16);
204 1.1 mrg while_loop = ((bytes / 4) / 16 - 1) * 16;
205 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
206 1.1 mrg emit_insn (gen_block_lump_real (func_addr_rtx));
207 1.1 mrg return true;
208 1.1 mrg }
209 1.1 mrg
210 1.1 mrg return false;
211 1.1 mrg }
212 1.1 mrg
213 1.1 mrg static const int prob_unlikely = REG_BR_PROB_BASE / 10;
214 1.1 mrg static const int prob_likely = REG_BR_PROB_BASE / 4;
215 1.1 mrg
216 1.1 mrg /* Emit code to perform a strcmp.
217 1.1 mrg
218 1.1 mrg OPERANDS[0] is the destination.
219 1.1 mrg OPERANDS[1] is the first string.
220 1.1 mrg OPERANDS[2] is the second string.
221 1.1 mrg OPERANDS[3] is the known alignment. */
222 1.1 mrg bool
223 1.1 mrg sh_expand_cmpstr (rtx *operands)
224 1.1 mrg {
225 1.1 mrg rtx addr1 = operands[1];
226 1.1 mrg rtx addr2 = operands[2];
227 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
228 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
229 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
230 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
231 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
232 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
233 1.1 mrg
234 1.1 mrg rtx jump;
235 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
236 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
237 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
238 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
239 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
240 1.1 mrg
241 1.1 mrg int align = INTVAL (operands[3]);
242 1.1 mrg
243 1.1 mrg emit_move_insn (tmp0, const0_rtx);
244 1.1 mrg
245 1.1 mrg if (align < 4)
246 1.1 mrg {
247 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
248 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
249 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
250 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
251 1.1 mrg }
252 1.1 mrg
253 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
254 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
255 1.1 mrg
256 1.1 mrg /* tmp2 is aligned, OK to load. */
257 1.1 mrg emit_move_insn (tmp3, addr2);
258 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
259 1.1 mrg
260 1.1 mrg /* start long loop. */
261 1.1 mrg emit_label (L_loop_long);
262 1.1 mrg
263 1.1 mrg emit_move_insn (tmp2, tmp3);
264 1.1 mrg
265 1.1 mrg /* tmp1 is aligned, OK to load. */
266 1.1 mrg emit_move_insn (tmp1, addr1);
267 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
268 1.1 mrg
269 1.1 mrg /* Is there a 0 byte ? */
270 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
271 1.1 mrg
272 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
273 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
274 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
275 1.1 mrg
276 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
277 1.1 mrg
278 1.1 mrg /* tmp2 is aligned, OK to load. */
279 1.1 mrg emit_move_insn (tmp3, addr2);
280 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
281 1.1 mrg
282 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_long));
283 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
284 1.1 mrg /* end loop. */
285 1.1 mrg
286 1.1 mrg /* Fallthu, substract words. */
287 1.1 mrg if (TARGET_LITTLE_ENDIAN)
288 1.1 mrg {
289 1.1 mrg rtx low_1 = gen_lowpart (HImode, tmp1);
290 1.1 mrg rtx low_2 = gen_lowpart (HImode, tmp2);
291 1.1 mrg
292 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
293 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
294 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
295 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
296 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
297 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
298 1.1 mrg }
299 1.1 mrg
300 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
301 1.1 mrg emit_barrier_after (jump);
302 1.1 mrg
303 1.1 mrg emit_label (L_end_loop_long);
304 1.1 mrg
305 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
306 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
307 1.1 mrg
308 1.1 mrg /* start byte loop. */
309 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
310 1.1 mrg addr2 = adjust_address (addr2, QImode, 0);
311 1.1 mrg
312 1.1 mrg emit_label (L_loop_byte);
313 1.1 mrg
314 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
315 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
316 1.1 mrg
317 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
318 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
319 1.1 mrg
320 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
321 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
322 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
323 1.1 mrg
324 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
325 1.1 mrg if (flag_delayed_branch)
326 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
327 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_byte));
328 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
329 1.1 mrg /* end loop. */
330 1.1 mrg
331 1.1 mrg emit_label (L_end_loop_byte);
332 1.1 mrg
333 1.1 mrg if (! flag_delayed_branch)
334 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
335 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
336 1.1 mrg
337 1.1 mrg emit_label (L_return);
338 1.1 mrg
339 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
340 1.1 mrg
341 1.1 mrg return true;
342 1.1 mrg }
343 1.1 mrg
344 1.1 mrg /* Emit code to perform a strncmp.
345 1.1 mrg
346 1.1 mrg OPERANDS[0] is the destination.
347 1.1 mrg OPERANDS[1] is the first string.
348 1.1 mrg OPERANDS[2] is the second string.
349 1.1 mrg OPERANDS[3] is the length.
350 1.1 mrg OPERANDS[4] is the known alignment. */
351 1.1 mrg bool
352 1.1 mrg sh_expand_cmpnstr (rtx *operands)
353 1.1 mrg {
354 1.1 mrg rtx addr1 = operands[1];
355 1.1 mrg rtx addr2 = operands[2];
356 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
357 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
358 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
359 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
360 1.1 mrg
361 1.1 mrg rtx jump;
362 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
363 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
364 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
365 1.1 mrg
366 1.1 mrg rtx len = force_reg (SImode, operands[3]);
367 1.1 mrg int constp = CONST_INT_P (operands[3]);
368 1.1 mrg
369 1.1 mrg /* Loop on a register count. */
370 1.1 mrg if (constp)
371 1.1 mrg {
372 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
373 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
374 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
375 1.1 mrg
376 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
377 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
378 1.1 mrg
379 1.1 mrg int align = INTVAL (operands[4]);
380 1.1 mrg int bytes = INTVAL (operands[3]);
381 1.1 mrg int witers = bytes / 4;
382 1.1 mrg
383 1.1 mrg if (witers > 1)
384 1.1 mrg {
385 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
386 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
387 1.1 mrg
388 1.1 mrg emit_move_insn (tmp0, const0_rtx);
389 1.1 mrg
390 1.1 mrg if (align < 4)
391 1.1 mrg {
392 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
393 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
394 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
395 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
396 1.1 mrg }
397 1.1 mrg
398 1.1 mrg /* word count. Do we have iterations ? */
399 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
400 1.1 mrg
401 1.1 mrg /* start long loop. */
402 1.1 mrg emit_label (L_loop_long);
403 1.1 mrg
404 1.1 mrg /* tmp2 is aligned, OK to load. */
405 1.1 mrg emit_move_insn (tmp2, addr2);
406 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
407 1.1 mrg GET_MODE_SIZE (SImode)));
408 1.1 mrg
409 1.1 mrg /* tmp1 is aligned, OK to load. */
410 1.1 mrg emit_move_insn (tmp1, addr1);
411 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
412 1.1 mrg GET_MODE_SIZE (SImode)));
413 1.1 mrg
414 1.1 mrg /* Is there a 0 byte ? */
415 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
416 1.1 mrg
417 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
418 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
419 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
420 1.1 mrg
421 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
422 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
423 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
424 1.1 mrg
425 1.1 mrg if (TARGET_SH2)
426 1.1 mrg emit_insn (gen_dect (lenw, lenw));
427 1.1 mrg else
428 1.1 mrg {
429 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
430 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
431 1.1 mrg }
432 1.1 mrg
433 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
434 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
435 1.1 mrg
436 1.1 mrg int sbytes = bytes % 4;
437 1.1 mrg
438 1.1 mrg /* end loop. Reached max iterations. */
439 1.1 mrg if (sbytes == 0)
440 1.1 mrg {
441 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
442 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
443 1.1 mrg emit_barrier_after (jump);
444 1.1 mrg }
445 1.1 mrg else
446 1.1 mrg {
447 1.1 mrg /* Remaining bytes to check. */
448 1.1 mrg
449 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
450 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
451 1.1 mrg
452 1.1 mrg while (sbytes--)
453 1.1 mrg {
454 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
455 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
456 1.1 mrg
457 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
458 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
459 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
460 1.1 mrg
461 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
462 1.1 mrg if (flag_delayed_branch)
463 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
464 1.1 mrg gen_lowpart (QImode,
465 1.1 mrg tmp2)));
466 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
467 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
468 1.1 mrg
469 1.1 mrg addr1 = adjust_address (addr1, QImode,
470 1.1 mrg GET_MODE_SIZE (QImode));
471 1.1 mrg addr2 = adjust_address (addr2, QImode,
472 1.1 mrg GET_MODE_SIZE (QImode));
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
476 1.1 mrg emit_barrier_after (jump);
477 1.1 mrg }
478 1.1 mrg
479 1.1 mrg emit_label (L_end_loop_long);
480 1.1 mrg
481 1.1 mrg /* Found last word. Restart it byte per byte. */
482 1.1 mrg
483 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
484 1.1 mrg -GET_MODE_SIZE (SImode)));
485 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
486 1.1 mrg -GET_MODE_SIZE (SImode)));
487 1.1 mrg
488 1.1 mrg /* fall thru. */
489 1.1 mrg }
490 1.1 mrg
491 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
492 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
493 1.1 mrg
494 1.1 mrg while (bytes--)
495 1.1 mrg {
496 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
497 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
498 1.1 mrg
499 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
500 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
501 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
502 1.1 mrg
503 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
504 1.1 mrg if (flag_delayed_branch)
505 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
506 1.1 mrg gen_lowpart (QImode, tmp2)));
507 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
508 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
509 1.1 mrg
510 1.1 mrg addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
511 1.1 mrg addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
512 1.1 mrg }
513 1.1 mrg
514 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
515 1.1 mrg emit_barrier_after (jump);
516 1.1 mrg }
517 1.1 mrg else
518 1.1 mrg {
519 1.1 mrg emit_insn (gen_cmpeqsi_t (len, const0_rtx));
520 1.1 mrg emit_move_insn (operands[0], const0_rtx);
521 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
522 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
523 1.1 mrg }
524 1.1 mrg
525 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
526 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
527 1.1 mrg
528 1.1 mrg emit_label (L_loop_byte);
529 1.1 mrg
530 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
531 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
532 1.1 mrg
533 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
534 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
535 1.1 mrg
536 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
537 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
538 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
539 1.1 mrg
540 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
541 1.1 mrg if (flag_delayed_branch)
542 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
543 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
544 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
545 1.1 mrg
546 1.1 mrg if (TARGET_SH2)
547 1.1 mrg emit_insn (gen_dect (len, len));
548 1.1 mrg else
549 1.1 mrg {
550 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
551 1.1 mrg emit_insn (gen_tstsi_t (len, len));
552 1.1 mrg }
553 1.1 mrg
554 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
555 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
556 1.1 mrg /* end byte loop. */
557 1.1 mrg
558 1.1 mrg emit_label (L_end_loop_byte);
559 1.1 mrg
560 1.1 mrg if (! flag_delayed_branch)
561 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
562 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
563 1.1 mrg
564 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
565 1.1 mrg
566 1.1 mrg emit_label (L_return);
567 1.1 mrg
568 1.1 mrg return true;
569 1.1 mrg }
570 1.1 mrg
571 1.1 mrg /* Emit code to perform a strlen.
572 1.1 mrg
573 1.1 mrg OPERANDS[0] is the destination.
574 1.1 mrg OPERANDS[1] is the string.
575 1.1 mrg OPERANDS[2] is the char to search.
576 1.1 mrg OPERANDS[3] is the alignment. */
577 1.1 mrg bool
578 1.1 mrg sh_expand_strlen (rtx *operands)
579 1.1 mrg {
580 1.1 mrg rtx addr1 = operands[1];
581 1.1 mrg rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
582 1.1 mrg rtx start_addr = gen_reg_rtx (Pmode);
583 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
584 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
585 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
586 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
587 1.1 mrg
588 1.1 mrg rtx jump;
589 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
590 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
591 1.1 mrg
592 1.1 mrg int align = INTVAL (operands[3]);
593 1.1 mrg
594 1.1 mrg emit_move_insn (operands[0], GEN_INT (-1));
595 1.1 mrg
596 1.1 mrg /* remember start of string. */
597 1.1 mrg emit_move_insn (start_addr, current_addr);
598 1.1 mrg
599 1.1 mrg if (align < 4)
600 1.1 mrg {
601 1.1 mrg emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
602 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
603 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
604 1.1 mrg }
605 1.1 mrg
606 1.1 mrg emit_move_insn (tmp0, operands[2]);
607 1.1 mrg
608 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
609 1.1 mrg
610 1.1 mrg /* start long loop. */
611 1.1 mrg emit_label (L_loop_long);
612 1.1 mrg
613 1.1 mrg /* tmp1 is aligned, OK to load. */
614 1.1 mrg emit_move_insn (tmp1, addr1);
615 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
616 1.1 mrg
617 1.1 mrg /* Is there a 0 byte ? */
618 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp1));
619 1.1 mrg
620 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
621 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
622 1.1 mrg /* end loop. */
623 1.1 mrg
624 1.1 mrg emit_label (L_end_loop_long);
625 1.1 mrg
626 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
627 1.1 mrg
628 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
629 1.1 mrg
630 1.1 mrg /* unroll remaining bytes. */
631 1.1 mrg for (int i = 0; i < 4; ++i)
632 1.1 mrg {
633 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
634 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
635 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
636 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
637 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
638 1.1 mrg }
639 1.1 mrg
640 1.1 mrg emit_barrier_after (jump);
641 1.1 mrg
642 1.1 mrg /* start byte loop. */
643 1.1 mrg emit_label (L_loop_byte);
644 1.1 mrg
645 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
646 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
647 1.1 mrg
648 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
649 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
650 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
651 1.1 mrg
652 1.1 mrg /* end loop. */
653 1.1 mrg
654 1.1 mrg emit_label (L_return);
655 1.1 mrg
656 1.1 mrg emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
657 1.1 mrg emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
658 1.1 mrg
659 1.1 mrg return true;
660 1.1 mrg }
661 1.1 mrg
662 1.1 mrg /* Emit code to perform a memset.
663 1.1 mrg
664 1.1 mrg OPERANDS[0] is the destination.
665 1.1 mrg OPERANDS[1] is the size;
666 1.1 mrg OPERANDS[2] is the char to search.
667 1.1 mrg OPERANDS[3] is the alignment. */
668 1.1 mrg void
669 1.1 mrg sh_expand_setmem (rtx *operands)
670 1.1 mrg {
671 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
672 1.1 mrg rtx_code_label *L_loop_word = gen_label_rtx ();
673 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
674 1.1 mrg rtx jump;
675 1.1 mrg rtx dest = copy_rtx (operands[0]);
676 1.1 mrg rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
677 1.1 mrg rtx val = force_reg (SImode, operands[2]);
678 1.1 mrg int align = INTVAL (operands[3]);
679 1.1 mrg rtx len = force_reg (SImode, operands[1]);
680 1.1 mrg
681 1.1 mrg if (! CONST_INT_P (operands[1]))
682 1.1 mrg return;
683 1.1 mrg
684 1.1 mrg int count = INTVAL (operands[1]);
685 1.1 mrg
686 1.1 mrg if (CONST_INT_P (operands[2])
687 1.1 mrg && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
688 1.1 mrg {
689 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
690 1.1 mrg
691 1.1 mrg if (align < 4)
692 1.1 mrg {
693 1.1 mrg emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
694 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
695 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
696 1.1 mrg }
697 1.1 mrg
698 1.1 mrg /* word count. Do we have iterations ? */
699 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
700 1.1 mrg
701 1.1 mrg dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
702 1.1 mrg
703 1.1 mrg /* start loop. */
704 1.1 mrg emit_label (L_loop_word);
705 1.1 mrg
706 1.1 mrg if (TARGET_SH2)
707 1.1 mrg emit_insn (gen_dect (lenw, lenw));
708 1.1 mrg else
709 1.1 mrg {
710 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
711 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
712 1.1 mrg }
713 1.1 mrg
714 1.1 mrg emit_move_insn (dest, val);
715 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
716 1.1 mrg GET_MODE_SIZE (SImode)));
717 1.1 mrg
718 1.1 mrg
719 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_word));
720 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
721 1.1 mrg count = count % 4;
722 1.1 mrg
723 1.1 mrg dest = adjust_address (dest, QImode, 0);
724 1.1 mrg
725 1.1 mrg val = gen_lowpart (QImode, val);
726 1.1 mrg
727 1.1 mrg while (count--)
728 1.1 mrg {
729 1.1 mrg emit_move_insn (dest, val);
730 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
731 1.1 mrg GET_MODE_SIZE (QImode)));
732 1.1 mrg }
733 1.1 mrg
734 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
735 1.1 mrg emit_barrier_after (jump);
736 1.1 mrg }
737 1.1 mrg
738 1.1 mrg dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
739 1.1 mrg
740 1.1 mrg /* start loop. */
741 1.1 mrg emit_label (L_loop_byte);
742 1.1 mrg
743 1.1 mrg if (TARGET_SH2)
744 1.1 mrg emit_insn (gen_dect (len, len));
745 1.1 mrg else
746 1.1 mrg {
747 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
748 1.1 mrg emit_insn (gen_tstsi_t (len, len));
749 1.1 mrg }
750 1.1 mrg
751 1.1 mrg val = gen_lowpart (QImode, val);
752 1.1 mrg emit_move_insn (dest, val);
753 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
754 1.1 mrg GET_MODE_SIZE (QImode)));
755 1.1 mrg
756 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
757 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
758 1.1 mrg
759 1.1 mrg emit_label (L_return);
760 1.1 mrg }
761