sh-mem.cc revision 1.1.1.3 1 1.1 mrg /* Helper routines for memory move and comparison insns.
2 1.1.1.3 mrg Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify
7 1.1 mrg it under the terms of the GNU General Public License as published by
8 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
9 1.1 mrg any later version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful,
12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 1.1 mrg GNU General Public License for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1 mrg #include "config.h"
21 1.1 mrg #include "system.h"
22 1.1 mrg #include "coretypes.h"
23 1.1 mrg #include "tm.h"
24 1.1.1.3 mrg #include "function.h"
25 1.1.1.3 mrg #include "basic-block.h"
26 1.1 mrg #include "rtl.h"
27 1.1 mrg #include "tree.h"
28 1.1.1.3 mrg #include "tm_p.h"
29 1.1 mrg #include "emit-rtl.h"
30 1.1.1.3 mrg #include "explow.h"
31 1.1 mrg #include "expr.h"
32 1.1 mrg
33 1.1 mrg /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
34 1.1 mrg static void
35 1.1 mrg force_into (rtx value, rtx target)
36 1.1 mrg {
37 1.1 mrg value = force_operand (value, target);
38 1.1 mrg if (! rtx_equal_p (value, target))
39 1.1 mrg emit_insn (gen_move_insn (target, value));
40 1.1 mrg }
41 1.1 mrg
42 1.1 mrg /* Emit code to perform a block move. Choose the best method.
43 1.1 mrg
44 1.1 mrg OPERANDS[0] is the destination.
45 1.1 mrg OPERANDS[1] is the source.
46 1.1 mrg OPERANDS[2] is the size.
47 1.1 mrg OPERANDS[3] is the alignment safe to use. */
48 1.1 mrg bool
49 1.1 mrg expand_block_move (rtx *operands)
50 1.1 mrg {
51 1.1 mrg int align = INTVAL (operands[3]);
52 1.1 mrg int constp = (CONST_INT_P (operands[2]));
53 1.1 mrg int bytes = (constp ? INTVAL (operands[2]) : 0);
54 1.1 mrg
55 1.1 mrg if (! constp)
56 1.1 mrg return false;
57 1.1 mrg
58 1.1 mrg /* If we could use mov.l to move words and dest is word-aligned, we
59 1.1 mrg can use movua.l for loads and still generate a relatively short
60 1.1 mrg and efficient sequence. */
61 1.1 mrg if (TARGET_SH4A && align < 4
62 1.1 mrg && MEM_ALIGN (operands[0]) >= 32
63 1.1 mrg && can_move_by_pieces (bytes, 32))
64 1.1 mrg {
65 1.1 mrg rtx dest = copy_rtx (operands[0]);
66 1.1 mrg rtx src = copy_rtx (operands[1]);
67 1.1 mrg /* We could use different pseudos for each copied word, but
68 1.1 mrg since movua can only load into r0, it's kind of
69 1.1 mrg pointless. */
70 1.1 mrg rtx temp = gen_reg_rtx (SImode);
71 1.1 mrg rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
72 1.1 mrg int copied = 0;
73 1.1 mrg
74 1.1 mrg while (copied + 4 <= bytes)
75 1.1 mrg {
76 1.1 mrg rtx to = adjust_address (dest, SImode, copied);
77 1.1 mrg rtx from = adjust_automodify_address (src, BLKmode,
78 1.1 mrg src_addr, copied);
79 1.1 mrg
80 1.1 mrg set_mem_size (from, 4);
81 1.1 mrg emit_insn (gen_movua (temp, from));
82 1.1 mrg emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
83 1.1 mrg emit_move_insn (to, temp);
84 1.1 mrg copied += 4;
85 1.1 mrg }
86 1.1 mrg
87 1.1 mrg if (copied < bytes)
88 1.1 mrg move_by_pieces (adjust_address (dest, BLKmode, copied),
89 1.1 mrg adjust_automodify_address (src, BLKmode,
90 1.1 mrg src_addr, copied),
91 1.1 mrg bytes - copied, align, 0);
92 1.1 mrg
93 1.1 mrg return true;
94 1.1 mrg }
95 1.1 mrg
96 1.1 mrg /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
97 1.1 mrg alignment, or if it isn't a multiple of 4 bytes, then fail. */
98 1.1 mrg if (align < 4 || (bytes % 4 != 0))
99 1.1 mrg return false;
100 1.1 mrg
101 1.1 mrg if (TARGET_HARD_SH4)
102 1.1 mrg {
103 1.1 mrg if (bytes < 12)
104 1.1 mrg return false;
105 1.1 mrg else if (bytes == 12)
106 1.1 mrg {
107 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
108 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
109 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
110 1.1 mrg
111 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
112 1.1.1.3 mrg SFUNC_STATIC).lab;
113 1.1 mrg force_into (XEXP (operands[0], 0), r4);
114 1.1 mrg force_into (XEXP (operands[1], 0), r5);
115 1.1.1.3 mrg emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
116 1.1 mrg return true;
117 1.1 mrg }
118 1.1 mrg else if (! optimize_size)
119 1.1 mrg {
120 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
121 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
122 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
123 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
124 1.1 mrg
125 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, bytes & 4
126 1.1.1.3 mrg ? "__movmem_i4_odd"
127 1.1.1.3 mrg : "__movmem_i4_even",
128 1.1.1.3 mrg SFUNC_STATIC).lab;
129 1.1 mrg force_into (XEXP (operands[0], 0), r4);
130 1.1 mrg force_into (XEXP (operands[1], 0), r5);
131 1.1 mrg
132 1.1.1.3 mrg int dwords = bytes >> 3;
133 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
134 1.1.1.3 mrg emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
135 1.1 mrg return true;
136 1.1 mrg }
137 1.1 mrg else
138 1.1 mrg return false;
139 1.1 mrg }
140 1.1 mrg if (bytes < 64)
141 1.1 mrg {
142 1.1 mrg char entry[30];
143 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
144 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
145 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
146 1.1 mrg
147 1.1 mrg sprintf (entry, "__movmemSI%d", bytes);
148 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
149 1.1 mrg force_into (XEXP (operands[0], 0), r4);
150 1.1 mrg force_into (XEXP (operands[1], 0), r5);
151 1.1.1.3 mrg emit_insn (gen_block_move_real (func_addr_rtx, lab));
152 1.1 mrg return true;
153 1.1 mrg }
154 1.1 mrg
155 1.1 mrg /* This is the same number of bytes as a memcpy call, but to a different
156 1.1 mrg less common function name, so this will occasionally use more space. */
157 1.1 mrg if (! optimize_size)
158 1.1 mrg {
159 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
160 1.1 mrg int final_switch, while_loop;
161 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
162 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
163 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
164 1.1 mrg
165 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
166 1.1 mrg force_into (XEXP (operands[0], 0), r4);
167 1.1 mrg force_into (XEXP (operands[1], 0), r5);
168 1.1 mrg
169 1.1 mrg /* r6 controls the size of the move. 16 is decremented from it
170 1.1 mrg for each 64 bytes moved. Then the negative bit left over is used
171 1.1 mrg as an index into a list of move instructions. e.g., a 72 byte move
172 1.1 mrg would be set up with size(r6) = 14, for one iteration through the
173 1.1 mrg big while loop, and a switch of -2 for the last part. */
174 1.1 mrg
175 1.1 mrg final_switch = 16 - ((bytes / 4) % 16);
176 1.1 mrg while_loop = ((bytes / 4) / 16 - 1) * 16;
177 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
178 1.1.1.3 mrg emit_insn (gen_block_lump_real (func_addr_rtx, lab));
179 1.1 mrg return true;
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg return false;
183 1.1 mrg }
184 1.1 mrg
185 1.1 mrg static const int prob_unlikely = REG_BR_PROB_BASE / 10;
186 1.1 mrg static const int prob_likely = REG_BR_PROB_BASE / 4;
187 1.1 mrg
188 1.1 mrg /* Emit code to perform a strcmp.
189 1.1 mrg
190 1.1 mrg OPERANDS[0] is the destination.
191 1.1 mrg OPERANDS[1] is the first string.
192 1.1 mrg OPERANDS[2] is the second string.
193 1.1 mrg OPERANDS[3] is the known alignment. */
194 1.1 mrg bool
195 1.1 mrg sh_expand_cmpstr (rtx *operands)
196 1.1 mrg {
197 1.1 mrg rtx addr1 = operands[1];
198 1.1 mrg rtx addr2 = operands[2];
199 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
200 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
201 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
202 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
203 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
204 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
205 1.1 mrg
206 1.1 mrg rtx jump;
207 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
208 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
209 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
210 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
211 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
212 1.1 mrg
213 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
214 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
215 1.1 mrg
216 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
217 1.1 mrg {
218 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
219 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
220 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
221 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
222 1.1 mrg }
223 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
224 1.1.1.3 mrg {
225 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
226 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
227 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
228 1.1.1.3 mrg }
229 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
230 1.1.1.3 mrg {
231 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
232 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
233 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
234 1.1.1.3 mrg }
235 1.1 mrg
236 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
237 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
238 1.1 mrg
239 1.1 mrg /* tmp2 is aligned, OK to load. */
240 1.1 mrg emit_move_insn (tmp3, addr2);
241 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
242 1.1 mrg
243 1.1 mrg /* start long loop. */
244 1.1 mrg emit_label (L_loop_long);
245 1.1 mrg
246 1.1 mrg emit_move_insn (tmp2, tmp3);
247 1.1 mrg
248 1.1 mrg /* tmp1 is aligned, OK to load. */
249 1.1 mrg emit_move_insn (tmp1, addr1);
250 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
251 1.1 mrg
252 1.1 mrg /* Is there a 0 byte ? */
253 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
254 1.1 mrg
255 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
256 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
257 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
258 1.1 mrg
259 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
260 1.1 mrg
261 1.1 mrg /* tmp2 is aligned, OK to load. */
262 1.1 mrg emit_move_insn (tmp3, addr2);
263 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
264 1.1 mrg
265 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_long));
266 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
267 1.1 mrg /* end loop. */
268 1.1 mrg
269 1.1 mrg /* Fallthu, substract words. */
270 1.1 mrg if (TARGET_LITTLE_ENDIAN)
271 1.1 mrg {
272 1.1 mrg rtx low_1 = gen_lowpart (HImode, tmp1);
273 1.1 mrg rtx low_2 = gen_lowpart (HImode, tmp2);
274 1.1 mrg
275 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
276 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
277 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
278 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
279 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
280 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
281 1.1 mrg }
282 1.1 mrg
283 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
284 1.1 mrg emit_barrier_after (jump);
285 1.1 mrg
286 1.1 mrg emit_label (L_end_loop_long);
287 1.1 mrg
288 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
289 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
290 1.1 mrg
291 1.1 mrg /* start byte loop. */
292 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
293 1.1 mrg addr2 = adjust_address (addr2, QImode, 0);
294 1.1 mrg
295 1.1 mrg emit_label (L_loop_byte);
296 1.1 mrg
297 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
298 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
299 1.1 mrg
300 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
301 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
302 1.1 mrg
303 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
304 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
305 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
306 1.1 mrg
307 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
308 1.1 mrg if (flag_delayed_branch)
309 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
310 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_byte));
311 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
312 1.1 mrg /* end loop. */
313 1.1 mrg
314 1.1 mrg emit_label (L_end_loop_byte);
315 1.1 mrg
316 1.1 mrg if (! flag_delayed_branch)
317 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
318 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
319 1.1 mrg
320 1.1 mrg emit_label (L_return);
321 1.1 mrg
322 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
323 1.1 mrg
324 1.1 mrg return true;
325 1.1 mrg }
326 1.1 mrg
327 1.1 mrg /* Emit code to perform a strncmp.
328 1.1 mrg
329 1.1 mrg OPERANDS[0] is the destination.
330 1.1 mrg OPERANDS[1] is the first string.
331 1.1 mrg OPERANDS[2] is the second string.
332 1.1 mrg OPERANDS[3] is the length.
333 1.1 mrg OPERANDS[4] is the known alignment. */
334 1.1 mrg bool
335 1.1 mrg sh_expand_cmpnstr (rtx *operands)
336 1.1 mrg {
337 1.1 mrg rtx addr1 = operands[1];
338 1.1 mrg rtx addr2 = operands[2];
339 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
340 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
341 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
342 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
343 1.1 mrg
344 1.1 mrg rtx jump;
345 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
346 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
347 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
348 1.1 mrg
349 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[3]);
350 1.1 mrg int constp = CONST_INT_P (operands[3]);
351 1.1 mrg
352 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
353 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
354 1.1.1.3 mrg
355 1.1 mrg /* Loop on a register count. */
356 1.1 mrg if (constp)
357 1.1 mrg {
358 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
359 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
360 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
361 1.1 mrg
362 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
363 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
364 1.1 mrg
365 1.1 mrg int bytes = INTVAL (operands[3]);
366 1.1 mrg int witers = bytes / 4;
367 1.1 mrg
368 1.1 mrg if (witers > 1)
369 1.1 mrg {
370 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
371 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
372 1.1 mrg
373 1.1 mrg emit_move_insn (tmp0, const0_rtx);
374 1.1 mrg
375 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
376 1.1 mrg {
377 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
378 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
379 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
380 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
381 1.1 mrg }
382 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
383 1.1.1.3 mrg {
384 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
385 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
386 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
387 1.1.1.3 mrg }
388 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
389 1.1.1.3 mrg {
390 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
391 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
392 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
393 1.1.1.3 mrg }
394 1.1 mrg
395 1.1 mrg /* word count. Do we have iterations ? */
396 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
397 1.1 mrg
398 1.1 mrg /* start long loop. */
399 1.1 mrg emit_label (L_loop_long);
400 1.1 mrg
401 1.1 mrg /* tmp2 is aligned, OK to load. */
402 1.1 mrg emit_move_insn (tmp2, addr2);
403 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
404 1.1 mrg GET_MODE_SIZE (SImode)));
405 1.1 mrg
406 1.1 mrg /* tmp1 is aligned, OK to load. */
407 1.1 mrg emit_move_insn (tmp1, addr1);
408 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
409 1.1 mrg GET_MODE_SIZE (SImode)));
410 1.1 mrg
411 1.1 mrg /* Is there a 0 byte ? */
412 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
413 1.1 mrg
414 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
415 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
416 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
417 1.1 mrg
418 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
419 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
420 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
421 1.1 mrg
422 1.1 mrg if (TARGET_SH2)
423 1.1 mrg emit_insn (gen_dect (lenw, lenw));
424 1.1 mrg else
425 1.1 mrg {
426 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
427 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
428 1.1 mrg }
429 1.1 mrg
430 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
431 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
432 1.1 mrg
433 1.1 mrg int sbytes = bytes % 4;
434 1.1 mrg
435 1.1 mrg /* end loop. Reached max iterations. */
436 1.1 mrg if (sbytes == 0)
437 1.1 mrg {
438 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
439 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
440 1.1 mrg emit_barrier_after (jump);
441 1.1 mrg }
442 1.1 mrg else
443 1.1 mrg {
444 1.1 mrg /* Remaining bytes to check. */
445 1.1 mrg
446 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
447 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
448 1.1 mrg
449 1.1 mrg while (sbytes--)
450 1.1 mrg {
451 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
452 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
453 1.1 mrg
454 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
455 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
456 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
457 1.1 mrg
458 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
459 1.1 mrg if (flag_delayed_branch)
460 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
461 1.1 mrg gen_lowpart (QImode,
462 1.1 mrg tmp2)));
463 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
464 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
465 1.1 mrg
466 1.1 mrg addr1 = adjust_address (addr1, QImode,
467 1.1 mrg GET_MODE_SIZE (QImode));
468 1.1 mrg addr2 = adjust_address (addr2, QImode,
469 1.1 mrg GET_MODE_SIZE (QImode));
470 1.1 mrg }
471 1.1 mrg
472 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
473 1.1 mrg emit_barrier_after (jump);
474 1.1 mrg }
475 1.1 mrg
476 1.1 mrg emit_label (L_end_loop_long);
477 1.1 mrg
478 1.1 mrg /* Found last word. Restart it byte per byte. */
479 1.1 mrg
480 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
481 1.1 mrg -GET_MODE_SIZE (SImode)));
482 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
483 1.1 mrg -GET_MODE_SIZE (SImode)));
484 1.1 mrg
485 1.1 mrg /* fall thru. */
486 1.1 mrg }
487 1.1 mrg
488 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
489 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
490 1.1 mrg
491 1.1 mrg while (bytes--)
492 1.1 mrg {
493 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
494 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
495 1.1 mrg
496 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
497 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
498 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
499 1.1 mrg
500 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
501 1.1 mrg if (flag_delayed_branch)
502 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
503 1.1 mrg gen_lowpart (QImode, tmp2)));
504 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
505 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
506 1.1 mrg
507 1.1 mrg addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
508 1.1 mrg addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
509 1.1 mrg }
510 1.1 mrg
511 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
512 1.1 mrg emit_barrier_after (jump);
513 1.1 mrg }
514 1.1 mrg else
515 1.1 mrg {
516 1.1 mrg emit_insn (gen_cmpeqsi_t (len, const0_rtx));
517 1.1 mrg emit_move_insn (operands[0], const0_rtx);
518 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
519 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
520 1.1 mrg }
521 1.1 mrg
522 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
523 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
524 1.1 mrg
525 1.1 mrg emit_label (L_loop_byte);
526 1.1 mrg
527 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
528 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
529 1.1 mrg
530 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
531 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
532 1.1 mrg
533 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
534 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
535 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
536 1.1 mrg
537 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
538 1.1 mrg if (flag_delayed_branch)
539 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
540 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
541 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
542 1.1 mrg
543 1.1 mrg if (TARGET_SH2)
544 1.1 mrg emit_insn (gen_dect (len, len));
545 1.1 mrg else
546 1.1 mrg {
547 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
548 1.1 mrg emit_insn (gen_tstsi_t (len, len));
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
552 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
553 1.1 mrg /* end byte loop. */
554 1.1 mrg
555 1.1 mrg emit_label (L_end_loop_byte);
556 1.1 mrg
557 1.1 mrg if (! flag_delayed_branch)
558 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
559 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
560 1.1 mrg
561 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
562 1.1 mrg
563 1.1 mrg emit_label (L_return);
564 1.1 mrg
565 1.1 mrg return true;
566 1.1 mrg }
567 1.1 mrg
568 1.1 mrg /* Emit code to perform a strlen.
569 1.1 mrg
570 1.1 mrg OPERANDS[0] is the destination.
571 1.1 mrg OPERANDS[1] is the string.
572 1.1 mrg OPERANDS[2] is the char to search.
573 1.1 mrg OPERANDS[3] is the alignment. */
574 1.1 mrg bool
575 1.1 mrg sh_expand_strlen (rtx *operands)
576 1.1 mrg {
577 1.1 mrg rtx addr1 = operands[1];
578 1.1 mrg rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
579 1.1 mrg rtx start_addr = gen_reg_rtx (Pmode);
580 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
581 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
582 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
583 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
584 1.1 mrg
585 1.1 mrg rtx jump;
586 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
587 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
588 1.1 mrg
589 1.1 mrg int align = INTVAL (operands[3]);
590 1.1 mrg
591 1.1 mrg emit_move_insn (operands[0], GEN_INT (-1));
592 1.1 mrg
593 1.1 mrg /* remember start of string. */
594 1.1 mrg emit_move_insn (start_addr, current_addr);
595 1.1 mrg
596 1.1 mrg if (align < 4)
597 1.1 mrg {
598 1.1 mrg emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
599 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
600 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
601 1.1 mrg }
602 1.1 mrg
603 1.1 mrg emit_move_insn (tmp0, operands[2]);
604 1.1 mrg
605 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
606 1.1 mrg
607 1.1 mrg /* start long loop. */
608 1.1 mrg emit_label (L_loop_long);
609 1.1 mrg
610 1.1 mrg /* tmp1 is aligned, OK to load. */
611 1.1 mrg emit_move_insn (tmp1, addr1);
612 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
613 1.1 mrg
614 1.1 mrg /* Is there a 0 byte ? */
615 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp1));
616 1.1 mrg
617 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
618 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
619 1.1 mrg /* end loop. */
620 1.1 mrg
621 1.1 mrg emit_label (L_end_loop_long);
622 1.1 mrg
623 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
624 1.1 mrg
625 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
626 1.1 mrg
627 1.1 mrg /* unroll remaining bytes. */
628 1.1 mrg for (int i = 0; i < 4; ++i)
629 1.1 mrg {
630 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
631 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
632 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
633 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
634 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
635 1.1 mrg }
636 1.1 mrg
637 1.1 mrg emit_barrier_after (jump);
638 1.1 mrg
639 1.1 mrg /* start byte loop. */
640 1.1 mrg emit_label (L_loop_byte);
641 1.1 mrg
642 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
643 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
644 1.1 mrg
645 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
646 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
647 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
648 1.1 mrg
649 1.1 mrg /* end loop. */
650 1.1 mrg
651 1.1 mrg emit_label (L_return);
652 1.1 mrg
653 1.1 mrg emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
654 1.1 mrg emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
655 1.1 mrg
656 1.1 mrg return true;
657 1.1 mrg }
658 1.1 mrg
659 1.1 mrg /* Emit code to perform a memset.
660 1.1 mrg
661 1.1 mrg OPERANDS[0] is the destination.
662 1.1 mrg OPERANDS[1] is the size;
663 1.1 mrg OPERANDS[2] is the char to search.
664 1.1 mrg OPERANDS[3] is the alignment. */
665 1.1 mrg void
666 1.1 mrg sh_expand_setmem (rtx *operands)
667 1.1 mrg {
668 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
669 1.1 mrg rtx_code_label *L_loop_word = gen_label_rtx ();
670 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
671 1.1 mrg rtx jump;
672 1.1 mrg rtx dest = copy_rtx (operands[0]);
673 1.1 mrg rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
674 1.1.1.2 mrg rtx val = copy_to_mode_reg (SImode, operands[2]);
675 1.1 mrg int align = INTVAL (operands[3]);
676 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[1]);
677 1.1 mrg
678 1.1 mrg if (! CONST_INT_P (operands[1]))
679 1.1 mrg return;
680 1.1 mrg
681 1.1 mrg int count = INTVAL (operands[1]);
682 1.1 mrg
683 1.1 mrg if (CONST_INT_P (operands[2])
684 1.1 mrg && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
685 1.1 mrg {
686 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
687 1.1 mrg
688 1.1 mrg if (align < 4)
689 1.1 mrg {
690 1.1 mrg emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
691 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
692 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
693 1.1 mrg }
694 1.1 mrg
695 1.1 mrg /* word count. Do we have iterations ? */
696 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
697 1.1 mrg
698 1.1 mrg dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
699 1.1 mrg
700 1.1 mrg /* start loop. */
701 1.1 mrg emit_label (L_loop_word);
702 1.1 mrg
703 1.1 mrg if (TARGET_SH2)
704 1.1 mrg emit_insn (gen_dect (lenw, lenw));
705 1.1 mrg else
706 1.1 mrg {
707 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
708 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
709 1.1 mrg }
710 1.1 mrg
711 1.1 mrg emit_move_insn (dest, val);
712 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
713 1.1 mrg GET_MODE_SIZE (SImode)));
714 1.1 mrg
715 1.1 mrg
716 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_word));
717 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
718 1.1 mrg count = count % 4;
719 1.1 mrg
720 1.1 mrg dest = adjust_address (dest, QImode, 0);
721 1.1 mrg
722 1.1 mrg val = gen_lowpart (QImode, val);
723 1.1 mrg
724 1.1 mrg while (count--)
725 1.1 mrg {
726 1.1 mrg emit_move_insn (dest, val);
727 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
728 1.1 mrg GET_MODE_SIZE (QImode)));
729 1.1 mrg }
730 1.1 mrg
731 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
732 1.1 mrg emit_barrier_after (jump);
733 1.1 mrg }
734 1.1 mrg
735 1.1 mrg dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
736 1.1 mrg
737 1.1 mrg /* start loop. */
738 1.1 mrg emit_label (L_loop_byte);
739 1.1 mrg
740 1.1 mrg if (TARGET_SH2)
741 1.1 mrg emit_insn (gen_dect (len, len));
742 1.1 mrg else
743 1.1 mrg {
744 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
745 1.1 mrg emit_insn (gen_tstsi_t (len, len));
746 1.1 mrg }
747 1.1 mrg
748 1.1 mrg val = gen_lowpart (QImode, val);
749 1.1 mrg emit_move_insn (dest, val);
750 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
751 1.1 mrg GET_MODE_SIZE (QImode)));
752 1.1 mrg
753 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
754 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
755 1.1 mrg
756 1.1 mrg emit_label (L_return);
757 1.1 mrg }
758