sh-mem.cc revision 1.1.1.8 1 1.1 mrg /* Helper routines for memory move and comparison insns.
2 1.1.1.8 mrg Copyright (C) 2013-2022 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify
7 1.1 mrg it under the terms of the GNU General Public License as published by
8 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
9 1.1 mrg any later version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful,
12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 1.1 mrg GNU General Public License for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1.1.5 mrg #define IN_TARGET_CODE 1
21 1.1.1.5 mrg
22 1.1 mrg #include "config.h"
23 1.1 mrg #include "system.h"
24 1.1 mrg #include "coretypes.h"
25 1.1 mrg #include "tm.h"
26 1.1.1.3 mrg #include "function.h"
27 1.1.1.3 mrg #include "basic-block.h"
28 1.1 mrg #include "rtl.h"
29 1.1 mrg #include "tree.h"
30 1.1.1.4 mrg #include "memmodel.h"
31 1.1.1.3 mrg #include "tm_p.h"
32 1.1 mrg #include "emit-rtl.h"
33 1.1.1.3 mrg #include "explow.h"
34 1.1 mrg #include "expr.h"
35 1.1 mrg
36 1.1 mrg /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
37 1.1 mrg static void
38 1.1 mrg force_into (rtx value, rtx target)
39 1.1 mrg {
40 1.1 mrg value = force_operand (value, target);
41 1.1 mrg if (! rtx_equal_p (value, target))
42 1.1 mrg emit_insn (gen_move_insn (target, value));
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg /* Emit code to perform a block move. Choose the best method.
46 1.1 mrg
47 1.1 mrg OPERANDS[0] is the destination.
48 1.1 mrg OPERANDS[1] is the source.
49 1.1 mrg OPERANDS[2] is the size.
50 1.1 mrg OPERANDS[3] is the alignment safe to use. */
51 1.1 mrg bool
52 1.1 mrg expand_block_move (rtx *operands)
53 1.1 mrg {
54 1.1 mrg int align = INTVAL (operands[3]);
55 1.1 mrg int constp = (CONST_INT_P (operands[2]));
56 1.1 mrg int bytes = (constp ? INTVAL (operands[2]) : 0);
57 1.1 mrg
58 1.1 mrg if (! constp)
59 1.1 mrg return false;
60 1.1 mrg
61 1.1 mrg /* If we could use mov.l to move words and dest is word-aligned, we
62 1.1 mrg can use movua.l for loads and still generate a relatively short
63 1.1 mrg and efficient sequence. */
64 1.1 mrg if (TARGET_SH4A && align < 4
65 1.1 mrg && MEM_ALIGN (operands[0]) >= 32
66 1.1 mrg && can_move_by_pieces (bytes, 32))
67 1.1 mrg {
68 1.1 mrg rtx dest = copy_rtx (operands[0]);
69 1.1 mrg rtx src = copy_rtx (operands[1]);
70 1.1 mrg /* We could use different pseudos for each copied word, but
71 1.1 mrg since movua can only load into r0, it's kind of
72 1.1 mrg pointless. */
73 1.1 mrg rtx temp = gen_reg_rtx (SImode);
74 1.1 mrg rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
75 1.1 mrg int copied = 0;
76 1.1 mrg
77 1.1 mrg while (copied + 4 <= bytes)
78 1.1 mrg {
79 1.1 mrg rtx to = adjust_address (dest, SImode, copied);
80 1.1 mrg rtx from = adjust_automodify_address (src, BLKmode,
81 1.1 mrg src_addr, copied);
82 1.1 mrg
83 1.1 mrg set_mem_size (from, 4);
84 1.1 mrg emit_insn (gen_movua (temp, from));
85 1.1 mrg emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
86 1.1 mrg emit_move_insn (to, temp);
87 1.1 mrg copied += 4;
88 1.1 mrg }
89 1.1 mrg
90 1.1 mrg if (copied < bytes)
91 1.1 mrg move_by_pieces (adjust_address (dest, BLKmode, copied),
92 1.1 mrg adjust_automodify_address (src, BLKmode,
93 1.1 mrg src_addr, copied),
94 1.1.1.6 mrg bytes - copied, align, RETURN_BEGIN);
95 1.1 mrg
96 1.1 mrg return true;
97 1.1 mrg }
98 1.1 mrg
99 1.1 mrg /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
100 1.1 mrg alignment, or if it isn't a multiple of 4 bytes, then fail. */
101 1.1 mrg if (align < 4 || (bytes % 4 != 0))
102 1.1 mrg return false;
103 1.1 mrg
104 1.1 mrg if (TARGET_HARD_SH4)
105 1.1 mrg {
106 1.1 mrg if (bytes < 12)
107 1.1 mrg return false;
108 1.1 mrg else if (bytes == 12)
109 1.1 mrg {
110 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
111 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
112 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
113 1.1 mrg
114 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
115 1.1.1.3 mrg SFUNC_STATIC).lab;
116 1.1 mrg force_into (XEXP (operands[0], 0), r4);
117 1.1 mrg force_into (XEXP (operands[1], 0), r5);
118 1.1.1.3 mrg emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
119 1.1 mrg return true;
120 1.1 mrg }
121 1.1 mrg else if (! optimize_size)
122 1.1 mrg {
123 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
124 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
125 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
126 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
127 1.1 mrg
128 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, bytes & 4
129 1.1.1.3 mrg ? "__movmem_i4_odd"
130 1.1.1.3 mrg : "__movmem_i4_even",
131 1.1.1.3 mrg SFUNC_STATIC).lab;
132 1.1 mrg force_into (XEXP (operands[0], 0), r4);
133 1.1 mrg force_into (XEXP (operands[1], 0), r5);
134 1.1 mrg
135 1.1.1.3 mrg int dwords = bytes >> 3;
136 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
137 1.1.1.3 mrg emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
138 1.1 mrg return true;
139 1.1 mrg }
140 1.1 mrg else
141 1.1 mrg return false;
142 1.1 mrg }
143 1.1 mrg if (bytes < 64)
144 1.1 mrg {
145 1.1 mrg char entry[30];
146 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
147 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
148 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
149 1.1 mrg
150 1.1 mrg sprintf (entry, "__movmemSI%d", bytes);
151 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
152 1.1 mrg force_into (XEXP (operands[0], 0), r4);
153 1.1 mrg force_into (XEXP (operands[1], 0), r5);
154 1.1.1.3 mrg emit_insn (gen_block_move_real (func_addr_rtx, lab));
155 1.1 mrg return true;
156 1.1 mrg }
157 1.1 mrg
158 1.1 mrg /* This is the same number of bytes as a memcpy call, but to a different
159 1.1 mrg less common function name, so this will occasionally use more space. */
160 1.1 mrg if (! optimize_size)
161 1.1 mrg {
162 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
163 1.1 mrg int final_switch, while_loop;
164 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
165 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
166 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
167 1.1 mrg
168 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
169 1.1 mrg force_into (XEXP (operands[0], 0), r4);
170 1.1 mrg force_into (XEXP (operands[1], 0), r5);
171 1.1 mrg
172 1.1 mrg /* r6 controls the size of the move. 16 is decremented from it
173 1.1 mrg for each 64 bytes moved. Then the negative bit left over is used
174 1.1 mrg as an index into a list of move instructions. e.g., a 72 byte move
175 1.1 mrg would be set up with size(r6) = 14, for one iteration through the
176 1.1 mrg big while loop, and a switch of -2 for the last part. */
177 1.1 mrg
178 1.1 mrg final_switch = 16 - ((bytes / 4) % 16);
179 1.1 mrg while_loop = ((bytes / 4) / 16 - 1) * 16;
180 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
181 1.1.1.3 mrg emit_insn (gen_block_lump_real (func_addr_rtx, lab));
182 1.1 mrg return true;
183 1.1 mrg }
184 1.1 mrg
185 1.1 mrg return false;
186 1.1 mrg }
187 1.1 mrg
188 1.1.1.5 mrg static const int prob_unlikely
189 1.1.1.5 mrg = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10)
190 1.1.1.5 mrg .to_reg_br_prob_note ();
191 1.1.1.5 mrg static const int prob_likely
192 1.1.1.5 mrg = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4)
193 1.1.1.5 mrg .to_reg_br_prob_note ();
194 1.1 mrg
195 1.1 mrg /* Emit code to perform a strcmp.
196 1.1 mrg
197 1.1 mrg OPERANDS[0] is the destination.
198 1.1 mrg OPERANDS[1] is the first string.
199 1.1 mrg OPERANDS[2] is the second string.
200 1.1 mrg OPERANDS[3] is the known alignment. */
201 1.1 mrg bool
202 1.1 mrg sh_expand_cmpstr (rtx *operands)
203 1.1 mrg {
204 1.1 mrg rtx addr1 = operands[1];
205 1.1 mrg rtx addr2 = operands[2];
206 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
207 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
208 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
209 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
210 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
211 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
212 1.1 mrg
213 1.1.1.4 mrg rtx_insn *jump;
214 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
215 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
216 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
217 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
218 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
219 1.1 mrg
220 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
221 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
222 1.1 mrg
223 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
224 1.1 mrg {
225 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
226 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
227 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
228 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229 1.1 mrg }
230 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
231 1.1.1.3 mrg {
232 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
233 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
234 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
235 1.1.1.3 mrg }
236 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
237 1.1.1.3 mrg {
238 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
239 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
240 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
241 1.1.1.3 mrg }
242 1.1 mrg
243 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
244 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
245 1.1 mrg
246 1.1 mrg /* tmp2 is aligned, OK to load. */
247 1.1 mrg emit_move_insn (tmp3, addr2);
248 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
249 1.1 mrg
250 1.1 mrg /* start long loop. */
251 1.1 mrg emit_label (L_loop_long);
252 1.1 mrg
253 1.1 mrg emit_move_insn (tmp2, tmp3);
254 1.1 mrg
255 1.1 mrg /* tmp1 is aligned, OK to load. */
256 1.1 mrg emit_move_insn (tmp1, addr1);
257 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
258 1.1 mrg
259 1.1 mrg /* Is there a 0 byte ? */
260 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
261 1.1 mrg
262 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
263 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
264 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
265 1.1 mrg
266 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
267 1.1 mrg
268 1.1 mrg /* tmp2 is aligned, OK to load. */
269 1.1 mrg emit_move_insn (tmp3, addr2);
270 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
271 1.1 mrg
272 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_long));
273 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
274 1.1 mrg /* end loop. */
275 1.1 mrg
276 1.1 mrg /* Fallthu, substract words. */
277 1.1 mrg if (TARGET_LITTLE_ENDIAN)
278 1.1 mrg {
279 1.1 mrg rtx low_1 = gen_lowpart (HImode, tmp1);
280 1.1 mrg rtx low_2 = gen_lowpart (HImode, tmp2);
281 1.1 mrg
282 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
283 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
284 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
285 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
286 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
287 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
288 1.1 mrg }
289 1.1 mrg
290 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
291 1.1 mrg emit_barrier_after (jump);
292 1.1 mrg
293 1.1 mrg emit_label (L_end_loop_long);
294 1.1 mrg
295 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
296 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
297 1.1 mrg
298 1.1 mrg /* start byte loop. */
299 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
300 1.1 mrg addr2 = adjust_address (addr2, QImode, 0);
301 1.1 mrg
302 1.1 mrg emit_label (L_loop_byte);
303 1.1 mrg
304 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
305 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
306 1.1 mrg
307 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
308 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
309 1.1 mrg
310 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
311 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
312 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
313 1.1 mrg
314 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
315 1.1 mrg if (flag_delayed_branch)
316 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
317 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_byte));
318 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
319 1.1 mrg /* end loop. */
320 1.1 mrg
321 1.1 mrg emit_label (L_end_loop_byte);
322 1.1 mrg
323 1.1 mrg if (! flag_delayed_branch)
324 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
325 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
326 1.1 mrg
327 1.1 mrg emit_label (L_return);
328 1.1 mrg
329 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
330 1.1 mrg
331 1.1 mrg return true;
332 1.1 mrg }
333 1.1 mrg
334 1.1 mrg /* Emit code to perform a strncmp.
335 1.1 mrg
336 1.1 mrg OPERANDS[0] is the destination.
337 1.1 mrg OPERANDS[1] is the first string.
338 1.1 mrg OPERANDS[2] is the second string.
339 1.1 mrg OPERANDS[3] is the length.
340 1.1 mrg OPERANDS[4] is the known alignment. */
341 1.1 mrg bool
342 1.1 mrg sh_expand_cmpnstr (rtx *operands)
343 1.1 mrg {
344 1.1 mrg rtx addr1 = operands[1];
345 1.1 mrg rtx addr2 = operands[2];
346 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
347 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
348 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
349 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
350 1.1 mrg
351 1.1.1.4 mrg rtx_insn *jump;
352 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
353 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
354 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
355 1.1 mrg
356 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[3]);
357 1.1 mrg int constp = CONST_INT_P (operands[3]);
358 1.1.1.4 mrg HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
359 1.1 mrg
360 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
361 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
362 1.1.1.3 mrg
363 1.1 mrg /* Loop on a register count. */
364 1.1.1.4 mrg if (constp && bytes >= 0 && bytes < 32)
365 1.1 mrg {
366 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
367 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
368 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
369 1.1 mrg
370 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
371 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
372 1.1 mrg
373 1.1 mrg int witers = bytes / 4;
374 1.1 mrg
375 1.1 mrg if (witers > 1)
376 1.1 mrg {
377 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
378 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
379 1.1 mrg
380 1.1 mrg emit_move_insn (tmp0, const0_rtx);
381 1.1 mrg
382 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
383 1.1 mrg {
384 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
385 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
386 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
387 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388 1.1 mrg }
389 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
390 1.1.1.3 mrg {
391 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
392 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
393 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
394 1.1.1.3 mrg }
395 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
396 1.1.1.3 mrg {
397 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
398 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
399 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
400 1.1.1.3 mrg }
401 1.1 mrg
402 1.1 mrg /* word count. Do we have iterations ? */
403 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
404 1.1 mrg
405 1.1 mrg /* start long loop. */
406 1.1 mrg emit_label (L_loop_long);
407 1.1 mrg
408 1.1 mrg /* tmp2 is aligned, OK to load. */
409 1.1 mrg emit_move_insn (tmp2, addr2);
410 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
411 1.1 mrg GET_MODE_SIZE (SImode)));
412 1.1 mrg
413 1.1 mrg /* tmp1 is aligned, OK to load. */
414 1.1 mrg emit_move_insn (tmp1, addr1);
415 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
416 1.1 mrg GET_MODE_SIZE (SImode)));
417 1.1 mrg
418 1.1 mrg /* Is there a 0 byte ? */
419 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
420 1.1 mrg
421 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
422 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
423 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
424 1.1 mrg
425 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
426 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
427 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
428 1.1 mrg
429 1.1 mrg if (TARGET_SH2)
430 1.1 mrg emit_insn (gen_dect (lenw, lenw));
431 1.1 mrg else
432 1.1 mrg {
433 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
434 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
435 1.1 mrg }
436 1.1 mrg
437 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
438 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
439 1.1 mrg
440 1.1 mrg int sbytes = bytes % 4;
441 1.1 mrg
442 1.1 mrg /* end loop. Reached max iterations. */
443 1.1 mrg if (sbytes == 0)
444 1.1 mrg {
445 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
446 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
447 1.1 mrg emit_barrier_after (jump);
448 1.1 mrg }
449 1.1 mrg else
450 1.1 mrg {
451 1.1 mrg /* Remaining bytes to check. */
452 1.1 mrg
453 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
454 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
455 1.1 mrg
456 1.1 mrg while (sbytes--)
457 1.1 mrg {
458 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
459 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
460 1.1 mrg
461 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
462 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
463 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
464 1.1 mrg
465 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
466 1.1 mrg if (flag_delayed_branch)
467 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
468 1.1 mrg gen_lowpart (QImode,
469 1.1 mrg tmp2)));
470 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
471 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
472 1.1 mrg
473 1.1 mrg addr1 = adjust_address (addr1, QImode,
474 1.1 mrg GET_MODE_SIZE (QImode));
475 1.1 mrg addr2 = adjust_address (addr2, QImode,
476 1.1 mrg GET_MODE_SIZE (QImode));
477 1.1 mrg }
478 1.1 mrg
479 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
480 1.1 mrg emit_barrier_after (jump);
481 1.1 mrg }
482 1.1 mrg
483 1.1 mrg emit_label (L_end_loop_long);
484 1.1 mrg
485 1.1 mrg /* Found last word. Restart it byte per byte. */
486 1.1 mrg
487 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
488 1.1 mrg -GET_MODE_SIZE (SImode)));
489 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
490 1.1 mrg -GET_MODE_SIZE (SImode)));
491 1.1 mrg
492 1.1 mrg /* fall thru. */
493 1.1 mrg }
494 1.1 mrg
495 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
496 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
497 1.1 mrg
498 1.1 mrg while (bytes--)
499 1.1 mrg {
500 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
501 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
502 1.1 mrg
503 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
504 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
505 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
506 1.1 mrg
507 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
508 1.1 mrg if (flag_delayed_branch)
509 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
510 1.1 mrg gen_lowpart (QImode, tmp2)));
511 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
512 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
513 1.1 mrg
514 1.1 mrg addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
515 1.1 mrg addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
516 1.1 mrg }
517 1.1 mrg
518 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
519 1.1 mrg emit_barrier_after (jump);
520 1.1 mrg }
521 1.1 mrg else
522 1.1 mrg {
523 1.1 mrg emit_insn (gen_cmpeqsi_t (len, const0_rtx));
524 1.1 mrg emit_move_insn (operands[0], const0_rtx);
525 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
526 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
527 1.1 mrg }
528 1.1 mrg
529 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
530 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
531 1.1 mrg
532 1.1 mrg emit_label (L_loop_byte);
533 1.1 mrg
534 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
535 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
536 1.1 mrg
537 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
538 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
539 1.1 mrg
540 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
541 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
542 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543 1.1 mrg
544 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
545 1.1 mrg if (flag_delayed_branch)
546 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
547 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
548 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
549 1.1 mrg
550 1.1 mrg if (TARGET_SH2)
551 1.1 mrg emit_insn (gen_dect (len, len));
552 1.1 mrg else
553 1.1 mrg {
554 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
555 1.1 mrg emit_insn (gen_tstsi_t (len, len));
556 1.1 mrg }
557 1.1 mrg
558 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
559 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
560 1.1 mrg /* end byte loop. */
561 1.1 mrg
562 1.1 mrg emit_label (L_end_loop_byte);
563 1.1 mrg
564 1.1 mrg if (! flag_delayed_branch)
565 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
566 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
567 1.1 mrg
568 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
569 1.1 mrg
570 1.1 mrg emit_label (L_return);
571 1.1 mrg
572 1.1 mrg return true;
573 1.1 mrg }
574 1.1 mrg
575 1.1 mrg /* Emit code to perform a strlen.
576 1.1 mrg
577 1.1 mrg OPERANDS[0] is the destination.
578 1.1 mrg OPERANDS[1] is the string.
579 1.1 mrg OPERANDS[2] is the char to search.
580 1.1 mrg OPERANDS[3] is the alignment. */
581 1.1 mrg bool
582 1.1 mrg sh_expand_strlen (rtx *operands)
583 1.1 mrg {
584 1.1 mrg rtx addr1 = operands[1];
585 1.1 mrg rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
586 1.1 mrg rtx start_addr = gen_reg_rtx (Pmode);
587 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
588 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
589 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
590 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
591 1.1 mrg
592 1.1.1.4 mrg rtx_insn *jump;
593 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
594 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
595 1.1 mrg
596 1.1 mrg int align = INTVAL (operands[3]);
597 1.1 mrg
598 1.1 mrg emit_move_insn (operands[0], GEN_INT (-1));
599 1.1 mrg
600 1.1 mrg /* remember start of string. */
601 1.1 mrg emit_move_insn (start_addr, current_addr);
602 1.1 mrg
603 1.1 mrg if (align < 4)
604 1.1 mrg {
605 1.1 mrg emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
606 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
607 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
608 1.1 mrg }
609 1.1 mrg
610 1.1 mrg emit_move_insn (tmp0, operands[2]);
611 1.1 mrg
612 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
613 1.1 mrg
614 1.1 mrg /* start long loop. */
615 1.1 mrg emit_label (L_loop_long);
616 1.1 mrg
617 1.1 mrg /* tmp1 is aligned, OK to load. */
618 1.1 mrg emit_move_insn (tmp1, addr1);
619 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
620 1.1 mrg
621 1.1 mrg /* Is there a 0 byte ? */
622 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp1));
623 1.1 mrg
624 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
625 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
626 1.1 mrg /* end loop. */
627 1.1 mrg
628 1.1 mrg emit_label (L_end_loop_long);
629 1.1 mrg
630 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
631 1.1 mrg
632 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
633 1.1 mrg
634 1.1 mrg /* unroll remaining bytes. */
635 1.1 mrg for (int i = 0; i < 4; ++i)
636 1.1 mrg {
637 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
638 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
639 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
640 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
641 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
642 1.1 mrg }
643 1.1 mrg
644 1.1 mrg emit_barrier_after (jump);
645 1.1 mrg
646 1.1 mrg /* start byte loop. */
647 1.1 mrg emit_label (L_loop_byte);
648 1.1 mrg
649 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
650 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
651 1.1 mrg
652 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
653 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
654 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
655 1.1 mrg
656 1.1 mrg /* end loop. */
657 1.1 mrg
658 1.1 mrg emit_label (L_return);
659 1.1 mrg
660 1.1 mrg emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
661 1.1 mrg emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
662 1.1 mrg
663 1.1 mrg return true;
664 1.1 mrg }
665 1.1 mrg
666 1.1 mrg /* Emit code to perform a memset.
667 1.1 mrg
668 1.1 mrg OPERANDS[0] is the destination.
669 1.1 mrg OPERANDS[1] is the size;
670 1.1 mrg OPERANDS[2] is the char to search.
671 1.1 mrg OPERANDS[3] is the alignment. */
672 1.1 mrg void
673 1.1 mrg sh_expand_setmem (rtx *operands)
674 1.1 mrg {
675 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
676 1.1 mrg rtx_code_label *L_loop_word = gen_label_rtx ();
677 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
678 1.1.1.4 mrg rtx_insn *jump;
679 1.1 mrg rtx dest = copy_rtx (operands[0]);
680 1.1 mrg rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
681 1.1.1.2 mrg rtx val = copy_to_mode_reg (SImode, operands[2]);
682 1.1 mrg int align = INTVAL (operands[3]);
683 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[1]);
684 1.1 mrg
685 1.1 mrg if (! CONST_INT_P (operands[1]))
686 1.1 mrg return;
687 1.1 mrg
688 1.1 mrg int count = INTVAL (operands[1]);
689 1.1 mrg
690 1.1 mrg if (CONST_INT_P (operands[2])
691 1.1 mrg && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
692 1.1 mrg {
693 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
694 1.1 mrg
695 1.1 mrg if (align < 4)
696 1.1 mrg {
697 1.1 mrg emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
698 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
699 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
700 1.1 mrg }
701 1.1 mrg
702 1.1 mrg /* word count. Do we have iterations ? */
703 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
704 1.1 mrg
705 1.1 mrg dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
706 1.1 mrg
707 1.1 mrg /* start loop. */
708 1.1 mrg emit_label (L_loop_word);
709 1.1 mrg
710 1.1 mrg if (TARGET_SH2)
711 1.1 mrg emit_insn (gen_dect (lenw, lenw));
712 1.1 mrg else
713 1.1 mrg {
714 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
715 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
716 1.1 mrg }
717 1.1 mrg
718 1.1 mrg emit_move_insn (dest, val);
719 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
720 1.1 mrg GET_MODE_SIZE (SImode)));
721 1.1 mrg
722 1.1 mrg
723 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_word));
724 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
725 1.1 mrg count = count % 4;
726 1.1 mrg
727 1.1 mrg dest = adjust_address (dest, QImode, 0);
728 1.1 mrg
729 1.1 mrg val = gen_lowpart (QImode, val);
730 1.1 mrg
731 1.1 mrg while (count--)
732 1.1 mrg {
733 1.1 mrg emit_move_insn (dest, val);
734 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
735 1.1 mrg GET_MODE_SIZE (QImode)));
736 1.1 mrg }
737 1.1 mrg
738 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
739 1.1 mrg emit_barrier_after (jump);
740 1.1 mrg }
741 1.1 mrg
742 1.1 mrg dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
743 1.1 mrg
744 1.1 mrg /* start loop. */
745 1.1 mrg emit_label (L_loop_byte);
746 1.1 mrg
747 1.1 mrg if (TARGET_SH2)
748 1.1 mrg emit_insn (gen_dect (len, len));
749 1.1 mrg else
750 1.1 mrg {
751 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
752 1.1 mrg emit_insn (gen_tstsi_t (len, len));
753 1.1 mrg }
754 1.1 mrg
755 1.1 mrg val = gen_lowpart (QImode, val);
756 1.1 mrg emit_move_insn (dest, val);
757 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
758 1.1 mrg GET_MODE_SIZE (QImode)));
759 1.1 mrg
760 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
761 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
762 1.1 mrg
763 1.1 mrg emit_label (L_return);
764 1.1 mrg }
765