sh-mem.cc revision 1.1.1.4 1 1.1 mrg /* Helper routines for memory move and comparison insns.
2 1.1.1.4 mrg Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify
7 1.1 mrg it under the terms of the GNU General Public License as published by
8 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
9 1.1 mrg any later version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful,
12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 1.1 mrg GNU General Public License for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1 mrg #include "config.h"
21 1.1 mrg #include "system.h"
22 1.1 mrg #include "coretypes.h"
23 1.1 mrg #include "tm.h"
24 1.1.1.3 mrg #include "function.h"
25 1.1.1.3 mrg #include "basic-block.h"
26 1.1 mrg #include "rtl.h"
27 1.1 mrg #include "tree.h"
28 1.1.1.4 mrg #include "memmodel.h"
29 1.1.1.3 mrg #include "tm_p.h"
30 1.1 mrg #include "emit-rtl.h"
31 1.1.1.3 mrg #include "explow.h"
32 1.1 mrg #include "expr.h"
33 1.1 mrg
34 1.1 mrg /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
35 1.1 mrg static void
36 1.1 mrg force_into (rtx value, rtx target)
37 1.1 mrg {
38 1.1 mrg value = force_operand (value, target);
39 1.1 mrg if (! rtx_equal_p (value, target))
40 1.1 mrg emit_insn (gen_move_insn (target, value));
41 1.1 mrg }
42 1.1 mrg
43 1.1 mrg /* Emit code to perform a block move. Choose the best method.
44 1.1 mrg
45 1.1 mrg OPERANDS[0] is the destination.
46 1.1 mrg OPERANDS[1] is the source.
47 1.1 mrg OPERANDS[2] is the size.
48 1.1 mrg OPERANDS[3] is the alignment safe to use. */
49 1.1 mrg bool
50 1.1 mrg expand_block_move (rtx *operands)
51 1.1 mrg {
52 1.1 mrg int align = INTVAL (operands[3]);
53 1.1 mrg int constp = (CONST_INT_P (operands[2]));
54 1.1 mrg int bytes = (constp ? INTVAL (operands[2]) : 0);
55 1.1 mrg
56 1.1 mrg if (! constp)
57 1.1 mrg return false;
58 1.1 mrg
59 1.1 mrg /* If we could use mov.l to move words and dest is word-aligned, we
60 1.1 mrg can use movua.l for loads and still generate a relatively short
61 1.1 mrg and efficient sequence. */
62 1.1 mrg if (TARGET_SH4A && align < 4
63 1.1 mrg && MEM_ALIGN (operands[0]) >= 32
64 1.1 mrg && can_move_by_pieces (bytes, 32))
65 1.1 mrg {
66 1.1 mrg rtx dest = copy_rtx (operands[0]);
67 1.1 mrg rtx src = copy_rtx (operands[1]);
68 1.1 mrg /* We could use different pseudos for each copied word, but
69 1.1 mrg since movua can only load into r0, it's kind of
70 1.1 mrg pointless. */
71 1.1 mrg rtx temp = gen_reg_rtx (SImode);
72 1.1 mrg rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
73 1.1 mrg int copied = 0;
74 1.1 mrg
75 1.1 mrg while (copied + 4 <= bytes)
76 1.1 mrg {
77 1.1 mrg rtx to = adjust_address (dest, SImode, copied);
78 1.1 mrg rtx from = adjust_automodify_address (src, BLKmode,
79 1.1 mrg src_addr, copied);
80 1.1 mrg
81 1.1 mrg set_mem_size (from, 4);
82 1.1 mrg emit_insn (gen_movua (temp, from));
83 1.1 mrg emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
84 1.1 mrg emit_move_insn (to, temp);
85 1.1 mrg copied += 4;
86 1.1 mrg }
87 1.1 mrg
88 1.1 mrg if (copied < bytes)
89 1.1 mrg move_by_pieces (adjust_address (dest, BLKmode, copied),
90 1.1 mrg adjust_automodify_address (src, BLKmode,
91 1.1 mrg src_addr, copied),
92 1.1 mrg bytes - copied, align, 0);
93 1.1 mrg
94 1.1 mrg return true;
95 1.1 mrg }
96 1.1 mrg
97 1.1 mrg /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
98 1.1 mrg alignment, or if it isn't a multiple of 4 bytes, then fail. */
99 1.1 mrg if (align < 4 || (bytes % 4 != 0))
100 1.1 mrg return false;
101 1.1 mrg
102 1.1 mrg if (TARGET_HARD_SH4)
103 1.1 mrg {
104 1.1 mrg if (bytes < 12)
105 1.1 mrg return false;
106 1.1 mrg else if (bytes == 12)
107 1.1 mrg {
108 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
109 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
110 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
111 1.1 mrg
112 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
113 1.1.1.3 mrg SFUNC_STATIC).lab;
114 1.1 mrg force_into (XEXP (operands[0], 0), r4);
115 1.1 mrg force_into (XEXP (operands[1], 0), r5);
116 1.1.1.3 mrg emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
117 1.1 mrg return true;
118 1.1 mrg }
119 1.1 mrg else if (! optimize_size)
120 1.1 mrg {
121 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
122 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
123 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
124 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
125 1.1 mrg
126 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, bytes & 4
127 1.1.1.3 mrg ? "__movmem_i4_odd"
128 1.1.1.3 mrg : "__movmem_i4_even",
129 1.1.1.3 mrg SFUNC_STATIC).lab;
130 1.1 mrg force_into (XEXP (operands[0], 0), r4);
131 1.1 mrg force_into (XEXP (operands[1], 0), r5);
132 1.1 mrg
133 1.1.1.3 mrg int dwords = bytes >> 3;
134 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
135 1.1.1.3 mrg emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
136 1.1 mrg return true;
137 1.1 mrg }
138 1.1 mrg else
139 1.1 mrg return false;
140 1.1 mrg }
141 1.1 mrg if (bytes < 64)
142 1.1 mrg {
143 1.1 mrg char entry[30];
144 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
145 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
146 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
147 1.1 mrg
148 1.1 mrg sprintf (entry, "__movmemSI%d", bytes);
149 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
150 1.1 mrg force_into (XEXP (operands[0], 0), r4);
151 1.1 mrg force_into (XEXP (operands[1], 0), r5);
152 1.1.1.3 mrg emit_insn (gen_block_move_real (func_addr_rtx, lab));
153 1.1 mrg return true;
154 1.1 mrg }
155 1.1 mrg
156 1.1 mrg /* This is the same number of bytes as a memcpy call, but to a different
157 1.1 mrg less common function name, so this will occasionally use more space. */
158 1.1 mrg if (! optimize_size)
159 1.1 mrg {
160 1.1 mrg rtx func_addr_rtx = gen_reg_rtx (Pmode);
161 1.1 mrg int final_switch, while_loop;
162 1.1 mrg rtx r4 = gen_rtx_REG (SImode, 4);
163 1.1 mrg rtx r5 = gen_rtx_REG (SImode, 5);
164 1.1 mrg rtx r6 = gen_rtx_REG (SImode, 6);
165 1.1 mrg
166 1.1.1.3 mrg rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
167 1.1 mrg force_into (XEXP (operands[0], 0), r4);
168 1.1 mrg force_into (XEXP (operands[1], 0), r5);
169 1.1 mrg
170 1.1 mrg /* r6 controls the size of the move. 16 is decremented from it
171 1.1 mrg for each 64 bytes moved. Then the negative bit left over is used
172 1.1 mrg as an index into a list of move instructions. e.g., a 72 byte move
173 1.1 mrg would be set up with size(r6) = 14, for one iteration through the
174 1.1 mrg big while loop, and a switch of -2 for the last part. */
175 1.1 mrg
176 1.1 mrg final_switch = 16 - ((bytes / 4) % 16);
177 1.1 mrg while_loop = ((bytes / 4) / 16 - 1) * 16;
178 1.1 mrg emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
179 1.1.1.3 mrg emit_insn (gen_block_lump_real (func_addr_rtx, lab));
180 1.1 mrg return true;
181 1.1 mrg }
182 1.1 mrg
183 1.1 mrg return false;
184 1.1 mrg }
185 1.1 mrg
186 1.1 mrg static const int prob_unlikely = REG_BR_PROB_BASE / 10;
187 1.1 mrg static const int prob_likely = REG_BR_PROB_BASE / 4;
188 1.1 mrg
189 1.1 mrg /* Emit code to perform a strcmp.
190 1.1 mrg
191 1.1 mrg OPERANDS[0] is the destination.
192 1.1 mrg OPERANDS[1] is the first string.
193 1.1 mrg OPERANDS[2] is the second string.
194 1.1 mrg OPERANDS[3] is the known alignment. */
195 1.1 mrg bool
196 1.1 mrg sh_expand_cmpstr (rtx *operands)
197 1.1 mrg {
198 1.1 mrg rtx addr1 = operands[1];
199 1.1 mrg rtx addr2 = operands[2];
200 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
201 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
202 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
203 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
204 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
205 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
206 1.1 mrg
207 1.1.1.4 mrg rtx_insn *jump;
208 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
209 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
210 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
211 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
212 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
213 1.1 mrg
214 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
215 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
216 1.1 mrg
217 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
218 1.1 mrg {
219 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
220 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
221 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
222 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
223 1.1 mrg }
224 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
225 1.1.1.3 mrg {
226 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
227 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
228 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229 1.1.1.3 mrg }
230 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
231 1.1.1.3 mrg {
232 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
233 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
234 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
235 1.1.1.3 mrg }
236 1.1 mrg
237 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
238 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
239 1.1 mrg
240 1.1 mrg /* tmp2 is aligned, OK to load. */
241 1.1 mrg emit_move_insn (tmp3, addr2);
242 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
243 1.1 mrg
244 1.1 mrg /* start long loop. */
245 1.1 mrg emit_label (L_loop_long);
246 1.1 mrg
247 1.1 mrg emit_move_insn (tmp2, tmp3);
248 1.1 mrg
249 1.1 mrg /* tmp1 is aligned, OK to load. */
250 1.1 mrg emit_move_insn (tmp1, addr1);
251 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
252 1.1 mrg
253 1.1 mrg /* Is there a 0 byte ? */
254 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
255 1.1 mrg
256 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
257 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
258 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
259 1.1 mrg
260 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
261 1.1 mrg
262 1.1 mrg /* tmp2 is aligned, OK to load. */
263 1.1 mrg emit_move_insn (tmp3, addr2);
264 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
265 1.1 mrg
266 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_long));
267 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
268 1.1 mrg /* end loop. */
269 1.1 mrg
270 1.1 mrg /* Fallthu, substract words. */
271 1.1 mrg if (TARGET_LITTLE_ENDIAN)
272 1.1 mrg {
273 1.1 mrg rtx low_1 = gen_lowpart (HImode, tmp1);
274 1.1 mrg rtx low_2 = gen_lowpart (HImode, tmp2);
275 1.1 mrg
276 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
277 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
278 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
279 1.1 mrg emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
280 1.1 mrg emit_insn (gen_rotlhi3_8 (low_1, low_1));
281 1.1 mrg emit_insn (gen_rotlhi3_8 (low_2, low_2));
282 1.1 mrg }
283 1.1 mrg
284 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
285 1.1 mrg emit_barrier_after (jump);
286 1.1 mrg
287 1.1 mrg emit_label (L_end_loop_long);
288 1.1 mrg
289 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
290 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
291 1.1 mrg
292 1.1 mrg /* start byte loop. */
293 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
294 1.1 mrg addr2 = adjust_address (addr2, QImode, 0);
295 1.1 mrg
296 1.1 mrg emit_label (L_loop_byte);
297 1.1 mrg
298 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
299 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
300 1.1 mrg
301 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
302 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
303 1.1 mrg
304 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
305 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
306 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
307 1.1 mrg
308 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
309 1.1 mrg if (flag_delayed_branch)
310 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
311 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_loop_byte));
312 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
313 1.1 mrg /* end loop. */
314 1.1 mrg
315 1.1 mrg emit_label (L_end_loop_byte);
316 1.1 mrg
317 1.1 mrg if (! flag_delayed_branch)
318 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
319 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
320 1.1 mrg
321 1.1 mrg emit_label (L_return);
322 1.1 mrg
323 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
324 1.1 mrg
325 1.1 mrg return true;
326 1.1 mrg }
327 1.1 mrg
328 1.1 mrg /* Emit code to perform a strncmp.
329 1.1 mrg
330 1.1 mrg OPERANDS[0] is the destination.
331 1.1 mrg OPERANDS[1] is the first string.
332 1.1 mrg OPERANDS[2] is the second string.
333 1.1 mrg OPERANDS[3] is the length.
334 1.1 mrg OPERANDS[4] is the known alignment. */
335 1.1 mrg bool
336 1.1 mrg sh_expand_cmpnstr (rtx *operands)
337 1.1 mrg {
338 1.1 mrg rtx addr1 = operands[1];
339 1.1 mrg rtx addr2 = operands[2];
340 1.1 mrg rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
341 1.1 mrg rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
342 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
343 1.1 mrg rtx tmp2 = gen_reg_rtx (SImode);
344 1.1 mrg
345 1.1.1.4 mrg rtx_insn *jump;
346 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
347 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
348 1.1 mrg rtx_code_label *L_end_loop_byte = gen_label_rtx ();
349 1.1 mrg
350 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[3]);
351 1.1 mrg int constp = CONST_INT_P (operands[3]);
352 1.1.1.4 mrg HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
353 1.1 mrg
354 1.1.1.3 mrg const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
355 1.1.1.3 mrg const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
356 1.1.1.3 mrg
357 1.1 mrg /* Loop on a register count. */
358 1.1.1.4 mrg if (constp && bytes >= 0 && bytes < 32)
359 1.1 mrg {
360 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
361 1.1 mrg rtx tmp3 = gen_reg_rtx (SImode);
362 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
363 1.1 mrg
364 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
365 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
366 1.1 mrg
367 1.1 mrg int witers = bytes / 4;
368 1.1 mrg
369 1.1 mrg if (witers > 1)
370 1.1 mrg {
371 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
372 1.1 mrg addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
373 1.1 mrg
374 1.1 mrg emit_move_insn (tmp0, const0_rtx);
375 1.1 mrg
376 1.1.1.3 mrg if (addr1_alignment < 4 && addr2_alignment < 4)
377 1.1 mrg {
378 1.1 mrg emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
379 1.1 mrg emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
380 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
381 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
382 1.1 mrg }
383 1.1.1.3 mrg else if (addr1_alignment < 4 && addr2_alignment >= 4)
384 1.1.1.3 mrg {
385 1.1.1.3 mrg emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
386 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
387 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388 1.1.1.3 mrg }
389 1.1.1.3 mrg else if (addr1_alignment >= 4 && addr2_alignment < 4)
390 1.1.1.3 mrg {
391 1.1.1.3 mrg emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
392 1.1.1.3 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
393 1.1.1.3 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
394 1.1.1.3 mrg }
395 1.1 mrg
396 1.1 mrg /* word count. Do we have iterations ? */
397 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
398 1.1 mrg
399 1.1 mrg /* start long loop. */
400 1.1 mrg emit_label (L_loop_long);
401 1.1 mrg
402 1.1 mrg /* tmp2 is aligned, OK to load. */
403 1.1 mrg emit_move_insn (tmp2, addr2);
404 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
405 1.1 mrg GET_MODE_SIZE (SImode)));
406 1.1 mrg
407 1.1 mrg /* tmp1 is aligned, OK to load. */
408 1.1 mrg emit_move_insn (tmp1, addr1);
409 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
410 1.1 mrg GET_MODE_SIZE (SImode)));
411 1.1 mrg
412 1.1 mrg /* Is there a 0 byte ? */
413 1.1 mrg emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
414 1.1 mrg
415 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp3));
416 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
417 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
418 1.1 mrg
419 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
420 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
421 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
422 1.1 mrg
423 1.1 mrg if (TARGET_SH2)
424 1.1 mrg emit_insn (gen_dect (lenw, lenw));
425 1.1 mrg else
426 1.1 mrg {
427 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
428 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
429 1.1 mrg }
430 1.1 mrg
431 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
432 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
433 1.1 mrg
434 1.1 mrg int sbytes = bytes % 4;
435 1.1 mrg
436 1.1 mrg /* end loop. Reached max iterations. */
437 1.1 mrg if (sbytes == 0)
438 1.1 mrg {
439 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
440 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
441 1.1 mrg emit_barrier_after (jump);
442 1.1 mrg }
443 1.1 mrg else
444 1.1 mrg {
445 1.1 mrg /* Remaining bytes to check. */
446 1.1 mrg
447 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
448 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
449 1.1 mrg
450 1.1 mrg while (sbytes--)
451 1.1 mrg {
452 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
453 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
454 1.1 mrg
455 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
456 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
457 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
458 1.1 mrg
459 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
460 1.1 mrg if (flag_delayed_branch)
461 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
462 1.1 mrg gen_lowpart (QImode,
463 1.1 mrg tmp2)));
464 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
465 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
466 1.1 mrg
467 1.1 mrg addr1 = adjust_address (addr1, QImode,
468 1.1 mrg GET_MODE_SIZE (QImode));
469 1.1 mrg addr2 = adjust_address (addr2, QImode,
470 1.1 mrg GET_MODE_SIZE (QImode));
471 1.1 mrg }
472 1.1 mrg
473 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
474 1.1 mrg emit_barrier_after (jump);
475 1.1 mrg }
476 1.1 mrg
477 1.1 mrg emit_label (L_end_loop_long);
478 1.1 mrg
479 1.1 mrg /* Found last word. Restart it byte per byte. */
480 1.1 mrg
481 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
482 1.1 mrg -GET_MODE_SIZE (SImode)));
483 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
484 1.1 mrg -GET_MODE_SIZE (SImode)));
485 1.1 mrg
486 1.1 mrg /* fall thru. */
487 1.1 mrg }
488 1.1 mrg
489 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
490 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
491 1.1 mrg
492 1.1 mrg while (bytes--)
493 1.1 mrg {
494 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
495 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
496 1.1 mrg
497 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
498 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
499 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
500 1.1 mrg
501 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
502 1.1 mrg if (flag_delayed_branch)
503 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2,
504 1.1 mrg gen_lowpart (QImode, tmp2)));
505 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
506 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
507 1.1 mrg
508 1.1 mrg addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
509 1.1 mrg addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
510 1.1 mrg }
511 1.1 mrg
512 1.1 mrg jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
513 1.1 mrg emit_barrier_after (jump);
514 1.1 mrg }
515 1.1 mrg else
516 1.1 mrg {
517 1.1 mrg emit_insn (gen_cmpeqsi_t (len, const0_rtx));
518 1.1 mrg emit_move_insn (operands[0], const0_rtx);
519 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
520 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
521 1.1 mrg }
522 1.1 mrg
523 1.1 mrg addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
524 1.1 mrg addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
525 1.1 mrg
526 1.1 mrg emit_label (L_loop_byte);
527 1.1 mrg
528 1.1 mrg emit_insn (gen_extendqisi2 (tmp2, addr2));
529 1.1 mrg emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
530 1.1 mrg
531 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
532 1.1 mrg emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
533 1.1 mrg
534 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
535 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
536 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
537 1.1 mrg
538 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
539 1.1 mrg if (flag_delayed_branch)
540 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
541 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
542 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543 1.1 mrg
544 1.1 mrg if (TARGET_SH2)
545 1.1 mrg emit_insn (gen_dect (len, len));
546 1.1 mrg else
547 1.1 mrg {
548 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
549 1.1 mrg emit_insn (gen_tstsi_t (len, len));
550 1.1 mrg }
551 1.1 mrg
552 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
553 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
554 1.1 mrg /* end byte loop. */
555 1.1 mrg
556 1.1 mrg emit_label (L_end_loop_byte);
557 1.1 mrg
558 1.1 mrg if (! flag_delayed_branch)
559 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
560 1.1 mrg emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
561 1.1 mrg
562 1.1 mrg emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
563 1.1 mrg
564 1.1 mrg emit_label (L_return);
565 1.1 mrg
566 1.1 mrg return true;
567 1.1 mrg }
568 1.1 mrg
569 1.1 mrg /* Emit code to perform a strlen.
570 1.1 mrg
571 1.1 mrg OPERANDS[0] is the destination.
572 1.1 mrg OPERANDS[1] is the string.
573 1.1 mrg OPERANDS[2] is the char to search.
574 1.1 mrg OPERANDS[3] is the alignment. */
575 1.1 mrg bool
576 1.1 mrg sh_expand_strlen (rtx *operands)
577 1.1 mrg {
578 1.1 mrg rtx addr1 = operands[1];
579 1.1 mrg rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
580 1.1 mrg rtx start_addr = gen_reg_rtx (Pmode);
581 1.1 mrg rtx tmp0 = gen_reg_rtx (SImode);
582 1.1 mrg rtx tmp1 = gen_reg_rtx (SImode);
583 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
584 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
585 1.1 mrg
586 1.1.1.4 mrg rtx_insn *jump;
587 1.1 mrg rtx_code_label *L_loop_long = gen_label_rtx ();
588 1.1 mrg rtx_code_label *L_end_loop_long = gen_label_rtx ();
589 1.1 mrg
590 1.1 mrg int align = INTVAL (operands[3]);
591 1.1 mrg
592 1.1 mrg emit_move_insn (operands[0], GEN_INT (-1));
593 1.1 mrg
594 1.1 mrg /* remember start of string. */
595 1.1 mrg emit_move_insn (start_addr, current_addr);
596 1.1 mrg
597 1.1 mrg if (align < 4)
598 1.1 mrg {
599 1.1 mrg emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
600 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
601 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
602 1.1 mrg }
603 1.1 mrg
604 1.1 mrg emit_move_insn (tmp0, operands[2]);
605 1.1 mrg
606 1.1 mrg addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
607 1.1 mrg
608 1.1 mrg /* start long loop. */
609 1.1 mrg emit_label (L_loop_long);
610 1.1 mrg
611 1.1 mrg /* tmp1 is aligned, OK to load. */
612 1.1 mrg emit_move_insn (tmp1, addr1);
613 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
614 1.1 mrg
615 1.1 mrg /* Is there a 0 byte ? */
616 1.1 mrg emit_insn (gen_cmpstr_t (tmp0, tmp1));
617 1.1 mrg
618 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_long));
619 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
620 1.1 mrg /* end loop. */
621 1.1 mrg
622 1.1 mrg emit_label (L_end_loop_long);
623 1.1 mrg
624 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
625 1.1 mrg
626 1.1 mrg addr1 = adjust_address (addr1, QImode, 0);
627 1.1 mrg
628 1.1 mrg /* unroll remaining bytes. */
629 1.1 mrg for (int i = 0; i < 4; ++i)
630 1.1 mrg {
631 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
632 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
633 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
634 1.1 mrg jump = emit_jump_insn (gen_branch_true (L_return));
635 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
636 1.1 mrg }
637 1.1 mrg
638 1.1 mrg emit_barrier_after (jump);
639 1.1 mrg
640 1.1 mrg /* start byte loop. */
641 1.1 mrg emit_label (L_loop_byte);
642 1.1 mrg
643 1.1 mrg emit_insn (gen_extendqisi2 (tmp1, addr1));
644 1.1 mrg emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
645 1.1 mrg
646 1.1 mrg emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
647 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
648 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
649 1.1 mrg
650 1.1 mrg /* end loop. */
651 1.1 mrg
652 1.1 mrg emit_label (L_return);
653 1.1 mrg
654 1.1 mrg emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
655 1.1 mrg emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
656 1.1 mrg
657 1.1 mrg return true;
658 1.1 mrg }
659 1.1 mrg
660 1.1 mrg /* Emit code to perform a memset.
661 1.1 mrg
662 1.1 mrg OPERANDS[0] is the destination.
663 1.1 mrg OPERANDS[1] is the size;
664 1.1 mrg OPERANDS[2] is the char to search.
665 1.1 mrg OPERANDS[3] is the alignment. */
666 1.1 mrg void
667 1.1 mrg sh_expand_setmem (rtx *operands)
668 1.1 mrg {
669 1.1 mrg rtx_code_label *L_loop_byte = gen_label_rtx ();
670 1.1 mrg rtx_code_label *L_loop_word = gen_label_rtx ();
671 1.1 mrg rtx_code_label *L_return = gen_label_rtx ();
672 1.1.1.4 mrg rtx_insn *jump;
673 1.1 mrg rtx dest = copy_rtx (operands[0]);
674 1.1 mrg rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
675 1.1.1.2 mrg rtx val = copy_to_mode_reg (SImode, operands[2]);
676 1.1 mrg int align = INTVAL (operands[3]);
677 1.1.1.2 mrg rtx len = copy_to_mode_reg (SImode, operands[1]);
678 1.1 mrg
679 1.1 mrg if (! CONST_INT_P (operands[1]))
680 1.1 mrg return;
681 1.1 mrg
682 1.1 mrg int count = INTVAL (operands[1]);
683 1.1 mrg
684 1.1 mrg if (CONST_INT_P (operands[2])
685 1.1 mrg && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
686 1.1 mrg {
687 1.1 mrg rtx lenw = gen_reg_rtx (SImode);
688 1.1 mrg
689 1.1 mrg if (align < 4)
690 1.1 mrg {
691 1.1 mrg emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
692 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
693 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
694 1.1 mrg }
695 1.1 mrg
696 1.1 mrg /* word count. Do we have iterations ? */
697 1.1 mrg emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
698 1.1 mrg
699 1.1 mrg dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
700 1.1 mrg
701 1.1 mrg /* start loop. */
702 1.1 mrg emit_label (L_loop_word);
703 1.1 mrg
704 1.1 mrg if (TARGET_SH2)
705 1.1 mrg emit_insn (gen_dect (lenw, lenw));
706 1.1 mrg else
707 1.1 mrg {
708 1.1 mrg emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
709 1.1 mrg emit_insn (gen_tstsi_t (lenw, lenw));
710 1.1 mrg }
711 1.1 mrg
712 1.1 mrg emit_move_insn (dest, val);
713 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
714 1.1 mrg GET_MODE_SIZE (SImode)));
715 1.1 mrg
716 1.1 mrg
717 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_word));
718 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
719 1.1 mrg count = count % 4;
720 1.1 mrg
721 1.1 mrg dest = adjust_address (dest, QImode, 0);
722 1.1 mrg
723 1.1 mrg val = gen_lowpart (QImode, val);
724 1.1 mrg
725 1.1 mrg while (count--)
726 1.1 mrg {
727 1.1 mrg emit_move_insn (dest, val);
728 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
729 1.1 mrg GET_MODE_SIZE (QImode)));
730 1.1 mrg }
731 1.1 mrg
732 1.1 mrg jump = emit_jump_insn (gen_jump_compact (L_return));
733 1.1 mrg emit_barrier_after (jump);
734 1.1 mrg }
735 1.1 mrg
736 1.1 mrg dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
737 1.1 mrg
738 1.1 mrg /* start loop. */
739 1.1 mrg emit_label (L_loop_byte);
740 1.1 mrg
741 1.1 mrg if (TARGET_SH2)
742 1.1 mrg emit_insn (gen_dect (len, len));
743 1.1 mrg else
744 1.1 mrg {
745 1.1 mrg emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
746 1.1 mrg emit_insn (gen_tstsi_t (len, len));
747 1.1 mrg }
748 1.1 mrg
749 1.1 mrg val = gen_lowpart (QImode, val);
750 1.1 mrg emit_move_insn (dest, val);
751 1.1 mrg emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
752 1.1 mrg GET_MODE_SIZE (QImode)));
753 1.1 mrg
754 1.1 mrg jump = emit_jump_insn (gen_branch_false (L_loop_byte));
755 1.1 mrg add_int_reg_note (jump, REG_BR_PROB, prob_likely);
756 1.1 mrg
757 1.1 mrg emit_label (L_return);
758 1.1 mrg }
759