sparc.cc revision 1.1 1 /* Subroutines for insn-output.cc for SPARC.
2 Copyright (C) 1987-2022 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann (at) cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs leon5_costs = {
274 COSTS_N_INSNS (1), /* int load */
275 COSTS_N_INSNS (1), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (17), /* fdivs */
284 COSTS_N_INSNS (18), /* fdivd */
285 COSTS_N_INSNS (25), /* fsqrts */
286 COSTS_N_INSNS (26), /* fsqrtd */
287 COSTS_N_INSNS (4), /* imul */
288 COSTS_N_INSNS (4), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (35), /* idiv */
291 COSTS_N_INSNS (35), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs sparclet_costs = {
299 COSTS_N_INSNS (3), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (1), /* int zeroed load */
302 COSTS_N_INSNS (1), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (1), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (1), /* fmov, fmovr */
307 COSTS_N_INSNS (1), /* fmul */
308 COSTS_N_INSNS (1), /* fdivs */
309 COSTS_N_INSNS (1), /* fdivd */
310 COSTS_N_INSNS (1), /* fsqrts */
311 COSTS_N_INSNS (1), /* fsqrtd */
312 COSTS_N_INSNS (5), /* imul */
313 COSTS_N_INSNS (5), /* imulX */
314 0, /* imul bit factor */
315 COSTS_N_INSNS (5), /* idiv */
316 COSTS_N_INSNS (5), /* idivX */
317 COSTS_N_INSNS (1), /* movcc/movr */
318 0, /* shift penalty */
319 3 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (2), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (1), /* fcmp */
331 COSTS_N_INSNS (2), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (13), /* fdivs */
334 COSTS_N_INSNS (23), /* fdivd */
335 COSTS_N_INSNS (13), /* fsqrts */
336 COSTS_N_INSNS (23), /* fsqrtd */
337 COSTS_N_INSNS (4), /* imul */
338 COSTS_N_INSNS (4), /* imulX */
339 2, /* imul bit factor */
340 COSTS_N_INSNS (37), /* idiv */
341 COSTS_N_INSNS (68), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 2, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs ultrasparc3_costs = {
349 COSTS_N_INSNS (2), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (2), /* float load */
353 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (4), /* fadd, fsub */
355 COSTS_N_INSNS (5), /* fcmp */
356 COSTS_N_INSNS (3), /* fmov, fmovr */
357 COSTS_N_INSNS (4), /* fmul */
358 COSTS_N_INSNS (17), /* fdivs */
359 COSTS_N_INSNS (20), /* fdivd */
360 COSTS_N_INSNS (20), /* fsqrts */
361 COSTS_N_INSNS (29), /* fsqrtd */
362 COSTS_N_INSNS (6), /* imul */
363 COSTS_N_INSNS (6), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (40), /* idiv */
366 COSTS_N_INSNS (71), /* idivX */
367 COSTS_N_INSNS (2), /* movcc/movr */
368 0, /* shift penalty */
369 2 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (9), /* float load */
378 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (8), /* fadd, fsub */
380 COSTS_N_INSNS (26), /* fcmp */
381 COSTS_N_INSNS (8), /* fmov, fmovr */
382 COSTS_N_INSNS (29), /* fmul */
383 COSTS_N_INSNS (54), /* fdivs */
384 COSTS_N_INSNS (83), /* fdivd */
385 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
386 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
387 COSTS_N_INSNS (11), /* imul */
388 COSTS_N_INSNS (11), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (72), /* idiv */
391 COSTS_N_INSNS (72), /* idivX */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 4 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara2_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (6), /* fadd, fsub */
405 COSTS_N_INSNS (6), /* fcmp */
406 COSTS_N_INSNS (6), /* fmov, fmovr */
407 COSTS_N_INSNS (6), /* fmul */
408 COSTS_N_INSNS (19), /* fdivs */
409 COSTS_N_INSNS (33), /* fdivd */
410 COSTS_N_INSNS (19), /* fsqrts */
411 COSTS_N_INSNS (33), /* fsqrtd */
412 COSTS_N_INSNS (5), /* imul */
413 COSTS_N_INSNS (5), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
416 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara3_costs = {
424 COSTS_N_INSNS (3), /* int load */
425 COSTS_N_INSNS (3), /* int signed load */
426 COSTS_N_INSNS (3), /* int zeroed load */
427 COSTS_N_INSNS (3), /* float load */
428 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (9), /* fadd, fsub */
430 COSTS_N_INSNS (9), /* fcmp */
431 COSTS_N_INSNS (9), /* fmov, fmovr */
432 COSTS_N_INSNS (9), /* fmul */
433 COSTS_N_INSNS (23), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (23), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (9), /* imul */
438 COSTS_N_INSNS (9), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
441 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 5 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara4_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 2 /* branch cost */
470 };
471
472 static const
473 struct processor_costs niagara7_costs = {
474 COSTS_N_INSNS (5), /* int load */
475 COSTS_N_INSNS (5), /* int signed load */
476 COSTS_N_INSNS (5), /* int zeroed load */
477 COSTS_N_INSNS (5), /* float load */
478 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (11), /* fadd, fsub */
480 COSTS_N_INSNS (11), /* fcmp */
481 COSTS_N_INSNS (11), /* fmov, fmovr */
482 COSTS_N_INSNS (11), /* fmul */
483 COSTS_N_INSNS (24), /* fdivs */
484 COSTS_N_INSNS (37), /* fdivd */
485 COSTS_N_INSNS (24), /* fsqrts */
486 COSTS_N_INSNS (37), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (12), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
491 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const
498 struct processor_costs m8_costs = {
499 COSTS_N_INSNS (3), /* int load */
500 COSTS_N_INSNS (3), /* int signed load */
501 COSTS_N_INSNS (3), /* int zeroed load */
502 COSTS_N_INSNS (3), /* float load */
503 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
504 COSTS_N_INSNS (9), /* fadd, fsub */
505 COSTS_N_INSNS (9), /* fcmp */
506 COSTS_N_INSNS (9), /* fmov, fmovr */
507 COSTS_N_INSNS (9), /* fmul */
508 COSTS_N_INSNS (26), /* fdivs */
509 COSTS_N_INSNS (30), /* fdivd */
510 COSTS_N_INSNS (33), /* fsqrts */
511 COSTS_N_INSNS (41), /* fsqrtd */
512 COSTS_N_INSNS (12), /* imul */
513 COSTS_N_INSNS (10), /* imulX */
514 0, /* imul bit factor */
515 COSTS_N_INSNS (57), /* udiv/sdiv */
516 COSTS_N_INSNS (30), /* udivx/sdivx */
517 COSTS_N_INSNS (1), /* movcc/movr */
518 0, /* shift penalty */
519 1 /* branch cost */
520 };
521
522 static const struct processor_costs *sparc_costs = &cypress_costs;
523
524 #ifdef HAVE_AS_RELAX_OPTION
525 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
526 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
527 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
528 somebody does not branch between the sethi and jmp. */
529 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
530 #else
531 #define LEAF_SIBCALL_SLOT_RESERVED_P \
532 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
533 #endif
534
535 /* Vector, indexed by hard register number, which contains 1
536 for a register that is allowable in a candidate for leaf
537 function treatment. */
538 char sparc_leaf_regs[] =
539 { 1, 1, 1, 1, 1, 1, 1, 1,
540 0, 0, 0, 0, 0, 0, 1, 0,
541 0, 0, 0, 0, 0, 0, 0, 0,
542 1, 1, 1, 1, 1, 1, 0, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 1,
546 1, 1, 1, 1, 1, 1, 1, 1,
547 1, 1, 1, 1, 1, 1, 1, 1,
548 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1,
550 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1};
552
553 struct GTY(()) machine_function
554 {
555 /* Size of the frame of the function. */
556 HOST_WIDE_INT frame_size;
557
558 /* Size of the frame of the function minus the register window save area
559 and the outgoing argument area. */
560 HOST_WIDE_INT apparent_frame_size;
561
562 /* Register we pretend the frame pointer is allocated to. Normally, this
563 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
564 record "offset" separately as it may be too big for (reg + disp). */
565 rtx frame_base_reg;
566 HOST_WIDE_INT frame_base_offset;
567
568 /* Number of global or FP registers to be saved (as 4-byte quantities). */
569 int n_global_fp_regs;
570
571 /* True if the current function is leaf and uses only leaf regs,
572 so that the SPARC leaf function optimization can be applied.
573 Private version of crtl->uses_only_leaf_regs, see
574 sparc_expand_prologue for the rationale. */
575 int leaf_function_p;
576
577 /* True if the prologue saves local or in registers. */
578 bool save_local_in_regs_p;
579
580 /* True if the data calculated by sparc_expand_prologue are valid. */
581 bool prologue_data_valid_p;
582 };
583
584 #define sparc_frame_size cfun->machine->frame_size
585 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
586 #define sparc_frame_base_reg cfun->machine->frame_base_reg
587 #define sparc_frame_base_offset cfun->machine->frame_base_offset
588 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
589 #define sparc_leaf_function_p cfun->machine->leaf_function_p
590 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
591 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
592
593 /* 1 if the next opcode is to be specially indented. */
594 int sparc_indent_opcode = 0;
595
596 static void sparc_option_override (void);
597 static void sparc_init_modes (void);
598 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
599 const_tree, bool, bool, int *, int *);
600
601 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
602 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
603 static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
604
605 static void sparc_emit_set_const32 (rtx, rtx);
606 static void sparc_emit_set_const64 (rtx, rtx);
607 static void sparc_output_addr_vec (rtx);
608 static void sparc_output_addr_diff_vec (rtx);
609 static void sparc_output_deferred_case_vectors (void);
610 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
611 static bool sparc_legitimate_constant_p (machine_mode, rtx);
612 static rtx sparc_builtin_saveregs (void);
613 static int epilogue_renumber (rtx *, int);
614 static bool sparc_assemble_integer (rtx, unsigned int, int);
615 static int set_extends (rtx_insn *);
616 static void sparc_asm_function_prologue (FILE *);
617 static void sparc_asm_function_epilogue (FILE *);
618 #ifdef TARGET_SOLARIS
619 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
620 tree) ATTRIBUTE_UNUSED;
621 #endif
622 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
623 static int sparc_issue_rate (void);
624 static void sparc_sched_init (FILE *, int, int);
625 static int sparc_use_sched_lookahead (void);
626
627 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
628 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
629 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
630 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
631 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
632
633 static bool sparc_function_ok_for_sibcall (tree, tree);
634 static void sparc_init_libfuncs (void);
635 static void sparc_init_builtins (void);
636 static void sparc_fpu_init_builtins (void);
637 static void sparc_vis_init_builtins (void);
638 static tree sparc_builtin_decl (unsigned, bool);
639 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
640 static tree sparc_fold_builtin (tree, int, tree *, bool);
641 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
642 HOST_WIDE_INT, tree);
643 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
644 HOST_WIDE_INT, const_tree);
645 static struct machine_function * sparc_init_machine_status (void);
646 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
647 static rtx sparc_tls_get_addr (void);
648 static rtx sparc_tls_got (void);
649 static int sparc_register_move_cost (machine_mode,
650 reg_class_t, reg_class_t);
651 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
652 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
653 int *, const_tree, int);
654 static bool sparc_strict_argument_naming (cumulative_args_t);
655 static void sparc_va_start (tree, rtx);
656 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
657 static bool sparc_vector_mode_supported_p (machine_mode);
658 static bool sparc_tls_referenced_p (rtx);
659 static rtx sparc_legitimize_tls_address (rtx);
660 static rtx sparc_legitimize_pic_address (rtx, rtx);
661 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
662 static rtx sparc_delegitimize_address (rtx);
663 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
664 static bool sparc_pass_by_reference (cumulative_args_t,
665 const function_arg_info &);
666 static void sparc_function_arg_advance (cumulative_args_t,
667 const function_arg_info &);
668 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
669 static rtx sparc_function_incoming_arg (cumulative_args_t,
670 const function_arg_info &);
671 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
672 static unsigned int sparc_function_arg_boundary (machine_mode,
673 const_tree);
674 static int sparc_arg_partial_bytes (cumulative_args_t,
675 const function_arg_info &);
676 static bool sparc_return_in_memory (const_tree, const_tree);
677 static rtx sparc_struct_value_rtx (tree, int);
678 static rtx sparc_function_value (const_tree, const_tree, bool);
679 static rtx sparc_libcall_value (machine_mode, const_rtx);
680 static bool sparc_function_value_regno_p (const unsigned int);
681 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
682 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
683 static void sparc_file_end (void);
684 static bool sparc_frame_pointer_required (void);
685 static bool sparc_can_eliminate (const int, const int);
686 static void sparc_conditional_register_usage (void);
687 static bool sparc_use_pseudo_pic_reg (void);
688 static void sparc_init_pic_reg (void);
689 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
690 static const char *sparc_mangle_type (const_tree);
691 #endif
692 static void sparc_trampoline_init (rtx, tree, rtx);
693 static machine_mode sparc_preferred_simd_mode (scalar_mode);
694 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
695 static bool sparc_lra_p (void);
696 static bool sparc_print_operand_punct_valid_p (unsigned char);
697 static void sparc_print_operand (FILE *, rtx, int);
698 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
699 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
700 machine_mode,
701 secondary_reload_info *);
702 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
703 reg_class_t);
704 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
705 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
706 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
707 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
708 static unsigned int sparc_min_arithmetic_precision (void);
709 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
710 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
711 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
712 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
713 reg_class_t);
714 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
715 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
716 const vec_perm_indices &);
717 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
718 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
719
720 #ifdef SUBTARGET_ATTRIBUTE_TABLE
722 /* Table of valid machine attributes. */
723 static const struct attribute_spec sparc_attribute_table[] =
724 {
725 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
726 do_diagnostic, handler, exclude } */
727 SUBTARGET_ATTRIBUTE_TABLE,
728 { NULL, 0, 0, false, false, false, false, NULL, NULL }
729 };
730 #endif
731
732 char sparc_hard_reg_printed[8];
734
735 /* Initialize the GCC target structure. */
736
737 /* The default is to use .half rather than .short for aligned HI objects. */
738 #undef TARGET_ASM_ALIGNED_HI_OP
739 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
740
741 #undef TARGET_ASM_UNALIGNED_HI_OP
742 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
743 #undef TARGET_ASM_UNALIGNED_SI_OP
744 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
745 #undef TARGET_ASM_UNALIGNED_DI_OP
746 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
747
748 /* The target hook has to handle DI-mode values. */
749 #undef TARGET_ASM_INTEGER
750 #define TARGET_ASM_INTEGER sparc_assemble_integer
751
752 #undef TARGET_ASM_FUNCTION_PROLOGUE
753 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
754 #undef TARGET_ASM_FUNCTION_EPILOGUE
755 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
756
757 #undef TARGET_SCHED_ADJUST_COST
758 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
759 #undef TARGET_SCHED_ISSUE_RATE
760 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
761 #undef TARGET_SCHED_INIT
762 #define TARGET_SCHED_INIT sparc_sched_init
763 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
764 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
765
766 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
767 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
768
769 #undef TARGET_INIT_LIBFUNCS
770 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
771
772 #undef TARGET_LEGITIMIZE_ADDRESS
773 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
774 #undef TARGET_DELEGITIMIZE_ADDRESS
775 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
776 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
777 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
778
779 #undef TARGET_INIT_BUILTINS
780 #define TARGET_INIT_BUILTINS sparc_init_builtins
781 #undef TARGET_BUILTIN_DECL
782 #define TARGET_BUILTIN_DECL sparc_builtin_decl
783 #undef TARGET_EXPAND_BUILTIN
784 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
785 #undef TARGET_FOLD_BUILTIN
786 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
787
788 #if TARGET_TLS
789 #undef TARGET_HAVE_TLS
790 #define TARGET_HAVE_TLS true
791 #endif
792
793 #undef TARGET_CANNOT_FORCE_CONST_MEM
794 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
795
796 #undef TARGET_ASM_OUTPUT_MI_THUNK
797 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
798 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
799 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
800
801 #undef TARGET_RTX_COSTS
802 #define TARGET_RTX_COSTS sparc_rtx_costs
803 #undef TARGET_ADDRESS_COST
804 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
805 #undef TARGET_REGISTER_MOVE_COST
806 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
807
808 #undef TARGET_PROMOTE_FUNCTION_MODE
809 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
810 #undef TARGET_STRICT_ARGUMENT_NAMING
811 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
812
813 #undef TARGET_MUST_PASS_IN_STACK
814 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
815 #undef TARGET_PASS_BY_REFERENCE
816 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
817 #undef TARGET_ARG_PARTIAL_BYTES
818 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
819 #undef TARGET_FUNCTION_ARG_ADVANCE
820 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
821 #undef TARGET_FUNCTION_ARG
822 #define TARGET_FUNCTION_ARG sparc_function_arg
823 #undef TARGET_FUNCTION_INCOMING_ARG
824 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
825 #undef TARGET_FUNCTION_ARG_PADDING
826 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
827 #undef TARGET_FUNCTION_ARG_BOUNDARY
828 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
829
830 #undef TARGET_RETURN_IN_MEMORY
831 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
832 #undef TARGET_STRUCT_VALUE_RTX
833 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
834 #undef TARGET_FUNCTION_VALUE
835 #define TARGET_FUNCTION_VALUE sparc_function_value
836 #undef TARGET_LIBCALL_VALUE
837 #define TARGET_LIBCALL_VALUE sparc_libcall_value
838 #undef TARGET_FUNCTION_VALUE_REGNO_P
839 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
840
841 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
842 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
843
844 #undef TARGET_ASAN_SHADOW_OFFSET
845 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
846
847 #undef TARGET_EXPAND_BUILTIN_VA_START
848 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
849 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
850 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
851
852 #undef TARGET_VECTOR_MODE_SUPPORTED_P
853 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
854
855 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
856 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
857
858 #ifdef SUBTARGET_INSERT_ATTRIBUTES
859 #undef TARGET_INSERT_ATTRIBUTES
860 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
861 #endif
862
863 #ifdef SUBTARGET_ATTRIBUTE_TABLE
864 #undef TARGET_ATTRIBUTE_TABLE
865 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
866 #endif
867
868 #undef TARGET_OPTION_OVERRIDE
869 #define TARGET_OPTION_OVERRIDE sparc_option_override
870
871 #ifdef TARGET_THREAD_SSP_OFFSET
872 #undef TARGET_STACK_PROTECT_GUARD
873 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
874 #endif
875
876 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
877 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
878 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
879 #endif
880
881 #undef TARGET_ASM_FILE_END
882 #define TARGET_ASM_FILE_END sparc_file_end
883
884 #undef TARGET_FRAME_POINTER_REQUIRED
885 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
886
887 #undef TARGET_CAN_ELIMINATE
888 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
889
890 #undef TARGET_PREFERRED_RELOAD_CLASS
891 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
892
893 #undef TARGET_SECONDARY_RELOAD
894 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
895 #undef TARGET_SECONDARY_MEMORY_NEEDED
896 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
897 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
898 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
899
900 #undef TARGET_CONDITIONAL_REGISTER_USAGE
901 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
902
903 #undef TARGET_INIT_PIC_REG
904 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
905
906 #undef TARGET_USE_PSEUDO_PIC_REG
907 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
908
909 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
910 #undef TARGET_MANGLE_TYPE
911 #define TARGET_MANGLE_TYPE sparc_mangle_type
912 #endif
913
914 #undef TARGET_LRA_P
915 #define TARGET_LRA_P sparc_lra_p
916
917 #undef TARGET_LEGITIMATE_ADDRESS_P
918 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
919
920 #undef TARGET_LEGITIMATE_CONSTANT_P
921 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
922
923 #undef TARGET_TRAMPOLINE_INIT
924 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
925
926 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
927 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
928 #undef TARGET_PRINT_OPERAND
929 #define TARGET_PRINT_OPERAND sparc_print_operand
930 #undef TARGET_PRINT_OPERAND_ADDRESS
931 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
932
933 /* The value stored by LDSTUB. */
934 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
935 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
936
937 #undef TARGET_CSTORE_MODE
938 #define TARGET_CSTORE_MODE sparc_cstore_mode
939
940 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
941 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
942
943 #undef TARGET_FIXED_CONDITION_CODE_REGS
944 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
945
946 #undef TARGET_MIN_ARITHMETIC_PRECISION
947 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
948
949 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
950 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
951
952 #undef TARGET_HARD_REGNO_NREGS
953 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
954 #undef TARGET_HARD_REGNO_MODE_OK
955 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
956
957 #undef TARGET_MODES_TIEABLE_P
958 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
959
960 #undef TARGET_CAN_CHANGE_MODE_CLASS
961 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
962
963 #undef TARGET_CONSTANT_ALIGNMENT
964 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
965
966 #undef TARGET_VECTORIZE_VEC_PERM_CONST
967 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
968
969 #undef TARGET_CAN_FOLLOW_JUMP
970 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
971
972 #undef TARGET_ZERO_CALL_USED_REGS
973 #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
974
975 #ifdef SPARC_GCOV_TYPE_SIZE
976 static HOST_WIDE_INT
977 sparc_gcov_type_size (void)
978 {
979 return SPARC_GCOV_TYPE_SIZE;
980 }
981
982 #undef TARGET_GCOV_TYPE_SIZE
983 #define TARGET_GCOV_TYPE_SIZE sparc_gcov_type_size
984 #endif
985
986 struct gcc_target targetm = TARGET_INITIALIZER;
987
988 /* Return the memory reference contained in X if any, zero otherwise. */
989
990 static rtx
991 mem_ref (rtx x)
992 {
993 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
994 x = XEXP (x, 0);
995
996 if (MEM_P (x))
997 return x;
998
999 return NULL_RTX;
1000 }
1001
1002 /* True if any of INSN's source register(s) is REG. */
1003
1004 static bool
1005 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1006 {
1007 extract_insn (insn);
1008 return ((REG_P (recog_data.operand[1])
1009 && REGNO (recog_data.operand[1]) == reg)
1010 || (recog_data.n_operands == 3
1011 && REG_P (recog_data.operand[2])
1012 && REGNO (recog_data.operand[2]) == reg));
1013 }
1014
1015 /* True if INSN is a floating-point division or square-root. */
1016
1017 static bool
1018 div_sqrt_insn_p (rtx_insn *insn)
1019 {
1020 if (GET_CODE (PATTERN (insn)) != SET)
1021 return false;
1022
1023 switch (get_attr_type (insn))
1024 {
1025 case TYPE_FPDIVS:
1026 case TYPE_FPSQRTS:
1027 case TYPE_FPDIVD:
1028 case TYPE_FPSQRTD:
1029 return true;
1030 default:
1031 return false;
1032 }
1033 }
1034
1035 /* True if INSN is a floating-point instruction. */
1036
1037 static bool
1038 fpop_insn_p (rtx_insn *insn)
1039 {
1040 if (GET_CODE (PATTERN (insn)) != SET)
1041 return false;
1042
1043 switch (get_attr_type (insn))
1044 {
1045 case TYPE_FPMOVE:
1046 case TYPE_FPCMOVE:
1047 case TYPE_FP:
1048 case TYPE_FPCMP:
1049 case TYPE_FPMUL:
1050 case TYPE_FPDIVS:
1051 case TYPE_FPSQRTS:
1052 case TYPE_FPDIVD:
1053 case TYPE_FPSQRTD:
1054 return true;
1055 default:
1056 return false;
1057 }
1058 }
1059
1060 /* True if INSN is an atomic instruction. */
1061
1062 static bool
1063 atomic_insn_for_leon3_p (rtx_insn *insn)
1064 {
1065 switch (INSN_CODE (insn))
1066 {
1067 case CODE_FOR_swapsi:
1068 case CODE_FOR_ldstub:
1069 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1070 return true;
1071 default:
1072 return false;
1073 }
1074 }
1075
1076 /* True if INSN is a store instruction. */
1077
1078 static bool
1079 store_insn_p (rtx_insn *insn)
1080 {
1081 if (GET_CODE (PATTERN (insn)) != SET)
1082 return false;
1083
1084 switch (get_attr_type (insn))
1085 {
1086 case TYPE_STORE:
1087 case TYPE_FPSTORE:
1088 return true;
1089 default:
1090 return false;
1091 }
1092 }
1093
1094 /* True if INSN is a load instruction. */
1095
1096 static bool
1097 load_insn_p (rtx_insn *insn)
1098 {
1099 if (GET_CODE (PATTERN (insn)) != SET)
1100 return false;
1101
1102 switch (get_attr_type (insn))
1103 {
1104 case TYPE_LOAD:
1105 case TYPE_SLOAD:
1106 case TYPE_FPLOAD:
1107 return true;
1108 default:
1109 return false;
1110 }
1111 }
1112
1113 /* We use a machine specific pass to enable workarounds for errata.
1114
1115 We need to have the (essentially) final form of the insn stream in order
1116 to properly detect the various hazards. Therefore, this machine specific
1117 pass runs as late as possible. */
1118
1119 /* True if INSN is a md pattern or asm statement. */
1120 #define USEFUL_INSN_P(INSN) \
1121 (NONDEBUG_INSN_P (INSN) \
1122 && GET_CODE (PATTERN (INSN)) != USE \
1123 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1124
1125 rtx_insn *
1126 next_active_non_empty_insn (rtx_insn *insn)
1127 {
1128 insn = next_active_insn (insn);
1129
1130 while (insn
1131 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1132 || GET_CODE (PATTERN (insn)) == ASM_INPUT
1133 || (USEFUL_INSN_P (insn)
1134 && (asm_noperands (PATTERN (insn)) >= 0)
1135 && !strcmp (decode_asm_operands (PATTERN (insn),
1136 NULL, NULL, NULL,
1137 NULL, NULL), ""))))
1138 insn = next_active_insn (insn);
1139
1140 return insn;
1141 }
1142
1143 static unsigned int
1144 sparc_do_work_around_errata (void)
1145 {
1146 rtx_insn *insn, *next;
1147 bool find_first_useful = true;
1148
1149 /* Force all instructions to be split into their final form. */
1150 split_all_insns_noflow ();
1151
1152 /* Now look for specific patterns in the insn stream. */
1153 for (insn = get_insns (); insn; insn = next)
1154 {
1155 bool insert_nop = false;
1156 rtx set;
1157 rtx_insn *jump;
1158 rtx_sequence *seq;
1159
1160 /* Look into the instruction in a delay slot. */
1161 if (NONJUMP_INSN_P (insn)
1162 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1163 {
1164 jump = seq->insn (0);
1165 insn = seq->insn (1);
1166 }
1167 else if (JUMP_P (insn))
1168 jump = insn;
1169 else
1170 jump = NULL;
1171
1172 /* Do not begin function with atomic instruction. */
1173 if (sparc_fix_ut700
1174 && find_first_useful
1175 && USEFUL_INSN_P (insn))
1176 {
1177 find_first_useful = false;
1178 if (atomic_insn_for_leon3_p (insn))
1179 emit_insn_before (gen_nop (), insn);
1180 }
1181
1182 /* Place a NOP at the branch target of an integer branch if it is a
1183 floating-point operation or a floating-point branch. */
1184 if (sparc_fix_gr712rc
1185 && jump
1186 && jump_to_label_p (jump)
1187 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1188 {
1189 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1190 if (target
1191 && (fpop_insn_p (target)
1192 || (JUMP_P (target)
1193 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1194 emit_insn_before (gen_nop (), target);
1195 }
1196
1197 /* Insert a NOP between load instruction and atomic instruction. Insert
1198 a NOP at branch target if there is a load in delay slot and an atomic
1199 instruction at branch target. */
1200 if (sparc_fix_ut700
1201 && NONJUMP_INSN_P (insn)
1202 && load_insn_p (insn))
1203 {
1204 if (jump && jump_to_label_p (jump))
1205 {
1206 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1207 if (target && atomic_insn_for_leon3_p (target))
1208 emit_insn_before (gen_nop (), target);
1209 }
1210
1211 next = next_active_non_empty_insn (insn);
1212 if (!next)
1213 break;
1214
1215 if (atomic_insn_for_leon3_p (next))
1216 insert_nop = true;
1217 }
1218
1219 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1220 ends with another fdiv or fsqrt instruction with no dependencies on
1221 the former, along with an appropriate pattern in between. */
1222 if (sparc_fix_lost_divsqrt
1223 && NONJUMP_INSN_P (insn)
1224 && div_sqrt_insn_p (insn))
1225 {
1226 int i;
1227 int fp_found = 0;
1228 rtx_insn *after;
1229
1230 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1231
1232 next = next_active_insn (insn);
1233 if (!next)
1234 break;
1235
1236 for (after = next, i = 0; i < 4; i++)
1237 {
1238 /* Count floating-point operations. */
1239 if (i != 3 && fpop_insn_p (after))
1240 {
1241 /* If the insn uses the destination register of
1242 the div/sqrt, then it cannot be problematic. */
1243 if (insn_uses_reg_p (after, dest_reg))
1244 break;
1245 fp_found++;
1246 }
1247
1248 /* Count floating-point loads. */
1249 if (i != 3
1250 && (set = single_set (after)) != NULL_RTX
1251 && REG_P (SET_DEST (set))
1252 && REGNO (SET_DEST (set)) > 31)
1253 {
1254 /* If the insn uses the destination register of
1255 the div/sqrt, then it cannot be problematic. */
1256 if (REGNO (SET_DEST (set)) == dest_reg)
1257 break;
1258 fp_found++;
1259 }
1260
1261 /* Check if this is a problematic sequence. */
1262 if (i > 1
1263 && fp_found >= 2
1264 && div_sqrt_insn_p (after))
1265 {
1266 /* If this is the short version of the problematic
1267 sequence we add two NOPs in a row to also prevent
1268 the long version. */
1269 if (i == 2)
1270 emit_insn_before (gen_nop (), next);
1271 insert_nop = true;
1272 break;
1273 }
1274
1275 /* No need to scan past a second div/sqrt. */
1276 if (div_sqrt_insn_p (after))
1277 break;
1278
1279 /* Insert NOP before branch. */
1280 if (i < 3
1281 && (!NONJUMP_INSN_P (after)
1282 || GET_CODE (PATTERN (after)) == SEQUENCE))
1283 {
1284 insert_nop = true;
1285 break;
1286 }
1287
1288 after = next_active_insn (after);
1289 if (!after)
1290 break;
1291 }
1292 }
1293
1294 /* Look for either of these two sequences:
1295
1296 Sequence A:
1297 1. store of word size or less (e.g. st / stb / sth / stf)
1298 2. any single instruction that is not a load or store
1299 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1300
1301 Sequence B:
1302 1. store of double word size (e.g. std / stdf)
1303 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1304 if (sparc_fix_b2bst
1305 && NONJUMP_INSN_P (insn)
1306 && (set = single_set (insn)) != NULL_RTX
1307 && store_insn_p (insn))
1308 {
1309 /* Sequence B begins with a double-word store. */
1310 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1311 rtx_insn *after;
1312 int i;
1313
1314 next = next_active_non_empty_insn (insn);
1315 if (!next)
1316 break;
1317
1318 for (after = next, i = 0; i < 2; i++)
1319 {
1320 /* If the insn is a branch, then it cannot be problematic. */
1321 if (!NONJUMP_INSN_P (after)
1322 || GET_CODE (PATTERN (after)) == SEQUENCE)
1323 break;
1324
1325 /* Sequence B is only two instructions long. */
1326 if (seq_b)
1327 {
1328 /* Add NOP if followed by a store. */
1329 if (store_insn_p (after))
1330 insert_nop = true;
1331
1332 /* Otherwise it is ok. */
1333 break;
1334 }
1335
1336 /* If the second instruction is a load or a store,
1337 then the sequence cannot be problematic. */
1338 if (i == 0)
1339 {
1340 if ((set = single_set (after)) != NULL_RTX
1341 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1342 break;
1343
1344 after = next_active_non_empty_insn (after);
1345 if (!after)
1346 break;
1347 }
1348
1349 /* Add NOP if third instruction is a store. */
1350 if (i == 1
1351 && store_insn_p (after))
1352 insert_nop = true;
1353 }
1354 }
1355
1356 /* Look for a single-word load into an odd-numbered FP register. */
1357 else if (sparc_fix_at697f
1358 && NONJUMP_INSN_P (insn)
1359 && (set = single_set (insn)) != NULL_RTX
1360 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1361 && mem_ref (SET_SRC (set))
1362 && REG_P (SET_DEST (set))
1363 && REGNO (SET_DEST (set)) > 31
1364 && REGNO (SET_DEST (set)) % 2 != 0)
1365 {
1366 /* The wrong dependency is on the enclosing double register. */
1367 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1368 unsigned int src1, src2, dest;
1369 int code;
1370
1371 next = next_active_insn (insn);
1372 if (!next)
1373 break;
1374 /* If the insn is a branch, then it cannot be problematic. */
1375 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1376 continue;
1377
1378 extract_insn (next);
1379 code = INSN_CODE (next);
1380
1381 switch (code)
1382 {
1383 case CODE_FOR_adddf3:
1384 case CODE_FOR_subdf3:
1385 case CODE_FOR_muldf3:
1386 case CODE_FOR_divdf3:
1387 dest = REGNO (recog_data.operand[0]);
1388 src1 = REGNO (recog_data.operand[1]);
1389 src2 = REGNO (recog_data.operand[2]);
1390 if (src1 != src2)
1391 {
1392 /* Case [1-4]:
1393 ld [address], %fx+1
1394 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1395 if ((src1 == x || src2 == x)
1396 && (dest == src1 || dest == src2))
1397 insert_nop = true;
1398 }
1399 else
1400 {
1401 /* Case 5:
1402 ld [address], %fx+1
1403 FPOPd %fx, %fx, %fx */
1404 if (src1 == x
1405 && dest == src1
1406 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1407 insert_nop = true;
1408 }
1409 break;
1410
1411 case CODE_FOR_sqrtdf2:
1412 dest = REGNO (recog_data.operand[0]);
1413 src1 = REGNO (recog_data.operand[1]);
1414 /* Case 6:
1415 ld [address], %fx+1
1416 fsqrtd %fx, %fx */
1417 if (src1 == x && dest == src1)
1418 insert_nop = true;
1419 break;
1420
1421 default:
1422 break;
1423 }
1424 }
1425
1426 /* Look for a single-word load into an integer register. */
1427 else if (sparc_fix_ut699
1428 && NONJUMP_INSN_P (insn)
1429 && (set = single_set (insn)) != NULL_RTX
1430 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1431 && (mem_ref (SET_SRC (set)) != NULL_RTX
1432 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1433 && REG_P (SET_DEST (set))
1434 && REGNO (SET_DEST (set)) < 32)
1435 {
1436 /* There is no problem if the second memory access has a data
1437 dependency on the first single-cycle load. */
1438 rtx x = SET_DEST (set);
1439
1440 next = next_active_insn (insn);
1441 if (!next)
1442 break;
1443 /* If the insn is a branch, then it cannot be problematic. */
1444 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1445 continue;
1446
1447 /* Look for a second memory access to/from an integer register. */
1448 if ((set = single_set (next)) != NULL_RTX)
1449 {
1450 rtx src = SET_SRC (set);
1451 rtx dest = SET_DEST (set);
1452 rtx mem;
1453
1454 /* LDD is affected. */
1455 if ((mem = mem_ref (src)) != NULL_RTX
1456 && REG_P (dest)
1457 && REGNO (dest) < 32
1458 && !reg_mentioned_p (x, XEXP (mem, 0)))
1459 insert_nop = true;
1460
1461 /* STD is *not* affected. */
1462 else if (MEM_P (dest)
1463 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1464 && (src == CONST0_RTX (GET_MODE (dest))
1465 || (REG_P (src)
1466 && REGNO (src) < 32
1467 && REGNO (src) != REGNO (x)))
1468 && !reg_mentioned_p (x, XEXP (dest, 0)))
1469 insert_nop = true;
1470
1471 /* GOT accesses uses LD. */
1472 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1473 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1474 insert_nop = true;
1475 }
1476 }
1477
1478 /* Look for a single-word load/operation into an FP register. */
1479 else if (sparc_fix_ut699
1480 && NONJUMP_INSN_P (insn)
1481 && (set = single_set (insn)) != NULL_RTX
1482 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1483 && REG_P (SET_DEST (set))
1484 && REGNO (SET_DEST (set)) > 31)
1485 {
1486 /* Number of instructions in the problematic window. */
1487 const int n_insns = 4;
1488 /* The problematic combination is with the sibling FP register. */
1489 const unsigned int x = REGNO (SET_DEST (set));
1490 const unsigned int y = x ^ 1;
1491 rtx_insn *after;
1492 int i;
1493
1494 next = next_active_insn (insn);
1495 if (!next)
1496 break;
1497 /* If the insn is a branch, then it cannot be problematic. */
1498 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1499 continue;
1500
1501 /* Look for a second load/operation into the sibling FP register. */
1502 if (!((set = single_set (next)) != NULL_RTX
1503 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1504 && REG_P (SET_DEST (set))
1505 && REGNO (SET_DEST (set)) == y))
1506 continue;
1507
1508 /* Look for a (possible) store from the FP register in the next N
1509 instructions, but bail out if it is again modified or if there
1510 is a store from the sibling FP register before this store. */
1511 for (after = next, i = 0; i < n_insns; i++)
1512 {
1513 bool branch_p;
1514
1515 after = next_active_insn (after);
1516 if (!after)
1517 break;
1518
1519 /* This is a branch with an empty delay slot. */
1520 if (!NONJUMP_INSN_P (after))
1521 {
1522 if (++i == n_insns)
1523 break;
1524 branch_p = true;
1525 after = NULL;
1526 }
1527 /* This is a branch with a filled delay slot. */
1528 else if (rtx_sequence *seq =
1529 dyn_cast <rtx_sequence *> (PATTERN (after)))
1530 {
1531 if (++i == n_insns)
1532 break;
1533 branch_p = true;
1534 after = seq->insn (1);
1535 }
1536 /* This is a regular instruction. */
1537 else
1538 branch_p = false;
1539
1540 if (after && (set = single_set (after)) != NULL_RTX)
1541 {
1542 const rtx src = SET_SRC (set);
1543 const rtx dest = SET_DEST (set);
1544 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1545
1546 /* If the FP register is again modified before the store,
1547 then the store isn't affected. */
1548 if (REG_P (dest)
1549 && (REGNO (dest) == x
1550 || (REGNO (dest) == y && size == 8)))
1551 break;
1552
1553 if (MEM_P (dest) && REG_P (src))
1554 {
1555 /* If there is a store from the sibling FP register
1556 before the store, then the store is not affected. */
1557 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1558 break;
1559
1560 /* Otherwise, the store is affected. */
1561 if (REGNO (src) == x && size == 4)
1562 {
1563 insert_nop = true;
1564 break;
1565 }
1566 }
1567 }
1568
1569 /* If we have a branch in the first M instructions, then we
1570 cannot see the (M+2)th instruction so we play safe. */
1571 if (branch_p && i <= (n_insns - 2))
1572 {
1573 insert_nop = true;
1574 break;
1575 }
1576 }
1577 }
1578
1579 else
1580 next = NEXT_INSN (insn);
1581
1582 if (insert_nop)
1583 emit_insn_before (gen_nop (), next);
1584 }
1585
1586 return 0;
1587 }
1588
1589 namespace {
1590
1591 const pass_data pass_data_work_around_errata =
1592 {
1593 RTL_PASS, /* type */
1594 "errata", /* name */
1595 OPTGROUP_NONE, /* optinfo_flags */
1596 TV_MACH_DEP, /* tv_id */
1597 0, /* properties_required */
1598 0, /* properties_provided */
1599 0, /* properties_destroyed */
1600 0, /* todo_flags_start */
1601 0, /* todo_flags_finish */
1602 };
1603
1604 class pass_work_around_errata : public rtl_opt_pass
1605 {
1606 public:
1607 pass_work_around_errata(gcc::context *ctxt)
1608 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1609 {}
1610
1611 /* opt_pass methods: */
1612 virtual bool gate (function *)
1613 {
1614 return sparc_fix_at697f
1615 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1616 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1617 }
1618
1619 virtual unsigned int execute (function *)
1620 {
1621 return sparc_do_work_around_errata ();
1622 }
1623
1624 }; // class pass_work_around_errata
1625
1626 } // anon namespace
1627
1628 rtl_opt_pass *
1629 make_pass_work_around_errata (gcc::context *ctxt)
1630 {
1631 return new pass_work_around_errata (ctxt);
1632 }
1633
1634 /* Helpers for TARGET_DEBUG_OPTIONS. */
1635 static void
1636 dump_target_flag_bits (const int flags)
1637 {
1638 if (flags & MASK_64BIT)
1639 fprintf (stderr, "64BIT ");
1640 if (flags & MASK_APP_REGS)
1641 fprintf (stderr, "APP_REGS ");
1642 if (flags & MASK_FASTER_STRUCTS)
1643 fprintf (stderr, "FASTER_STRUCTS ");
1644 if (flags & MASK_FLAT)
1645 fprintf (stderr, "FLAT ");
1646 if (flags & MASK_FMAF)
1647 fprintf (stderr, "FMAF ");
1648 if (flags & MASK_FSMULD)
1649 fprintf (stderr, "FSMULD ");
1650 if (flags & MASK_FPU)
1651 fprintf (stderr, "FPU ");
1652 if (flags & MASK_HARD_QUAD)
1653 fprintf (stderr, "HARD_QUAD ");
1654 if (flags & MASK_POPC)
1655 fprintf (stderr, "POPC ");
1656 if (flags & MASK_PTR64)
1657 fprintf (stderr, "PTR64 ");
1658 if (flags & MASK_STACK_BIAS)
1659 fprintf (stderr, "STACK_BIAS ");
1660 if (flags & MASK_UNALIGNED_DOUBLES)
1661 fprintf (stderr, "UNALIGNED_DOUBLES ");
1662 if (flags & MASK_V8PLUS)
1663 fprintf (stderr, "V8PLUS ");
1664 if (flags & MASK_VIS)
1665 fprintf (stderr, "VIS ");
1666 if (flags & MASK_VIS2)
1667 fprintf (stderr, "VIS2 ");
1668 if (flags & MASK_VIS3)
1669 fprintf (stderr, "VIS3 ");
1670 if (flags & MASK_VIS4)
1671 fprintf (stderr, "VIS4 ");
1672 if (flags & MASK_VIS4B)
1673 fprintf (stderr, "VIS4B ");
1674 if (flags & MASK_CBCOND)
1675 fprintf (stderr, "CBCOND ");
1676 if (flags & MASK_DEPRECATED_V8_INSNS)
1677 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1678 if (flags & MASK_LEON)
1679 fprintf (stderr, "LEON ");
1680 if (flags & MASK_LEON3)
1681 fprintf (stderr, "LEON3 ");
1682 if (flags & MASK_SPARCLET)
1683 fprintf (stderr, "SPARCLET ");
1684 if (flags & MASK_SPARCLITE)
1685 fprintf (stderr, "SPARCLITE ");
1686 if (flags & MASK_V8)
1687 fprintf (stderr, "V8 ");
1688 if (flags & MASK_V9)
1689 fprintf (stderr, "V9 ");
1690 }
1691
1692 static void
1693 dump_target_flags (const char *prefix, const int flags)
1694 {
1695 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1696 dump_target_flag_bits (flags);
1697 fprintf(stderr, "]\n");
1698 }
1699
1700 /* Validate and override various options, and do some machine dependent
1701 initialization. */
1702
1703 static void
1704 sparc_option_override (void)
1705 {
1706 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1707 static struct cpu_default {
1708 const int cpu;
1709 const enum sparc_processor_type processor;
1710 } const cpu_default[] = {
1711 /* There must be one entry here for each TARGET_CPU value. */
1712 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1713 { TARGET_CPU_v8, PROCESSOR_V8 },
1714 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1715 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1716 { TARGET_CPU_leon, PROCESSOR_LEON },
1717 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1718 { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1719 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1720 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1721 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1722 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1723 { TARGET_CPU_v9, PROCESSOR_V9 },
1724 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1725 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1726 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1727 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1728 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1729 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1730 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1731 { TARGET_CPU_m8, PROCESSOR_M8 },
1732 { -1, PROCESSOR_V7 }
1733 };
1734 const struct cpu_default *def;
1735 /* Table of values for -m{cpu,tune}=. This must match the order of
1736 the enum processor_type in sparc-opts.h. */
1737 static struct cpu_table {
1738 const char *const name;
1739 const int disable;
1740 const int enable;
1741 } const cpu_table[] = {
1742 { "v7", MASK_ISA, 0 },
1743 { "cypress", MASK_ISA, 0 },
1744 { "v8", MASK_ISA, MASK_V8 },
1745 /* TI TMS390Z55 supersparc */
1746 { "supersparc", MASK_ISA, MASK_V8 },
1747 { "hypersparc", MASK_ISA, MASK_V8 },
1748 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1749 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1750 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
1751 { "leon3v7", MASK_ISA, MASK_LEON3 },
1752 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1753 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1754 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1755 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1756 { "f934", MASK_ISA, MASK_SPARCLITE },
1757 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1758 { "sparclet", MASK_ISA, MASK_SPARCLET },
1759 /* TEMIC sparclet */
1760 { "tsc701", MASK_ISA, MASK_SPARCLET },
1761 { "v9", MASK_ISA, MASK_V9 },
1762 /* UltraSPARC I, II, IIi */
1763 { "ultrasparc", MASK_ISA,
1764 /* Although insns using %y are deprecated, it is a clear win. */
1765 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1766 /* UltraSPARC III */
1767 /* ??? Check if %y issue still holds true. */
1768 { "ultrasparc3", MASK_ISA,
1769 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1770 /* UltraSPARC T1 */
1771 { "niagara", MASK_ISA,
1772 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1773 /* UltraSPARC T2 */
1774 { "niagara2", MASK_ISA,
1775 MASK_V9|MASK_POPC|MASK_VIS2 },
1776 /* UltraSPARC T3 */
1777 { "niagara3", MASK_ISA,
1778 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1779 /* UltraSPARC T4 */
1780 { "niagara4", MASK_ISA,
1781 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1782 /* UltraSPARC M7 */
1783 { "niagara7", MASK_ISA,
1784 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1785 /* UltraSPARC M8 */
1786 { "m8", MASK_ISA,
1787 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1788 };
1789 const struct cpu_table *cpu;
1790 unsigned int i;
1791
1792 if (sparc_debug_string != NULL)
1793 {
1794 const char *q;
1795 char *p;
1796
1797 p = ASTRDUP (sparc_debug_string);
1798 while ((q = strtok (p, ",")) != NULL)
1799 {
1800 bool invert;
1801 int mask;
1802
1803 p = NULL;
1804 if (*q == '!')
1805 {
1806 invert = true;
1807 q++;
1808 }
1809 else
1810 invert = false;
1811
1812 if (! strcmp (q, "all"))
1813 mask = MASK_DEBUG_ALL;
1814 else if (! strcmp (q, "options"))
1815 mask = MASK_DEBUG_OPTIONS;
1816 else
1817 error ("unknown %<-mdebug-%s%> switch", q);
1818
1819 if (invert)
1820 sparc_debug &= ~mask;
1821 else
1822 sparc_debug |= mask;
1823 }
1824 }
1825
1826 /* Enable the FsMULd instruction by default if not explicitly specified by
1827 the user. It may be later disabled by the CPU (explicitly or not). */
1828 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1829 target_flags |= MASK_FSMULD;
1830
1831 if (TARGET_DEBUG_OPTIONS)
1832 {
1833 dump_target_flags("Initial target_flags", target_flags);
1834 dump_target_flags("target_flags_explicit", target_flags_explicit);
1835 }
1836
1837 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1838 SUBTARGET_OVERRIDE_OPTIONS;
1839 #endif
1840
1841 #ifndef SPARC_BI_ARCH
1842 /* Check for unsupported architecture size. */
1843 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1844 error ("%s is not supported by this configuration",
1845 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1846 #endif
1847
1848 /* We force all 64bit archs to use 128 bit long double */
1849 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1850 {
1851 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1852 target_flags |= MASK_LONG_DOUBLE_128;
1853 }
1854
1855 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1856 for (i = 8; i < 16; i++)
1857 if (!call_used_regs [i])
1858 {
1859 error ("%<-fcall-saved-REG%> is not supported for out registers");
1860 call_used_regs [i] = 1;
1861 }
1862
1863 /* Set the default CPU if no -mcpu option was specified. */
1864 if (!OPTION_SET_P (sparc_cpu_and_features))
1865 {
1866 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1867 if (def->cpu == TARGET_CPU_DEFAULT)
1868 break;
1869 gcc_assert (def->cpu != -1);
1870 sparc_cpu_and_features = def->processor;
1871 }
1872
1873 /* Set the default CPU if no -mtune option was specified. */
1874 if (!OPTION_SET_P (sparc_cpu))
1875 sparc_cpu = sparc_cpu_and_features;
1876
1877 cpu = &cpu_table[(int) sparc_cpu_and_features];
1878
1879 if (TARGET_DEBUG_OPTIONS)
1880 {
1881 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1882 dump_target_flags ("cpu->disable", cpu->disable);
1883 dump_target_flags ("cpu->enable", cpu->enable);
1884 }
1885
1886 target_flags &= ~cpu->disable;
1887 target_flags |= (cpu->enable
1888 #ifndef HAVE_AS_FMAF_HPC_VIS3
1889 & ~(MASK_FMAF | MASK_VIS3)
1890 #endif
1891 #ifndef HAVE_AS_SPARC4
1892 & ~MASK_CBCOND
1893 #endif
1894 #ifndef HAVE_AS_SPARC5_VIS4
1895 & ~(MASK_VIS4 | MASK_SUBXC)
1896 #endif
1897 #ifndef HAVE_AS_SPARC6
1898 & ~(MASK_VIS4B)
1899 #endif
1900 #ifndef HAVE_AS_LEON
1901 & ~(MASK_LEON | MASK_LEON3)
1902 #endif
1903 & ~(target_flags_explicit & MASK_FEATURES)
1904 );
1905
1906 /* FsMULd is a V8 instruction. */
1907 if (!TARGET_V8 && !TARGET_V9)
1908 target_flags &= ~MASK_FSMULD;
1909
1910 /* -mvis2 implies -mvis. */
1911 if (TARGET_VIS2)
1912 target_flags |= MASK_VIS;
1913
1914 /* -mvis3 implies -mvis2 and -mvis. */
1915 if (TARGET_VIS3)
1916 target_flags |= MASK_VIS2 | MASK_VIS;
1917
1918 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1919 if (TARGET_VIS4)
1920 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1921
1922 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1923 if (TARGET_VIS4B)
1924 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1925
1926 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1927 FPU is disabled. */
1928 if (!TARGET_FPU)
1929 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1930 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1931
1932 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1933 are available; -m64 also implies v9. */
1934 if (TARGET_VIS || TARGET_ARCH64)
1935 {
1936 target_flags |= MASK_V9;
1937 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1938 }
1939
1940 /* -mvis also implies -mv8plus on 32-bit. */
1941 if (TARGET_VIS && !TARGET_ARCH64)
1942 target_flags |= MASK_V8PLUS;
1943
1944 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1945 if (TARGET_V9 && TARGET_ARCH32)
1946 target_flags |= MASK_DEPRECATED_V8_INSNS;
1947
1948 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1949 if (!TARGET_V9 || TARGET_ARCH64)
1950 target_flags &= ~MASK_V8PLUS;
1951
1952 /* Don't use stack biasing in 32-bit mode. */
1953 if (TARGET_ARCH32)
1954 target_flags &= ~MASK_STACK_BIAS;
1955
1956 /* Use LRA instead of reload, unless otherwise instructed. */
1957 if (!(target_flags_explicit & MASK_LRA))
1958 target_flags |= MASK_LRA;
1959
1960 /* Enable applicable errata workarounds for LEON3FT. */
1961 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1962 {
1963 sparc_fix_b2bst = 1;
1964 sparc_fix_lost_divsqrt = 1;
1965 }
1966
1967 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1968 if (sparc_fix_ut699)
1969 target_flags &= ~MASK_FSMULD;
1970
1971 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1972 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1973 target_flags |= MASK_LONG_DOUBLE_128;
1974 #endif
1975
1976 if (TARGET_DEBUG_OPTIONS)
1977 dump_target_flags ("Final target_flags", target_flags);
1978
1979 /* Set the code model if no -mcmodel option was specified. */
1980 if (OPTION_SET_P (sparc_code_model))
1981 {
1982 if (TARGET_ARCH32)
1983 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1984 }
1985 else
1986 {
1987 if (TARGET_ARCH32)
1988 sparc_code_model = CM_32;
1989 else
1990 sparc_code_model = SPARC_DEFAULT_CMODEL;
1991 }
1992
1993 /* Set the memory model if no -mmemory-model option was specified. */
1994 if (!OPTION_SET_P (sparc_memory_model))
1995 {
1996 /* Choose the memory model for the operating system. */
1997 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1998 if (os_default != SMM_DEFAULT)
1999 sparc_memory_model = os_default;
2000 /* Choose the most relaxed model for the processor. */
2001 else if (TARGET_V9)
2002 sparc_memory_model = SMM_RMO;
2003 else if (TARGET_LEON3)
2004 sparc_memory_model = SMM_TSO;
2005 else if (TARGET_LEON)
2006 sparc_memory_model = SMM_SC;
2007 else if (TARGET_V8)
2008 sparc_memory_model = SMM_PSO;
2009 else
2010 sparc_memory_model = SMM_SC;
2011 }
2012
2013 /* Supply a default value for align_functions. */
2014 if (flag_align_functions && !str_align_functions)
2015 {
2016 if (sparc_cpu == PROCESSOR_ULTRASPARC
2017 || sparc_cpu == PROCESSOR_ULTRASPARC3
2018 || sparc_cpu == PROCESSOR_NIAGARA
2019 || sparc_cpu == PROCESSOR_NIAGARA2
2020 || sparc_cpu == PROCESSOR_NIAGARA3
2021 || sparc_cpu == PROCESSOR_NIAGARA4)
2022 str_align_functions = "32";
2023 else if (sparc_cpu == PROCESSOR_NIAGARA7
2024 || sparc_cpu == PROCESSOR_M8)
2025 str_align_functions = "64";
2026 }
2027
2028 /* Validate PCC_STRUCT_RETURN. */
2029 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2030 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2031
2032 /* Only use .uaxword when compiling for a 64-bit target. */
2033 if (!TARGET_ARCH64)
2034 targetm.asm_out.unaligned_op.di = NULL;
2035
2036 /* Set the processor costs. */
2037 switch (sparc_cpu)
2038 {
2039 case PROCESSOR_V7:
2040 case PROCESSOR_CYPRESS:
2041 sparc_costs = &cypress_costs;
2042 break;
2043 case PROCESSOR_V8:
2044 case PROCESSOR_SPARCLITE:
2045 case PROCESSOR_SUPERSPARC:
2046 sparc_costs = &supersparc_costs;
2047 break;
2048 case PROCESSOR_F930:
2049 case PROCESSOR_F934:
2050 case PROCESSOR_HYPERSPARC:
2051 case PROCESSOR_SPARCLITE86X:
2052 sparc_costs = &hypersparc_costs;
2053 break;
2054 case PROCESSOR_LEON:
2055 sparc_costs = &leon_costs;
2056 break;
2057 case PROCESSOR_LEON3:
2058 case PROCESSOR_LEON3V7:
2059 sparc_costs = &leon3_costs;
2060 break;
2061 case PROCESSOR_LEON5:
2062 sparc_costs = &leon5_costs;
2063 break;
2064 case PROCESSOR_SPARCLET:
2065 case PROCESSOR_TSC701:
2066 sparc_costs = &sparclet_costs;
2067 break;
2068 case PROCESSOR_V9:
2069 case PROCESSOR_ULTRASPARC:
2070 sparc_costs = &ultrasparc_costs;
2071 break;
2072 case PROCESSOR_ULTRASPARC3:
2073 sparc_costs = &ultrasparc3_costs;
2074 break;
2075 case PROCESSOR_NIAGARA:
2076 sparc_costs = &niagara_costs;
2077 break;
2078 case PROCESSOR_NIAGARA2:
2079 sparc_costs = &niagara2_costs;
2080 break;
2081 case PROCESSOR_NIAGARA3:
2082 sparc_costs = &niagara3_costs;
2083 break;
2084 case PROCESSOR_NIAGARA4:
2085 sparc_costs = &niagara4_costs;
2086 break;
2087 case PROCESSOR_NIAGARA7:
2088 sparc_costs = &niagara7_costs;
2089 break;
2090 case PROCESSOR_M8:
2091 sparc_costs = &m8_costs;
2092 break;
2093 case PROCESSOR_NATIVE:
2094 gcc_unreachable ();
2095 };
2096
2097 /* param_simultaneous_prefetches is the number of prefetches that
2098 can run at the same time. More important, it is the threshold
2099 defining when additional prefetches will be dropped by the
2100 hardware.
2101
2102 The UltraSPARC-III features a documented prefetch queue with a
2103 size of 8. Additional prefetches issued in the cpu are
2104 dropped.
2105
2106 Niagara processors are different. In these processors prefetches
2107 are handled much like regular loads. The L1 miss buffer is 32
2108 entries, but prefetches start getting affected when 30 entries
2109 become occupied. That occupation could be a mix of regular loads
2110 and prefetches though. And that buffer is shared by all threads.
2111 Once the threshold is reached, if the core is running a single
2112 thread the prefetch will retry. If more than one thread is
2113 running, the prefetch will be dropped.
2114
2115 All this makes it very difficult to determine how many
2116 simultaneous prefetches can be issued simultaneously, even in a
2117 single-threaded program. Experimental results show that setting
2118 this parameter to 32 works well when the number of threads is not
2119 high. */
2120 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2121 param_simultaneous_prefetches,
2122 ((sparc_cpu == PROCESSOR_ULTRASPARC
2123 || sparc_cpu == PROCESSOR_NIAGARA
2124 || sparc_cpu == PROCESSOR_NIAGARA2
2125 || sparc_cpu == PROCESSOR_NIAGARA3
2126 || sparc_cpu == PROCESSOR_NIAGARA4)
2127 ? 2
2128 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2129 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2130 || sparc_cpu == PROCESSOR_M8)
2131 ? 32 : 3))));
2132
2133 /* param_l1_cache_line_size is the size of the L1 cache line, in
2134 bytes.
2135
2136 The Oracle SPARC Architecture (previously the UltraSPARC
2137 Architecture) specification states that when a PREFETCH[A]
2138 instruction is executed an implementation-specific amount of data
2139 is prefetched, and that it is at least 64 bytes long (aligned to
2140 at least 64 bytes).
2141
2142 However, this is not correct. The M7 (and implementations prior
2143 to that) does not guarantee a 64B prefetch into a cache if the
2144 line size is smaller. A single cache line is all that is ever
2145 prefetched. So for the M7, where the L1D$ has 32B lines and the
2146 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2147 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2148 is a read_n prefetch, which is the only type which allocates to
2149 the L1.) */
2150 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2151 param_l1_cache_line_size,
2152 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2153
2154 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2155 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2156 Niagara processors feature a L1D$ of 16KB. */
2157 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2158 param_l1_cache_size,
2159 ((sparc_cpu == PROCESSOR_ULTRASPARC
2160 || sparc_cpu == PROCESSOR_ULTRASPARC3
2161 || sparc_cpu == PROCESSOR_NIAGARA
2162 || sparc_cpu == PROCESSOR_NIAGARA2
2163 || sparc_cpu == PROCESSOR_NIAGARA3
2164 || sparc_cpu == PROCESSOR_NIAGARA4
2165 || sparc_cpu == PROCESSOR_NIAGARA7
2166 || sparc_cpu == PROCESSOR_M8)
2167 ? 16 : 64));
2168
2169 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2170 that 512 is the default in params.def. */
2171 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2172 param_l2_cache_size,
2173 ((sparc_cpu == PROCESSOR_NIAGARA4
2174 || sparc_cpu == PROCESSOR_M8)
2175 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2176 ? 256 : 512)));
2177
2178
2179 /* Disable save slot sharing for call-clobbered registers by default.
2180 The IRA sharing algorithm works on single registers only and this
2181 pessimizes for double floating-point registers. */
2182 if (!OPTION_SET_P (flag_ira_share_save_slots))
2183 flag_ira_share_save_slots = 0;
2184
2185 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2186 redundant 32-to-64-bit extensions. */
2187 if (!OPTION_SET_P (flag_ree) && TARGET_ARCH32)
2188 flag_ree = 0;
2189
2190 /* Do various machine dependent initializations. */
2191 sparc_init_modes ();
2192
2193 /* Set up function hooks. */
2194 init_machine_status = sparc_init_machine_status;
2195 }
2196
2197 /* Miscellaneous utilities. */
2199
2200 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2201 or branch on register contents instructions. */
2202
2203 int
2204 v9_regcmp_p (enum rtx_code code)
2205 {
2206 return (code == EQ || code == NE || code == GE || code == LT
2207 || code == LE || code == GT);
2208 }
2209
2210 /* Nonzero if OP is a floating point constant which can
2211 be loaded into an integer register using a single
2212 sethi instruction. */
2213
2214 int
2215 fp_sethi_p (rtx op)
2216 {
2217 if (GET_CODE (op) == CONST_DOUBLE)
2218 {
2219 long i;
2220
2221 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2222 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2223 }
2224
2225 return 0;
2226 }
2227
2228 /* Nonzero if OP is a floating point constant which can
2229 be loaded into an integer register using a single
2230 mov instruction. */
2231
2232 int
2233 fp_mov_p (rtx op)
2234 {
2235 if (GET_CODE (op) == CONST_DOUBLE)
2236 {
2237 long i;
2238
2239 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2240 return SPARC_SIMM13_P (i);
2241 }
2242
2243 return 0;
2244 }
2245
2246 /* Nonzero if OP is a floating point constant which can
2247 be loaded into an integer register using a high/losum
2248 instruction sequence. */
2249
2250 int
2251 fp_high_losum_p (rtx op)
2252 {
2253 /* The constraints calling this should only be in
2254 SFmode move insns, so any constant which cannot
2255 be moved using a single insn will do. */
2256 if (GET_CODE (op) == CONST_DOUBLE)
2257 {
2258 long i;
2259
2260 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2261 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2262 }
2263
2264 return 0;
2265 }
2266
2267 /* Return true if the address of LABEL can be loaded by means of the
2268 mov{si,di}_pic_label_ref patterns in PIC mode. */
2269
2270 static bool
2271 can_use_mov_pic_label_ref (rtx label)
2272 {
2273 /* VxWorks does not impose a fixed gap between segments; the run-time
2274 gap can be different from the object-file gap. We therefore can't
2275 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2276 are absolutely sure that X is in the same segment as the GOT.
2277 Unfortunately, the flexibility of linker scripts means that we
2278 can't be sure of that in general, so assume that GOT-relative
2279 accesses are never valid on VxWorks. */
2280 if (TARGET_VXWORKS_RTP)
2281 return false;
2282
2283 /* Similarly, if the label is non-local, it might end up being placed
2284 in a different section than the current one; now mov_pic_label_ref
2285 requires the label and the code to be in the same section. */
2286 if (LABEL_REF_NONLOCAL_P (label))
2287 return false;
2288
2289 /* Finally, if we are reordering basic blocks and partition into hot
2290 and cold sections, this might happen for any label. */
2291 if (flag_reorder_blocks_and_partition)
2292 return false;
2293
2294 return true;
2295 }
2296
2297 /* Expand a move instruction. Return true if all work is done. */
2298
2299 bool
2300 sparc_expand_move (machine_mode mode, rtx *operands)
2301 {
2302 /* Handle sets of MEM first. */
2303 if (GET_CODE (operands[0]) == MEM)
2304 {
2305 /* 0 is a register (or a pair of registers) on SPARC. */
2306 if (register_or_zero_operand (operands[1], mode))
2307 return false;
2308
2309 if (!reload_in_progress)
2310 {
2311 operands[0] = validize_mem (operands[0]);
2312 operands[1] = force_reg (mode, operands[1]);
2313 }
2314 }
2315
2316 /* Fix up TLS cases. */
2317 if (TARGET_HAVE_TLS
2318 && CONSTANT_P (operands[1])
2319 && sparc_tls_referenced_p (operands [1]))
2320 {
2321 operands[1] = sparc_legitimize_tls_address (operands[1]);
2322 return false;
2323 }
2324
2325 /* Fix up PIC cases. */
2326 if (flag_pic && CONSTANT_P (operands[1]))
2327 {
2328 if (pic_address_needs_scratch (operands[1]))
2329 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2330
2331 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2332 if ((GET_CODE (operands[1]) == LABEL_REF
2333 && can_use_mov_pic_label_ref (operands[1]))
2334 || (GET_CODE (operands[1]) == CONST
2335 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2336 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2337 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2338 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2339 {
2340 if (mode == SImode)
2341 {
2342 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2343 return true;
2344 }
2345
2346 if (mode == DImode)
2347 {
2348 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2349 return true;
2350 }
2351 }
2352
2353 if (symbolic_operand (operands[1], mode))
2354 {
2355 operands[1]
2356 = sparc_legitimize_pic_address (operands[1],
2357 reload_in_progress
2358 ? operands[0] : NULL_RTX);
2359 return false;
2360 }
2361 }
2362
2363 /* If we are trying to toss an integer constant into FP registers,
2364 or loading a FP or vector constant, force it into memory. */
2365 if (CONSTANT_P (operands[1])
2366 && REG_P (operands[0])
2367 && (SPARC_FP_REG_P (REGNO (operands[0]))
2368 || SCALAR_FLOAT_MODE_P (mode)
2369 || VECTOR_MODE_P (mode)))
2370 {
2371 /* emit_group_store will send such bogosity to us when it is
2372 not storing directly into memory. So fix this up to avoid
2373 crashes in output_constant_pool. */
2374 if (operands [1] == const0_rtx)
2375 operands[1] = CONST0_RTX (mode);
2376
2377 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2378 always other regs. */
2379 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2380 && (const_zero_operand (operands[1], mode)
2381 || const_all_ones_operand (operands[1], mode)))
2382 return false;
2383
2384 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2385 /* We are able to build any SF constant in integer registers
2386 with at most 2 instructions. */
2387 && (mode == SFmode
2388 /* And any DF constant in integer registers if needed. */
2389 || (mode == DFmode && !can_create_pseudo_p ())))
2390 return false;
2391
2392 operands[1] = force_const_mem (mode, operands[1]);
2393 if (!reload_in_progress)
2394 operands[1] = validize_mem (operands[1]);
2395 return false;
2396 }
2397
2398 /* Accept non-constants and valid constants unmodified. */
2399 if (!CONSTANT_P (operands[1])
2400 || GET_CODE (operands[1]) == HIGH
2401 || input_operand (operands[1], mode))
2402 return false;
2403
2404 switch (mode)
2405 {
2406 case E_QImode:
2407 /* All QImode constants require only one insn, so proceed. */
2408 break;
2409
2410 case E_HImode:
2411 case E_SImode:
2412 sparc_emit_set_const32 (operands[0], operands[1]);
2413 return true;
2414
2415 case E_DImode:
2416 /* input_operand should have filtered out 32-bit mode. */
2417 sparc_emit_set_const64 (operands[0], operands[1]);
2418 return true;
2419
2420 case E_TImode:
2421 {
2422 rtx high, low;
2423 /* TImode isn't available in 32-bit mode. */
2424 split_double (operands[1], &high, &low);
2425 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2426 high));
2427 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2428 low));
2429 }
2430 return true;
2431
2432 default:
2433 gcc_unreachable ();
2434 }
2435
2436 return false;
2437 }
2438
2439 /* Load OP1, a 32-bit constant, into OP0, a register.
2440 We know it can't be done in one insn when we get
2441 here, the move expander guarantees this. */
2442
2443 static void
2444 sparc_emit_set_const32 (rtx op0, rtx op1)
2445 {
2446 machine_mode mode = GET_MODE (op0);
2447 rtx temp = op0;
2448
2449 if (can_create_pseudo_p ())
2450 temp = gen_reg_rtx (mode);
2451
2452 if (GET_CODE (op1) == CONST_INT)
2453 {
2454 gcc_assert (!small_int_operand (op1, mode)
2455 && !const_high_operand (op1, mode));
2456
2457 /* Emit them as real moves instead of a HIGH/LO_SUM,
2458 this way CSE can see everything and reuse intermediate
2459 values if it wants. */
2460 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2461 & ~(HOST_WIDE_INT) 0x3ff)));
2462
2463 emit_insn (gen_rtx_SET (op0,
2464 gen_rtx_IOR (mode, temp,
2465 GEN_INT (INTVAL (op1) & 0x3ff))));
2466 }
2467 else
2468 {
2469 /* A symbol, emit in the traditional way. */
2470 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2471 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2472 }
2473 }
2474
2475 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2476 If TEMP is nonzero, we are forbidden to use any other scratch
2477 registers. Otherwise, we are allowed to generate them as needed.
2478
2479 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2480 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2481
2482 void
2483 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2484 {
2485 rtx cst, temp1, temp2, temp3, temp4, temp5;
2486 rtx ti_temp = 0;
2487
2488 /* Deal with too large offsets. */
2489 if (GET_CODE (op1) == CONST
2490 && GET_CODE (XEXP (op1, 0)) == PLUS
2491 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2492 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2493 {
2494 gcc_assert (!temp);
2495 temp1 = gen_reg_rtx (DImode);
2496 temp2 = gen_reg_rtx (DImode);
2497 sparc_emit_set_const64 (temp2, cst);
2498 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2499 NULL_RTX);
2500 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2501 return;
2502 }
2503
2504 if (temp && GET_MODE (temp) == TImode)
2505 {
2506 ti_temp = temp;
2507 temp = gen_rtx_REG (DImode, REGNO (temp));
2508 }
2509
2510 /* SPARC-V9 code model support. */
2511 switch (sparc_code_model)
2512 {
2513 case CM_MEDLOW:
2514 /* The range spanned by all instructions in the object is less
2515 than 2^31 bytes (2GB) and the distance from any instruction
2516 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2517 than 2^31 bytes (2GB).
2518
2519 The executable must be in the low 4TB of the virtual address
2520 space.
2521
2522 sethi %hi(symbol), %temp1
2523 or %temp1, %lo(symbol), %reg */
2524 if (temp)
2525 temp1 = temp; /* op0 is allowed. */
2526 else
2527 temp1 = gen_reg_rtx (DImode);
2528
2529 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2530 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2531 break;
2532
2533 case CM_MEDMID:
2534 /* The range spanned by all instructions in the object is less
2535 than 2^31 bytes (2GB) and the distance from any instruction
2536 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2537 than 2^31 bytes (2GB).
2538
2539 The executable must be in the low 16TB of the virtual address
2540 space.
2541
2542 sethi %h44(symbol), %temp1
2543 or %temp1, %m44(symbol), %temp2
2544 sllx %temp2, 12, %temp3
2545 or %temp3, %l44(symbol), %reg */
2546 if (temp)
2547 {
2548 temp1 = op0;
2549 temp2 = op0;
2550 temp3 = temp; /* op0 is allowed. */
2551 }
2552 else
2553 {
2554 temp1 = gen_reg_rtx (DImode);
2555 temp2 = gen_reg_rtx (DImode);
2556 temp3 = gen_reg_rtx (DImode);
2557 }
2558
2559 emit_insn (gen_seth44 (temp1, op1));
2560 emit_insn (gen_setm44 (temp2, temp1, op1));
2561 emit_insn (gen_rtx_SET (temp3,
2562 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2563 emit_insn (gen_setl44 (op0, temp3, op1));
2564 break;
2565
2566 case CM_MEDANY:
2567 /* The range spanned by all instructions in the object is less
2568 than 2^31 bytes (2GB) and the distance from any instruction
2569 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2570 than 2^31 bytes (2GB).
2571
2572 The executable can be placed anywhere in the virtual address
2573 space.
2574
2575 sethi %hh(symbol), %temp1
2576 sethi %lm(symbol), %temp2
2577 or %temp1, %hm(symbol), %temp3
2578 sllx %temp3, 32, %temp4
2579 or %temp4, %temp2, %temp5
2580 or %temp5, %lo(symbol), %reg */
2581 if (temp)
2582 {
2583 /* It is possible that one of the registers we got for operands[2]
2584 might coincide with that of operands[0] (which is why we made
2585 it TImode). Pick the other one to use as our scratch. */
2586 if (rtx_equal_p (temp, op0))
2587 {
2588 gcc_assert (ti_temp);
2589 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2590 }
2591 temp1 = op0;
2592 temp2 = temp; /* op0 is _not_ allowed, see above. */
2593 temp3 = op0;
2594 temp4 = op0;
2595 temp5 = op0;
2596 }
2597 else
2598 {
2599 temp1 = gen_reg_rtx (DImode);
2600 temp2 = gen_reg_rtx (DImode);
2601 temp3 = gen_reg_rtx (DImode);
2602 temp4 = gen_reg_rtx (DImode);
2603 temp5 = gen_reg_rtx (DImode);
2604 }
2605
2606 emit_insn (gen_sethh (temp1, op1));
2607 emit_insn (gen_setlm (temp2, op1));
2608 emit_insn (gen_sethm (temp3, temp1, op1));
2609 emit_insn (gen_rtx_SET (temp4,
2610 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2611 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2612 emit_insn (gen_setlo (op0, temp5, op1));
2613 break;
2614
2615 case CM_EMBMEDANY:
2616 /* Old old old backwards compatibility kruft here.
2617 Essentially it is MEDLOW with a fixed 64-bit
2618 virtual base added to all data segment addresses.
2619 Text-segment stuff is computed like MEDANY, we can't
2620 reuse the code above because the relocation knobs
2621 look different.
2622
2623 Data segment: sethi %hi(symbol), %temp1
2624 add %temp1, EMBMEDANY_BASE_REG, %temp2
2625 or %temp2, %lo(symbol), %reg */
2626 if (data_segment_operand (op1, GET_MODE (op1)))
2627 {
2628 if (temp)
2629 {
2630 temp1 = temp; /* op0 is allowed. */
2631 temp2 = op0;
2632 }
2633 else
2634 {
2635 temp1 = gen_reg_rtx (DImode);
2636 temp2 = gen_reg_rtx (DImode);
2637 }
2638
2639 emit_insn (gen_embmedany_sethi (temp1, op1));
2640 emit_insn (gen_embmedany_brsum (temp2, temp1));
2641 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2642 }
2643
2644 /* Text segment: sethi %uhi(symbol), %temp1
2645 sethi %hi(symbol), %temp2
2646 or %temp1, %ulo(symbol), %temp3
2647 sllx %temp3, 32, %temp4
2648 or %temp4, %temp2, %temp5
2649 or %temp5, %lo(symbol), %reg */
2650 else
2651 {
2652 if (temp)
2653 {
2654 /* It is possible that one of the registers we got for operands[2]
2655 might coincide with that of operands[0] (which is why we made
2656 it TImode). Pick the other one to use as our scratch. */
2657 if (rtx_equal_p (temp, op0))
2658 {
2659 gcc_assert (ti_temp);
2660 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2661 }
2662 temp1 = op0;
2663 temp2 = temp; /* op0 is _not_ allowed, see above. */
2664 temp3 = op0;
2665 temp4 = op0;
2666 temp5 = op0;
2667 }
2668 else
2669 {
2670 temp1 = gen_reg_rtx (DImode);
2671 temp2 = gen_reg_rtx (DImode);
2672 temp3 = gen_reg_rtx (DImode);
2673 temp4 = gen_reg_rtx (DImode);
2674 temp5 = gen_reg_rtx (DImode);
2675 }
2676
2677 emit_insn (gen_embmedany_textuhi (temp1, op1));
2678 emit_insn (gen_embmedany_texthi (temp2, op1));
2679 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2680 emit_insn (gen_rtx_SET (temp4,
2681 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2682 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2683 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2684 }
2685 break;
2686
2687 default:
2688 gcc_unreachable ();
2689 }
2690 }
2691
2692 /* These avoid problems when cross compiling. If we do not
2693 go through all this hair then the optimizer will see
2694 invalid REG_EQUAL notes or in some cases none at all. */
2695 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2696 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2697 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2698 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2699
2700 /* The optimizer is not to assume anything about exactly
2701 which bits are set for a HIGH, they are unspecified.
2702 Unfortunately this leads to many missed optimizations
2703 during CSE. We mask out the non-HIGH bits, and matches
2704 a plain movdi, to alleviate this problem. */
2705 static rtx
2706 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2707 {
2708 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2709 }
2710
2711 static rtx
2712 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2713 {
2714 return gen_rtx_SET (dest, GEN_INT (val));
2715 }
2716
2717 static rtx
2718 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2719 {
2720 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2721 }
2722
2723 static rtx
2724 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2725 {
2726 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2727 }
2728
2729 /* Worker routines for 64-bit constant formation on arch64.
2730 One of the key things to be doing in these emissions is
2731 to create as many temp REGs as possible. This makes it
2732 possible for half-built constants to be used later when
2733 such values are similar to something required later on.
2734 Without doing this, the optimizer cannot see such
2735 opportunities. */
2736
2737 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2738 unsigned HOST_WIDE_INT, int);
2739
2740 static void
2741 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2742 unsigned HOST_WIDE_INT low_bits, int is_neg)
2743 {
2744 unsigned HOST_WIDE_INT high_bits;
2745
2746 if (is_neg)
2747 high_bits = (~low_bits) & 0xffffffff;
2748 else
2749 high_bits = low_bits;
2750
2751 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2752 if (!is_neg)
2753 {
2754 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2755 }
2756 else
2757 {
2758 /* If we are XOR'ing with -1, then we should emit a one's complement
2759 instead. This way the combiner will notice logical operations
2760 such as ANDN later on and substitute. */
2761 if ((low_bits & 0x3ff) == 0x3ff)
2762 {
2763 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2764 }
2765 else
2766 {
2767 emit_insn (gen_rtx_SET (op0,
2768 gen_safe_XOR64 (temp,
2769 (-(HOST_WIDE_INT)0x400
2770 | (low_bits & 0x3ff)))));
2771 }
2772 }
2773 }
2774
2775 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2776 unsigned HOST_WIDE_INT, int);
2777
2778 static void
2779 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2780 unsigned HOST_WIDE_INT high_bits,
2781 unsigned HOST_WIDE_INT low_immediate,
2782 int shift_count)
2783 {
2784 rtx temp2 = op0;
2785
2786 if ((high_bits & 0xfffffc00) != 0)
2787 {
2788 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2789 if ((high_bits & ~0xfffffc00) != 0)
2790 emit_insn (gen_rtx_SET (op0,
2791 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2792 else
2793 temp2 = temp;
2794 }
2795 else
2796 {
2797 emit_insn (gen_safe_SET64 (temp, high_bits));
2798 temp2 = temp;
2799 }
2800
2801 /* Now shift it up into place. */
2802 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2803 GEN_INT (shift_count))));
2804
2805 /* If there is a low immediate part piece, finish up by
2806 putting that in as well. */
2807 if (low_immediate != 0)
2808 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2809 }
2810
2811 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2812 unsigned HOST_WIDE_INT);
2813
2814 /* Full 64-bit constant decomposition. Even though this is the
2815 'worst' case, we still optimize a few things away. */
2816 static void
2817 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2818 unsigned HOST_WIDE_INT high_bits,
2819 unsigned HOST_WIDE_INT low_bits)
2820 {
2821 rtx sub_temp = op0;
2822
2823 if (can_create_pseudo_p ())
2824 sub_temp = gen_reg_rtx (DImode);
2825
2826 if ((high_bits & 0xfffffc00) != 0)
2827 {
2828 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2829 if ((high_bits & ~0xfffffc00) != 0)
2830 emit_insn (gen_rtx_SET (sub_temp,
2831 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2832 else
2833 sub_temp = temp;
2834 }
2835 else
2836 {
2837 emit_insn (gen_safe_SET64 (temp, high_bits));
2838 sub_temp = temp;
2839 }
2840
2841 if (can_create_pseudo_p ())
2842 {
2843 rtx temp2 = gen_reg_rtx (DImode);
2844 rtx temp3 = gen_reg_rtx (DImode);
2845 rtx temp4 = gen_reg_rtx (DImode);
2846
2847 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2848 GEN_INT (32))));
2849
2850 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2851 if ((low_bits & ~0xfffffc00) != 0)
2852 {
2853 emit_insn (gen_rtx_SET (temp3,
2854 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2855 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2856 }
2857 else
2858 {
2859 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2860 }
2861 }
2862 else
2863 {
2864 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2865 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2866 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2867 int to_shift = 12;
2868
2869 /* We are in the middle of reload, so this is really
2870 painful. However we do still make an attempt to
2871 avoid emitting truly stupid code. */
2872 if (low1 != const0_rtx)
2873 {
2874 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2875 GEN_INT (to_shift))));
2876 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2877 sub_temp = op0;
2878 to_shift = 12;
2879 }
2880 else
2881 {
2882 to_shift += 12;
2883 }
2884 if (low2 != const0_rtx)
2885 {
2886 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2887 GEN_INT (to_shift))));
2888 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2889 sub_temp = op0;
2890 to_shift = 8;
2891 }
2892 else
2893 {
2894 to_shift += 8;
2895 }
2896 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2897 GEN_INT (to_shift))));
2898 if (low3 != const0_rtx)
2899 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2900 /* phew... */
2901 }
2902 }
2903
2904 /* Analyze a 64-bit constant for certain properties. */
2905 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2906 unsigned HOST_WIDE_INT,
2907 int *, int *, int *);
2908
2909 static void
2910 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2911 unsigned HOST_WIDE_INT low_bits,
2912 int *hbsp, int *lbsp, int *abbasp)
2913 {
2914 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2915 int i;
2916
2917 lowest_bit_set = highest_bit_set = -1;
2918 i = 0;
2919 do
2920 {
2921 if ((lowest_bit_set == -1)
2922 && ((low_bits >> i) & 1))
2923 lowest_bit_set = i;
2924 if ((highest_bit_set == -1)
2925 && ((high_bits >> (32 - i - 1)) & 1))
2926 highest_bit_set = (64 - i - 1);
2927 }
2928 while (++i < 32
2929 && ((highest_bit_set == -1)
2930 || (lowest_bit_set == -1)));
2931 if (i == 32)
2932 {
2933 i = 0;
2934 do
2935 {
2936 if ((lowest_bit_set == -1)
2937 && ((high_bits >> i) & 1))
2938 lowest_bit_set = i + 32;
2939 if ((highest_bit_set == -1)
2940 && ((low_bits >> (32 - i - 1)) & 1))
2941 highest_bit_set = 32 - i - 1;
2942 }
2943 while (++i < 32
2944 && ((highest_bit_set == -1)
2945 || (lowest_bit_set == -1)));
2946 }
2947 /* If there are no bits set this should have gone out
2948 as one instruction! */
2949 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2950 all_bits_between_are_set = 1;
2951 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2952 {
2953 if (i < 32)
2954 {
2955 if ((low_bits & (1 << i)) != 0)
2956 continue;
2957 }
2958 else
2959 {
2960 if ((high_bits & (1 << (i - 32))) != 0)
2961 continue;
2962 }
2963 all_bits_between_are_set = 0;
2964 break;
2965 }
2966 *hbsp = highest_bit_set;
2967 *lbsp = lowest_bit_set;
2968 *abbasp = all_bits_between_are_set;
2969 }
2970
2971 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2972
2973 static int
2974 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2975 unsigned HOST_WIDE_INT low_bits)
2976 {
2977 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2978
2979 if (high_bits == 0
2980 || high_bits == 0xffffffff)
2981 return 1;
2982
2983 analyze_64bit_constant (high_bits, low_bits,
2984 &highest_bit_set, &lowest_bit_set,
2985 &all_bits_between_are_set);
2986
2987 if ((highest_bit_set == 63
2988 || lowest_bit_set == 0)
2989 && all_bits_between_are_set != 0)
2990 return 1;
2991
2992 if ((highest_bit_set - lowest_bit_set) < 21)
2993 return 1;
2994
2995 return 0;
2996 }
2997
2998 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2999 unsigned HOST_WIDE_INT,
3000 int, int);
3001
3002 static unsigned HOST_WIDE_INT
3003 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
3004 unsigned HOST_WIDE_INT low_bits,
3005 int lowest_bit_set, int shift)
3006 {
3007 HOST_WIDE_INT hi, lo;
3008
3009 if (lowest_bit_set < 32)
3010 {
3011 lo = (low_bits >> lowest_bit_set) << shift;
3012 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3013 }
3014 else
3015 {
3016 lo = 0;
3017 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3018 }
3019 gcc_assert (! (hi & lo));
3020 return (hi | lo);
3021 }
3022
3023 /* Here we are sure to be arch64 and this is an integer constant
3024 being loaded into a register. Emit the most efficient
3025 insn sequence possible. Detection of all the 1-insn cases
3026 has been done already. */
3027 static void
3028 sparc_emit_set_const64 (rtx op0, rtx op1)
3029 {
3030 unsigned HOST_WIDE_INT high_bits, low_bits;
3031 int lowest_bit_set, highest_bit_set;
3032 int all_bits_between_are_set;
3033 rtx temp = 0;
3034
3035 /* Sanity check that we know what we are working with. */
3036 gcc_assert (TARGET_ARCH64
3037 && (GET_CODE (op0) == SUBREG
3038 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3039
3040 if (! can_create_pseudo_p ())
3041 temp = op0;
3042
3043 if (GET_CODE (op1) != CONST_INT)
3044 {
3045 sparc_emit_set_symbolic_const64 (op0, op1, temp);
3046 return;
3047 }
3048
3049 if (! temp)
3050 temp = gen_reg_rtx (DImode);
3051
3052 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3053 low_bits = (INTVAL (op1) & 0xffffffff);
3054
3055 /* low_bits bits 0 --> 31
3056 high_bits bits 32 --> 63 */
3057
3058 analyze_64bit_constant (high_bits, low_bits,
3059 &highest_bit_set, &lowest_bit_set,
3060 &all_bits_between_are_set);
3061
3062 /* First try for a 2-insn sequence. */
3063
3064 /* These situations are preferred because the optimizer can
3065 * do more things with them:
3066 * 1) mov -1, %reg
3067 * sllx %reg, shift, %reg
3068 * 2) mov -1, %reg
3069 * srlx %reg, shift, %reg
3070 * 3) mov some_small_const, %reg
3071 * sllx %reg, shift, %reg
3072 */
3073 if (((highest_bit_set == 63
3074 || lowest_bit_set == 0)
3075 && all_bits_between_are_set != 0)
3076 || ((highest_bit_set - lowest_bit_set) < 12))
3077 {
3078 HOST_WIDE_INT the_const = -1;
3079 int shift = lowest_bit_set;
3080
3081 if ((highest_bit_set != 63
3082 && lowest_bit_set != 0)
3083 || all_bits_between_are_set == 0)
3084 {
3085 the_const =
3086 create_simple_focus_bits (high_bits, low_bits,
3087 lowest_bit_set, 0);
3088 }
3089 else if (lowest_bit_set == 0)
3090 shift = -(63 - highest_bit_set);
3091
3092 gcc_assert (SPARC_SIMM13_P (the_const));
3093 gcc_assert (shift != 0);
3094
3095 emit_insn (gen_safe_SET64 (temp, the_const));
3096 if (shift > 0)
3097 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3098 GEN_INT (shift))));
3099 else if (shift < 0)
3100 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3101 GEN_INT (-shift))));
3102 return;
3103 }
3104
3105 /* Now a range of 22 or less bits set somewhere.
3106 * 1) sethi %hi(focus_bits), %reg
3107 * sllx %reg, shift, %reg
3108 * 2) sethi %hi(focus_bits), %reg
3109 * srlx %reg, shift, %reg
3110 */
3111 if ((highest_bit_set - lowest_bit_set) < 21)
3112 {
3113 unsigned HOST_WIDE_INT focus_bits =
3114 create_simple_focus_bits (high_bits, low_bits,
3115 lowest_bit_set, 10);
3116
3117 gcc_assert (SPARC_SETHI_P (focus_bits));
3118 gcc_assert (lowest_bit_set != 10);
3119
3120 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3121
3122 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3123 if (lowest_bit_set < 10)
3124 emit_insn (gen_rtx_SET (op0,
3125 gen_rtx_LSHIFTRT (DImode, temp,
3126 GEN_INT (10 - lowest_bit_set))));
3127 else if (lowest_bit_set > 10)
3128 emit_insn (gen_rtx_SET (op0,
3129 gen_rtx_ASHIFT (DImode, temp,
3130 GEN_INT (lowest_bit_set - 10))));
3131 return;
3132 }
3133
3134 /* 1) sethi %hi(low_bits), %reg
3135 * or %reg, %lo(low_bits), %reg
3136 * 2) sethi %hi(~low_bits), %reg
3137 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3138 */
3139 if (high_bits == 0
3140 || high_bits == 0xffffffff)
3141 {
3142 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3143 (high_bits == 0xffffffff));
3144 return;
3145 }
3146
3147 /* Now, try 3-insn sequences. */
3148
3149 /* 1) sethi %hi(high_bits), %reg
3150 * or %reg, %lo(high_bits), %reg
3151 * sllx %reg, 32, %reg
3152 */
3153 if (low_bits == 0)
3154 {
3155 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3156 return;
3157 }
3158
3159 /* We may be able to do something quick
3160 when the constant is negated, so try that. */
3161 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3162 (~low_bits) & 0xfffffc00))
3163 {
3164 /* NOTE: The trailing bits get XOR'd so we need the
3165 non-negated bits, not the negated ones. */
3166 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3167
3168 if ((((~high_bits) & 0xffffffff) == 0
3169 && ((~low_bits) & 0x80000000) == 0)
3170 || (((~high_bits) & 0xffffffff) == 0xffffffff
3171 && ((~low_bits) & 0x80000000) != 0))
3172 {
3173 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3174
3175 if ((SPARC_SETHI_P (fast_int)
3176 && (~high_bits & 0xffffffff) == 0)
3177 || SPARC_SIMM13_P (fast_int))
3178 emit_insn (gen_safe_SET64 (temp, fast_int));
3179 else
3180 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3181 }
3182 else
3183 {
3184 rtx negated_const;
3185 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3186 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3187 sparc_emit_set_const64 (temp, negated_const);
3188 }
3189
3190 /* If we are XOR'ing with -1, then we should emit a one's complement
3191 instead. This way the combiner will notice logical operations
3192 such as ANDN later on and substitute. */
3193 if (trailing_bits == 0x3ff)
3194 {
3195 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3196 }
3197 else
3198 {
3199 emit_insn (gen_rtx_SET (op0,
3200 gen_safe_XOR64 (temp,
3201 (-0x400 | trailing_bits))));
3202 }
3203 return;
3204 }
3205
3206 /* 1) sethi %hi(xxx), %reg
3207 * or %reg, %lo(xxx), %reg
3208 * sllx %reg, yyy, %reg
3209 *
3210 * ??? This is just a generalized version of the low_bits==0
3211 * thing above, FIXME...
3212 */
3213 if ((highest_bit_set - lowest_bit_set) < 32)
3214 {
3215 unsigned HOST_WIDE_INT focus_bits =
3216 create_simple_focus_bits (high_bits, low_bits,
3217 lowest_bit_set, 0);
3218
3219 /* We can't get here in this state. */
3220 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3221
3222 /* So what we know is that the set bits straddle the
3223 middle of the 64-bit word. */
3224 sparc_emit_set_const64_quick2 (op0, temp,
3225 focus_bits, 0,
3226 lowest_bit_set);
3227 return;
3228 }
3229
3230 /* 1) sethi %hi(high_bits), %reg
3231 * or %reg, %lo(high_bits), %reg
3232 * sllx %reg, 32, %reg
3233 * or %reg, low_bits, %reg
3234 */
3235 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3236 {
3237 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3238 return;
3239 }
3240
3241 /* The easiest way when all else fails, is full decomposition. */
3242 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3243 }
3244
3245 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3246
3247 static bool
3248 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3249 {
3250 *p1 = SPARC_ICC_REG;
3251 *p2 = SPARC_FCC_REG;
3252 return true;
3253 }
3254
3255 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3256
3257 static unsigned int
3258 sparc_min_arithmetic_precision (void)
3259 {
3260 return 32;
3261 }
3262
3263 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3264 return the mode to be used for the comparison. For floating-point,
3265 CCFP[E]mode is used. CCNZmode should be used when the first operand
3266 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3267 processing is needed. */
3268
3269 machine_mode
3270 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3271 {
3272 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3273 {
3274 switch (op)
3275 {
3276 case EQ:
3277 case NE:
3278 case UNORDERED:
3279 case ORDERED:
3280 case UNLT:
3281 case UNLE:
3282 case UNGT:
3283 case UNGE:
3284 case UNEQ:
3285 return CCFPmode;
3286
3287 case LT:
3288 case LE:
3289 case GT:
3290 case GE:
3291 case LTGT:
3292 return CCFPEmode;
3293
3294 default:
3295 gcc_unreachable ();
3296 }
3297 }
3298 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3299 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3300 && y == const0_rtx)
3301 {
3302 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3303 return CCXNZmode;
3304 else
3305 return CCNZmode;
3306 }
3307 else
3308 {
3309 /* This is for the cmp<mode>_sne pattern. */
3310 if (GET_CODE (x) == NOT && y == constm1_rtx)
3311 {
3312 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3313 return CCXCmode;
3314 else
3315 return CCCmode;
3316 }
3317
3318 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3319 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3320 {
3321 if (GET_CODE (y) == UNSPEC
3322 && (XINT (y, 1) == UNSPEC_ADDV
3323 || XINT (y, 1) == UNSPEC_SUBV
3324 || XINT (y, 1) == UNSPEC_NEGV))
3325 return CCVmode;
3326 else
3327 return CCCmode;
3328 }
3329
3330 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3331 return CCXmode;
3332 else
3333 return CCmode;
3334 }
3335 }
3336
3337 /* Emit the compare insn and return the CC reg for a CODE comparison
3338 with operands X and Y. */
3339
3340 static rtx
3341 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3342 {
3343 machine_mode mode;
3344 rtx cc_reg;
3345
3346 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3347 return x;
3348
3349 mode = SELECT_CC_MODE (code, x, y);
3350
3351 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3352 fcc regs (cse can't tell they're really call clobbered regs and will
3353 remove a duplicate comparison even if there is an intervening function
3354 call - it will then try to reload the cc reg via an int reg which is why
3355 we need the movcc patterns). It is possible to provide the movcc
3356 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3357 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3358 to tell cse that CCFPE mode registers (even pseudos) are call
3359 clobbered. */
3360
3361 /* ??? This is an experiment. Rather than making changes to cse which may
3362 or may not be easy/clean, we do our own cse. This is possible because
3363 we will generate hard registers. Cse knows they're call clobbered (it
3364 doesn't know the same thing about pseudos). If we guess wrong, no big
3365 deal, but if we win, great! */
3366
3367 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3368 #if 1 /* experiment */
3369 {
3370 int reg;
3371 /* We cycle through the registers to ensure they're all exercised. */
3372 static int next_fcc_reg = 0;
3373 /* Previous x,y for each fcc reg. */
3374 static rtx prev_args[4][2];
3375
3376 /* Scan prev_args for x,y. */
3377 for (reg = 0; reg < 4; reg++)
3378 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3379 break;
3380 if (reg == 4)
3381 {
3382 reg = next_fcc_reg;
3383 prev_args[reg][0] = x;
3384 prev_args[reg][1] = y;
3385 next_fcc_reg = (next_fcc_reg + 1) & 3;
3386 }
3387 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3388 }
3389 #else
3390 cc_reg = gen_reg_rtx (mode);
3391 #endif /* ! experiment */
3392 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3393 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3394 else
3395 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3396
3397 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3398 will only result in an unrecognizable insn so no point in asserting. */
3399 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3400
3401 return cc_reg;
3402 }
3403
3404
3405 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3406
3407 rtx
3408 gen_compare_reg (rtx cmp)
3409 {
3410 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3411 }
3412
3413 /* This function is used for v9 only.
3414 DEST is the target of the Scc insn.
3415 CODE is the code for an Scc's comparison.
3416 X and Y are the values we compare.
3417
3418 This function is needed to turn
3419
3420 (set (reg:SI 110)
3421 (gt (reg:CCX 100 %icc)
3422 (const_int 0)))
3423 into
3424 (set (reg:SI 110)
3425 (gt:DI (reg:CCX 100 %icc)
3426 (const_int 0)))
3427
3428 IE: The instruction recognizer needs to see the mode of the comparison to
3429 find the right instruction. We could use "gt:DI" right in the
3430 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3431
3432 static int
3433 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3434 {
3435 if (! TARGET_ARCH64
3436 && (GET_MODE (x) == DImode
3437 || GET_MODE (dest) == DImode))
3438 return 0;
3439
3440 /* Try to use the movrCC insns. */
3441 if (TARGET_ARCH64
3442 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3443 && y == const0_rtx
3444 && v9_regcmp_p (compare_code))
3445 {
3446 rtx op0 = x;
3447 rtx temp;
3448
3449 /* Special case for op0 != 0. This can be done with one instruction if
3450 dest == x. */
3451
3452 if (compare_code == NE
3453 && GET_MODE (dest) == DImode
3454 && rtx_equal_p (op0, dest))
3455 {
3456 emit_insn (gen_rtx_SET (dest,
3457 gen_rtx_IF_THEN_ELSE (DImode,
3458 gen_rtx_fmt_ee (compare_code, DImode,
3459 op0, const0_rtx),
3460 const1_rtx,
3461 dest)));
3462 return 1;
3463 }
3464
3465 if (reg_overlap_mentioned_p (dest, op0))
3466 {
3467 /* Handle the case where dest == x.
3468 We "early clobber" the result. */
3469 op0 = gen_reg_rtx (GET_MODE (x));
3470 emit_move_insn (op0, x);
3471 }
3472
3473 emit_insn (gen_rtx_SET (dest, const0_rtx));
3474 if (GET_MODE (op0) != DImode)
3475 {
3476 temp = gen_reg_rtx (DImode);
3477 convert_move (temp, op0, 0);
3478 }
3479 else
3480 temp = op0;
3481 emit_insn (gen_rtx_SET (dest,
3482 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3483 gen_rtx_fmt_ee (compare_code, DImode,
3484 temp, const0_rtx),
3485 const1_rtx,
3486 dest)));
3487 return 1;
3488 }
3489 else
3490 {
3491 x = gen_compare_reg_1 (compare_code, x, y);
3492 y = const0_rtx;
3493
3494 emit_insn (gen_rtx_SET (dest, const0_rtx));
3495 emit_insn (gen_rtx_SET (dest,
3496 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3497 gen_rtx_fmt_ee (compare_code,
3498 GET_MODE (x), x, y),
3499 const1_rtx, dest)));
3500 return 1;
3501 }
3502 }
3503
3504
3505 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3506 without jumps using the addx/subx instructions. */
3507
3508 bool
3509 emit_scc_insn (rtx operands[])
3510 {
3511 rtx tem, x, y;
3512 enum rtx_code code;
3513 machine_mode mode;
3514
3515 /* The quad-word fp compare library routines all return nonzero to indicate
3516 true, which is different from the equivalent libgcc routines, so we must
3517 handle them specially here. */
3518 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3519 {
3520 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3521 GET_CODE (operands[1]));
3522 operands[2] = XEXP (operands[1], 0);
3523 operands[3] = XEXP (operands[1], 1);
3524 }
3525
3526 code = GET_CODE (operands[1]);
3527 x = operands[2];
3528 y = operands[3];
3529 mode = GET_MODE (x);
3530
3531 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3532 more applications). The exception to this is "reg != 0" which can
3533 be done in one instruction on v9 (so we do it). */
3534 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3535 {
3536 if (y != const0_rtx)
3537 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3538
3539 rtx pat = gen_rtx_SET (operands[0],
3540 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3541 x, const0_rtx));
3542
3543 /* If we can use addx/subx or addxc, add a clobber for CC. */
3544 if (mode == SImode || (code == NE && TARGET_VIS3))
3545 {
3546 rtx clobber
3547 = gen_rtx_CLOBBER (VOIDmode,
3548 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3549 SPARC_ICC_REG));
3550 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3551 }
3552
3553 emit_insn (pat);
3554 return true;
3555 }
3556
3557 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3558 if (TARGET_ARCH64
3559 && mode == DImode
3560 && !((code == LTU || code == GTU) && TARGET_VIS3)
3561 && gen_v9_scc (operands[0], code, x, y))
3562 return true;
3563
3564 /* We can do LTU and GEU using the addx/subx instructions too. And
3565 for GTU/LEU, if both operands are registers swap them and fall
3566 back to the easy case. */
3567 if (code == GTU || code == LEU)
3568 {
3569 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3570 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3571 {
3572 tem = x;
3573 x = y;
3574 y = tem;
3575 code = swap_condition (code);
3576 }
3577 }
3578
3579 if (code == LTU || code == GEU)
3580 {
3581 emit_insn (gen_rtx_SET (operands[0],
3582 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3583 gen_compare_reg_1 (code, x, y),
3584 const0_rtx)));
3585 return true;
3586 }
3587
3588 /* All the posibilities to use addx/subx based sequences has been
3589 exhausted, try for a 3 instruction sequence using v9 conditional
3590 moves. */
3591 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3592 return true;
3593
3594 /* Nope, do branches. */
3595 return false;
3596 }
3597
3598 /* Emit a conditional jump insn for the v9 architecture using comparison code
3599 CODE and jump target LABEL.
3600 This function exists to take advantage of the v9 brxx insns. */
3601
3602 static void
3603 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3604 {
3605 emit_jump_insn (gen_rtx_SET (pc_rtx,
3606 gen_rtx_IF_THEN_ELSE (VOIDmode,
3607 gen_rtx_fmt_ee (code, GET_MODE (op0),
3608 op0, const0_rtx),
3609 gen_rtx_LABEL_REF (VOIDmode, label),
3610 pc_rtx)));
3611 }
3612
3613 /* Emit a conditional jump insn for the UA2011 architecture using
3614 comparison code CODE and jump target LABEL. This function exists
3615 to take advantage of the UA2011 Compare and Branch insns. */
3616
3617 static void
3618 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3619 {
3620 rtx if_then_else;
3621
3622 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3623 gen_rtx_fmt_ee(code, GET_MODE(op0),
3624 op0, op1),
3625 gen_rtx_LABEL_REF (VOIDmode, label),
3626 pc_rtx);
3627
3628 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3629 }
3630
3631 void
3632 emit_conditional_branch_insn (rtx operands[])
3633 {
3634 /* The quad-word fp compare library routines all return nonzero to indicate
3635 true, which is different from the equivalent libgcc routines, so we must
3636 handle them specially here. */
3637 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3638 {
3639 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3640 GET_CODE (operands[0]));
3641 operands[1] = XEXP (operands[0], 0);
3642 operands[2] = XEXP (operands[0], 1);
3643 }
3644
3645 /* If we can tell early on that the comparison is against a constant
3646 that won't fit in the 5-bit signed immediate field of a cbcond,
3647 use one of the other v9 conditional branch sequences. */
3648 if (TARGET_CBCOND
3649 && GET_CODE (operands[1]) == REG
3650 && (GET_MODE (operands[1]) == SImode
3651 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3652 && (GET_CODE (operands[2]) != CONST_INT
3653 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3654 {
3655 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3656 return;
3657 }
3658
3659 if (TARGET_ARCH64 && operands[2] == const0_rtx
3660 && GET_CODE (operands[1]) == REG
3661 && GET_MODE (operands[1]) == DImode)
3662 {
3663 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3664 return;
3665 }
3666
3667 operands[1] = gen_compare_reg (operands[0]);
3668 operands[2] = const0_rtx;
3669 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3670 operands[1], operands[2]);
3671 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3672 operands[3]));
3673 }
3674
3675
3676 /* Generate a DFmode part of a hard TFmode register.
3677 REG is the TFmode hard register, LOW is 1 for the
3678 low 64bit of the register and 0 otherwise.
3679 */
3680 rtx
3681 gen_df_reg (rtx reg, int low)
3682 {
3683 int regno = REGNO (reg);
3684
3685 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3686 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3687 return gen_rtx_REG (DFmode, regno);
3688 }
3689
3690 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3692 Unlike normal calls, TFmode operands are passed by reference. It is
3693 assumed that no more than 3 operands are required. */
3694
3695 static void
3696 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3697 {
3698 rtx ret_slot = NULL, arg[3], func_sym;
3699 int i;
3700
3701 /* We only expect to be called for conversions, unary, and binary ops. */
3702 gcc_assert (nargs == 2 || nargs == 3);
3703
3704 for (i = 0; i < nargs; ++i)
3705 {
3706 rtx this_arg = operands[i];
3707 rtx this_slot;
3708
3709 /* TFmode arguments and return values are passed by reference. */
3710 if (GET_MODE (this_arg) == TFmode)
3711 {
3712 int force_stack_temp;
3713
3714 force_stack_temp = 0;
3715 if (TARGET_BUGGY_QP_LIB && i == 0)
3716 force_stack_temp = 1;
3717
3718 if (GET_CODE (this_arg) == MEM
3719 && ! force_stack_temp)
3720 {
3721 tree expr = MEM_EXPR (this_arg);
3722 if (expr)
3723 mark_addressable (expr);
3724 this_arg = XEXP (this_arg, 0);
3725 }
3726 else if (CONSTANT_P (this_arg)
3727 && ! force_stack_temp)
3728 {
3729 this_slot = force_const_mem (TFmode, this_arg);
3730 this_arg = XEXP (this_slot, 0);
3731 }
3732 else
3733 {
3734 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3735
3736 /* Operand 0 is the return value. We'll copy it out later. */
3737 if (i > 0)
3738 emit_move_insn (this_slot, this_arg);
3739 else
3740 ret_slot = this_slot;
3741
3742 this_arg = XEXP (this_slot, 0);
3743 }
3744 }
3745
3746 arg[i] = this_arg;
3747 }
3748
3749 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3750
3751 if (GET_MODE (operands[0]) == TFmode)
3752 {
3753 if (nargs == 2)
3754 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3755 arg[0], GET_MODE (arg[0]),
3756 arg[1], GET_MODE (arg[1]));
3757 else
3758 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3759 arg[0], GET_MODE (arg[0]),
3760 arg[1], GET_MODE (arg[1]),
3761 arg[2], GET_MODE (arg[2]));
3762
3763 if (ret_slot)
3764 emit_move_insn (operands[0], ret_slot);
3765 }
3766 else
3767 {
3768 rtx ret;
3769
3770 gcc_assert (nargs == 2);
3771
3772 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3773 GET_MODE (operands[0]),
3774 arg[1], GET_MODE (arg[1]));
3775
3776 if (ret != operands[0])
3777 emit_move_insn (operands[0], ret);
3778 }
3779 }
3780
3781 /* Expand soft-float TFmode calls to sparc abi routines. */
3782
3783 static void
3784 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3785 {
3786 const char *func;
3787
3788 switch (code)
3789 {
3790 case PLUS:
3791 func = "_Qp_add";
3792 break;
3793 case MINUS:
3794 func = "_Qp_sub";
3795 break;
3796 case MULT:
3797 func = "_Qp_mul";
3798 break;
3799 case DIV:
3800 func = "_Qp_div";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805
3806 emit_soft_tfmode_libcall (func, 3, operands);
3807 }
3808
3809 static void
3810 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3811 {
3812 const char *func;
3813
3814 gcc_assert (code == SQRT);
3815 func = "_Qp_sqrt";
3816
3817 emit_soft_tfmode_libcall (func, 2, operands);
3818 }
3819
3820 static void
3821 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3822 {
3823 const char *func;
3824
3825 switch (code)
3826 {
3827 case FLOAT_EXTEND:
3828 switch (GET_MODE (operands[1]))
3829 {
3830 case E_SFmode:
3831 func = "_Qp_stoq";
3832 break;
3833 case E_DFmode:
3834 func = "_Qp_dtoq";
3835 break;
3836 default:
3837 gcc_unreachable ();
3838 }
3839 break;
3840
3841 case FLOAT_TRUNCATE:
3842 switch (GET_MODE (operands[0]))
3843 {
3844 case E_SFmode:
3845 func = "_Qp_qtos";
3846 break;
3847 case E_DFmode:
3848 func = "_Qp_qtod";
3849 break;
3850 default:
3851 gcc_unreachable ();
3852 }
3853 break;
3854
3855 case FLOAT:
3856 switch (GET_MODE (operands[1]))
3857 {
3858 case E_SImode:
3859 func = "_Qp_itoq";
3860 if (TARGET_ARCH64)
3861 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3862 break;
3863 case E_DImode:
3864 func = "_Qp_xtoq";
3865 break;
3866 default:
3867 gcc_unreachable ();
3868 }
3869 break;
3870
3871 case UNSIGNED_FLOAT:
3872 switch (GET_MODE (operands[1]))
3873 {
3874 case E_SImode:
3875 func = "_Qp_uitoq";
3876 if (TARGET_ARCH64)
3877 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3878 break;
3879 case E_DImode:
3880 func = "_Qp_uxtoq";
3881 break;
3882 default:
3883 gcc_unreachable ();
3884 }
3885 break;
3886
3887 case FIX:
3888 switch (GET_MODE (operands[0]))
3889 {
3890 case E_SImode:
3891 func = "_Qp_qtoi";
3892 break;
3893 case E_DImode:
3894 func = "_Qp_qtox";
3895 break;
3896 default:
3897 gcc_unreachable ();
3898 }
3899 break;
3900
3901 case UNSIGNED_FIX:
3902 switch (GET_MODE (operands[0]))
3903 {
3904 case E_SImode:
3905 func = "_Qp_qtoui";
3906 break;
3907 case E_DImode:
3908 func = "_Qp_qtoux";
3909 break;
3910 default:
3911 gcc_unreachable ();
3912 }
3913 break;
3914
3915 default:
3916 gcc_unreachable ();
3917 }
3918
3919 emit_soft_tfmode_libcall (func, 2, operands);
3920 }
3921
3922 /* Expand a hard-float tfmode operation. All arguments must be in
3923 registers. */
3924
3925 static void
3926 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3927 {
3928 rtx op, dest;
3929
3930 if (GET_RTX_CLASS (code) == RTX_UNARY)
3931 {
3932 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3933 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3934 }
3935 else
3936 {
3937 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3938 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3939 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3940 operands[1], operands[2]);
3941 }
3942
3943 if (register_operand (operands[0], VOIDmode))
3944 dest = operands[0];
3945 else
3946 dest = gen_reg_rtx (GET_MODE (operands[0]));
3947
3948 emit_insn (gen_rtx_SET (dest, op));
3949
3950 if (dest != operands[0])
3951 emit_move_insn (operands[0], dest);
3952 }
3953
3954 void
3955 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3956 {
3957 if (TARGET_HARD_QUAD)
3958 emit_hard_tfmode_operation (code, operands);
3959 else
3960 emit_soft_tfmode_binop (code, operands);
3961 }
3962
3963 void
3964 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3965 {
3966 if (TARGET_HARD_QUAD)
3967 emit_hard_tfmode_operation (code, operands);
3968 else
3969 emit_soft_tfmode_unop (code, operands);
3970 }
3971
3972 void
3973 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3974 {
3975 if (TARGET_HARD_QUAD)
3976 emit_hard_tfmode_operation (code, operands);
3977 else
3978 emit_soft_tfmode_cvt (code, operands);
3979 }
3980
3981 /* Return nonzero if a branch/jump/call instruction will be emitting
3983 nop into its delay slot. */
3984
3985 int
3986 empty_delay_slot (rtx_insn *insn)
3987 {
3988 rtx seq;
3989
3990 /* If no previous instruction (should not happen), return true. */
3991 if (PREV_INSN (insn) == NULL)
3992 return 1;
3993
3994 seq = NEXT_INSN (PREV_INSN (insn));
3995 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3996 return 0;
3997
3998 return 1;
3999 }
4000
4001 /* Return nonzero if we should emit a nop after a cbcond instruction.
4002 The cbcond instruction does not have a delay slot, however there is
4003 a severe performance penalty if a control transfer appears right
4004 after a cbcond. Therefore we emit a nop when we detect this
4005 situation. */
4006
4007 int
4008 emit_cbcond_nop (rtx_insn *insn)
4009 {
4010 rtx next = next_active_insn (insn);
4011
4012 if (!next)
4013 return 1;
4014
4015 if (NONJUMP_INSN_P (next)
4016 && GET_CODE (PATTERN (next)) == SEQUENCE)
4017 next = XVECEXP (PATTERN (next), 0, 0);
4018 else if (CALL_P (next)
4019 && GET_CODE (PATTERN (next)) == PARALLEL)
4020 {
4021 rtx delay = XVECEXP (PATTERN (next), 0, 1);
4022
4023 if (GET_CODE (delay) == RETURN)
4024 {
4025 /* It's a sibling call. Do not emit the nop if we're going
4026 to emit something other than the jump itself as the first
4027 instruction of the sibcall sequence. */
4028 if (sparc_leaf_function_p || TARGET_FLAT)
4029 return 0;
4030 }
4031 }
4032
4033 if (NONJUMP_INSN_P (next))
4034 return 0;
4035
4036 return 1;
4037 }
4038
4039 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4040 instruction. RETURN_P is true if the v9 variant 'return' is to be
4041 considered in the test too.
4042
4043 TRIAL must be a SET whose destination is a REG appropriate for the
4044 'restore' instruction or, if RETURN_P is true, for the 'return'
4045 instruction. */
4046
4047 static int
4048 eligible_for_restore_insn (rtx trial, bool return_p)
4049 {
4050 rtx pat = PATTERN (trial);
4051 rtx src = SET_SRC (pat);
4052 bool src_is_freg = false;
4053 rtx src_reg;
4054
4055 /* Since we now can do moves between float and integer registers when
4056 VIS3 is enabled, we have to catch this case. We can allow such
4057 moves when doing a 'return' however. */
4058 src_reg = src;
4059 if (GET_CODE (src_reg) == SUBREG)
4060 src_reg = SUBREG_REG (src_reg);
4061 if (GET_CODE (src_reg) == REG
4062 && SPARC_FP_REG_P (REGNO (src_reg)))
4063 src_is_freg = true;
4064
4065 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4066 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4067 && arith_operand (src, GET_MODE (src))
4068 && ! src_is_freg)
4069 {
4070 if (TARGET_ARCH64)
4071 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4072 else
4073 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4074 }
4075
4076 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4077 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4078 && arith_double_operand (src, GET_MODE (src))
4079 && ! src_is_freg)
4080 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4081
4082 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4083 else if (! TARGET_FPU && register_operand (src, SFmode))
4084 return 1;
4085
4086 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4087 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4088 return 1;
4089
4090 /* If we have the 'return' instruction, anything that does not use
4091 local or output registers and can go into a delay slot wins. */
4092 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4093 return 1;
4094
4095 /* The 'restore src1,src2,dest' pattern for SImode. */
4096 else if (GET_CODE (src) == PLUS
4097 && register_operand (XEXP (src, 0), SImode)
4098 && arith_operand (XEXP (src, 1), SImode))
4099 return 1;
4100
4101 /* The 'restore src1,src2,dest' pattern for DImode. */
4102 else if (GET_CODE (src) == PLUS
4103 && register_operand (XEXP (src, 0), DImode)
4104 && arith_double_operand (XEXP (src, 1), DImode))
4105 return 1;
4106
4107 /* The 'restore src1,%lo(src2),dest' pattern. */
4108 else if (GET_CODE (src) == LO_SUM
4109 && ! TARGET_CM_MEDMID
4110 && ((register_operand (XEXP (src, 0), SImode)
4111 && immediate_operand (XEXP (src, 1), SImode))
4112 || (TARGET_ARCH64
4113 && register_operand (XEXP (src, 0), DImode)
4114 && immediate_operand (XEXP (src, 1), DImode))))
4115 return 1;
4116
4117 /* The 'restore src,src,dest' pattern. */
4118 else if (GET_CODE (src) == ASHIFT
4119 && (register_operand (XEXP (src, 0), SImode)
4120 || register_operand (XEXP (src, 0), DImode))
4121 && XEXP (src, 1) == const1_rtx)
4122 return 1;
4123
4124 return 0;
4125 }
4126
4127 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4128
4129 int
4130 eligible_for_return_delay (rtx_insn *trial)
4131 {
4132 int regno;
4133 rtx pat;
4134
4135 /* If the function uses __builtin_eh_return, the eh_return machinery
4136 occupies the delay slot. */
4137 if (crtl->calls_eh_return)
4138 return 0;
4139
4140 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4141 return 0;
4142
4143 /* In the case of a leaf or flat function, anything can go into the slot. */
4144 if (sparc_leaf_function_p || TARGET_FLAT)
4145 return 1;
4146
4147 if (!NONJUMP_INSN_P (trial))
4148 return 0;
4149
4150 pat = PATTERN (trial);
4151 if (GET_CODE (pat) == PARALLEL)
4152 {
4153 int i;
4154
4155 if (! TARGET_V9)
4156 return 0;
4157 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4158 {
4159 rtx expr = XVECEXP (pat, 0, i);
4160 if (GET_CODE (expr) != SET)
4161 return 0;
4162 if (GET_CODE (SET_DEST (expr)) != REG)
4163 return 0;
4164 regno = REGNO (SET_DEST (expr));
4165 if (regno >= 8 && regno < 24)
4166 return 0;
4167 }
4168 return !epilogue_renumber (&pat, 1);
4169 }
4170
4171 if (GET_CODE (pat) != SET)
4172 return 0;
4173
4174 if (GET_CODE (SET_DEST (pat)) != REG)
4175 return 0;
4176
4177 regno = REGNO (SET_DEST (pat));
4178
4179 /* Otherwise, only operations which can be done in tandem with
4180 a `restore' or `return' insn can go into the delay slot. */
4181 if (regno >= 8 && regno < 24)
4182 return 0;
4183
4184 /* If this instruction sets up floating point register and we have a return
4185 instruction, it can probably go in. But restore will not work
4186 with FP_REGS. */
4187 if (! SPARC_INT_REG_P (regno))
4188 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4189
4190 return eligible_for_restore_insn (trial, true);
4191 }
4192
4193 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4194
4195 int
4196 eligible_for_sibcall_delay (rtx_insn *trial)
4197 {
4198 rtx pat;
4199
4200 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4201 return 0;
4202
4203 if (!NONJUMP_INSN_P (trial))
4204 return 0;
4205
4206 pat = PATTERN (trial);
4207
4208 if (sparc_leaf_function_p || TARGET_FLAT)
4209 {
4210 /* If the tail call is done using the call instruction,
4211 we have to restore %o7 in the delay slot. */
4212 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4213 return 0;
4214
4215 /* %g1 is used to build the function address */
4216 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4217 return 0;
4218
4219 return 1;
4220 }
4221
4222 if (GET_CODE (pat) != SET)
4223 return 0;
4224
4225 /* Otherwise, only operations which can be done in tandem with
4226 a `restore' insn can go into the delay slot. */
4227 if (GET_CODE (SET_DEST (pat)) != REG
4228 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4229 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4230 return 0;
4231
4232 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4233 in most cases. */
4234 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4235 return 0;
4236
4237 return eligible_for_restore_insn (trial, false);
4238 }
4239
4240 /* Determine if it's legal to put X into the constant pool. This
4242 is not possible if X contains the address of a symbol that is
4243 not constant (TLS) or not known at final link time (PIC). */
4244
4245 static bool
4246 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4247 {
4248 switch (GET_CODE (x))
4249 {
4250 case CONST_INT:
4251 case CONST_WIDE_INT:
4252 case CONST_DOUBLE:
4253 case CONST_VECTOR:
4254 /* Accept all non-symbolic constants. */
4255 return false;
4256
4257 case LABEL_REF:
4258 /* Labels are OK iff we are non-PIC. */
4259 return flag_pic != 0;
4260
4261 case SYMBOL_REF:
4262 /* 'Naked' TLS symbol references are never OK,
4263 non-TLS symbols are OK iff we are non-PIC. */
4264 if (SYMBOL_REF_TLS_MODEL (x))
4265 return true;
4266 else
4267 return flag_pic != 0;
4268
4269 case CONST:
4270 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4271 case PLUS:
4272 case MINUS:
4273 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4274 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4275 case UNSPEC:
4276 return true;
4277 default:
4278 gcc_unreachable ();
4279 }
4280 }
4281
4282 /* Global Offset Table support. */
4284 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4285 static GTY(()) rtx got_register_rtx = NULL_RTX;
4286 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4287
4288 static GTY(()) bool got_helper_needed = false;
4289
4290 /* Return the SYMBOL_REF for the Global Offset Table. */
4291
4292 static rtx
4293 sparc_got (void)
4294 {
4295 if (!got_symbol_rtx)
4296 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4297
4298 return got_symbol_rtx;
4299 }
4300
4301 /* Output the load_pcrel_sym pattern. */
4302
4303 const char *
4304 output_load_pcrel_sym (rtx *operands)
4305 {
4306 if (flag_delayed_branch)
4307 {
4308 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4309 output_asm_insn ("call\t%a2", operands);
4310 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4311 }
4312 else
4313 {
4314 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4315 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4316 output_asm_insn ("call\t%a2", operands);
4317 output_asm_insn (" nop", NULL);
4318 }
4319
4320 if (operands[2] == got_helper_rtx)
4321 got_helper_needed = true;
4322
4323 return "";
4324 }
4325
4326 #ifdef HAVE_GAS_HIDDEN
4327 # define USE_HIDDEN_LINKONCE 1
4328 #else
4329 # define USE_HIDDEN_LINKONCE 0
4330 #endif
4331
4332 /* Emit code to load the GOT register. */
4333
4334 void
4335 load_got_register (void)
4336 {
4337 rtx insn;
4338
4339 if (TARGET_VXWORKS_RTP)
4340 {
4341 if (!got_register_rtx)
4342 got_register_rtx = pic_offset_table_rtx;
4343
4344 insn = gen_vxworks_load_got ();
4345 }
4346 else
4347 {
4348 if (!got_register_rtx)
4349 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4350
4351 /* The GOT symbol is subject to a PC-relative relocation so we need a
4352 helper function to add the PC value and thus get the final value. */
4353 if (!got_helper_rtx)
4354 {
4355 char name[32];
4356
4357 /* Skip the leading '%' as that cannot be used in a symbol name. */
4358 if (USE_HIDDEN_LINKONCE)
4359 sprintf (name, "__sparc_get_pc_thunk.%s",
4360 reg_names[REGNO (got_register_rtx)] + 1);
4361 else
4362 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4363 REGNO (got_register_rtx));
4364
4365 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4366 }
4367
4368 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4369 const int orig_flag_pic = flag_pic;
4370 flag_pic = 0;
4371 insn = gen_load_pcrel_sym (Pmode,
4372 got_register_rtx,
4373 sparc_got (),
4374 got_helper_rtx,
4375 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM));
4376 flag_pic = orig_flag_pic;
4377 }
4378
4379 emit_insn (insn);
4380 }
4381
4382 /* Ensure that we are not using patterns that are not OK with PIC. */
4383
4384 int
4385 check_pic (int i)
4386 {
4387 rtx op;
4388
4389 switch (flag_pic)
4390 {
4391 case 1:
4392 op = recog_data.operand[i];
4393 gcc_assert (GET_CODE (op) != SYMBOL_REF
4394 && (GET_CODE (op) != CONST
4395 || (GET_CODE (XEXP (op, 0)) == MINUS
4396 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4397 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4398 /* fallthrough */
4399 case 2:
4400 default:
4401 return 1;
4402 }
4403 }
4404
4405 /* Return true if X is an address which needs a temporary register when
4406 reloaded while generating PIC code. */
4407
4408 int
4409 pic_address_needs_scratch (rtx x)
4410 {
4411 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4412 if (GET_CODE (x) == CONST
4413 && GET_CODE (XEXP (x, 0)) == PLUS
4414 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4415 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4416 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4417 return 1;
4418
4419 return 0;
4420 }
4421
4422 /* Determine if a given RTX is a valid constant. We already know this
4423 satisfies CONSTANT_P. */
4424
4425 static bool
4426 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4427 {
4428 switch (GET_CODE (x))
4429 {
4430 case CONST:
4431 case SYMBOL_REF:
4432 if (sparc_tls_referenced_p (x))
4433 return false;
4434 break;
4435
4436 case CONST_DOUBLE:
4437 /* Floating point constants are generally not ok.
4438 The only exception is 0.0 and all-ones in VIS. */
4439 if (TARGET_VIS
4440 && SCALAR_FLOAT_MODE_P (mode)
4441 && (const_zero_operand (x, mode)
4442 || const_all_ones_operand (x, mode)))
4443 return true;
4444
4445 return false;
4446
4447 case CONST_VECTOR:
4448 /* Vector constants are generally not ok.
4449 The only exception is 0 or -1 in VIS. */
4450 if (TARGET_VIS
4451 && (const_zero_operand (x, mode)
4452 || const_all_ones_operand (x, mode)))
4453 return true;
4454
4455 return false;
4456
4457 default:
4458 break;
4459 }
4460
4461 return true;
4462 }
4463
4464 /* Determine if a given RTX is a valid constant address. */
4465
4466 bool
4467 constant_address_p (rtx x)
4468 {
4469 switch (GET_CODE (x))
4470 {
4471 case LABEL_REF:
4472 case CONST_INT:
4473 case HIGH:
4474 return true;
4475
4476 case CONST:
4477 if (flag_pic && pic_address_needs_scratch (x))
4478 return false;
4479 return sparc_legitimate_constant_p (Pmode, x);
4480
4481 case SYMBOL_REF:
4482 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4483
4484 default:
4485 return false;
4486 }
4487 }
4488
4489 /* Nonzero if the constant value X is a legitimate general operand
4490 when generating PIC code. It is given that flag_pic is on and
4491 that X satisfies CONSTANT_P. */
4492
4493 bool
4494 legitimate_pic_operand_p (rtx x)
4495 {
4496 if (pic_address_needs_scratch (x))
4497 return false;
4498 if (sparc_tls_referenced_p (x))
4499 return false;
4500 return true;
4501 }
4502
4503 /* Return true if X is a representation of the PIC register. */
4504
4505 static bool
4506 sparc_pic_register_p (rtx x)
4507 {
4508 if (!REG_P (x) || !pic_offset_table_rtx)
4509 return false;
4510
4511 if (x == pic_offset_table_rtx)
4512 return true;
4513
4514 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4515 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4516 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4517 return true;
4518
4519 return false;
4520 }
4521
4522 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4523 (CONST_INT_P (X) \
4524 && INTVAL (X) >= -0x1000 \
4525 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4526
4527 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4528 (CONST_INT_P (X) \
4529 && INTVAL (X) >= -0x1000 \
4530 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4531
4532 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4533
4534 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4535 ordinarily. This changes a bit when generating PIC. */
4536
4537 static bool
4538 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4539 {
4540 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4541
4542 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4543 rs1 = addr;
4544 else if (GET_CODE (addr) == PLUS)
4545 {
4546 rs1 = XEXP (addr, 0);
4547 rs2 = XEXP (addr, 1);
4548
4549 /* Canonicalize. REG comes first, if there are no regs,
4550 LO_SUM comes first. */
4551 if (!REG_P (rs1)
4552 && GET_CODE (rs1) != SUBREG
4553 && (REG_P (rs2)
4554 || GET_CODE (rs2) == SUBREG
4555 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4556 {
4557 rs1 = XEXP (addr, 1);
4558 rs2 = XEXP (addr, 0);
4559 }
4560
4561 if ((flag_pic == 1
4562 && sparc_pic_register_p (rs1)
4563 && !REG_P (rs2)
4564 && GET_CODE (rs2) != SUBREG
4565 && GET_CODE (rs2) != LO_SUM
4566 && GET_CODE (rs2) != MEM
4567 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4568 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4569 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4570 || ((REG_P (rs1)
4571 || GET_CODE (rs1) == SUBREG)
4572 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4573 {
4574 imm1 = rs2;
4575 rs2 = NULL;
4576 }
4577 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4578 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4579 {
4580 /* We prohibit REG + REG for TFmode when there are no quad move insns
4581 and we consequently need to split. We do this because REG+REG
4582 is not an offsettable address. If we get the situation in reload
4583 where source and destination of a movtf pattern are both MEMs with
4584 REG+REG address, then only one of them gets converted to an
4585 offsettable address. */
4586 if (mode == TFmode
4587 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4588 return 0;
4589
4590 /* Likewise for TImode, but in all cases. */
4591 if (mode == TImode)
4592 return 0;
4593
4594 /* We prohibit REG + REG on ARCH32 if not optimizing for
4595 DFmode/DImode because then mem_min_alignment is likely to be zero
4596 after reload and the forced split would lack a matching splitter
4597 pattern. */
4598 if (TARGET_ARCH32 && !optimize
4599 && (mode == DFmode || mode == DImode))
4600 return 0;
4601 }
4602 else if (USE_AS_OFFSETABLE_LO10
4603 && GET_CODE (rs1) == LO_SUM
4604 && TARGET_ARCH64
4605 && ! TARGET_CM_MEDMID
4606 && RTX_OK_FOR_OLO10_P (rs2, mode))
4607 {
4608 rs2 = NULL;
4609 imm1 = XEXP (rs1, 1);
4610 rs1 = XEXP (rs1, 0);
4611 if (!CONSTANT_P (imm1)
4612 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4613 return 0;
4614 }
4615 }
4616 else if (GET_CODE (addr) == LO_SUM)
4617 {
4618 rs1 = XEXP (addr, 0);
4619 imm1 = XEXP (addr, 1);
4620
4621 if (!CONSTANT_P (imm1)
4622 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4623 return 0;
4624
4625 /* We can't allow TFmode in 32-bit mode, because an offset greater
4626 than the alignment (8) may cause the LO_SUM to overflow. */
4627 if (mode == TFmode && TARGET_ARCH32)
4628 return 0;
4629
4630 /* During reload, accept the HIGH+LO_SUM construct generated by
4631 sparc_legitimize_reload_address. */
4632 if (reload_in_progress
4633 && GET_CODE (rs1) == HIGH
4634 && XEXP (rs1, 0) == imm1)
4635 return 1;
4636 }
4637 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4638 return 1;
4639 else
4640 return 0;
4641
4642 if (GET_CODE (rs1) == SUBREG)
4643 rs1 = SUBREG_REG (rs1);
4644 if (!REG_P (rs1))
4645 return 0;
4646
4647 if (rs2)
4648 {
4649 if (GET_CODE (rs2) == SUBREG)
4650 rs2 = SUBREG_REG (rs2);
4651 if (!REG_P (rs2))
4652 return 0;
4653 }
4654
4655 if (strict)
4656 {
4657 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4658 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4659 return 0;
4660 }
4661 else
4662 {
4663 if ((! SPARC_INT_REG_P (REGNO (rs1))
4664 && REGNO (rs1) != FRAME_POINTER_REGNUM
4665 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4666 || (rs2
4667 && (! SPARC_INT_REG_P (REGNO (rs2))
4668 && REGNO (rs2) != FRAME_POINTER_REGNUM
4669 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4670 return 0;
4671 }
4672 return 1;
4673 }
4674
4675 /* Return the SYMBOL_REF for the tls_get_addr function. */
4676
4677 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4678
4679 static rtx
4680 sparc_tls_get_addr (void)
4681 {
4682 if (!sparc_tls_symbol)
4683 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4684
4685 return sparc_tls_symbol;
4686 }
4687
4688 /* Return the Global Offset Table to be used in TLS mode. */
4689
4690 static rtx
4691 sparc_tls_got (void)
4692 {
4693 /* In PIC mode, this is just the PIC offset table. */
4694 if (flag_pic)
4695 {
4696 crtl->uses_pic_offset_table = 1;
4697 return pic_offset_table_rtx;
4698 }
4699
4700 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4701 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4702 if (TARGET_SUN_TLS && TARGET_ARCH32)
4703 {
4704 load_got_register ();
4705 return got_register_rtx;
4706 }
4707
4708 /* In all other cases, we load a new pseudo with the GOT symbol. */
4709 return copy_to_reg (sparc_got ());
4710 }
4711
4712 /* Return true if X contains a thread-local symbol. */
4713
4714 static bool
4715 sparc_tls_referenced_p (rtx x)
4716 {
4717 if (!TARGET_HAVE_TLS)
4718 return false;
4719
4720 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4721 x = XEXP (XEXP (x, 0), 0);
4722
4723 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4724 return true;
4725
4726 /* That's all we handle in sparc_legitimize_tls_address for now. */
4727 return false;
4728 }
4729
4730 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4731 this (thread-local) address. */
4732
4733 static rtx
4734 sparc_legitimize_tls_address (rtx addr)
4735 {
4736 rtx temp1, temp2, temp3, ret, o0, got;
4737 rtx_insn *insn;
4738
4739 gcc_assert (can_create_pseudo_p ());
4740
4741 if (GET_CODE (addr) == SYMBOL_REF)
4742 /* Although the various sethi/or sequences generate SImode values, many of
4743 them can be transformed by the linker when relaxing and, if relaxing to
4744 local-exec, will become a sethi/xor pair, which is signed and therefore
4745 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4746 values be spilled onto the stack in 64-bit mode. */
4747 switch (SYMBOL_REF_TLS_MODEL (addr))
4748 {
4749 case TLS_MODEL_GLOBAL_DYNAMIC:
4750 start_sequence ();
4751 temp1 = gen_reg_rtx (Pmode);
4752 temp2 = gen_reg_rtx (Pmode);
4753 ret = gen_reg_rtx (Pmode);
4754 o0 = gen_rtx_REG (Pmode, 8);
4755 got = sparc_tls_got ();
4756 emit_insn (gen_tgd_hi22 (Pmode, temp1, addr));
4757 emit_insn (gen_tgd_lo10 (Pmode, temp2, temp1, addr));
4758 emit_insn (gen_tgd_add (Pmode, o0, got, temp2, addr));
4759 insn = emit_call_insn (gen_tgd_call (Pmode, o0, sparc_tls_get_addr (),
4760 addr, const1_rtx));
4761 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4762 RTL_CONST_CALL_P (insn) = 1;
4763 insn = get_insns ();
4764 end_sequence ();
4765 emit_libcall_block (insn, ret, o0, addr);
4766 break;
4767
4768 case TLS_MODEL_LOCAL_DYNAMIC:
4769 start_sequence ();
4770 temp1 = gen_reg_rtx (Pmode);
4771 temp2 = gen_reg_rtx (Pmode);
4772 temp3 = gen_reg_rtx (Pmode);
4773 ret = gen_reg_rtx (Pmode);
4774 o0 = gen_rtx_REG (Pmode, 8);
4775 got = sparc_tls_got ();
4776 emit_insn (gen_tldm_hi22 (Pmode, temp1));
4777 emit_insn (gen_tldm_lo10 (Pmode, temp2, temp1));
4778 emit_insn (gen_tldm_add (Pmode, o0, got, temp2));
4779 insn = emit_call_insn (gen_tldm_call (Pmode, o0, sparc_tls_get_addr (),
4780 const1_rtx));
4781 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4782 RTL_CONST_CALL_P (insn) = 1;
4783 insn = get_insns ();
4784 end_sequence ();
4785 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4786 share the LD_BASE result with other LD model accesses. */
4787 emit_libcall_block (insn, temp3, o0,
4788 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4789 UNSPEC_TLSLD_BASE));
4790 temp1 = gen_reg_rtx (Pmode);
4791 temp2 = gen_reg_rtx (Pmode);
4792 emit_insn (gen_tldo_hix22 (Pmode, temp1, addr));
4793 emit_insn (gen_tldo_lox10 (Pmode, temp2, temp1, addr));
4794 emit_insn (gen_tldo_add (Pmode, ret, temp3, temp2, addr));
4795 break;
4796
4797 case TLS_MODEL_INITIAL_EXEC:
4798 temp1 = gen_reg_rtx (Pmode);
4799 temp2 = gen_reg_rtx (Pmode);
4800 temp3 = gen_reg_rtx (Pmode);
4801 got = sparc_tls_got ();
4802 emit_insn (gen_tie_hi22 (Pmode, temp1, addr));
4803 emit_insn (gen_tie_lo10 (Pmode, temp2, temp1, addr));
4804 if (TARGET_ARCH32)
4805 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4806 else
4807 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4808 if (TARGET_SUN_TLS)
4809 {
4810 ret = gen_reg_rtx (Pmode);
4811 emit_insn (gen_tie_add (Pmode, ret, gen_rtx_REG (Pmode, 7),
4812 temp3, addr));
4813 }
4814 else
4815 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4816 break;
4817
4818 case TLS_MODEL_LOCAL_EXEC:
4819 temp1 = gen_reg_rtx (Pmode);
4820 temp2 = gen_reg_rtx (Pmode);
4821 emit_insn (gen_tle_hix22 (Pmode, temp1, addr));
4822 emit_insn (gen_tle_lox10 (Pmode, temp2, temp1, addr));
4823 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4824 break;
4825
4826 default:
4827 gcc_unreachable ();
4828 }
4829
4830 else if (GET_CODE (addr) == CONST)
4831 {
4832 rtx base, offset;
4833
4834 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4835
4836 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4837 offset = XEXP (XEXP (addr, 0), 1);
4838
4839 base = force_operand (base, NULL_RTX);
4840 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4841 offset = force_reg (Pmode, offset);
4842 ret = gen_rtx_PLUS (Pmode, base, offset);
4843 }
4844
4845 else
4846 gcc_unreachable (); /* for now ... */
4847
4848 return ret;
4849 }
4850
4851 /* Legitimize PIC addresses. If the address is already position-independent,
4852 we return ORIG. Newly generated position-independent addresses go into a
4853 reg. This is REG if nonzero, otherwise we allocate register(s) as
4854 necessary. */
4855
4856 static rtx
4857 sparc_legitimize_pic_address (rtx orig, rtx reg)
4858 {
4859 if (GET_CODE (orig) == SYMBOL_REF
4860 /* See the comment in sparc_expand_move. */
4861 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4862 {
4863 bool gotdata_op = false;
4864 rtx pic_ref, address;
4865 rtx_insn *insn;
4866
4867 if (!reg)
4868 {
4869 gcc_assert (can_create_pseudo_p ());
4870 reg = gen_reg_rtx (Pmode);
4871 }
4872
4873 if (flag_pic == 2)
4874 {
4875 /* If not during reload, allocate another temp reg here for loading
4876 in the address, so that these instructions can be optimized
4877 properly. */
4878 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4879
4880 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4881 won't get confused into thinking that these two instructions
4882 are loading in the true address of the symbol. If in the
4883 future a PIC rtx exists, that should be used instead. */
4884 if (TARGET_ARCH64)
4885 {
4886 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4887 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4888 }
4889 else
4890 {
4891 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4892 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4893 }
4894
4895 address = temp_reg;
4896 gotdata_op = true;
4897 }
4898 else
4899 address = orig;
4900
4901 crtl->uses_pic_offset_table = 1;
4902 if (gotdata_op)
4903 {
4904 if (TARGET_ARCH64)
4905 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4906 pic_offset_table_rtx,
4907 address, orig));
4908 else
4909 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4910 pic_offset_table_rtx,
4911 address, orig));
4912 }
4913 else
4914 {
4915 pic_ref
4916 = gen_const_mem (Pmode,
4917 gen_rtx_PLUS (Pmode,
4918 pic_offset_table_rtx, address));
4919 insn = emit_move_insn (reg, pic_ref);
4920 }
4921
4922 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4923 by loop. */
4924 set_unique_reg_note (insn, REG_EQUAL, orig);
4925 return reg;
4926 }
4927 else if (GET_CODE (orig) == CONST)
4928 {
4929 rtx base, offset;
4930
4931 if (GET_CODE (XEXP (orig, 0)) == PLUS
4932 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4933 return orig;
4934
4935 if (!reg)
4936 {
4937 gcc_assert (can_create_pseudo_p ());
4938 reg = gen_reg_rtx (Pmode);
4939 }
4940
4941 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4942 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4943 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4944 base == reg ? NULL_RTX : reg);
4945
4946 if (GET_CODE (offset) == CONST_INT)
4947 {
4948 if (SMALL_INT (offset))
4949 return plus_constant (Pmode, base, INTVAL (offset));
4950 else if (can_create_pseudo_p ())
4951 offset = force_reg (Pmode, offset);
4952 else
4953 /* If we reach here, then something is seriously wrong. */
4954 gcc_unreachable ();
4955 }
4956 return gen_rtx_PLUS (Pmode, base, offset);
4957 }
4958 else if (GET_CODE (orig) == LABEL_REF)
4959 /* ??? We ought to be checking that the register is live instead, in case
4960 it is eliminated. */
4961 crtl->uses_pic_offset_table = 1;
4962
4963 return orig;
4964 }
4965
4966 /* Try machine-dependent ways of modifying an illegitimate address X
4967 to be legitimate. If we find one, return the new, valid address.
4968
4969 OLDX is the address as it was before break_out_memory_refs was called.
4970 In some cases it is useful to look at this to decide what needs to be done.
4971
4972 MODE is the mode of the operand pointed to by X.
4973
4974 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4975
4976 static rtx
4977 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4978 machine_mode mode)
4979 {
4980 rtx orig_x = x;
4981
4982 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4983 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4984 force_operand (XEXP (x, 0), NULL_RTX));
4985 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4986 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4987 force_operand (XEXP (x, 1), NULL_RTX));
4988 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4989 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4990 XEXP (x, 1));
4991 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4992 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4993 force_operand (XEXP (x, 1), NULL_RTX));
4994
4995 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4996 return x;
4997
4998 if (sparc_tls_referenced_p (x))
4999 x = sparc_legitimize_tls_address (x);
5000 else if (flag_pic)
5001 x = sparc_legitimize_pic_address (x, NULL_RTX);
5002 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5003 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5004 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5005 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5006 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5007 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5008 else if (GET_CODE (x) == SYMBOL_REF
5009 || GET_CODE (x) == CONST
5010 || GET_CODE (x) == LABEL_REF)
5011 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5012
5013 return x;
5014 }
5015
5016 /* Delegitimize an address that was legitimized by the above function. */
5017
5018 static rtx
5019 sparc_delegitimize_address (rtx x)
5020 {
5021 x = delegitimize_mem_from_attrs (x);
5022
5023 if (GET_CODE (x) == LO_SUM)
5024 x = XEXP (x, 1);
5025
5026 if (GET_CODE (x) == UNSPEC)
5027 switch (XINT (x, 1))
5028 {
5029 case UNSPEC_MOVE_PIC:
5030 case UNSPEC_TLSLE:
5031 x = XVECEXP (x, 0, 0);
5032 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5033 break;
5034 case UNSPEC_MOVE_GOTDATA:
5035 x = XVECEXP (x, 0, 2);
5036 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5037 break;
5038 default:
5039 break;
5040 }
5041
5042 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5043 if (GET_CODE (x) == MINUS
5044 && (XEXP (x, 0) == got_register_rtx
5045 || sparc_pic_register_p (XEXP (x, 0))))
5046 {
5047 rtx y = XEXP (x, 1);
5048
5049 if (GET_CODE (y) == LO_SUM)
5050 y = XEXP (y, 1);
5051
5052 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5053 {
5054 x = XVECEXP (y, 0, 0);
5055 gcc_assert (GET_CODE (x) == LABEL_REF
5056 || (GET_CODE (x) == CONST
5057 && GET_CODE (XEXP (x, 0)) == PLUS
5058 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5060 }
5061 }
5062
5063 return x;
5064 }
5065
5066 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5067 replace the input X, or the original X if no replacement is called for.
5068 The output parameter *WIN is 1 if the calling macro should goto WIN,
5069 0 if it should not.
5070
5071 For SPARC, we wish to handle addresses by splitting them into
5072 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5073 This cuts the number of extra insns by one.
5074
5075 Do nothing when generating PIC code and the address is a symbolic
5076 operand or requires a scratch register. */
5077
5078 rtx
5079 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5080 int opnum, int type,
5081 int ind_levels ATTRIBUTE_UNUSED, int *win)
5082 {
5083 /* Decompose SImode constants into HIGH+LO_SUM. */
5084 if (CONSTANT_P (x)
5085 && (mode != TFmode || TARGET_ARCH64)
5086 && GET_MODE (x) == SImode
5087 && GET_CODE (x) != LO_SUM
5088 && GET_CODE (x) != HIGH
5089 && sparc_code_model <= CM_MEDLOW
5090 && !(flag_pic
5091 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5092 {
5093 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5094 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5095 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5096 opnum, (enum reload_type)type);
5097 *win = 1;
5098 return x;
5099 }
5100
5101 /* We have to recognize what we have already generated above. */
5102 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5103 {
5104 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5105 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5106 opnum, (enum reload_type)type);
5107 *win = 1;
5108 return x;
5109 }
5110
5111 *win = 0;
5112 return x;
5113 }
5114
5115 /* Return true if ADDR (a legitimate address expression)
5116 has an effect that depends on the machine mode it is used for.
5117
5118 In PIC mode,
5119
5120 (mem:HI [%l7+a])
5121
5122 is not equivalent to
5123
5124 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5125
5126 because [%l7+a+1] is interpreted as the address of (a+1). */
5127
5128
5129 static bool
5130 sparc_mode_dependent_address_p (const_rtx addr,
5131 addr_space_t as ATTRIBUTE_UNUSED)
5132 {
5133 if (GET_CODE (addr) == PLUS
5134 && sparc_pic_register_p (XEXP (addr, 0))
5135 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5136 return true;
5137
5138 return false;
5139 }
5140
5141 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5142 address of the call target. */
5143
5144 void
5145 sparc_emit_call_insn (rtx pat, rtx addr)
5146 {
5147 rtx_insn *insn;
5148
5149 insn = emit_call_insn (pat);
5150
5151 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5152 if (TARGET_VXWORKS_RTP
5153 && flag_pic
5154 && GET_CODE (addr) == SYMBOL_REF
5155 && (SYMBOL_REF_DECL (addr)
5156 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5157 : !SYMBOL_REF_LOCAL_P (addr)))
5158 {
5159 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5160 crtl->uses_pic_offset_table = 1;
5161 }
5162 }
5163
5164 /* Return 1 if RTX is a MEM which is known to be aligned to at
5166 least a DESIRED byte boundary. */
5167
5168 int
5169 mem_min_alignment (rtx mem, int desired)
5170 {
5171 rtx addr, base, offset;
5172
5173 /* If it's not a MEM we can't accept it. */
5174 if (GET_CODE (mem) != MEM)
5175 return 0;
5176
5177 /* Obviously... */
5178 if (!TARGET_UNALIGNED_DOUBLES
5179 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5180 return 1;
5181
5182 /* ??? The rest of the function predates MEM_ALIGN so
5183 there is probably a bit of redundancy. */
5184 addr = XEXP (mem, 0);
5185 base = offset = NULL_RTX;
5186 if (GET_CODE (addr) == PLUS)
5187 {
5188 if (GET_CODE (XEXP (addr, 0)) == REG)
5189 {
5190 base = XEXP (addr, 0);
5191
5192 /* What we are saying here is that if the base
5193 REG is aligned properly, the compiler will make
5194 sure any REG based index upon it will be so
5195 as well. */
5196 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5197 offset = XEXP (addr, 1);
5198 else
5199 offset = const0_rtx;
5200 }
5201 }
5202 else if (GET_CODE (addr) == REG)
5203 {
5204 base = addr;
5205 offset = const0_rtx;
5206 }
5207
5208 if (base != NULL_RTX)
5209 {
5210 int regno = REGNO (base);
5211
5212 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5213 {
5214 /* Check if the compiler has recorded some information
5215 about the alignment of the base REG. If reload has
5216 completed, we already matched with proper alignments.
5217 If not running global_alloc, reload might give us
5218 unaligned pointer to local stack though. */
5219 if (((cfun != 0
5220 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5221 || (optimize && reload_completed))
5222 && (INTVAL (offset) & (desired - 1)) == 0)
5223 return 1;
5224 }
5225 else
5226 {
5227 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5228 return 1;
5229 }
5230 }
5231 else if (! TARGET_UNALIGNED_DOUBLES
5232 || CONSTANT_P (addr)
5233 || GET_CODE (addr) == LO_SUM)
5234 {
5235 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5236 is true, in which case we can only assume that an access is aligned if
5237 it is to a constant address, or the address involves a LO_SUM. */
5238 return 1;
5239 }
5240
5241 /* An obviously unaligned address. */
5242 return 0;
5243 }
5244
5245
5246 /* Vectors to keep interesting information about registers where it can easily
5248 be got. We used to use the actual mode value as the bit number, but there
5249 are more than 32 modes now. Instead we use two tables: one indexed by
5250 hard register number, and one indexed by mode. */
5251
5252 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5253 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5254 mapped into one sparc_mode_class mode. */
5255
5256 enum sparc_mode_class {
5257 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5258 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5259 CC_MODE, CCFP_MODE
5260 };
5261
5262 /* Modes for single-word and smaller quantities. */
5263 #define S_MODES \
5264 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5265
5266 /* Modes for double-word and smaller quantities. */
5267 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5268
5269 /* Modes for quad-word and smaller quantities. */
5270 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5271
5272 /* Modes for 8-word and smaller quantities. */
5273 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5274
5275 /* Modes for single-float quantities. */
5276 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5277
5278 /* Modes for double-float and smaller quantities. */
5279 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5280
5281 /* Modes for quad-float and smaller quantities. */
5282 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5283
5284 /* Modes for quad-float pairs and smaller quantities. */
5285 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5286
5287 /* Modes for double-float only quantities. */
5288 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5289
5290 /* Modes for quad-float and double-float only quantities. */
5291 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5292
5293 /* Modes for quad-float pairs and double-float only quantities. */
5294 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5295
5296 /* Modes for condition codes. */
5297 #define CC_MODES (1 << (int) CC_MODE)
5298 #define CCFP_MODES (1 << (int) CCFP_MODE)
5299
5300 /* Value is 1 if register/mode pair is acceptable on sparc.
5301
5302 The funny mixture of D and T modes is because integer operations
5303 do not specially operate on tetra quantities, so non-quad-aligned
5304 registers can hold quadword quantities (except %o4 and %i4 because
5305 they cross fixed registers).
5306
5307 ??? Note that, despite the settings, non-double-aligned parameter
5308 registers can hold double-word quantities in 32-bit mode. */
5309
5310 /* This points to either the 32-bit or the 64-bit version. */
5311 static const int *hard_regno_mode_classes;
5312
5313 static const int hard_32bit_mode_classes[] = {
5314 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5315 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5316 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5317 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5318
5319 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5320 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5321 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5322 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5323
5324 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5325 and none can hold SFmode/SImode values. */
5326 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5327 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5328 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5329 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5330
5331 /* %fcc[0123] */
5332 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5333
5334 /* %icc, %sfp, %gsr */
5335 CC_MODES, 0, D_MODES
5336 };
5337
5338 static const int hard_64bit_mode_classes[] = {
5339 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5340 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5341 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5342 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5343
5344 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5345 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5346 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5347 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5348
5349 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5350 and none can hold SFmode/SImode values. */
5351 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5352 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5353 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5354 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5355
5356 /* %fcc[0123] */
5357 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5358
5359 /* %icc, %sfp, %gsr */
5360 CC_MODES, 0, D_MODES
5361 };
5362
5363 static int sparc_mode_class [NUM_MACHINE_MODES];
5364
5365 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5366
5367 static void
5368 sparc_init_modes (void)
5369 {
5370 int i;
5371
5372 for (i = 0; i < NUM_MACHINE_MODES; i++)
5373 {
5374 machine_mode m = (machine_mode) i;
5375 unsigned int size = GET_MODE_SIZE (m);
5376
5377 switch (GET_MODE_CLASS (m))
5378 {
5379 case MODE_INT:
5380 case MODE_PARTIAL_INT:
5381 case MODE_COMPLEX_INT:
5382 if (size < 4)
5383 sparc_mode_class[i] = 1 << (int) H_MODE;
5384 else if (size == 4)
5385 sparc_mode_class[i] = 1 << (int) S_MODE;
5386 else if (size == 8)
5387 sparc_mode_class[i] = 1 << (int) D_MODE;
5388 else if (size == 16)
5389 sparc_mode_class[i] = 1 << (int) T_MODE;
5390 else if (size == 32)
5391 sparc_mode_class[i] = 1 << (int) O_MODE;
5392 else
5393 sparc_mode_class[i] = 0;
5394 break;
5395 case MODE_VECTOR_INT:
5396 if (size == 4)
5397 sparc_mode_class[i] = 1 << (int) SF_MODE;
5398 else if (size == 8)
5399 sparc_mode_class[i] = 1 << (int) DF_MODE;
5400 else
5401 sparc_mode_class[i] = 0;
5402 break;
5403 case MODE_FLOAT:
5404 case MODE_COMPLEX_FLOAT:
5405 if (size == 4)
5406 sparc_mode_class[i] = 1 << (int) SF_MODE;
5407 else if (size == 8)
5408 sparc_mode_class[i] = 1 << (int) DF_MODE;
5409 else if (size == 16)
5410 sparc_mode_class[i] = 1 << (int) TF_MODE;
5411 else if (size == 32)
5412 sparc_mode_class[i] = 1 << (int) OF_MODE;
5413 else
5414 sparc_mode_class[i] = 0;
5415 break;
5416 case MODE_CC:
5417 if (m == CCFPmode || m == CCFPEmode)
5418 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5419 else
5420 sparc_mode_class[i] = 1 << (int) CC_MODE;
5421 break;
5422 default:
5423 sparc_mode_class[i] = 0;
5424 break;
5425 }
5426 }
5427
5428 if (TARGET_ARCH64)
5429 hard_regno_mode_classes = hard_64bit_mode_classes;
5430 else
5431 hard_regno_mode_classes = hard_32bit_mode_classes;
5432
5433 /* Initialize the array used by REGNO_REG_CLASS. */
5434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5435 {
5436 if (i < 16 && TARGET_V8PLUS)
5437 sparc_regno_reg_class[i] = I64_REGS;
5438 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5439 sparc_regno_reg_class[i] = GENERAL_REGS;
5440 else if (i < 64)
5441 sparc_regno_reg_class[i] = FP_REGS;
5442 else if (i < 96)
5443 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5444 else if (i < 100)
5445 sparc_regno_reg_class[i] = FPCC_REGS;
5446 else
5447 sparc_regno_reg_class[i] = NO_REGS;
5448 }
5449 }
5450
5451 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5453
5454 static inline bool
5455 save_global_or_fp_reg_p (unsigned int regno,
5456 int leaf_function ATTRIBUTE_UNUSED)
5457 {
5458 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5459 }
5460
5461 /* Return whether the return address register (%i7) is needed. */
5462
5463 static inline bool
5464 return_addr_reg_needed_p (int leaf_function)
5465 {
5466 /* If it is live, for example because of __builtin_return_address (0). */
5467 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5468 return true;
5469
5470 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5471 if (!leaf_function
5472 /* Loading the GOT register clobbers %o7. */
5473 || crtl->uses_pic_offset_table
5474 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5475 return true;
5476
5477 return false;
5478 }
5479
5480 /* Return whether REGNO, a local or in register, must be saved/restored. */
5481
5482 static bool
5483 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5484 {
5485 /* General case: call-saved registers live at some point. */
5486 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5487 return true;
5488
5489 /* Frame pointer register (%fp) if needed. */
5490 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5491 return true;
5492
5493 /* Return address register (%i7) if needed. */
5494 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5495 return true;
5496
5497 /* GOT register (%l7) if needed. */
5498 if (got_register_rtx && regno == REGNO (got_register_rtx))
5499 return true;
5500
5501 /* If the function accesses prior frames, the frame pointer and the return
5502 address of the previous frame must be saved on the stack. */
5503 if (crtl->accesses_prior_frames
5504 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5505 return true;
5506
5507 return false;
5508 }
5509
5510 /* Compute the frame size required by the function. This function is called
5511 during the reload pass and also by sparc_expand_prologue. */
5512
5513 static HOST_WIDE_INT
5514 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5515 {
5516 HOST_WIDE_INT frame_size, apparent_frame_size;
5517 int args_size, n_global_fp_regs = 0;
5518 bool save_local_in_regs_p = false;
5519 unsigned int i;
5520
5521 /* If the function allocates dynamic stack space, the dynamic offset is
5522 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5523 if (leaf_function && !cfun->calls_alloca)
5524 args_size = 0;
5525 else
5526 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5527
5528 /* Calculate space needed for global registers. */
5529 if (TARGET_ARCH64)
5530 {
5531 for (i = 0; i < 8; i++)
5532 if (save_global_or_fp_reg_p (i, 0))
5533 n_global_fp_regs += 2;
5534 }
5535 else
5536 {
5537 for (i = 0; i < 8; i += 2)
5538 if (save_global_or_fp_reg_p (i, 0)
5539 || save_global_or_fp_reg_p (i + 1, 0))
5540 n_global_fp_regs += 2;
5541 }
5542
5543 /* In the flat window model, find out which local and in registers need to
5544 be saved. We don't reserve space in the current frame for them as they
5545 will be spilled into the register window save area of the caller's frame.
5546 However, as soon as we use this register window save area, we must create
5547 that of the current frame to make it the live one. */
5548 if (TARGET_FLAT)
5549 for (i = 16; i < 32; i++)
5550 if (save_local_or_in_reg_p (i, leaf_function))
5551 {
5552 save_local_in_regs_p = true;
5553 break;
5554 }
5555
5556 /* Calculate space needed for FP registers. */
5557 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5558 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5559 n_global_fp_regs += 2;
5560
5561 if (size == 0
5562 && n_global_fp_regs == 0
5563 && args_size == 0
5564 && !save_local_in_regs_p)
5565 frame_size = apparent_frame_size = 0;
5566 else
5567 {
5568 /* Start from the apparent frame size. */
5569 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5570
5571 /* We need to add the size of the outgoing argument area. */
5572 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5573
5574 /* And that of the register window save area. */
5575 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5576
5577 /* Finally, bump to the appropriate alignment. */
5578 frame_size = SPARC_STACK_ALIGN (frame_size);
5579 }
5580
5581 /* Set up values for use in prologue and epilogue. */
5582 sparc_frame_size = frame_size;
5583 sparc_apparent_frame_size = apparent_frame_size;
5584 sparc_n_global_fp_regs = n_global_fp_regs;
5585 sparc_save_local_in_regs_p = save_local_in_regs_p;
5586
5587 return frame_size;
5588 }
5589
5590 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5591
5592 int
5593 sparc_initial_elimination_offset (int to)
5594 {
5595 int offset;
5596
5597 if (to == STACK_POINTER_REGNUM)
5598 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5599 else
5600 offset = 0;
5601
5602 offset += SPARC_STACK_BIAS;
5603 return offset;
5604 }
5605
5606 /* Output any necessary .register pseudo-ops. */
5607
5608 void
5609 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5610 {
5611 int i;
5612
5613 if (TARGET_ARCH32)
5614 return;
5615
5616 /* Check if %g[2367] were used without
5617 .register being printed for them already. */
5618 for (i = 2; i < 8; i++)
5619 {
5620 if (df_regs_ever_live_p (i)
5621 && ! sparc_hard_reg_printed [i])
5622 {
5623 sparc_hard_reg_printed [i] = 1;
5624 /* %g7 is used as TLS base register, use #ignore
5625 for it instead of #scratch. */
5626 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5627 i == 7 ? "ignore" : "scratch");
5628 }
5629 if (i == 3) i = 5;
5630 }
5631 }
5632
5633 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5634
5635 #if PROBE_INTERVAL > 4096
5636 #error Cannot use indexed addressing mode for stack probing
5637 #endif
5638
5639 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5640 inclusive. These are offsets from the current stack pointer.
5641
5642 Note that we don't use the REG+REG addressing mode for the probes because
5643 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5644 so the advantages of having a single code win here. */
5645
5646 static void
5647 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5648 {
5649 rtx g1 = gen_rtx_REG (Pmode, 1);
5650
5651 /* See if we have a constant small number of probes to generate. If so,
5652 that's the easy case. */
5653 if (size <= PROBE_INTERVAL)
5654 {
5655 emit_move_insn (g1, GEN_INT (first));
5656 emit_insn (gen_rtx_SET (g1,
5657 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5658 emit_stack_probe (plus_constant (Pmode, g1, -size));
5659 }
5660
5661 /* The run-time loop is made up of 9 insns in the generic case while the
5662 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5663 else if (size <= 4 * PROBE_INTERVAL)
5664 {
5665 HOST_WIDE_INT i;
5666
5667 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5668 emit_insn (gen_rtx_SET (g1,
5669 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5670 emit_stack_probe (g1);
5671
5672 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5673 it exceeds SIZE. If only two probes are needed, this will not
5674 generate any code. Then probe at FIRST + SIZE. */
5675 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5676 {
5677 emit_insn (gen_rtx_SET (g1,
5678 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5679 emit_stack_probe (g1);
5680 }
5681
5682 emit_stack_probe (plus_constant (Pmode, g1,
5683 (i - PROBE_INTERVAL) - size));
5684 }
5685
5686 /* Otherwise, do the same as above, but in a loop. Note that we must be
5687 extra careful with variables wrapping around because we might be at
5688 the very top (or the very bottom) of the address space and we have
5689 to be able to handle this case properly; in particular, we use an
5690 equality test for the loop condition. */
5691 else
5692 {
5693 HOST_WIDE_INT rounded_size;
5694 rtx g4 = gen_rtx_REG (Pmode, 4);
5695
5696 emit_move_insn (g1, GEN_INT (first));
5697
5698
5699 /* Step 1: round SIZE to the previous multiple of the interval. */
5700
5701 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5702 emit_move_insn (g4, GEN_INT (rounded_size));
5703
5704
5705 /* Step 2: compute initial and final value of the loop counter. */
5706
5707 /* TEST_ADDR = SP + FIRST. */
5708 emit_insn (gen_rtx_SET (g1,
5709 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5710
5711 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5712 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5713
5714
5715 /* Step 3: the loop
5716
5717 while (TEST_ADDR != LAST_ADDR)
5718 {
5719 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5720 probe at TEST_ADDR
5721 }
5722
5723 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5724 until it is equal to ROUNDED_SIZE. */
5725
5726 emit_insn (gen_probe_stack_range (Pmode, g1, g1, g4));
5727
5728
5729 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5730 that SIZE is equal to ROUNDED_SIZE. */
5731
5732 if (size != rounded_size)
5733 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5734 }
5735
5736 /* Make sure nothing is scheduled before we are done. */
5737 emit_insn (gen_blockage ());
5738 }
5739
5740 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5741 absolute addresses. */
5742
5743 const char *
5744 output_probe_stack_range (rtx reg1, rtx reg2)
5745 {
5746 static int labelno = 0;
5747 char loop_lab[32];
5748 rtx xops[2];
5749
5750 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5751
5752 /* Loop. */
5753 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5754
5755 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5756 xops[0] = reg1;
5757 xops[1] = GEN_INT (-PROBE_INTERVAL);
5758 output_asm_insn ("add\t%0, %1, %0", xops);
5759
5760 /* Test if TEST_ADDR == LAST_ADDR. */
5761 xops[1] = reg2;
5762 output_asm_insn ("cmp\t%0, %1", xops);
5763
5764 /* Probe at TEST_ADDR and branch. */
5765 if (TARGET_ARCH64)
5766 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5767 else
5768 fputs ("\tbne\t", asm_out_file);
5769 assemble_name_raw (asm_out_file, loop_lab);
5770 fputc ('\n', asm_out_file);
5771 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5772 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5773
5774 return "";
5775 }
5776
5777 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5778 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5779 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5780 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5781 the action to be performed if it returns false. Return the new offset. */
5782
5783 typedef bool (*sorr_pred_t) (unsigned int, int);
5784 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5785
5786 static int
5787 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5788 int offset, int leaf_function, sorr_pred_t save_p,
5789 sorr_act_t action_true, sorr_act_t action_false)
5790 {
5791 unsigned int i;
5792 rtx mem;
5793 rtx_insn *insn;
5794
5795 if (TARGET_ARCH64 && high <= 32)
5796 {
5797 int fp_offset = -1;
5798
5799 for (i = low; i < high; i++)
5800 {
5801 if (save_p (i, leaf_function))
5802 {
5803 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5804 base, offset));
5805 if (action_true == SORR_SAVE)
5806 {
5807 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5808 RTX_FRAME_RELATED_P (insn) = 1;
5809 }
5810 else /* action_true == SORR_RESTORE */
5811 {
5812 /* The frame pointer must be restored last since its old
5813 value may be used as base address for the frame. This
5814 is problematic in 64-bit mode only because of the lack
5815 of double-word load instruction. */
5816 if (i == HARD_FRAME_POINTER_REGNUM)
5817 fp_offset = offset;
5818 else
5819 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5820 }
5821 offset += 8;
5822 }
5823 else if (action_false == SORR_ADVANCE)
5824 offset += 8;
5825 }
5826
5827 if (fp_offset >= 0)
5828 {
5829 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5830 emit_move_insn (hard_frame_pointer_rtx, mem);
5831 }
5832 }
5833 else
5834 {
5835 for (i = low; i < high; i += 2)
5836 {
5837 bool reg0 = save_p (i, leaf_function);
5838 bool reg1 = save_p (i + 1, leaf_function);
5839 machine_mode mode;
5840 int regno;
5841
5842 if (reg0 && reg1)
5843 {
5844 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5845 regno = i;
5846 }
5847 else if (reg0)
5848 {
5849 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5850 regno = i;
5851 }
5852 else if (reg1)
5853 {
5854 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5855 regno = i + 1;
5856 offset += 4;
5857 }
5858 else
5859 {
5860 if (action_false == SORR_ADVANCE)
5861 offset += 8;
5862 continue;
5863 }
5864
5865 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5866 if (action_true == SORR_SAVE)
5867 {
5868 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5869 RTX_FRAME_RELATED_P (insn) = 1;
5870 if (mode == DImode)
5871 {
5872 rtx set1, set2;
5873 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5874 offset));
5875 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5876 RTX_FRAME_RELATED_P (set1) = 1;
5877 mem
5878 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5879 offset + 4));
5880 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5881 RTX_FRAME_RELATED_P (set2) = 1;
5882 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5883 gen_rtx_PARALLEL (VOIDmode,
5884 gen_rtvec (2, set1, set2)));
5885 }
5886 }
5887 else /* action_true == SORR_RESTORE */
5888 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5889
5890 /* Bump and round down to double word
5891 in case we already bumped by 4. */
5892 offset = ROUND_DOWN (offset + 8, 8);
5893 }
5894 }
5895
5896 return offset;
5897 }
5898
5899 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5900
5901 static rtx
5902 emit_adjust_base_to_offset (rtx base, int offset)
5903 {
5904 /* ??? This might be optimized a little as %g1 might already have a
5905 value close enough that a single add insn will do. */
5906 /* ??? Although, all of this is probably only a temporary fix because
5907 if %g1 can hold a function result, then sparc_expand_epilogue will
5908 lose (the result will be clobbered). */
5909 rtx new_base = gen_rtx_REG (Pmode, 1);
5910 emit_move_insn (new_base, GEN_INT (offset));
5911 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5912 return new_base;
5913 }
5914
5915 /* Emit code to save/restore call-saved global and FP registers. */
5916
5917 static void
5918 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5919 {
5920 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5921 {
5922 base = emit_adjust_base_to_offset (base, offset);
5923 offset = 0;
5924 }
5925
5926 offset
5927 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5928 save_global_or_fp_reg_p, action, SORR_NONE);
5929 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5930 save_global_or_fp_reg_p, action, SORR_NONE);
5931 }
5932
5933 /* Emit code to save/restore call-saved local and in registers. */
5934
5935 static void
5936 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5937 {
5938 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5939 {
5940 base = emit_adjust_base_to_offset (base, offset);
5941 offset = 0;
5942 }
5943
5944 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5945 save_local_or_in_reg_p, action, SORR_ADVANCE);
5946 }
5947
5948 /* Emit a window_save insn. */
5949
5950 static rtx_insn *
5951 emit_window_save (rtx increment)
5952 {
5953 rtx_insn *insn = emit_insn (gen_window_save (increment));
5954 RTX_FRAME_RELATED_P (insn) = 1;
5955
5956 /* The incoming return address (%o7) is saved in %i7. */
5957 add_reg_note (insn, REG_CFA_REGISTER,
5958 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5959 gen_rtx_REG (Pmode,
5960 INCOMING_RETURN_ADDR_REGNUM)));
5961
5962 /* The window save event. */
5963 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5964
5965 /* The CFA is %fp, the hard frame pointer. */
5966 add_reg_note (insn, REG_CFA_DEF_CFA,
5967 plus_constant (Pmode, hard_frame_pointer_rtx,
5968 INCOMING_FRAME_SP_OFFSET));
5969
5970 return insn;
5971 }
5972
5973 /* Generate an increment for the stack pointer. */
5974
5975 static rtx
5976 gen_stack_pointer_inc (rtx increment)
5977 {
5978 return gen_rtx_SET (stack_pointer_rtx,
5979 gen_rtx_PLUS (Pmode,
5980 stack_pointer_rtx,
5981 increment));
5982 }
5983
5984 /* Expand the function prologue. The prologue is responsible for reserving
5985 storage for the frame, saving the call-saved registers and loading the
5986 GOT register if needed. */
5987
5988 void
5989 sparc_expand_prologue (void)
5990 {
5991 HOST_WIDE_INT size;
5992 rtx_insn *insn;
5993
5994 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5995 on the final value of the flag means deferring the prologue/epilogue
5996 expansion until just before the second scheduling pass, which is too
5997 late to emit multiple epilogues or return insns.
5998
5999 Of course we are making the assumption that the value of the flag
6000 will not change between now and its final value. Of the three parts
6001 of the formula, only the last one can reasonably vary. Let's take a
6002 closer look, after assuming that the first two ones are set to true
6003 (otherwise the last value is effectively silenced).
6004
6005 If only_leaf_regs_used returns false, the global predicate will also
6006 be false so the actual frame size calculated below will be positive.
6007 As a consequence, the save_register_window insn will be emitted in
6008 the instruction stream; now this insn explicitly references %fp
6009 which is not a leaf register so only_leaf_regs_used will always
6010 return false subsequently.
6011
6012 If only_leaf_regs_used returns true, we hope that the subsequent
6013 optimization passes won't cause non-leaf registers to pop up. For
6014 example, the regrename pass has special provisions to not rename to
6015 non-leaf registers in a leaf function. */
6016 sparc_leaf_function_p
6017 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6018
6019 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6020
6021 if (flag_stack_usage_info)
6022 current_function_static_stack_size = size;
6023
6024 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6025 || flag_stack_clash_protection)
6026 {
6027 if (crtl->is_leaf && !cfun->calls_alloca)
6028 {
6029 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6030 sparc_emit_probe_stack_range (get_stack_check_protect (),
6031 size - get_stack_check_protect ());
6032 }
6033 else if (size > 0)
6034 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6035 }
6036
6037 if (size == 0)
6038 ; /* do nothing. */
6039 else if (sparc_leaf_function_p)
6040 {
6041 rtx size_int_rtx = GEN_INT (-size);
6042
6043 if (size <= 4096)
6044 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6045 else if (size <= 8192)
6046 {
6047 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6048 RTX_FRAME_RELATED_P (insn) = 1;
6049
6050 /* %sp is still the CFA register. */
6051 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6052 }
6053 else
6054 {
6055 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6056 emit_move_insn (size_rtx, size_int_rtx);
6057 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6058 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6059 gen_stack_pointer_inc (size_int_rtx));
6060 }
6061
6062 RTX_FRAME_RELATED_P (insn) = 1;
6063
6064 /* Ensure no memory access is done before the frame is established. */
6065 emit_insn (gen_frame_blockage ());
6066 }
6067 else
6068 {
6069 rtx size_int_rtx = GEN_INT (-size);
6070
6071 if (size <= 4096)
6072 emit_window_save (size_int_rtx);
6073 else if (size <= 8192)
6074 {
6075 emit_window_save (GEN_INT (-4096));
6076
6077 /* %sp is not the CFA register anymore. */
6078 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6079
6080 /* Likewise. */
6081 emit_insn (gen_frame_blockage ());
6082 }
6083 else
6084 {
6085 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6086 emit_move_insn (size_rtx, size_int_rtx);
6087 emit_window_save (size_rtx);
6088 }
6089 }
6090
6091 if (sparc_leaf_function_p)
6092 {
6093 sparc_frame_base_reg = stack_pointer_rtx;
6094 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6095 }
6096 else
6097 {
6098 sparc_frame_base_reg = hard_frame_pointer_rtx;
6099 sparc_frame_base_offset = SPARC_STACK_BIAS;
6100 }
6101
6102 if (sparc_n_global_fp_regs > 0)
6103 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6104 sparc_frame_base_offset
6105 - sparc_apparent_frame_size,
6106 SORR_SAVE);
6107
6108 /* Advertise that the data calculated just above are now valid. */
6109 sparc_prologue_data_valid_p = true;
6110 }
6111
6112 /* Expand the function prologue. The prologue is responsible for reserving
6113 storage for the frame, saving the call-saved registers and loading the
6114 GOT register if needed. */
6115
6116 void
6117 sparc_flat_expand_prologue (void)
6118 {
6119 HOST_WIDE_INT size;
6120 rtx_insn *insn;
6121
6122 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6123
6124 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6125
6126 if (flag_stack_usage_info)
6127 current_function_static_stack_size = size;
6128
6129 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6130 || flag_stack_clash_protection)
6131 {
6132 if (crtl->is_leaf && !cfun->calls_alloca)
6133 {
6134 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6135 sparc_emit_probe_stack_range (get_stack_check_protect (),
6136 size - get_stack_check_protect ());
6137 }
6138 else if (size > 0)
6139 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6140 }
6141
6142 if (sparc_save_local_in_regs_p)
6143 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6144 SORR_SAVE);
6145
6146 if (size == 0)
6147 ; /* do nothing. */
6148 else
6149 {
6150 rtx size_int_rtx, size_rtx;
6151
6152 size_rtx = size_int_rtx = GEN_INT (-size);
6153
6154 /* We establish the frame (i.e. decrement the stack pointer) first, even
6155 if we use a frame pointer, because we cannot clobber any call-saved
6156 registers, including the frame pointer, if we haven't created a new
6157 register save area, for the sake of compatibility with the ABI. */
6158 if (size <= 4096)
6159 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6160 else if (size <= 8192 && !frame_pointer_needed)
6161 {
6162 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6163 RTX_FRAME_RELATED_P (insn) = 1;
6164 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6165 }
6166 else
6167 {
6168 size_rtx = gen_rtx_REG (Pmode, 1);
6169 emit_move_insn (size_rtx, size_int_rtx);
6170 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6171 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6172 gen_stack_pointer_inc (size_int_rtx));
6173 }
6174 RTX_FRAME_RELATED_P (insn) = 1;
6175
6176 /* Ensure no memory access is done before the frame is established. */
6177 emit_insn (gen_frame_blockage ());
6178
6179 if (frame_pointer_needed)
6180 {
6181 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6182 gen_rtx_MINUS (Pmode,
6183 stack_pointer_rtx,
6184 size_rtx)));
6185 RTX_FRAME_RELATED_P (insn) = 1;
6186
6187 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6188 gen_rtx_SET (hard_frame_pointer_rtx,
6189 plus_constant (Pmode, stack_pointer_rtx,
6190 size)));
6191 }
6192
6193 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6194 {
6195 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6196 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6197
6198 insn = emit_move_insn (i7, o7);
6199 RTX_FRAME_RELATED_P (insn) = 1;
6200
6201 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6202
6203 /* Prevent this instruction from ever being considered dead,
6204 even if this function has no epilogue. */
6205 emit_use (i7);
6206 }
6207 }
6208
6209 if (frame_pointer_needed)
6210 {
6211 sparc_frame_base_reg = hard_frame_pointer_rtx;
6212 sparc_frame_base_offset = SPARC_STACK_BIAS;
6213 }
6214 else
6215 {
6216 sparc_frame_base_reg = stack_pointer_rtx;
6217 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6218 }
6219
6220 if (sparc_n_global_fp_regs > 0)
6221 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6222 sparc_frame_base_offset
6223 - sparc_apparent_frame_size,
6224 SORR_SAVE);
6225
6226 /* Advertise that the data calculated just above are now valid. */
6227 sparc_prologue_data_valid_p = true;
6228 }
6229
6230 /* This function generates the assembly code for function entry, which boils
6231 down to emitting the necessary .register directives. */
6232
6233 static void
6234 sparc_asm_function_prologue (FILE *file)
6235 {
6236 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6237 if (!TARGET_FLAT)
6238 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6239
6240 sparc_output_scratch_registers (file);
6241 }
6242
6243 /* Expand the function epilogue, either normal or part of a sibcall.
6244 We emit all the instructions except the return or the call. */
6245
6246 void
6247 sparc_expand_epilogue (bool for_eh)
6248 {
6249 HOST_WIDE_INT size = sparc_frame_size;
6250
6251 if (cfun->calls_alloca)
6252 emit_insn (gen_frame_blockage ());
6253
6254 if (sparc_n_global_fp_regs > 0)
6255 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6256 sparc_frame_base_offset
6257 - sparc_apparent_frame_size,
6258 SORR_RESTORE);
6259
6260 if (size == 0 || for_eh)
6261 ; /* do nothing. */
6262 else if (sparc_leaf_function_p)
6263 {
6264 /* Ensure no memory access is done after the frame is destroyed. */
6265 emit_insn (gen_frame_blockage ());
6266
6267 if (size <= 4096)
6268 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6269 else if (size <= 8192)
6270 {
6271 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6272 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6273 }
6274 else
6275 {
6276 rtx reg = gen_rtx_REG (Pmode, 1);
6277 emit_move_insn (reg, GEN_INT (size));
6278 emit_insn (gen_stack_pointer_inc (reg));
6279 }
6280 }
6281 }
6282
6283 /* Expand the function epilogue, either normal or part of a sibcall.
6284 We emit all the instructions except the return or the call. */
6285
6286 void
6287 sparc_flat_expand_epilogue (bool for_eh)
6288 {
6289 HOST_WIDE_INT size = sparc_frame_size;
6290
6291 if (sparc_n_global_fp_regs > 0)
6292 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6293 sparc_frame_base_offset
6294 - sparc_apparent_frame_size,
6295 SORR_RESTORE);
6296
6297 /* If we have a frame pointer, we'll need both to restore it before the
6298 frame is destroyed and use its current value in destroying the frame.
6299 Since we don't have an atomic way to do that in the flat window model,
6300 we save the current value into a temporary register (%g1). */
6301 if (frame_pointer_needed && !for_eh)
6302 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6303
6304 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6305 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6306 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6307
6308 if (sparc_save_local_in_regs_p)
6309 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6310 sparc_frame_base_offset,
6311 SORR_RESTORE);
6312
6313 if (size == 0 || for_eh)
6314 ; /* do nothing. */
6315 else if (frame_pointer_needed)
6316 {
6317 /* Ensure no memory access is done after the frame is destroyed. */
6318 emit_insn (gen_frame_blockage ());
6319
6320 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6321 }
6322 else
6323 {
6324 /* Likewise. */
6325 emit_insn (gen_frame_blockage ());
6326
6327 if (size <= 4096)
6328 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6329 else if (size <= 8192)
6330 {
6331 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6332 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6333 }
6334 else
6335 {
6336 rtx reg = gen_rtx_REG (Pmode, 1);
6337 emit_move_insn (reg, GEN_INT (size));
6338 emit_insn (gen_stack_pointer_inc (reg));
6339 }
6340 }
6341 }
6342
6343 /* Return true if it is appropriate to emit `return' instructions in the
6344 body of a function. */
6345
6346 bool
6347 sparc_can_use_return_insn_p (void)
6348 {
6349 return sparc_prologue_data_valid_p
6350 && sparc_n_global_fp_regs == 0
6351 && TARGET_FLAT
6352 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6353 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6354 }
6355
6356 /* This function generates the assembly code for function exit. */
6357
6358 static void
6359 sparc_asm_function_epilogue (FILE *file)
6360 {
6361 /* If the last two instructions of a function are "call foo; dslot;"
6362 the return address might point to the first instruction in the next
6363 function and we have to output a dummy nop for the sake of sane
6364 backtraces in such cases. This is pointless for sibling calls since
6365 the return address is explicitly adjusted. */
6366
6367 rtx_insn *insn = get_last_insn ();
6368
6369 rtx last_real_insn = prev_real_insn (insn);
6370 if (last_real_insn
6371 && NONJUMP_INSN_P (last_real_insn)
6372 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6373 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6374
6375 if (last_real_insn
6376 && CALL_P (last_real_insn)
6377 && !SIBLING_CALL_P (last_real_insn))
6378 fputs("\tnop\n", file);
6379
6380 sparc_output_deferred_case_vectors ();
6381 }
6382
6383 /* Output a 'restore' instruction. */
6384
6385 static void
6386 output_restore (rtx pat)
6387 {
6388 rtx operands[3];
6389
6390 if (! pat)
6391 {
6392 fputs ("\t restore\n", asm_out_file);
6393 return;
6394 }
6395
6396 gcc_assert (GET_CODE (pat) == SET);
6397
6398 operands[0] = SET_DEST (pat);
6399 pat = SET_SRC (pat);
6400
6401 switch (GET_CODE (pat))
6402 {
6403 case PLUS:
6404 operands[1] = XEXP (pat, 0);
6405 operands[2] = XEXP (pat, 1);
6406 output_asm_insn (" restore %r1, %2, %Y0", operands);
6407 break;
6408 case LO_SUM:
6409 operands[1] = XEXP (pat, 0);
6410 operands[2] = XEXP (pat, 1);
6411 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6412 break;
6413 case ASHIFT:
6414 operands[1] = XEXP (pat, 0);
6415 gcc_assert (XEXP (pat, 1) == const1_rtx);
6416 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6417 break;
6418 default:
6419 operands[1] = pat;
6420 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6421 break;
6422 }
6423 }
6424
6425 /* Output a return. */
6426
6427 const char *
6428 output_return (rtx_insn *insn)
6429 {
6430 if (crtl->calls_eh_return)
6431 {
6432 /* If the function uses __builtin_eh_return, the eh_return
6433 machinery occupies the delay slot. */
6434 gcc_assert (!final_sequence);
6435
6436 if (flag_delayed_branch)
6437 {
6438 if (!TARGET_FLAT && TARGET_V9)
6439 fputs ("\treturn\t%i7+8\n", asm_out_file);
6440 else
6441 {
6442 if (!TARGET_FLAT)
6443 fputs ("\trestore\n", asm_out_file);
6444
6445 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6446 }
6447
6448 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6449 }
6450 else
6451 {
6452 if (!TARGET_FLAT)
6453 fputs ("\trestore\n", asm_out_file);
6454
6455 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6456 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6457 }
6458 }
6459 else if (sparc_leaf_function_p || TARGET_FLAT)
6460 {
6461 /* This is a leaf or flat function so we don't have to bother restoring
6462 the register window, which frees us from dealing with the convoluted
6463 semantics of restore/return. We simply output the jump to the
6464 return address and the insn in the delay slot (if any). */
6465
6466 return "jmp\t%%o7+%)%#";
6467 }
6468 else
6469 {
6470 /* This is a regular function so we have to restore the register window.
6471 We may have a pending insn for the delay slot, which will be either
6472 combined with the 'restore' instruction or put in the delay slot of
6473 the 'return' instruction. */
6474
6475 if (final_sequence)
6476 {
6477 rtx_insn *delay;
6478 rtx pat;
6479
6480 delay = NEXT_INSN (insn);
6481 gcc_assert (delay);
6482
6483 pat = PATTERN (delay);
6484
6485 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6486 {
6487 epilogue_renumber (&pat, 0);
6488 return "return\t%%i7+%)%#";
6489 }
6490 else
6491 {
6492 output_asm_insn ("jmp\t%%i7+%)", NULL);
6493
6494 /* We're going to output the insn in the delay slot manually.
6495 Make sure to output its source location first. */
6496 PATTERN (delay) = gen_blockage ();
6497 INSN_CODE (delay) = -1;
6498 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6499 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6500
6501 output_restore (pat);
6502 }
6503 }
6504 else
6505 {
6506 /* The delay slot is empty. */
6507 if (TARGET_V9)
6508 return "return\t%%i7+%)\n\t nop";
6509 else if (flag_delayed_branch)
6510 return "jmp\t%%i7+%)\n\t restore";
6511 else
6512 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6513 }
6514 }
6515
6516 return "";
6517 }
6518
6519 /* Output a sibling call. */
6520
6521 const char *
6522 output_sibcall (rtx_insn *insn, rtx call_operand)
6523 {
6524 rtx operands[1];
6525
6526 gcc_assert (flag_delayed_branch);
6527
6528 operands[0] = call_operand;
6529
6530 if (sparc_leaf_function_p || TARGET_FLAT)
6531 {
6532 /* This is a leaf or flat function so we don't have to bother restoring
6533 the register window. We simply output the jump to the function and
6534 the insn in the delay slot (if any). */
6535
6536 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6537
6538 if (final_sequence)
6539 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6540 operands);
6541 else
6542 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6543 it into branch if possible. */
6544 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6545 operands);
6546 }
6547 else
6548 {
6549 /* This is a regular function so we have to restore the register window.
6550 We may have a pending insn for the delay slot, which will be combined
6551 with the 'restore' instruction. */
6552
6553 output_asm_insn ("call\t%a0, 0", operands);
6554
6555 if (final_sequence)
6556 {
6557 rtx_insn *delay;
6558 rtx pat;
6559
6560 delay = NEXT_INSN (insn);
6561 gcc_assert (delay);
6562
6563 pat = PATTERN (delay);
6564
6565 /* We're going to output the insn in the delay slot manually.
6566 Make sure to output its source location first. */
6567 PATTERN (delay) = gen_blockage ();
6568 INSN_CODE (delay) = -1;
6569 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6570 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6571
6572 output_restore (pat);
6573 }
6574 else
6575 output_restore (NULL_RTX);
6576 }
6577
6578 return "";
6579 }
6580
6581 /* Functions for handling argument passing.
6583
6584 For 32-bit, the first 6 args are normally in registers and the rest are
6585 pushed. Any arg that starts within the first 6 words is at least
6586 partially passed in a register unless its data type forbids.
6587
6588 For 64-bit, the argument registers are laid out as an array of 16 elements
6589 and arguments are added sequentially. The first 6 int args and up to the
6590 first 16 fp args (depending on size) are passed in regs.
6591
6592 Slot Stack Integral Float Float in structure Double Long Double
6593 ---- ----- -------- ----- ------------------ ------ -----------
6594 15 [SP+248] %f31 %f30,%f31 %d30
6595 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6596 13 [SP+232] %f27 %f26,%f27 %d26
6597 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6598 11 [SP+216] %f23 %f22,%f23 %d22
6599 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6600 9 [SP+200] %f19 %f18,%f19 %d18
6601 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6602 7 [SP+184] %f15 %f14,%f15 %d14
6603 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6604 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6605 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6606 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6607 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6608 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6609 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6610
6611 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6612
6613 Integral arguments are always passed as 64-bit quantities appropriately
6614 extended.
6615
6616 Passing of floating point values is handled as follows.
6617 If a prototype is in scope:
6618 If the value is in a named argument (i.e. not a stdarg function or a
6619 value not part of the `...') then the value is passed in the appropriate
6620 fp reg.
6621 If the value is part of the `...' and is passed in one of the first 6
6622 slots then the value is passed in the appropriate int reg.
6623 If the value is part of the `...' and is not passed in one of the first 6
6624 slots then the value is passed in memory.
6625 If a prototype is not in scope:
6626 If the value is one of the first 6 arguments the value is passed in the
6627 appropriate integer reg and the appropriate fp reg.
6628 If the value is not one of the first 6 arguments the value is passed in
6629 the appropriate fp reg and in memory.
6630
6631
6632 Summary of the calling conventions implemented by GCC on the SPARC:
6633
6634 32-bit ABI:
6635 size argument return value
6636
6637 small integer <4 int. reg. int. reg.
6638 word 4 int. reg. int. reg.
6639 double word 8 int. reg. int. reg.
6640
6641 _Complex small integer <8 int. reg. int. reg.
6642 _Complex word 8 int. reg. int. reg.
6643 _Complex double word 16 memory int. reg.
6644
6645 vector integer <=8 int. reg. FP reg.
6646 vector integer >8 memory memory
6647
6648 float 4 int. reg. FP reg.
6649 double 8 int. reg. FP reg.
6650 long double 16 memory memory
6651
6652 _Complex float 8 memory FP reg.
6653 _Complex double 16 memory FP reg.
6654 _Complex long double 32 memory FP reg.
6655
6656 vector float any memory memory
6657
6658 aggregate any memory memory
6659
6660
6661
6662 64-bit ABI:
6663 size argument return value
6664
6665 small integer <8 int. reg. int. reg.
6666 word 8 int. reg. int. reg.
6667 double word 16 int. reg. int. reg.
6668
6669 _Complex small integer <16 int. reg. int. reg.
6670 _Complex word 16 int. reg. int. reg.
6671 _Complex double word 32 memory int. reg.
6672
6673 vector integer <=16 FP reg. FP reg.
6674 vector integer 16<s<=32 memory FP reg.
6675 vector integer >32 memory memory
6676
6677 float 4 FP reg. FP reg.
6678 double 8 FP reg. FP reg.
6679 long double 16 FP reg. FP reg.
6680
6681 _Complex float 8 FP reg. FP reg.
6682 _Complex double 16 FP reg. FP reg.
6683 _Complex long double 32 memory FP reg.
6684
6685 vector float <=16 FP reg. FP reg.
6686 vector float 16<s<=32 memory FP reg.
6687 vector float >32 memory memory
6688
6689 aggregate <=16 reg. reg.
6690 aggregate 16<s<=32 memory reg.
6691 aggregate >32 memory memory
6692
6693
6694
6695 Note #1: complex floating-point types follow the extended SPARC ABIs as
6696 implemented by the Sun compiler.
6697
6698 Note #2: integer vector types follow the scalar floating-point types
6699 conventions to match what is implemented by the Sun VIS SDK.
6700
6701 Note #3: floating-point vector types follow the aggregate types
6702 conventions. */
6703
6704
6705 /* Maximum number of int regs for args. */
6706 #define SPARC_INT_ARG_MAX 6
6707 /* Maximum number of fp regs for args. */
6708 #define SPARC_FP_ARG_MAX 16
6709 /* Number of words (partially) occupied for a given size in units. */
6710 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6711
6712 /* Handle the INIT_CUMULATIVE_ARGS macro.
6713 Initialize a variable CUM of type CUMULATIVE_ARGS
6714 for a call to a function whose data type is FNTYPE.
6715 For a library call, FNTYPE is 0. */
6716
6717 void
6718 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6719 {
6720 cum->words = 0;
6721 cum->prototype_p = fntype && prototype_p (fntype);
6722 cum->libcall_p = !fntype;
6723 }
6724
6725 /* Handle promotion of pointer and integer arguments. */
6726
6727 static machine_mode
6728 sparc_promote_function_mode (const_tree type, machine_mode mode,
6729 int *punsignedp, const_tree, int)
6730 {
6731 if (type && POINTER_TYPE_P (type))
6732 {
6733 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6734 return Pmode;
6735 }
6736
6737 /* Integral arguments are passed as full words, as per the ABI. */
6738 if (GET_MODE_CLASS (mode) == MODE_INT
6739 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6740 return word_mode;
6741
6742 return mode;
6743 }
6744
6745 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6746
6747 static bool
6748 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6749 {
6750 return TARGET_ARCH64 ? true : false;
6751 }
6752
6753 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6754 Specify whether to pass the argument by reference. */
6755
6756 static bool
6757 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6758 {
6759 tree type = arg.type;
6760 machine_mode mode = arg.mode;
6761 if (TARGET_ARCH32)
6762 /* Original SPARC 32-bit ABI says that structures and unions,
6763 and quad-precision floats are passed by reference.
6764 All other base types are passed in registers.
6765
6766 Extended ABI (as implemented by the Sun compiler) says that all
6767 complex floats are passed by reference. Pass complex integers
6768 in registers up to 8 bytes. More generally, enforce the 2-word
6769 cap for passing arguments in registers.
6770
6771 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6772 vectors are passed like floats of the same size, that is in
6773 registers up to 8 bytes. Pass all vector floats by reference
6774 like structure and unions. */
6775 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6776 || mode == SCmode
6777 /* Catch CDImode, TFmode, DCmode and TCmode. */
6778 || GET_MODE_SIZE (mode) > 8
6779 || (type
6780 && VECTOR_TYPE_P (type)
6781 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6782 else
6783 /* Original SPARC 64-bit ABI says that structures and unions
6784 smaller than 16 bytes are passed in registers, as well as
6785 all other base types.
6786
6787 Extended ABI (as implemented by the Sun compiler) says that
6788 complex floats are passed in registers up to 16 bytes. Pass
6789 all complex integers in registers up to 16 bytes. More generally,
6790 enforce the 2-word cap for passing arguments in registers.
6791
6792 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6793 vectors are passed like floats of the same size, that is in
6794 registers (up to 16 bytes). Pass all vector floats like structure
6795 and unions. */
6796 return ((type
6797 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6798 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6799 /* Catch CTImode and TCmode. */
6800 || GET_MODE_SIZE (mode) > 16);
6801 }
6802
6803 /* Traverse the record TYPE recursively and call FUNC on its fields.
6804 NAMED is true if this is for a named parameter. DATA is passed
6805 to FUNC for each field. OFFSET is the starting position and
6806 PACKED is true if we are inside a packed record. */
6807
6808 template <typename T, void Func (const_tree, int, bool, T*)>
6809 static void
6810 traverse_record_type (const_tree type, bool named, T *data,
6811 int offset = 0, bool packed = false)
6812 {
6813 /* The ABI obviously doesn't specify how packed structures are passed.
6814 These are passed in integer regs if possible, otherwise memory. */
6815 if (!packed)
6816 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6817 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6818 {
6819 packed = true;
6820 break;
6821 }
6822
6823 /* Walk the real fields, but skip those with no size or a zero size.
6824 ??? Fields with variable offset are handled as having zero offset. */
6825 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6826 if (TREE_CODE (field) == FIELD_DECL)
6827 {
6828 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6829 continue;
6830
6831 int bitpos = offset;
6832 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6833 bitpos += int_bit_position (field);
6834
6835 tree field_type = TREE_TYPE (field);
6836 if (TREE_CODE (field_type) == RECORD_TYPE)
6837 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6838 packed);
6839 else
6840 {
6841 const bool fp_type
6842 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6843 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6844 data);
6845 }
6846 }
6847 }
6848
6849 /* Handle recursive register classifying for structure layout. */
6850
6851 typedef struct
6852 {
6853 bool fp_regs; /* true if field eligible to FP registers. */
6854 bool fp_regs_in_first_word; /* true if such field in first word. */
6855 } classify_data_t;
6856
6857 /* A subroutine of function_arg_slotno. Classify the field. */
6858
6859 inline void
6860 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6861 {
6862 if (fp)
6863 {
6864 data->fp_regs = true;
6865 if (bitpos < BITS_PER_WORD)
6866 data->fp_regs_in_first_word = true;
6867 }
6868 }
6869
6870 /* Compute the slot number to pass an argument in.
6871 Return the slot number or -1 if passing on the stack.
6872
6873 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6874 the preceding args and about the function being called.
6875 MODE is the argument's machine mode.
6876 TYPE is the data type of the argument (as a tree).
6877 This is null for libcalls where that information may
6878 not be available.
6879 NAMED is nonzero if this argument is a named parameter
6880 (otherwise it is an extra parameter matching an ellipsis).
6881 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6882 *PREGNO records the register number to use if scalar type.
6883 *PPADDING records the amount of padding needed in words. */
6884
6885 static int
6886 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6887 const_tree type, bool named, bool incoming,
6888 int *pregno, int *ppadding)
6889 {
6890 const int regbase
6891 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6892 int slotno = cum->words, regno;
6893 enum mode_class mclass = GET_MODE_CLASS (mode);
6894
6895 /* Silence warnings in the callers. */
6896 *pregno = -1;
6897 *ppadding = -1;
6898
6899 if (type && TREE_ADDRESSABLE (type))
6900 return -1;
6901
6902 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6903 if (TARGET_ARCH64
6904 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6905 && (slotno & 1) != 0)
6906 {
6907 slotno++;
6908 *ppadding = 1;
6909 }
6910 else
6911 *ppadding = 0;
6912
6913 /* Vector types deserve special treatment because they are polymorphic wrt
6914 their mode, depending upon whether VIS instructions are enabled. */
6915 if (type && VECTOR_TYPE_P (type))
6916 {
6917 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6918 {
6919 /* The SPARC port defines no floating-point vector modes. */
6920 gcc_assert (mode == BLKmode);
6921 }
6922 else
6923 {
6924 /* Integer vector types should either have a vector
6925 mode or an integral mode, because we are guaranteed
6926 by pass_by_reference that their size is not greater
6927 than 16 bytes and TImode is 16-byte wide. */
6928 gcc_assert (mode != BLKmode);
6929
6930 /* Integer vectors are handled like floats as per
6931 the Sun VIS SDK. */
6932 mclass = MODE_FLOAT;
6933 }
6934 }
6935
6936 switch (mclass)
6937 {
6938 case MODE_FLOAT:
6939 case MODE_COMPLEX_FLOAT:
6940 case MODE_VECTOR_INT:
6941 if (TARGET_ARCH64 && TARGET_FPU && named)
6942 {
6943 /* If all arg slots are filled, then must pass on stack. */
6944 if (slotno >= SPARC_FP_ARG_MAX)
6945 return -1;
6946
6947 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6948 /* Arguments filling only one single FP register are
6949 right-justified in the outer double FP register. */
6950 if (GET_MODE_SIZE (mode) <= 4)
6951 regno++;
6952 break;
6953 }
6954 /* fallthrough */
6955
6956 case MODE_INT:
6957 case MODE_COMPLEX_INT:
6958 /* If all arg slots are filled, then must pass on stack. */
6959 if (slotno >= SPARC_INT_ARG_MAX)
6960 return -1;
6961
6962 regno = regbase + slotno;
6963 break;
6964
6965 case MODE_RANDOM:
6966 /* MODE is VOIDmode when generating the actual call. */
6967 if (mode == VOIDmode)
6968 return -1;
6969
6970 if (TARGET_64BIT && TARGET_FPU && named
6971 && type
6972 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6973 {
6974 /* If all arg slots are filled, then must pass on stack. */
6975 if (slotno >= SPARC_FP_ARG_MAX)
6976 return -1;
6977
6978 if (TREE_CODE (type) == RECORD_TYPE)
6979 {
6980 classify_data_t data = { false, false };
6981 traverse_record_type<classify_data_t, classify_registers>
6982 (type, named, &data);
6983
6984 if (data.fp_regs)
6985 {
6986 /* If all FP slots are filled except for the last one and
6987 there is no FP field in the first word, then must pass
6988 on stack. */
6989 if (slotno >= SPARC_FP_ARG_MAX - 1
6990 && !data.fp_regs_in_first_word)
6991 return -1;
6992 }
6993 else
6994 {
6995 /* If all int slots are filled, then must pass on stack. */
6996 if (slotno >= SPARC_INT_ARG_MAX)
6997 return -1;
6998 }
6999
7000 /* PREGNO isn't set since both int and FP regs can be used. */
7001 return slotno;
7002 }
7003
7004 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7005 }
7006 else
7007 {
7008 /* If all arg slots are filled, then must pass on stack. */
7009 if (slotno >= SPARC_INT_ARG_MAX)
7010 return -1;
7011
7012 regno = regbase + slotno;
7013 }
7014 break;
7015
7016 default :
7017 gcc_unreachable ();
7018 }
7019
7020 *pregno = regno;
7021 return slotno;
7022 }
7023
7024 /* Handle recursive register counting/assigning for structure layout. */
7025
7026 typedef struct
7027 {
7028 int slotno; /* slot number of the argument. */
7029 int regbase; /* regno of the base register. */
7030 int intoffset; /* offset of the first pending integer field. */
7031 int nregs; /* number of words passed in registers. */
7032 bool stack; /* true if part of the argument is on the stack. */
7033 rtx ret; /* return expression being built. */
7034 } assign_data_t;
7035
7036 /* A subroutine of function_arg_record_value. Compute the number of integer
7037 registers to be assigned between PARMS->intoffset and BITPOS. Return
7038 true if at least one integer register is assigned or false otherwise. */
7039
7040 static bool
7041 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7042 {
7043 if (data->intoffset < 0)
7044 return false;
7045
7046 const int intoffset = data->intoffset;
7047 data->intoffset = -1;
7048
7049 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7050 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7051 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7052 int nregs = (endbit - startbit) / BITS_PER_WORD;
7053
7054 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7055 {
7056 nregs = SPARC_INT_ARG_MAX - this_slotno;
7057
7058 /* We need to pass this field (partly) on the stack. */
7059 data->stack = 1;
7060 }
7061
7062 if (nregs <= 0)
7063 return false;
7064
7065 *pnregs = nregs;
7066 return true;
7067 }
7068
7069 /* A subroutine of function_arg_record_value. Compute the number and the mode
7070 of the FP registers to be assigned for FIELD. Return true if at least one
7071 FP register is assigned or false otherwise. */
7072
7073 static bool
7074 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7075 int *pnregs, machine_mode *pmode)
7076 {
7077 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7078 machine_mode mode = DECL_MODE (field);
7079 int nregs, nslots;
7080
7081 /* Slots are counted as words while regs are counted as having the size of
7082 the (inner) mode. */
7083 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7084 {
7085 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7086 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7087 }
7088 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7089 {
7090 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7091 nregs = 2;
7092 }
7093 else
7094 nregs = 1;
7095
7096 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7097
7098 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7099 {
7100 nslots = SPARC_FP_ARG_MAX - this_slotno;
7101 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7102
7103 /* We need to pass this field (partly) on the stack. */
7104 data->stack = 1;
7105
7106 if (nregs <= 0)
7107 return false;
7108 }
7109
7110 *pnregs = nregs;
7111 *pmode = mode;
7112 return true;
7113 }
7114
7115 /* A subroutine of function_arg_record_value. Count the number of registers
7116 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7117
7118 inline void
7119 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7120 {
7121 if (fp)
7122 {
7123 int nregs;
7124 machine_mode mode;
7125
7126 if (compute_int_layout (bitpos, data, &nregs))
7127 data->nregs += nregs;
7128
7129 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7130 data->nregs += nregs;
7131 }
7132 else
7133 {
7134 if (data->intoffset < 0)
7135 data->intoffset = bitpos;
7136 }
7137 }
7138
7139 /* A subroutine of function_arg_record_value. Assign the bits of the
7140 structure between PARMS->intoffset and BITPOS to integer registers. */
7141
7142 static void
7143 assign_int_registers (int bitpos, assign_data_t *data)
7144 {
7145 int intoffset = data->intoffset;
7146 machine_mode mode;
7147 int nregs;
7148
7149 if (!compute_int_layout (bitpos, data, &nregs))
7150 return;
7151
7152 /* If this is the trailing part of a word, only load that much into
7153 the register. Otherwise load the whole register. Note that in
7154 the latter case we may pick up unwanted bits. It's not a problem
7155 at the moment but may wish to revisit. */
7156 if (intoffset % BITS_PER_WORD != 0)
7157 mode = smallest_int_mode_for_size (BITS_PER_WORD
7158 - intoffset % BITS_PER_WORD);
7159 else
7160 mode = word_mode;
7161
7162 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7163 unsigned int regno = data->regbase + this_slotno;
7164 intoffset /= BITS_PER_UNIT;
7165
7166 do
7167 {
7168 rtx reg = gen_rtx_REG (mode, regno);
7169 XVECEXP (data->ret, 0, data->stack + data->nregs)
7170 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7171 data->nregs += 1;
7172 mode = word_mode;
7173 regno += 1;
7174 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7175 }
7176 while (--nregs > 0);
7177 }
7178
7179 /* A subroutine of function_arg_record_value. Assign FIELD at position
7180 BITPOS to FP registers. */
7181
7182 static void
7183 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7184 {
7185 int nregs;
7186 machine_mode mode;
7187
7188 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7189 return;
7190
7191 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7192 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7193 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7194 regno++;
7195 int pos = bitpos / BITS_PER_UNIT;
7196
7197 do
7198 {
7199 rtx reg = gen_rtx_REG (mode, regno);
7200 XVECEXP (data->ret, 0, data->stack + data->nregs)
7201 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7202 data->nregs += 1;
7203 regno += GET_MODE_SIZE (mode) / 4;
7204 pos += GET_MODE_SIZE (mode);
7205 }
7206 while (--nregs > 0);
7207 }
7208
7209 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7210 the structure between PARMS->intoffset and BITPOS to registers. */
7211
7212 inline void
7213 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7214 {
7215 if (fp)
7216 {
7217 assign_int_registers (bitpos, data);
7218
7219 assign_fp_registers (field, bitpos, data);
7220 }
7221 else
7222 {
7223 if (data->intoffset < 0)
7224 data->intoffset = bitpos;
7225 }
7226 }
7227
7228 /* Used by function_arg and function_value to implement the complex
7229 conventions of the 64-bit ABI for passing and returning structures.
7230 Return an expression valid as a return value for the FUNCTION_ARG
7231 and TARGET_FUNCTION_VALUE.
7232
7233 TYPE is the data type of the argument (as a tree).
7234 This is null for libcalls where that information may
7235 not be available.
7236 MODE is the argument's machine mode.
7237 SLOTNO is the index number of the argument's slot in the parameter array.
7238 NAMED is true if this argument is a named parameter
7239 (otherwise it is an extra parameter matching an ellipsis).
7240 REGBASE is the regno of the base register for the parameter array. */
7241
7242 static rtx
7243 function_arg_record_value (const_tree type, machine_mode mode,
7244 int slotno, bool named, int regbase)
7245 {
7246 const int size = int_size_in_bytes (type);
7247 assign_data_t data;
7248 int nregs;
7249
7250 data.slotno = slotno;
7251 data.regbase = regbase;
7252
7253 /* Count how many registers we need. */
7254 data.nregs = 0;
7255 data.intoffset = 0;
7256 data.stack = false;
7257 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7258
7259 /* Take into account pending integer fields. */
7260 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7261 data.nregs += nregs;
7262
7263 /* Allocate the vector and handle some annoying special cases. */
7264 nregs = data.nregs;
7265
7266 if (nregs == 0)
7267 {
7268 /* ??? Empty structure has no value? Duh? */
7269 if (size <= 0)
7270 {
7271 /* Though there's nothing really to store, return a word register
7272 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7273 leads to breakage due to the fact that there are zero bytes to
7274 load. */
7275 return gen_rtx_REG (mode, regbase);
7276 }
7277
7278 /* ??? C++ has structures with no fields, and yet a size. Give up
7279 for now and pass everything back in integer registers. */
7280 nregs = CEIL_NWORDS (size);
7281 if (nregs + slotno > SPARC_INT_ARG_MAX)
7282 nregs = SPARC_INT_ARG_MAX - slotno;
7283 }
7284
7285 gcc_assert (nregs > 0);
7286
7287 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7288
7289 /* If at least one field must be passed on the stack, generate
7290 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7291 also be passed on the stack. We can't do much better because the
7292 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7293 of structures for which the fields passed exclusively in registers
7294 are not at the beginning of the structure. */
7295 if (data.stack)
7296 XVECEXP (data.ret, 0, 0)
7297 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7298
7299 /* Assign the registers. */
7300 data.nregs = 0;
7301 data.intoffset = 0;
7302 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7303
7304 /* Assign pending integer fields. */
7305 assign_int_registers (size * BITS_PER_UNIT, &data);
7306
7307 gcc_assert (data.nregs == nregs);
7308
7309 return data.ret;
7310 }
7311
7312 /* Used by function_arg and function_value to implement the conventions
7313 of the 64-bit ABI for passing and returning unions.
7314 Return an expression valid as a return value for the FUNCTION_ARG
7315 and TARGET_FUNCTION_VALUE.
7316
7317 SIZE is the size in bytes of the union.
7318 MODE is the argument's machine mode.
7319 SLOTNO is the index number of the argument's slot in the parameter array.
7320 REGNO is the hard register the union will be passed in. */
7321
7322 static rtx
7323 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7324 {
7325 unsigned int nwords;
7326
7327 /* See comment in function_arg_record_value for empty structures. */
7328 if (size <= 0)
7329 return gen_rtx_REG (mode, regno);
7330
7331 if (slotno == SPARC_INT_ARG_MAX - 1)
7332 nwords = 1;
7333 else
7334 nwords = CEIL_NWORDS (size);
7335
7336 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7337
7338 /* Unions are passed left-justified. */
7339 for (unsigned int i = 0; i < nwords; i++)
7340 XVECEXP (regs, 0, i)
7341 = gen_rtx_EXPR_LIST (VOIDmode,
7342 gen_rtx_REG (word_mode, regno + i),
7343 GEN_INT (UNITS_PER_WORD * i));
7344
7345 return regs;
7346 }
7347
7348 /* Used by function_arg and function_value to implement the conventions
7349 of the 64-bit ABI for passing and returning BLKmode vectors.
7350 Return an expression valid as a return value for the FUNCTION_ARG
7351 and TARGET_FUNCTION_VALUE.
7352
7353 SIZE is the size in bytes of the vector.
7354 SLOTNO is the index number of the argument's slot in the parameter array.
7355 NAMED is true if this argument is a named parameter
7356 (otherwise it is an extra parameter matching an ellipsis).
7357 REGNO is the hard register the vector will be passed in. */
7358
7359 static rtx
7360 function_arg_vector_value (int size, int slotno, bool named, int regno)
7361 {
7362 const int mult = (named ? 2 : 1);
7363 unsigned int nwords;
7364
7365 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7366 nwords = 1;
7367 else
7368 nwords = CEIL_NWORDS (size);
7369
7370 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7371
7372 if (size < UNITS_PER_WORD)
7373 XVECEXP (regs, 0, 0)
7374 = gen_rtx_EXPR_LIST (VOIDmode,
7375 gen_rtx_REG (SImode, regno),
7376 const0_rtx);
7377 else
7378 for (unsigned int i = 0; i < nwords; i++)
7379 XVECEXP (regs, 0, i)
7380 = gen_rtx_EXPR_LIST (VOIDmode,
7381 gen_rtx_REG (word_mode, regno + i * mult),
7382 GEN_INT (i * UNITS_PER_WORD));
7383
7384 return regs;
7385 }
7386
7387 /* Determine where to put an argument to a function.
7388 Value is zero to push the argument on the stack,
7389 or a hard register in which to store the argument.
7390
7391 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7392 the preceding args and about the function being called.
7393 ARG is a description of the argument.
7394 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7395 TARGET_FUNCTION_INCOMING_ARG. */
7396
7397 static rtx
7398 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7399 bool incoming)
7400 {
7401 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7402 const int regbase
7403 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7404 int slotno, regno, padding;
7405 tree type = arg.type;
7406 machine_mode mode = arg.mode;
7407 enum mode_class mclass = GET_MODE_CLASS (mode);
7408 bool named = arg.named;
7409
7410 slotno
7411 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding);
7412 if (slotno == -1)
7413 return 0;
7414
7415 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7416 if (type && VECTOR_INTEGER_TYPE_P (type))
7417 mclass = MODE_FLOAT;
7418
7419 if (TARGET_ARCH32)
7420 return gen_rtx_REG (mode, regno);
7421
7422 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7423 and are promoted to registers if possible. */
7424 if (type && TREE_CODE (type) == RECORD_TYPE)
7425 {
7426 const int size = int_size_in_bytes (type);
7427 gcc_assert (size <= 16);
7428
7429 return function_arg_record_value (type, mode, slotno, named, regbase);
7430 }
7431
7432 /* Unions up to 16 bytes in size are passed in integer registers. */
7433 else if (type && TREE_CODE (type) == UNION_TYPE)
7434 {
7435 const int size = int_size_in_bytes (type);
7436 gcc_assert (size <= 16);
7437
7438 return function_arg_union_value (size, mode, slotno, regno);
7439 }
7440
7441 /* Floating-point vectors up to 16 bytes are passed in registers. */
7442 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7443 {
7444 const int size = int_size_in_bytes (type);
7445 gcc_assert (size <= 16);
7446
7447 return function_arg_vector_value (size, slotno, named, regno);
7448 }
7449
7450 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7451 but also have the slot allocated for them.
7452 If no prototype is in scope fp values in register slots get passed
7453 in two places, either fp regs and int regs or fp regs and memory. */
7454 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7455 && SPARC_FP_REG_P (regno))
7456 {
7457 rtx reg = gen_rtx_REG (mode, regno);
7458 if (cum->prototype_p || cum->libcall_p)
7459 return reg;
7460 else
7461 {
7462 rtx v0, v1;
7463
7464 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7465 {
7466 int intreg;
7467
7468 /* On incoming, we don't need to know that the value
7469 is passed in %f0 and %i0, and it confuses other parts
7470 causing needless spillage even on the simplest cases. */
7471 if (incoming)
7472 return reg;
7473
7474 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7475 + (regno - SPARC_FP_ARG_FIRST) / 2);
7476
7477 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7478 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7479 const0_rtx);
7480 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7481 }
7482 else
7483 {
7484 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7485 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7486 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7487 }
7488 }
7489 }
7490
7491 /* All other aggregate types are passed in an integer register in a mode
7492 corresponding to the size of the type. */
7493 else if (type && AGGREGATE_TYPE_P (type))
7494 {
7495 const int size = int_size_in_bytes (type);
7496 gcc_assert (size <= 16);
7497
7498 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7499 }
7500
7501 return gen_rtx_REG (mode, regno);
7502 }
7503
7504 /* Handle the TARGET_FUNCTION_ARG target hook. */
7505
7506 static rtx
7507 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7508 {
7509 return sparc_function_arg_1 (cum, arg, false);
7510 }
7511
7512 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7513
7514 static rtx
7515 sparc_function_incoming_arg (cumulative_args_t cum,
7516 const function_arg_info &arg)
7517 {
7518 return sparc_function_arg_1 (cum, arg, true);
7519 }
7520
7521 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7522
7523 static unsigned int
7524 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7525 {
7526 return ((TARGET_ARCH64
7527 && (GET_MODE_ALIGNMENT (mode) == 128
7528 || (type && TYPE_ALIGN (type) == 128)))
7529 ? 128
7530 : PARM_BOUNDARY);
7531 }
7532
7533 /* For an arg passed partly in registers and partly in memory,
7534 this is the number of bytes of registers used.
7535 For args passed entirely in registers or entirely in memory, zero.
7536
7537 Any arg that starts in the first 6 regs but won't entirely fit in them
7538 needs partial registers on v8. On v9, structures with integer
7539 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7540 values that begin in the last fp reg [where "last fp reg" varies with the
7541 mode] will be split between that reg and memory. */
7542
7543 static int
7544 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7545 {
7546 int slotno, regno, padding;
7547
7548 /* We pass false for incoming here, it doesn't matter. */
7549 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7550 arg.named, false, ®no, &padding);
7551
7552 if (slotno == -1)
7553 return 0;
7554
7555 if (TARGET_ARCH32)
7556 {
7557 /* We are guaranteed by pass_by_reference that the size of the
7558 argument is not greater than 8 bytes, so we only need to return
7559 one word if the argument is partially passed in registers. */
7560 const int size = GET_MODE_SIZE (arg.mode);
7561
7562 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7563 return UNITS_PER_WORD;
7564 }
7565 else
7566 {
7567 /* We are guaranteed by pass_by_reference that the size of the
7568 argument is not greater than 16 bytes, so we only need to return
7569 one word if the argument is partially passed in registers. */
7570 if (arg.aggregate_type_p ())
7571 {
7572 const int size = int_size_in_bytes (arg.type);
7573
7574 if (size > UNITS_PER_WORD
7575 && (slotno == SPARC_INT_ARG_MAX - 1
7576 || slotno == SPARC_FP_ARG_MAX - 1))
7577 return UNITS_PER_WORD;
7578 }
7579 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7580 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7581 || (arg.type && VECTOR_TYPE_P (arg.type)))
7582 && !(TARGET_FPU && arg.named)))
7583 {
7584 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7585 ? int_size_in_bytes (arg.type)
7586 : GET_MODE_SIZE (arg.mode);
7587
7588 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7589 return UNITS_PER_WORD;
7590 }
7591 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7592 || (arg.type && VECTOR_TYPE_P (arg.type)))
7593 {
7594 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7595 ? int_size_in_bytes (arg.type)
7596 : GET_MODE_SIZE (arg.mode);
7597
7598 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7599 return UNITS_PER_WORD;
7600 }
7601 }
7602
7603 return 0;
7604 }
7605
7606 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7607 Update the data in CUM to advance over argument ARG. */
7608
7609 static void
7610 sparc_function_arg_advance (cumulative_args_t cum_v,
7611 const function_arg_info &arg)
7612 {
7613 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7614 tree type = arg.type;
7615 machine_mode mode = arg.mode;
7616 int regno, padding;
7617
7618 /* We pass false for incoming here, it doesn't matter. */
7619 function_arg_slotno (cum, mode, type, arg.named, false, ®no, &padding);
7620
7621 /* If argument requires leading padding, add it. */
7622 cum->words += padding;
7623
7624 if (TARGET_ARCH32)
7625 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7626 else
7627 {
7628 /* For types that can have BLKmode, get the size from the type. */
7629 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7630 {
7631 const int size = int_size_in_bytes (type);
7632
7633 /* See comment in function_arg_record_value for empty structures. */
7634 if (size <= 0)
7635 cum->words++;
7636 else
7637 cum->words += CEIL_NWORDS (size);
7638 }
7639 else
7640 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7641 }
7642 }
7643
7644 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7645 are always stored left shifted in their argument slot. */
7646
7647 static pad_direction
7648 sparc_function_arg_padding (machine_mode mode, const_tree type)
7649 {
7650 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7651 return PAD_UPWARD;
7652
7653 /* Fall back to the default. */
7654 return default_function_arg_padding (mode, type);
7655 }
7656
7657 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7658 Specify whether to return the return value in memory. */
7659
7660 static bool
7661 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7662 {
7663 if (TARGET_ARCH32)
7664 /* Original SPARC 32-bit ABI says that structures and unions, and
7665 quad-precision floats are returned in memory. But note that the
7666 first part is implemented through -fpcc-struct-return being the
7667 default, so here we only implement -freg-struct-return instead.
7668 All other base types are returned in registers.
7669
7670 Extended ABI (as implemented by the Sun compiler) says that
7671 all complex floats are returned in registers (8 FP registers
7672 at most for '_Complex long double'). Return all complex integers
7673 in registers (4 at most for '_Complex long long').
7674
7675 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7676 integers are returned like floats of the same size, that is in
7677 registers up to 8 bytes and in memory otherwise. Return all
7678 vector floats in memory like structure and unions; note that
7679 they always have BLKmode like the latter. */
7680 return (TYPE_MODE (type) == BLKmode
7681 || TYPE_MODE (type) == TFmode
7682 || (TREE_CODE (type) == VECTOR_TYPE
7683 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7684 else
7685 /* Original SPARC 64-bit ABI says that structures and unions
7686 smaller than 32 bytes are returned in registers, as well as
7687 all other base types.
7688
7689 Extended ABI (as implemented by the Sun compiler) says that all
7690 complex floats are returned in registers (8 FP registers at most
7691 for '_Complex long double'). Return all complex integers in
7692 registers (4 at most for '_Complex TItype').
7693
7694 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7695 integers are returned like floats of the same size, that is in
7696 registers. Return all vector floats like structure and unions;
7697 note that they always have BLKmode like the latter. */
7698 return (TYPE_MODE (type) == BLKmode
7699 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7700 }
7701
7702 /* Handle the TARGET_STRUCT_VALUE target hook.
7703 Return where to find the structure return value address. */
7704
7705 static rtx
7706 sparc_struct_value_rtx (tree fndecl, int incoming)
7707 {
7708 if (TARGET_ARCH64)
7709 return NULL_RTX;
7710 else
7711 {
7712 rtx mem;
7713
7714 if (incoming)
7715 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7716 STRUCT_VALUE_OFFSET));
7717 else
7718 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7719 STRUCT_VALUE_OFFSET));
7720
7721 /* Only follow the SPARC ABI for fixed-size structure returns.
7722 Variable size structure returns are handled per the normal
7723 procedures in GCC. This is enabled by -mstd-struct-return */
7724 if (incoming == 2
7725 && sparc_std_struct_return
7726 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7727 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7728 {
7729 /* We must check and adjust the return address, as it is optional
7730 as to whether the return object is really provided. */
7731 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7732 rtx scratch = gen_reg_rtx (SImode);
7733 rtx_code_label *endlab = gen_label_rtx ();
7734
7735 /* Calculate the return object size. */
7736 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7737 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7738 /* Construct a temporary return value. */
7739 rtx temp_val
7740 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7741
7742 /* Implement SPARC 32-bit psABI callee return struct checking:
7743
7744 Fetch the instruction where we will return to and see if
7745 it's an unimp instruction (the most significant 10 bits
7746 will be zero). */
7747 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7748 plus_constant (Pmode,
7749 ret_reg, 8)));
7750 /* Assume the size is valid and pre-adjust. */
7751 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7752 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7753 0, endlab);
7754 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7755 /* Write the address of the memory pointed to by temp_val into
7756 the memory pointed to by mem. */
7757 emit_move_insn (mem, XEXP (temp_val, 0));
7758 emit_label (endlab);
7759 }
7760
7761 return mem;
7762 }
7763 }
7764
7765 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7766 For v9, function return values are subject to the same rules as arguments,
7767 except that up to 32 bytes may be returned in registers. */
7768
7769 static rtx
7770 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7771 {
7772 /* Beware that the two values are swapped here wrt function_arg. */
7773 const int regbase
7774 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7775 enum mode_class mclass = GET_MODE_CLASS (mode);
7776 int regno;
7777
7778 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7779 Note that integer vectors larger than 16 bytes have BLKmode so
7780 they need to be handled like floating-point vectors below. */
7781 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7782 mclass = MODE_FLOAT;
7783
7784 if (TARGET_ARCH64 && type)
7785 {
7786 /* Structures up to 32 bytes in size are returned in registers. */
7787 if (TREE_CODE (type) == RECORD_TYPE)
7788 {
7789 const int size = int_size_in_bytes (type);
7790 gcc_assert (size <= 32);
7791
7792 return function_arg_record_value (type, mode, 0, true, regbase);
7793 }
7794
7795 /* Unions up to 32 bytes in size are returned in integer registers. */
7796 else if (TREE_CODE (type) == UNION_TYPE)
7797 {
7798 const int size = int_size_in_bytes (type);
7799 gcc_assert (size <= 32);
7800
7801 return function_arg_union_value (size, mode, 0, regbase);
7802 }
7803
7804 /* Vectors up to 32 bytes are returned in FP registers. */
7805 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7806 {
7807 const int size = int_size_in_bytes (type);
7808 gcc_assert (size <= 32);
7809
7810 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7811 }
7812
7813 /* Objects that require it are returned in FP registers. */
7814 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7815 ;
7816
7817 /* All other aggregate types are returned in an integer register in a
7818 mode corresponding to the size of the type. */
7819 else if (AGGREGATE_TYPE_P (type))
7820 {
7821 /* All other aggregate types are passed in an integer register
7822 in a mode corresponding to the size of the type. */
7823 const int size = int_size_in_bytes (type);
7824 gcc_assert (size <= 32);
7825
7826 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7827
7828 /* ??? We probably should have made the same ABI change in
7829 3.4.0 as the one we made for unions. The latter was
7830 required by the SCD though, while the former is not
7831 specified, so we favored compatibility and efficiency.
7832
7833 Now we're stuck for aggregates larger than 16 bytes,
7834 because OImode vanished in the meantime. Let's not
7835 try to be unduly clever, and simply follow the ABI
7836 for unions in that case. */
7837 if (mode == BLKmode)
7838 return function_arg_union_value (size, mode, 0, regbase);
7839 else
7840 mclass = MODE_INT;
7841 }
7842
7843 /* We should only have pointer and integer types at this point. This
7844 must match sparc_promote_function_mode. */
7845 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7846 mode = word_mode;
7847 }
7848
7849 /* We should only have pointer and integer types at this point, except with
7850 -freg-struct-return. This must match sparc_promote_function_mode. */
7851 else if (TARGET_ARCH32
7852 && !(type && AGGREGATE_TYPE_P (type))
7853 && mclass == MODE_INT
7854 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7855 mode = word_mode;
7856
7857 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7858 regno = SPARC_FP_ARG_FIRST;
7859 else
7860 regno = regbase;
7861
7862 return gen_rtx_REG (mode, regno);
7863 }
7864
7865 /* Handle TARGET_FUNCTION_VALUE.
7866 On the SPARC, the value is found in the first "output" register, but the
7867 called function leaves it in the first "input" register. */
7868
7869 static rtx
7870 sparc_function_value (const_tree valtype,
7871 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7872 bool outgoing)
7873 {
7874 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7875 }
7876
7877 /* Handle TARGET_LIBCALL_VALUE. */
7878
7879 static rtx
7880 sparc_libcall_value (machine_mode mode,
7881 const_rtx fun ATTRIBUTE_UNUSED)
7882 {
7883 return sparc_function_value_1 (NULL_TREE, mode, false);
7884 }
7885
7886 /* Handle FUNCTION_VALUE_REGNO_P.
7887 On the SPARC, the first "output" reg is used for integer values, and the
7888 first floating point register is used for floating point values. */
7889
7890 static bool
7891 sparc_function_value_regno_p (const unsigned int regno)
7892 {
7893 return (regno == 8 || (TARGET_FPU && regno == 32));
7894 }
7895
7896 /* Do what is necessary for `va_start'. We look at the current function
7897 to determine if stdarg or varargs is used and return the address of
7898 the first unnamed parameter. */
7899
7900 static rtx
7901 sparc_builtin_saveregs (void)
7902 {
7903 int first_reg = crtl->args.info.words;
7904 rtx address;
7905 int regno;
7906
7907 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7908 emit_move_insn (gen_rtx_MEM (word_mode,
7909 gen_rtx_PLUS (Pmode,
7910 frame_pointer_rtx,
7911 GEN_INT (FIRST_PARM_OFFSET (0)
7912 + (UNITS_PER_WORD
7913 * regno)))),
7914 gen_rtx_REG (word_mode,
7915 SPARC_INCOMING_INT_ARG_FIRST + regno));
7916
7917 address = gen_rtx_PLUS (Pmode,
7918 frame_pointer_rtx,
7919 GEN_INT (FIRST_PARM_OFFSET (0)
7920 + UNITS_PER_WORD * first_reg));
7921
7922 return address;
7923 }
7924
7925 /* Implement `va_start' for stdarg. */
7926
7927 static void
7928 sparc_va_start (tree valist, rtx nextarg)
7929 {
7930 nextarg = expand_builtin_saveregs ();
7931 std_expand_builtin_va_start (valist, nextarg);
7932 }
7933
7934 /* Implement `va_arg' for stdarg. */
7935
7936 static tree
7937 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7938 gimple_seq *post_p)
7939 {
7940 HOST_WIDE_INT size, rsize, align;
7941 tree addr, incr;
7942 bool indirect;
7943 tree ptrtype = build_pointer_type (type);
7944
7945 if (pass_va_arg_by_reference (type))
7946 {
7947 indirect = true;
7948 size = rsize = UNITS_PER_WORD;
7949 align = 0;
7950 }
7951 else
7952 {
7953 indirect = false;
7954 size = int_size_in_bytes (type);
7955 rsize = ROUND_UP (size, UNITS_PER_WORD);
7956 align = 0;
7957
7958 if (TARGET_ARCH64)
7959 {
7960 /* For SPARC64, objects requiring 16-byte alignment get it. */
7961 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7962 align = 2 * UNITS_PER_WORD;
7963
7964 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7965 are left-justified in their slots. */
7966 if (AGGREGATE_TYPE_P (type))
7967 {
7968 if (size == 0)
7969 size = rsize = UNITS_PER_WORD;
7970 else
7971 size = rsize;
7972 }
7973 }
7974 }
7975
7976 incr = valist;
7977 if (align)
7978 {
7979 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7980 incr = fold_convert (sizetype, incr);
7981 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7982 size_int (-align));
7983 incr = fold_convert (ptr_type_node, incr);
7984 }
7985
7986 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7987 addr = incr;
7988
7989 if (BYTES_BIG_ENDIAN && size < rsize)
7990 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7991
7992 if (indirect)
7993 {
7994 addr = fold_convert (build_pointer_type (ptrtype), addr);
7995 addr = build_va_arg_indirect_ref (addr);
7996 }
7997
7998 /* If the address isn't aligned properly for the type, we need a temporary.
7999 FIXME: This is inefficient, usually we can do this in registers. */
8000 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8001 {
8002 tree tmp = create_tmp_var (type, "va_arg_tmp");
8003 tree dest_addr = build_fold_addr_expr (tmp);
8004 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8005 3, dest_addr, addr, size_int (rsize));
8006 TREE_ADDRESSABLE (tmp) = 1;
8007 gimplify_and_add (copy, pre_p);
8008 addr = dest_addr;
8009 }
8010
8011 else
8012 addr = fold_convert (ptrtype, addr);
8013
8014 incr = fold_build_pointer_plus_hwi (incr, rsize);
8015 gimplify_assign (valist, incr, post_p);
8016
8017 return build_va_arg_indirect_ref (addr);
8018 }
8019
8020 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8022 Specify whether the vector mode is supported by the hardware. */
8023
8024 static bool
8025 sparc_vector_mode_supported_p (machine_mode mode)
8026 {
8027 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8028 }
8029
8030 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8032
8033 static machine_mode
8034 sparc_preferred_simd_mode (scalar_mode mode)
8035 {
8036 if (TARGET_VIS)
8037 switch (mode)
8038 {
8039 case E_SImode:
8040 return V2SImode;
8041 case E_HImode:
8042 return V4HImode;
8043 case E_QImode:
8044 return V8QImode;
8045
8046 default:;
8047 }
8048
8049 return word_mode;
8050 }
8051
8052 /* Implement TARGET_CAN_FOLLOW_JUMP. */
8055
8056 static bool
8057 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8058 {
8059 /* Do not fold unconditional jumps that have been created for crossing
8060 partition boundaries. */
8061 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8062 return false;
8063
8064 return true;
8065 }
8066
8067 /* Return the string to output an unconditional branch to LABEL, which is
8068 the operand number of the label.
8069
8070 DEST is the destination insn (i.e. the label), INSN is the source. */
8071
8072 const char *
8073 output_ubranch (rtx dest, rtx_insn *insn)
8074 {
8075 static char string[64];
8076 bool v9_form = false;
8077 int delta;
8078 char *p;
8079
8080 /* Even if we are trying to use cbcond for this, evaluate
8081 whether we can use V9 branches as our backup plan. */
8082 delta = 5000000;
8083 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8084 delta = (INSN_ADDRESSES (INSN_UID (dest))
8085 - INSN_ADDRESSES (INSN_UID (insn)));
8086
8087 /* Leave some instructions for "slop". */
8088 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8089 v9_form = true;
8090
8091 if (TARGET_CBCOND)
8092 {
8093 bool emit_nop = emit_cbcond_nop (insn);
8094 bool far = false;
8095 const char *rval;
8096
8097 if (delta < -500 || delta > 500)
8098 far = true;
8099
8100 if (far)
8101 {
8102 if (v9_form)
8103 rval = "ba,a,pt\t%%xcc, %l0";
8104 else
8105 rval = "b,a\t%l0";
8106 }
8107 else
8108 {
8109 if (emit_nop)
8110 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8111 else
8112 rval = "cwbe\t%%g0, %%g0, %l0";
8113 }
8114 return rval;
8115 }
8116
8117 if (v9_form)
8118 strcpy (string, "ba%*,pt\t%%xcc, ");
8119 else
8120 strcpy (string, "b%*\t");
8121
8122 p = strchr (string, '\0');
8123 *p++ = '%';
8124 *p++ = 'l';
8125 *p++ = '0';
8126 *p++ = '%';
8127 *p++ = '(';
8128 *p = '\0';
8129
8130 return string;
8131 }
8132
8133 /* Return the string to output a conditional branch to LABEL, which is
8134 the operand number of the label. OP is the conditional expression.
8135 XEXP (OP, 0) is assumed to be a condition code register (integer or
8136 floating point) and its mode specifies what kind of comparison we made.
8137
8138 DEST is the destination insn (i.e. the label), INSN is the source.
8139
8140 REVERSED is nonzero if we should reverse the sense of the comparison.
8141
8142 ANNUL is nonzero if we should generate an annulling branch. */
8143
8144 const char *
8145 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8146 rtx_insn *insn)
8147 {
8148 static char string[64];
8149 enum rtx_code code = GET_CODE (op);
8150 rtx cc_reg = XEXP (op, 0);
8151 machine_mode mode = GET_MODE (cc_reg);
8152 const char *labelno, *branch;
8153 int spaces = 8, far;
8154 char *p;
8155
8156 /* v9 branches are limited to +-1MB. If it is too far away,
8157 change
8158
8159 bne,pt %xcc, .LC30
8160
8161 to
8162
8163 be,pn %xcc, .+12
8164 nop
8165 ba .LC30
8166
8167 and
8168
8169 fbne,a,pn %fcc2, .LC29
8170
8171 to
8172
8173 fbe,pt %fcc2, .+16
8174 nop
8175 ba .LC29 */
8176
8177 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8178 if (reversed ^ far)
8179 {
8180 /* Reversal of FP compares takes care -- an ordered compare
8181 becomes an unordered compare and vice versa. */
8182 if (mode == CCFPmode || mode == CCFPEmode)
8183 code = reverse_condition_maybe_unordered (code);
8184 else
8185 code = reverse_condition (code);
8186 }
8187
8188 /* Start by writing the branch condition. */
8189 if (mode == CCFPmode || mode == CCFPEmode)
8190 {
8191 switch (code)
8192 {
8193 case NE:
8194 branch = "fbne";
8195 break;
8196 case EQ:
8197 branch = "fbe";
8198 break;
8199 case GE:
8200 branch = "fbge";
8201 break;
8202 case GT:
8203 branch = "fbg";
8204 break;
8205 case LE:
8206 branch = "fble";
8207 break;
8208 case LT:
8209 branch = "fbl";
8210 break;
8211 case UNORDERED:
8212 branch = "fbu";
8213 break;
8214 case ORDERED:
8215 branch = "fbo";
8216 break;
8217 case UNGT:
8218 branch = "fbug";
8219 break;
8220 case UNLT:
8221 branch = "fbul";
8222 break;
8223 case UNEQ:
8224 branch = "fbue";
8225 break;
8226 case UNGE:
8227 branch = "fbuge";
8228 break;
8229 case UNLE:
8230 branch = "fbule";
8231 break;
8232 case LTGT:
8233 branch = "fblg";
8234 break;
8235 default:
8236 gcc_unreachable ();
8237 }
8238
8239 /* ??? !v9: FP branches cannot be preceded by another floating point
8240 insn. Because there is currently no concept of pre-delay slots,
8241 we can fix this only by always emitting a nop before a floating
8242 point branch. */
8243
8244 string[0] = '\0';
8245 if (! TARGET_V9)
8246 strcpy (string, "nop\n\t");
8247 strcat (string, branch);
8248 }
8249 else
8250 {
8251 switch (code)
8252 {
8253 case NE:
8254 if (mode == CCVmode || mode == CCXVmode)
8255 branch = "bvs";
8256 else
8257 branch = "bne";
8258 break;
8259 case EQ:
8260 if (mode == CCVmode || mode == CCXVmode)
8261 branch = "bvc";
8262 else
8263 branch = "be";
8264 break;
8265 case GE:
8266 if (mode == CCNZmode || mode == CCXNZmode)
8267 branch = "bpos";
8268 else
8269 branch = "bge";
8270 break;
8271 case GT:
8272 branch = "bg";
8273 break;
8274 case LE:
8275 branch = "ble";
8276 break;
8277 case LT:
8278 if (mode == CCNZmode || mode == CCXNZmode)
8279 branch = "bneg";
8280 else
8281 branch = "bl";
8282 break;
8283 case GEU:
8284 branch = "bgeu";
8285 break;
8286 case GTU:
8287 branch = "bgu";
8288 break;
8289 case LEU:
8290 branch = "bleu";
8291 break;
8292 case LTU:
8293 branch = "blu";
8294 break;
8295 default:
8296 gcc_unreachable ();
8297 }
8298 strcpy (string, branch);
8299 }
8300 spaces -= strlen (branch);
8301 p = strchr (string, '\0');
8302
8303 /* Now add the annulling, the label, and a possible noop. */
8304 if (annul && ! far)
8305 {
8306 strcpy (p, ",a");
8307 p += 2;
8308 spaces -= 2;
8309 }
8310
8311 if (TARGET_V9)
8312 {
8313 rtx note;
8314 int v8 = 0;
8315
8316 if (! far && insn && INSN_ADDRESSES_SET_P ())
8317 {
8318 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8319 - INSN_ADDRESSES (INSN_UID (insn)));
8320 /* Leave some instructions for "slop". */
8321 if (delta < -260000 || delta >= 260000)
8322 v8 = 1;
8323 }
8324
8325 switch (mode)
8326 {
8327 case E_CCmode:
8328 case E_CCNZmode:
8329 case E_CCCmode:
8330 case E_CCVmode:
8331 labelno = "%%icc, ";
8332 if (v8)
8333 labelno = "";
8334 break;
8335 case E_CCXmode:
8336 case E_CCXNZmode:
8337 case E_CCXCmode:
8338 case E_CCXVmode:
8339 labelno = "%%xcc, ";
8340 gcc_assert (!v8);
8341 break;
8342 case E_CCFPmode:
8343 case E_CCFPEmode:
8344 {
8345 static char v9_fcc_labelno[] = "%%fccX, ";
8346 /* Set the char indicating the number of the fcc reg to use. */
8347 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8348 labelno = v9_fcc_labelno;
8349 if (v8)
8350 {
8351 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8352 labelno = "";
8353 }
8354 }
8355 break;
8356 default:
8357 gcc_unreachable ();
8358 }
8359
8360 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8361 {
8362 strcpy (p,
8363 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8364 >= profile_probability::even ()) ^ far)
8365 ? ",pt" : ",pn");
8366 p += 3;
8367 spaces -= 3;
8368 }
8369 }
8370 else
8371 labelno = "";
8372
8373 if (spaces > 0)
8374 *p++ = '\t';
8375 else
8376 *p++ = ' ';
8377 strcpy (p, labelno);
8378 p = strchr (p, '\0');
8379 if (far)
8380 {
8381 strcpy (p, ".+12\n\t nop\n\tb\t");
8382 /* Skip the next insn if requested or
8383 if we know that it will be a nop. */
8384 if (annul || ! final_sequence)
8385 p[3] = '6';
8386 p += 14;
8387 }
8388 *p++ = '%';
8389 *p++ = 'l';
8390 *p++ = label + '0';
8391 *p++ = '%';
8392 *p++ = '#';
8393 *p = '\0';
8394
8395 return string;
8396 }
8397
8398 /* Emit a library call comparison between floating point X and Y.
8399 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8400 Return the new operator to be used in the comparison sequence.
8401
8402 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8403 values as arguments instead of the TFmode registers themselves,
8404 that's why we cannot call emit_float_lib_cmp. */
8405
8406 rtx
8407 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8408 {
8409 const char *qpfunc;
8410 rtx slot0, slot1, result, tem, tem2, libfunc;
8411 machine_mode mode;
8412 enum rtx_code new_comparison;
8413
8414 switch (comparison)
8415 {
8416 case EQ:
8417 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8418 break;
8419
8420 case NE:
8421 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8422 break;
8423
8424 case GT:
8425 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8426 break;
8427
8428 case GE:
8429 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8430 break;
8431
8432 case LT:
8433 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8434 break;
8435
8436 case LE:
8437 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8438 break;
8439
8440 case ORDERED:
8441 case UNORDERED:
8442 case UNGT:
8443 case UNLT:
8444 case UNEQ:
8445 case UNGE:
8446 case UNLE:
8447 case LTGT:
8448 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8449 break;
8450
8451 default:
8452 gcc_unreachable ();
8453 }
8454
8455 if (TARGET_ARCH64)
8456 {
8457 if (MEM_P (x))
8458 {
8459 tree expr = MEM_EXPR (x);
8460 if (expr)
8461 mark_addressable (expr);
8462 slot0 = x;
8463 }
8464 else
8465 {
8466 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8467 emit_move_insn (slot0, x);
8468 }
8469
8470 if (MEM_P (y))
8471 {
8472 tree expr = MEM_EXPR (y);
8473 if (expr)
8474 mark_addressable (expr);
8475 slot1 = y;
8476 }
8477 else
8478 {
8479 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8480 emit_move_insn (slot1, y);
8481 }
8482
8483 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8484 emit_library_call (libfunc, LCT_NORMAL,
8485 DImode,
8486 XEXP (slot0, 0), Pmode,
8487 XEXP (slot1, 0), Pmode);
8488 mode = DImode;
8489 }
8490 else
8491 {
8492 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8493 emit_library_call (libfunc, LCT_NORMAL,
8494 SImode,
8495 x, TFmode, y, TFmode);
8496 mode = SImode;
8497 }
8498
8499
8500 /* Immediately move the result of the libcall into a pseudo
8501 register so reload doesn't clobber the value if it needs
8502 the return register for a spill reg. */
8503 result = gen_reg_rtx (mode);
8504 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8505
8506 switch (comparison)
8507 {
8508 default:
8509 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8510 case ORDERED:
8511 case UNORDERED:
8512 new_comparison = (comparison == UNORDERED ? EQ : NE);
8513 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8514 case UNGT:
8515 case UNGE:
8516 new_comparison = (comparison == UNGT ? GT : NE);
8517 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8518 case UNLE:
8519 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8520 case UNLT:
8521 tem = gen_reg_rtx (mode);
8522 if (TARGET_ARCH32)
8523 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8524 else
8525 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8526 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8527 case UNEQ:
8528 case LTGT:
8529 tem = gen_reg_rtx (mode);
8530 if (TARGET_ARCH32)
8531 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8532 else
8533 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8534 tem2 = gen_reg_rtx (mode);
8535 if (TARGET_ARCH32)
8536 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8537 else
8538 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8539 new_comparison = (comparison == UNEQ ? EQ : NE);
8540 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8541 }
8542
8543 gcc_unreachable ();
8544 }
8545
8546 /* Generate an unsigned DImode to FP conversion. This is the same code
8547 optabs would emit if we didn't have TFmode patterns. */
8548
8549 void
8550 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8551 {
8552 rtx i0, i1, f0, in, out;
8553
8554 out = operands[0];
8555 in = force_reg (DImode, operands[1]);
8556 rtx_code_label *neglab = gen_label_rtx ();
8557 rtx_code_label *donelab = gen_label_rtx ();
8558 i0 = gen_reg_rtx (DImode);
8559 i1 = gen_reg_rtx (DImode);
8560 f0 = gen_reg_rtx (mode);
8561
8562 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8563
8564 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8565 emit_jump_insn (gen_jump (donelab));
8566 emit_barrier ();
8567
8568 emit_label (neglab);
8569
8570 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8571 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8572 emit_insn (gen_iordi3 (i0, i0, i1));
8573 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8574 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8575
8576 emit_label (donelab);
8577 }
8578
8579 /* Generate an FP to unsigned DImode conversion. This is the same code
8580 optabs would emit if we didn't have TFmode patterns. */
8581
8582 void
8583 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8584 {
8585 rtx i0, i1, f0, in, out, limit;
8586
8587 out = operands[0];
8588 in = force_reg (mode, operands[1]);
8589 rtx_code_label *neglab = gen_label_rtx ();
8590 rtx_code_label *donelab = gen_label_rtx ();
8591 i0 = gen_reg_rtx (DImode);
8592 i1 = gen_reg_rtx (DImode);
8593 limit = gen_reg_rtx (mode);
8594 f0 = gen_reg_rtx (mode);
8595
8596 emit_move_insn (limit,
8597 const_double_from_real_value (
8598 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8599 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8600
8601 emit_insn (gen_rtx_SET (out,
8602 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8603 emit_jump_insn (gen_jump (donelab));
8604 emit_barrier ();
8605
8606 emit_label (neglab);
8607
8608 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8609 emit_insn (gen_rtx_SET (i0,
8610 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8611 emit_insn (gen_movdi (i1, const1_rtx));
8612 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8613 emit_insn (gen_xordi3 (out, i0, i1));
8614
8615 emit_label (donelab);
8616 }
8617
8618 /* Return the string to output a compare and branch instruction to DEST.
8619 DEST is the destination insn (i.e. the label), INSN is the source,
8620 and OP is the conditional expression. */
8621
8622 const char *
8623 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8624 {
8625 machine_mode mode = GET_MODE (XEXP (op, 0));
8626 enum rtx_code code = GET_CODE (op);
8627 const char *cond_str, *tmpl;
8628 int far, emit_nop, len;
8629 static char string[64];
8630 char size_char;
8631
8632 /* Compare and Branch is limited to +-2KB. If it is too far away,
8633 change
8634
8635 cxbne X, Y, .LC30
8636
8637 to
8638
8639 cxbe X, Y, .+16
8640 nop
8641 ba,pt xcc, .LC30
8642 nop */
8643
8644 len = get_attr_length (insn);
8645
8646 far = len == 4;
8647 emit_nop = len == 2;
8648
8649 if (far)
8650 code = reverse_condition (code);
8651
8652 size_char = ((mode == SImode) ? 'w' : 'x');
8653
8654 switch (code)
8655 {
8656 case NE:
8657 cond_str = "ne";
8658 break;
8659
8660 case EQ:
8661 cond_str = "e";
8662 break;
8663
8664 case GE:
8665 cond_str = "ge";
8666 break;
8667
8668 case GT:
8669 cond_str = "g";
8670 break;
8671
8672 case LE:
8673 cond_str = "le";
8674 break;
8675
8676 case LT:
8677 cond_str = "l";
8678 break;
8679
8680 case GEU:
8681 cond_str = "cc";
8682 break;
8683
8684 case GTU:
8685 cond_str = "gu";
8686 break;
8687
8688 case LEU:
8689 cond_str = "leu";
8690 break;
8691
8692 case LTU:
8693 cond_str = "cs";
8694 break;
8695
8696 default:
8697 gcc_unreachable ();
8698 }
8699
8700 if (far)
8701 {
8702 int veryfar = 1, delta;
8703
8704 if (INSN_ADDRESSES_SET_P ())
8705 {
8706 delta = (INSN_ADDRESSES (INSN_UID (dest))
8707 - INSN_ADDRESSES (INSN_UID (insn)));
8708 /* Leave some instructions for "slop". */
8709 if (delta >= -260000 && delta < 260000)
8710 veryfar = 0;
8711 }
8712
8713 if (veryfar)
8714 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8715 else
8716 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8717 }
8718 else
8719 {
8720 if (emit_nop)
8721 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8722 else
8723 tmpl = "c%cb%s\t%%1, %%2, %%3";
8724 }
8725
8726 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8727
8728 return string;
8729 }
8730
8731 /* Return the string to output a conditional branch to LABEL, testing
8732 register REG. LABEL is the operand number of the label; REG is the
8733 operand number of the reg. OP is the conditional expression. The mode
8734 of REG says what kind of comparison we made.
8735
8736 DEST is the destination insn (i.e. the label), INSN is the source.
8737
8738 REVERSED is nonzero if we should reverse the sense of the comparison.
8739
8740 ANNUL is nonzero if we should generate an annulling branch. */
8741
8742 const char *
8743 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8744 int annul, rtx_insn *insn)
8745 {
8746 static char string[64];
8747 enum rtx_code code = GET_CODE (op);
8748 machine_mode mode = GET_MODE (XEXP (op, 0));
8749 rtx note;
8750 int far;
8751 char *p;
8752
8753 /* branch on register are limited to +-128KB. If it is too far away,
8754 change
8755
8756 brnz,pt %g1, .LC30
8757
8758 to
8759
8760 brz,pn %g1, .+12
8761 nop
8762 ba,pt %xcc, .LC30
8763
8764 and
8765
8766 brgez,a,pn %o1, .LC29
8767
8768 to
8769
8770 brlz,pt %o1, .+16
8771 nop
8772 ba,pt %xcc, .LC29 */
8773
8774 far = get_attr_length (insn) >= 3;
8775
8776 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8777 if (reversed ^ far)
8778 code = reverse_condition (code);
8779
8780 /* Only 64-bit versions of these instructions exist. */
8781 gcc_assert (mode == DImode);
8782
8783 /* Start by writing the branch condition. */
8784
8785 switch (code)
8786 {
8787 case NE:
8788 strcpy (string, "brnz");
8789 break;
8790
8791 case EQ:
8792 strcpy (string, "brz");
8793 break;
8794
8795 case GE:
8796 strcpy (string, "brgez");
8797 break;
8798
8799 case LT:
8800 strcpy (string, "brlz");
8801 break;
8802
8803 case LE:
8804 strcpy (string, "brlez");
8805 break;
8806
8807 case GT:
8808 strcpy (string, "brgz");
8809 break;
8810
8811 default:
8812 gcc_unreachable ();
8813 }
8814
8815 p = strchr (string, '\0');
8816
8817 /* Now add the annulling, reg, label, and nop. */
8818 if (annul && ! far)
8819 {
8820 strcpy (p, ",a");
8821 p += 2;
8822 }
8823
8824 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8825 {
8826 strcpy (p,
8827 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8828 >= profile_probability::even ()) ^ far)
8829 ? ",pt" : ",pn");
8830 p += 3;
8831 }
8832
8833 *p = p < string + 8 ? '\t' : ' ';
8834 p++;
8835 *p++ = '%';
8836 *p++ = '0' + reg;
8837 *p++ = ',';
8838 *p++ = ' ';
8839 if (far)
8840 {
8841 int veryfar = 1, delta;
8842
8843 if (INSN_ADDRESSES_SET_P ())
8844 {
8845 delta = (INSN_ADDRESSES (INSN_UID (dest))
8846 - INSN_ADDRESSES (INSN_UID (insn)));
8847 /* Leave some instructions for "slop". */
8848 if (delta >= -260000 && delta < 260000)
8849 veryfar = 0;
8850 }
8851
8852 strcpy (p, ".+12\n\t nop\n\t");
8853 /* Skip the next insn if requested or
8854 if we know that it will be a nop. */
8855 if (annul || ! final_sequence)
8856 p[3] = '6';
8857 p += 12;
8858 if (veryfar)
8859 {
8860 strcpy (p, "b\t");
8861 p += 2;
8862 }
8863 else
8864 {
8865 strcpy (p, "ba,pt\t%%xcc, ");
8866 p += 13;
8867 }
8868 }
8869 *p++ = '%';
8870 *p++ = 'l';
8871 *p++ = '0' + label;
8872 *p++ = '%';
8873 *p++ = '#';
8874 *p = '\0';
8875
8876 return string;
8877 }
8878
8879 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8880 Such instructions cannot be used in the delay slot of return insn on v9.
8881 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8882 */
8883
8884 static int
8885 epilogue_renumber (rtx *where, int test)
8886 {
8887 const char *fmt;
8888 int i;
8889 enum rtx_code code;
8890
8891 if (*where == 0)
8892 return 0;
8893
8894 code = GET_CODE (*where);
8895
8896 switch (code)
8897 {
8898 case REG:
8899 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8900 return 1;
8901 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8902 {
8903 if (ORIGINAL_REGNO (*where))
8904 {
8905 rtx n = gen_raw_REG (GET_MODE (*where),
8906 OUTGOING_REGNO (REGNO (*where)));
8907 ORIGINAL_REGNO (n) = ORIGINAL_REGNO (*where);
8908 *where = n;
8909 }
8910 else
8911 *where = gen_rtx_REG (GET_MODE (*where),
8912 OUTGOING_REGNO (REGNO (*where)));
8913 }
8914 return 0;
8915
8916 case SCRATCH:
8917 case PC:
8918 case CONST_INT:
8919 case CONST_WIDE_INT:
8920 case CONST_DOUBLE:
8921 return 0;
8922
8923 /* Do not replace the frame pointer with the stack pointer because
8924 it can cause the delayed instruction to load below the stack.
8925 This occurs when instructions like:
8926
8927 (set (reg/i:SI 24 %i0)
8928 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8929 (const_int -20 [0xffffffec])) 0))
8930
8931 are in the return delayed slot. */
8932 case PLUS:
8933 if (GET_CODE (XEXP (*where, 0)) == REG
8934 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8935 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8936 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8937 return 1;
8938 break;
8939
8940 case MEM:
8941 if (SPARC_STACK_BIAS
8942 && GET_CODE (XEXP (*where, 0)) == REG
8943 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8944 return 1;
8945 break;
8946
8947 default:
8948 break;
8949 }
8950
8951 fmt = GET_RTX_FORMAT (code);
8952
8953 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8954 {
8955 if (fmt[i] == 'E')
8956 {
8957 int j;
8958 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8959 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8960 return 1;
8961 }
8962 else if (fmt[i] == 'e'
8963 && epilogue_renumber (&(XEXP (*where, i)), test))
8964 return 1;
8965 }
8966 return 0;
8967 }
8968
8969 /* Leaf functions and non-leaf functions have different needs. */
8971
8972 static const int reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8973
8974 static const int reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8975
8976 static const int *const reg_alloc_orders[] =
8977 {
8978 reg_leaf_alloc_order,
8979 reg_nonleaf_alloc_order
8980 };
8981
8982 void
8983 sparc_order_regs_for_local_alloc (void)
8984 {
8985 static int last_order_nonleaf = 1;
8986
8987 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8988 {
8989 last_order_nonleaf = !last_order_nonleaf;
8990 memcpy ((char *) reg_alloc_order,
8991 (const char *) reg_alloc_orders[last_order_nonleaf],
8992 FIRST_PSEUDO_REGISTER * sizeof (int));
8993 }
8994 }
8995
8996 int
8997 sparc_leaf_reg_remap (int regno)
8998 {
8999 gcc_checking_assert (regno >= 0);
9000
9001 /* Do not remap in flat mode. */
9002 if (TARGET_FLAT)
9003 return regno;
9004
9005 /* Do not remap global, stack pointer or floating-point registers. */
9006 if (regno < 8 || regno == STACK_POINTER_REGNUM || regno > SPARC_LAST_INT_REG)
9007 return regno;
9008
9009 /* Neither out nor local nor frame pointer registers must appear. */
9010 if ((regno >= 8 && regno <= 23) || regno == HARD_FRAME_POINTER_REGNUM)
9011 return -1;
9012
9013 /* Remap in to out registers. */
9014 return regno - 16;
9015 }
9016
9017 /* Return 1 if REG and MEM are legitimate enough to allow the various
9018 MEM<-->REG splits to be run. */
9019
9020 int
9021 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9022 {
9023 /* Punt if we are here by mistake. */
9024 gcc_assert (reload_completed);
9025
9026 /* We must have an offsettable memory reference. */
9027 if (!offsettable_memref_p (mem))
9028 return 0;
9029
9030 /* If we have legitimate args for ldd/std, we do not want
9031 the split to happen. */
9032 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9033 return 0;
9034
9035 /* Success. */
9036 return 1;
9037 }
9038
9039 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9040
9041 void
9042 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9043 {
9044 rtx high_part = gen_highpart (mode, dest);
9045 rtx low_part = gen_lowpart (mode, dest);
9046 rtx word0 = adjust_address (src, mode, 0);
9047 rtx word1 = adjust_address (src, mode, 4);
9048
9049 if (reg_overlap_mentioned_p (high_part, word1))
9050 {
9051 emit_move_insn_1 (low_part, word1);
9052 emit_move_insn_1 (high_part, word0);
9053 }
9054 else
9055 {
9056 emit_move_insn_1 (high_part, word0);
9057 emit_move_insn_1 (low_part, word1);
9058 }
9059 }
9060
9061 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9062
9063 void
9064 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9065 {
9066 rtx word0 = adjust_address (dest, mode, 0);
9067 rtx word1 = adjust_address (dest, mode, 4);
9068 rtx high_part = gen_highpart (mode, src);
9069 rtx low_part = gen_lowpart (mode, src);
9070
9071 emit_move_insn_1 (word0, high_part);
9072 emit_move_insn_1 (word1, low_part);
9073 }
9074
9075 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9076
9077 int
9078 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9079 {
9080 /* Punt if we are here by mistake. */
9081 gcc_assert (reload_completed);
9082
9083 if (GET_CODE (reg1) == SUBREG)
9084 reg1 = SUBREG_REG (reg1);
9085 if (GET_CODE (reg1) != REG)
9086 return 0;
9087 const int regno1 = REGNO (reg1);
9088
9089 if (GET_CODE (reg2) == SUBREG)
9090 reg2 = SUBREG_REG (reg2);
9091 if (GET_CODE (reg2) != REG)
9092 return 0;
9093 const int regno2 = REGNO (reg2);
9094
9095 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9096 return 1;
9097
9098 if (TARGET_VIS3)
9099 {
9100 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9101 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9102 return 1;
9103 }
9104
9105 return 0;
9106 }
9107
9108 /* Split a REG <--> REG move into a pair of moves in MODE. */
9109
9110 void
9111 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9112 {
9113 rtx dest1 = gen_highpart (mode, dest);
9114 rtx dest2 = gen_lowpart (mode, dest);
9115 rtx src1 = gen_highpart (mode, src);
9116 rtx src2 = gen_lowpart (mode, src);
9117
9118 /* Now emit using the real source and destination we found, swapping
9119 the order if we detect overlap. */
9120 if (reg_overlap_mentioned_p (dest1, src2))
9121 {
9122 emit_move_insn_1 (dest2, src2);
9123 emit_move_insn_1 (dest1, src1);
9124 }
9125 else
9126 {
9127 emit_move_insn_1 (dest1, src1);
9128 emit_move_insn_1 (dest2, src2);
9129 }
9130 }
9131
9132 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9133 This makes them candidates for using ldd and std insns.
9134
9135 Note reg1 and reg2 *must* be hard registers. */
9136
9137 int
9138 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9139 {
9140 /* We might have been passed a SUBREG. */
9141 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9142 return 0;
9143
9144 if (REGNO (reg1) % 2 != 0)
9145 return 0;
9146
9147 /* Integer ldd is deprecated in SPARC V9 */
9148 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9149 return 0;
9150
9151 return (REGNO (reg1) == REGNO (reg2) - 1);
9152 }
9153
9154 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9155 an ldd or std insn.
9156
9157 This can only happen when addr1 and addr2, the addresses in mem1
9158 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9159 addr1 must also be aligned on a 64-bit boundary.
9160
9161 Also iff dependent_reg_rtx is not null it should not be used to
9162 compute the address for mem1, i.e. we cannot optimize a sequence
9163 like:
9164 ld [%o0], %o0
9165 ld [%o0 + 4], %o1
9166 to
9167 ldd [%o0], %o0
9168 nor:
9169 ld [%g3 + 4], %g3
9170 ld [%g3], %g2
9171 to
9172 ldd [%g3], %g2
9173
9174 But, note that the transformation from:
9175 ld [%g2 + 4], %g3
9176 ld [%g2], %g2
9177 to
9178 ldd [%g2], %g2
9179 is perfectly fine. Thus, the peephole2 patterns always pass us
9180 the destination register of the first load, never the second one.
9181
9182 For stores we don't have a similar problem, so dependent_reg_rtx is
9183 NULL_RTX. */
9184
9185 int
9186 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9187 {
9188 rtx addr1, addr2;
9189 unsigned int reg1;
9190 HOST_WIDE_INT offset1;
9191
9192 /* The mems cannot be volatile. */
9193 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9194 return 0;
9195
9196 /* MEM1 should be aligned on a 64-bit boundary. */
9197 if (MEM_ALIGN (mem1) < 64)
9198 return 0;
9199
9200 addr1 = XEXP (mem1, 0);
9201 addr2 = XEXP (mem2, 0);
9202
9203 /* Extract a register number and offset (if used) from the first addr. */
9204 if (GET_CODE (addr1) == PLUS)
9205 {
9206 /* If not a REG, return zero. */
9207 if (GET_CODE (XEXP (addr1, 0)) != REG)
9208 return 0;
9209 else
9210 {
9211 reg1 = REGNO (XEXP (addr1, 0));
9212 /* The offset must be constant! */
9213 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9214 return 0;
9215 offset1 = INTVAL (XEXP (addr1, 1));
9216 }
9217 }
9218 else if (GET_CODE (addr1) != REG)
9219 return 0;
9220 else
9221 {
9222 reg1 = REGNO (addr1);
9223 /* This was a simple (mem (reg)) expression. Offset is 0. */
9224 offset1 = 0;
9225 }
9226
9227 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9228 if (GET_CODE (addr2) != PLUS)
9229 return 0;
9230
9231 if (GET_CODE (XEXP (addr2, 0)) != REG
9232 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9233 return 0;
9234
9235 if (reg1 != REGNO (XEXP (addr2, 0)))
9236 return 0;
9237
9238 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9239 return 0;
9240
9241 /* The first offset must be evenly divisible by 8 to ensure the
9242 address is 64-bit aligned. */
9243 if (offset1 % 8 != 0)
9244 return 0;
9245
9246 /* The offset for the second addr must be 4 more than the first addr. */
9247 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9248 return 0;
9249
9250 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9251 instructions. */
9252 return 1;
9253 }
9254
9255 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9256
9257 rtx
9258 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9259 {
9260 rtx x = widen_memory_access (mem1, mode, 0);
9261 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9262 return x;
9263 }
9264
9265 /* Return 1 if reg is a pseudo, or is the first register in
9266 a hard register pair. This makes it suitable for use in
9267 ldd and std insns. */
9268
9269 int
9270 register_ok_for_ldd (rtx reg)
9271 {
9272 /* We might have been passed a SUBREG. */
9273 if (!REG_P (reg))
9274 return 0;
9275
9276 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9277 return (REGNO (reg) % 2 == 0);
9278
9279 return 1;
9280 }
9281
9282 /* Return 1 if OP, a MEM, has an address which is known to be
9283 aligned to an 8-byte boundary. */
9284
9285 int
9286 memory_ok_for_ldd (rtx op)
9287 {
9288 if (!mem_min_alignment (op, 8))
9289 return 0;
9290
9291 /* We need to perform the job of a memory constraint. */
9292 if ((reload_in_progress || reload_completed)
9293 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9294 return 0;
9295
9296 if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
9297 return 0;
9298
9299 return 1;
9300 }
9301
9302 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9304
9305 static bool
9306 sparc_print_operand_punct_valid_p (unsigned char code)
9307 {
9308 if (code == '#'
9309 || code == '*'
9310 || code == '('
9311 || code == ')'
9312 || code == '_'
9313 || code == '&')
9314 return true;
9315
9316 return false;
9317 }
9318
9319 /* Implement TARGET_PRINT_OPERAND.
9320 Print operand X (an rtx) in assembler syntax to file FILE.
9321 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9322 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9323
9324 static void
9325 sparc_print_operand (FILE *file, rtx x, int code)
9326 {
9327 const char *s;
9328
9329 switch (code)
9330 {
9331 case '#':
9332 /* Output an insn in a delay slot. */
9333 if (final_sequence)
9334 sparc_indent_opcode = 1;
9335 else
9336 fputs ("\n\t nop", file);
9337 return;
9338 case '*':
9339 /* Output an annul flag if there's nothing for the delay slot and we
9340 are optimizing. This is always used with '(' below.
9341 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9342 this is a dbx bug. So, we only do this when optimizing.
9343 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9344 Always emit a nop in case the next instruction is a branch. */
9345 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9346 fputs (",a", file);
9347 return;
9348 case '(':
9349 /* Output a 'nop' if there's nothing for the delay slot and we are
9350 not optimizing. This is always used with '*' above. */
9351 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9352 fputs ("\n\t nop", file);
9353 else if (final_sequence)
9354 sparc_indent_opcode = 1;
9355 return;
9356 case ')':
9357 /* Output the right displacement from the saved PC on function return.
9358 The caller may have placed an "unimp" insn immediately after the call
9359 so we have to account for it. This insn is used in the 32-bit ABI
9360 when calling a function that returns a non zero-sized structure. The
9361 64-bit ABI doesn't have it. Be careful to have this test be the same
9362 as that for the call. The exception is when sparc_std_struct_return
9363 is enabled, the psABI is followed exactly and the adjustment is made
9364 by the code in sparc_struct_value_rtx. The call emitted is the same
9365 when sparc_std_struct_return is enabled. */
9366 if (!TARGET_ARCH64
9367 && cfun->returns_struct
9368 && !sparc_std_struct_return
9369 && DECL_SIZE (DECL_RESULT (current_function_decl))
9370 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9371 == INTEGER_CST
9372 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9373 fputs ("12", file);
9374 else
9375 fputc ('8', file);
9376 return;
9377 case '_':
9378 /* Output the Embedded Medium/Anywhere code model base register. */
9379 fputs (EMBMEDANY_BASE_REG, file);
9380 return;
9381 case '&':
9382 /* Print some local dynamic TLS name. */
9383 if (const char *name = get_some_local_dynamic_name ())
9384 assemble_name (file, name);
9385 else
9386 output_operand_lossage ("'%%&' used without any "
9387 "local dynamic TLS references");
9388 return;
9389
9390 case 'Y':
9391 /* Adjust the operand to take into account a RESTORE operation. */
9392 if (GET_CODE (x) == CONST_INT)
9393 break;
9394 else if (GET_CODE (x) != REG)
9395 output_operand_lossage ("invalid %%Y operand");
9396 else if (REGNO (x) < 8)
9397 fputs (reg_names[REGNO (x)], file);
9398 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9399 fputs (reg_names[REGNO (x)-16], file);
9400 else
9401 output_operand_lossage ("invalid %%Y operand");
9402 return;
9403 case 'L':
9404 /* Print out the low order register name of a register pair. */
9405 if (WORDS_BIG_ENDIAN)
9406 fputs (reg_names[REGNO (x)+1], file);
9407 else
9408 fputs (reg_names[REGNO (x)], file);
9409 return;
9410 case 'H':
9411 /* Print out the high order register name of a register pair. */
9412 if (WORDS_BIG_ENDIAN)
9413 fputs (reg_names[REGNO (x)], file);
9414 else
9415 fputs (reg_names[REGNO (x)+1], file);
9416 return;
9417 case 'R':
9418 /* Print out the second register name of a register pair or quad.
9419 I.e., R (%o0) => %o1. */
9420 fputs (reg_names[REGNO (x)+1], file);
9421 return;
9422 case 'S':
9423 /* Print out the third register name of a register quad.
9424 I.e., S (%o0) => %o2. */
9425 fputs (reg_names[REGNO (x)+2], file);
9426 return;
9427 case 'T':
9428 /* Print out the fourth register name of a register quad.
9429 I.e., T (%o0) => %o3. */
9430 fputs (reg_names[REGNO (x)+3], file);
9431 return;
9432 case 'x':
9433 /* Print a condition code register. */
9434 if (REGNO (x) == SPARC_ICC_REG)
9435 {
9436 switch (GET_MODE (x))
9437 {
9438 case E_CCmode:
9439 case E_CCNZmode:
9440 case E_CCCmode:
9441 case E_CCVmode:
9442 s = "%icc";
9443 break;
9444 case E_CCXmode:
9445 case E_CCXNZmode:
9446 case E_CCXCmode:
9447 case E_CCXVmode:
9448 s = "%xcc";
9449 break;
9450 default:
9451 gcc_unreachable ();
9452 }
9453 fputs (s, file);
9454 }
9455 else
9456 /* %fccN register */
9457 fputs (reg_names[REGNO (x)], file);
9458 return;
9459 case 'm':
9460 /* Print the operand's address only. */
9461 output_address (GET_MODE (x), XEXP (x, 0));
9462 return;
9463 case 'r':
9464 /* In this case we need a register. Use %g0 if the
9465 operand is const0_rtx. */
9466 if (x == const0_rtx
9467 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9468 {
9469 fputs ("%g0", file);
9470 return;
9471 }
9472 else
9473 break;
9474
9475 case 'A':
9476 switch (GET_CODE (x))
9477 {
9478 case IOR:
9479 s = "or";
9480 break;
9481 case AND:
9482 s = "and";
9483 break;
9484 case XOR:
9485 s = "xor";
9486 break;
9487 default:
9488 output_operand_lossage ("invalid %%A operand");
9489 s = "";
9490 break;
9491 }
9492 fputs (s, file);
9493 return;
9494
9495 case 'B':
9496 switch (GET_CODE (x))
9497 {
9498 case IOR:
9499 s = "orn";
9500 break;
9501 case AND:
9502 s = "andn";
9503 break;
9504 case XOR:
9505 s = "xnor";
9506 break;
9507 default:
9508 output_operand_lossage ("invalid %%B operand");
9509 s = "";
9510 break;
9511 }
9512 fputs (s, file);
9513 return;
9514
9515 /* This is used by the conditional move instructions. */
9516 case 'C':
9517 {
9518 machine_mode mode = GET_MODE (XEXP (x, 0));
9519 switch (GET_CODE (x))
9520 {
9521 case NE:
9522 if (mode == CCVmode || mode == CCXVmode)
9523 s = "vs";
9524 else
9525 s = "ne";
9526 break;
9527 case EQ:
9528 if (mode == CCVmode || mode == CCXVmode)
9529 s = "vc";
9530 else
9531 s = "e";
9532 break;
9533 case GE:
9534 if (mode == CCNZmode || mode == CCXNZmode)
9535 s = "pos";
9536 else
9537 s = "ge";
9538 break;
9539 case GT:
9540 s = "g";
9541 break;
9542 case LE:
9543 s = "le";
9544 break;
9545 case LT:
9546 if (mode == CCNZmode || mode == CCXNZmode)
9547 s = "neg";
9548 else
9549 s = "l";
9550 break;
9551 case GEU:
9552 s = "geu";
9553 break;
9554 case GTU:
9555 s = "gu";
9556 break;
9557 case LEU:
9558 s = "leu";
9559 break;
9560 case LTU:
9561 s = "lu";
9562 break;
9563 case LTGT:
9564 s = "lg";
9565 break;
9566 case UNORDERED:
9567 s = "u";
9568 break;
9569 case ORDERED:
9570 s = "o";
9571 break;
9572 case UNLT:
9573 s = "ul";
9574 break;
9575 case UNLE:
9576 s = "ule";
9577 break;
9578 case UNGT:
9579 s = "ug";
9580 break;
9581 case UNGE:
9582 s = "uge"
9583 ; break;
9584 case UNEQ:
9585 s = "ue";
9586 break;
9587 default:
9588 output_operand_lossage ("invalid %%C operand");
9589 s = "";
9590 break;
9591 }
9592 fputs (s, file);
9593 return;
9594 }
9595
9596 /* This are used by the movr instruction pattern. */
9597 case 'D':
9598 {
9599 switch (GET_CODE (x))
9600 {
9601 case NE:
9602 s = "ne";
9603 break;
9604 case EQ:
9605 s = "e";
9606 break;
9607 case GE:
9608 s = "gez";
9609 break;
9610 case LT:
9611 s = "lz";
9612 break;
9613 case LE:
9614 s = "lez";
9615 break;
9616 case GT:
9617 s = "gz";
9618 break;
9619 default:
9620 output_operand_lossage ("invalid %%D operand");
9621 s = "";
9622 break;
9623 }
9624 fputs (s, file);
9625 return;
9626 }
9627
9628 case 'b':
9629 {
9630 /* Print a sign-extended character. */
9631 int i = trunc_int_for_mode (INTVAL (x), QImode);
9632 fprintf (file, "%d", i);
9633 return;
9634 }
9635
9636 case 'f':
9637 /* Operand must be a MEM; write its address. */
9638 if (GET_CODE (x) != MEM)
9639 output_operand_lossage ("invalid %%f operand");
9640 output_address (GET_MODE (x), XEXP (x, 0));
9641 return;
9642
9643 case 's':
9644 {
9645 /* Print a sign-extended 32-bit value. */
9646 HOST_WIDE_INT i;
9647 if (GET_CODE(x) == CONST_INT)
9648 i = INTVAL (x);
9649 else
9650 {
9651 output_operand_lossage ("invalid %%s operand");
9652 return;
9653 }
9654 i = trunc_int_for_mode (i, SImode);
9655 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9656 return;
9657 }
9658
9659 case 0:
9660 /* Do nothing special. */
9661 break;
9662
9663 default:
9664 /* Undocumented flag. */
9665 output_operand_lossage ("invalid operand output code");
9666 }
9667
9668 if (GET_CODE (x) == REG)
9669 fputs (reg_names[REGNO (x)], file);
9670 else if (GET_CODE (x) == MEM)
9671 {
9672 fputc ('[', file);
9673 /* Poor Sun assembler doesn't understand absolute addressing. */
9674 if (CONSTANT_P (XEXP (x, 0)))
9675 fputs ("%g0+", file);
9676 output_address (GET_MODE (x), XEXP (x, 0));
9677 fputc (']', file);
9678 }
9679 else if (GET_CODE (x) == HIGH)
9680 {
9681 fputs ("%hi(", file);
9682 output_addr_const (file, XEXP (x, 0));
9683 fputc (')', file);
9684 }
9685 else if (GET_CODE (x) == LO_SUM)
9686 {
9687 sparc_print_operand (file, XEXP (x, 0), 0);
9688 if (TARGET_CM_MEDMID)
9689 fputs ("+%l44(", file);
9690 else
9691 fputs ("+%lo(", file);
9692 output_addr_const (file, XEXP (x, 1));
9693 fputc (')', file);
9694 }
9695 else if (GET_CODE (x) == CONST_DOUBLE)
9696 output_operand_lossage ("floating-point constant not a valid immediate operand");
9697 else
9698 output_addr_const (file, x);
9699 }
9700
9701 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9702
9703 static void
9704 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9705 {
9706 rtx base, index = 0;
9707 int offset = 0;
9708 rtx addr = x;
9709
9710 if (REG_P (addr))
9711 fputs (reg_names[REGNO (addr)], file);
9712 else if (GET_CODE (addr) == PLUS)
9713 {
9714 if (CONST_INT_P (XEXP (addr, 0)))
9715 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9716 else if (CONST_INT_P (XEXP (addr, 1)))
9717 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9718 else
9719 base = XEXP (addr, 0), index = XEXP (addr, 1);
9720 if (GET_CODE (base) == LO_SUM)
9721 {
9722 gcc_assert (USE_AS_OFFSETABLE_LO10
9723 && TARGET_ARCH64
9724 && ! TARGET_CM_MEDMID);
9725 output_operand (XEXP (base, 0), 0);
9726 fputs ("+%lo(", file);
9727 output_address (VOIDmode, XEXP (base, 1));
9728 fprintf (file, ")+%d", offset);
9729 }
9730 else
9731 {
9732 fputs (reg_names[REGNO (base)], file);
9733 if (index == 0)
9734 fprintf (file, "%+d", offset);
9735 else if (REG_P (index))
9736 fprintf (file, "+%s", reg_names[REGNO (index)]);
9737 else if (GET_CODE (index) == SYMBOL_REF
9738 || GET_CODE (index) == LABEL_REF
9739 || GET_CODE (index) == CONST)
9740 fputc ('+', file), output_addr_const (file, index);
9741 else gcc_unreachable ();
9742 }
9743 }
9744 else if (GET_CODE (addr) == MINUS
9745 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9746 {
9747 output_addr_const (file, XEXP (addr, 0));
9748 fputs ("-(", file);
9749 output_addr_const (file, XEXP (addr, 1));
9750 fputs ("-.)", file);
9751 }
9752 else if (GET_CODE (addr) == LO_SUM)
9753 {
9754 output_operand (XEXP (addr, 0), 0);
9755 if (TARGET_CM_MEDMID)
9756 fputs ("+%l44(", file);
9757 else
9758 fputs ("+%lo(", file);
9759 output_address (VOIDmode, XEXP (addr, 1));
9760 fputc (')', file);
9761 }
9762 else if (flag_pic
9763 && GET_CODE (addr) == CONST
9764 && GET_CODE (XEXP (addr, 0)) == MINUS
9765 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9766 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9767 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9768 {
9769 addr = XEXP (addr, 0);
9770 output_addr_const (file, XEXP (addr, 0));
9771 /* Group the args of the second CONST in parenthesis. */
9772 fputs ("-(", file);
9773 /* Skip past the second CONST--it does nothing for us. */
9774 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9775 /* Close the parenthesis. */
9776 fputc (')', file);
9777 }
9778 else
9779 {
9780 output_addr_const (file, addr);
9781 }
9782 }
9783
9784 /* Target hook for assembling integer objects. The sparc version has
9786 special handling for aligned DI-mode objects. */
9787
9788 static bool
9789 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9790 {
9791 /* ??? We only output .xword's for symbols and only then in environments
9792 where the assembler can handle them. */
9793 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9794 {
9795 if (TARGET_V9)
9796 {
9797 assemble_integer_with_op ("\t.xword\t", x);
9798 return true;
9799 }
9800 else
9801 {
9802 assemble_aligned_integer (4, const0_rtx);
9803 assemble_aligned_integer (4, x);
9804 return true;
9805 }
9806 }
9807 return default_assemble_integer (x, size, aligned_p);
9808 }
9809
9810 /* Return the value of a code used in the .proc pseudo-op that says
9812 what kind of result this function returns. For non-C types, we pick
9813 the closest C type. */
9814
9815 #ifndef SHORT_TYPE_SIZE
9816 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9817 #endif
9818
9819 #ifndef INT_TYPE_SIZE
9820 #define INT_TYPE_SIZE BITS_PER_WORD
9821 #endif
9822
9823 #ifndef LONG_TYPE_SIZE
9824 #define LONG_TYPE_SIZE BITS_PER_WORD
9825 #endif
9826
9827 #ifndef LONG_LONG_TYPE_SIZE
9828 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9829 #endif
9830
9831 #ifndef FLOAT_TYPE_SIZE
9832 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9833 #endif
9834
9835 #ifndef DOUBLE_TYPE_SIZE
9836 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9837 #endif
9838
9839 #ifndef LONG_DOUBLE_TYPE_SIZE
9840 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9841 #endif
9842
9843 unsigned long
9844 sparc_type_code (tree type)
9845 {
9846 unsigned long qualifiers = 0;
9847 unsigned shift;
9848
9849 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9850 setting more, since some assemblers will give an error for this. Also,
9851 we must be careful to avoid shifts of 32 bits or more to avoid getting
9852 unpredictable results. */
9853
9854 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9855 {
9856 switch (TREE_CODE (type))
9857 {
9858 case ERROR_MARK:
9859 return qualifiers;
9860
9861 case ARRAY_TYPE:
9862 qualifiers |= (3 << shift);
9863 break;
9864
9865 case FUNCTION_TYPE:
9866 case METHOD_TYPE:
9867 qualifiers |= (2 << shift);
9868 break;
9869
9870 case POINTER_TYPE:
9871 case REFERENCE_TYPE:
9872 case OFFSET_TYPE:
9873 qualifiers |= (1 << shift);
9874 break;
9875
9876 case RECORD_TYPE:
9877 return (qualifiers | 8);
9878
9879 case UNION_TYPE:
9880 case QUAL_UNION_TYPE:
9881 return (qualifiers | 9);
9882
9883 case ENUMERAL_TYPE:
9884 return (qualifiers | 10);
9885
9886 case VOID_TYPE:
9887 return (qualifiers | 16);
9888
9889 case INTEGER_TYPE:
9890 /* If this is a range type, consider it to be the underlying
9891 type. */
9892 if (TREE_TYPE (type) != 0)
9893 break;
9894
9895 /* Carefully distinguish all the standard types of C,
9896 without messing up if the language is not C. We do this by
9897 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9898 look at both the names and the above fields, but that's redundant.
9899 Any type whose size is between two C types will be considered
9900 to be the wider of the two types. Also, we do not have a
9901 special code to use for "long long", so anything wider than
9902 long is treated the same. Note that we can't distinguish
9903 between "int" and "long" in this code if they are the same
9904 size, but that's fine, since neither can the assembler. */
9905
9906 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9907 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9908
9909 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9910 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9911
9912 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9913 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9914
9915 else
9916 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9917
9918 case REAL_TYPE:
9919 /* If this is a range type, consider it to be the underlying
9920 type. */
9921 if (TREE_TYPE (type) != 0)
9922 break;
9923
9924 /* Carefully distinguish all the standard types of C,
9925 without messing up if the language is not C. */
9926
9927 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9928 return (qualifiers | 6);
9929
9930 else
9931 return (qualifiers | 7);
9932
9933 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9934 /* ??? We need to distinguish between double and float complex types,
9935 but I don't know how yet because I can't reach this code from
9936 existing front-ends. */
9937 return (qualifiers | 7); /* Who knows? */
9938
9939 case VECTOR_TYPE:
9940 case BOOLEAN_TYPE: /* Boolean truth value type. */
9941 case LANG_TYPE:
9942 case NULLPTR_TYPE:
9943 return qualifiers;
9944
9945 default:
9946 gcc_unreachable (); /* Not a type! */
9947 }
9948 }
9949
9950 return qualifiers;
9951 }
9952
9953 /* Nested function support. */
9955
9956 /* Emit RTL insns to initialize the variable parts of a trampoline.
9957 FNADDR is an RTX for the address of the function's pure code.
9958 CXT is an RTX for the static chain value for the function.
9959
9960 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9961 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9962 (to store insns). This is a bit excessive. Perhaps a different
9963 mechanism would be better here.
9964
9965 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9966
9967 static void
9968 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9969 {
9970 /* SPARC 32-bit trampoline:
9971
9972 sethi %hi(fn), %g1
9973 sethi %hi(static), %g2
9974 jmp %g1+%lo(fn)
9975 or %g2, %lo(static), %g2
9976
9977 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9978 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9979 */
9980
9981 emit_move_insn
9982 (adjust_address (m_tramp, SImode, 0),
9983 expand_binop (SImode, ior_optab,
9984 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9985 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9986 NULL_RTX, 1, OPTAB_DIRECT));
9987
9988 emit_move_insn
9989 (adjust_address (m_tramp, SImode, 4),
9990 expand_binop (SImode, ior_optab,
9991 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9992 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9993 NULL_RTX, 1, OPTAB_DIRECT));
9994
9995 emit_move_insn
9996 (adjust_address (m_tramp, SImode, 8),
9997 expand_binop (SImode, ior_optab,
9998 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9999 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
10000 NULL_RTX, 1, OPTAB_DIRECT));
10001
10002 emit_move_insn
10003 (adjust_address (m_tramp, SImode, 12),
10004 expand_binop (SImode, ior_optab,
10005 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
10006 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
10007 NULL_RTX, 1, OPTAB_DIRECT));
10008
10009 emit_insn
10010 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 0))));
10011
10012 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10013 aligned on a 16 byte boundary so one flush clears it all. */
10014 if (sparc_cpu != PROCESSOR_ULTRASPARC
10015 && sparc_cpu != PROCESSOR_ULTRASPARC3
10016 && sparc_cpu != PROCESSOR_NIAGARA
10017 && sparc_cpu != PROCESSOR_NIAGARA2
10018 && sparc_cpu != PROCESSOR_NIAGARA3
10019 && sparc_cpu != PROCESSOR_NIAGARA4
10020 && sparc_cpu != PROCESSOR_NIAGARA7
10021 && sparc_cpu != PROCESSOR_M8)
10022 emit_insn
10023 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 8))));
10024
10025 /* Call __enable_execute_stack after writing onto the stack to make sure
10026 the stack address is accessible. */
10027 #ifdef HAVE_ENABLE_EXECUTE_STACK
10028 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10029 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10030 #endif
10031
10032 }
10033
10034 /* The 64-bit version is simpler because it makes more sense to load the
10035 values as "immediate" data out of the trampoline. It's also easier since
10036 we can read the PC without clobbering a register. */
10037
10038 static void
10039 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10040 {
10041 /* SPARC 64-bit trampoline:
10042
10043 rd %pc, %g1
10044 ldx [%g1+24], %g5
10045 jmp %g5
10046 ldx [%g1+16], %g5
10047 +16 bytes data
10048 */
10049
10050 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10051 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10052 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10053 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10054 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10055 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10056 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10057 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10058 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10059 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10060 emit_insn
10061 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 0))));
10062
10063 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10064 aligned on a 16 byte boundary so one flush clears it all. */
10065 if (sparc_cpu != PROCESSOR_ULTRASPARC
10066 && sparc_cpu != PROCESSOR_ULTRASPARC3
10067 && sparc_cpu != PROCESSOR_NIAGARA
10068 && sparc_cpu != PROCESSOR_NIAGARA2
10069 && sparc_cpu != PROCESSOR_NIAGARA3
10070 && sparc_cpu != PROCESSOR_NIAGARA4
10071 && sparc_cpu != PROCESSOR_NIAGARA7
10072 && sparc_cpu != PROCESSOR_M8)
10073 emit_insn
10074 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 8))));
10075
10076 /* Call __enable_execute_stack after writing onto the stack to make sure
10077 the stack address is accessible. */
10078 #ifdef HAVE_ENABLE_EXECUTE_STACK
10079 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10080 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10081 #endif
10082 }
10083
10084 /* Worker for TARGET_TRAMPOLINE_INIT. */
10085
10086 static void
10087 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10088 {
10089 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10090 cxt = force_reg (Pmode, cxt);
10091 if (TARGET_ARCH64)
10092 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10093 else
10094 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10095 }
10096
10097 /* Adjust the cost of a scheduling dependency. Return the new cost of
10099 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10100
10101 static int
10102 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10103 int cost)
10104 {
10105 enum attr_type insn_type;
10106
10107 if (recog_memoized (insn) < 0)
10108 return cost;
10109
10110 insn_type = get_attr_type (insn);
10111
10112 if (dep_type == 0)
10113 {
10114 /* Data dependency; DEP_INSN writes a register that INSN reads some
10115 cycles later. */
10116
10117 /* if a load, then the dependence must be on the memory address;
10118 add an extra "cycle". Note that the cost could be two cycles
10119 if the reg was written late in an instruction group; we ca not tell
10120 here. */
10121 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10122 return cost + 3;
10123
10124 /* Get the delay only if the address of the store is the dependence. */
10125 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10126 {
10127 rtx pat = PATTERN(insn);
10128 rtx dep_pat = PATTERN (dep_insn);
10129
10130 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10131 return cost; /* This should not happen! */
10132
10133 /* The dependency between the two instructions was on the data that
10134 is being stored. Assume that this implies that the address of the
10135 store is not dependent. */
10136 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10137 return cost;
10138
10139 return cost + 3; /* An approximation. */
10140 }
10141
10142 /* A shift instruction cannot receive its data from an instruction
10143 in the same cycle; add a one cycle penalty. */
10144 if (insn_type == TYPE_SHIFT)
10145 return cost + 3; /* Split before cascade into shift. */
10146 }
10147 else
10148 {
10149 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10150 INSN writes some cycles later. */
10151
10152 /* These are only significant for the fpu unit; writing a fp reg before
10153 the fpu has finished with it stalls the processor. */
10154
10155 /* Reusing an integer register causes no problems. */
10156 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10157 return 0;
10158 }
10159
10160 return cost;
10161 }
10162
10163 static int
10164 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10165 int cost)
10166 {
10167 enum attr_type insn_type, dep_type;
10168 rtx pat = PATTERN(insn);
10169 rtx dep_pat = PATTERN (dep_insn);
10170
10171 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10172 return cost;
10173
10174 insn_type = get_attr_type (insn);
10175 dep_type = get_attr_type (dep_insn);
10176
10177 switch (dtype)
10178 {
10179 case 0:
10180 /* Data dependency; DEP_INSN writes a register that INSN reads some
10181 cycles later. */
10182
10183 switch (insn_type)
10184 {
10185 case TYPE_STORE:
10186 case TYPE_FPSTORE:
10187 /* Get the delay iff the address of the store is the dependence. */
10188 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10189 return cost;
10190
10191 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10192 return cost;
10193 return cost + 3;
10194
10195 case TYPE_LOAD:
10196 case TYPE_SLOAD:
10197 case TYPE_FPLOAD:
10198 /* If a load, then the dependence must be on the memory address. If
10199 the addresses aren't equal, then it might be a false dependency */
10200 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10201 {
10202 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10203 || GET_CODE (SET_DEST (dep_pat)) != MEM
10204 || GET_CODE (SET_SRC (pat)) != MEM
10205 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10206 XEXP (SET_SRC (pat), 0)))
10207 return cost + 2;
10208
10209 return cost + 8;
10210 }
10211 break;
10212
10213 case TYPE_BRANCH:
10214 /* Compare to branch latency is 0. There is no benefit from
10215 separating compare and branch. */
10216 if (dep_type == TYPE_COMPARE)
10217 return 0;
10218 /* Floating point compare to branch latency is less than
10219 compare to conditional move. */
10220 if (dep_type == TYPE_FPCMP)
10221 return cost - 1;
10222 break;
10223 default:
10224 break;
10225 }
10226 break;
10227
10228 case REG_DEP_ANTI:
10229 /* Anti-dependencies only penalize the fpu unit. */
10230 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10231 return 0;
10232 break;
10233
10234 default:
10235 break;
10236 }
10237
10238 return cost;
10239 }
10240
10241 static int
10242 leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10243 int cost)
10244 {
10245 enum attr_type insn_type, dep_type;
10246 rtx pat = PATTERN (insn);
10247 rtx dep_pat = PATTERN (dep_insn);
10248
10249 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10250 return cost;
10251
10252 insn_type = get_attr_type (insn);
10253 dep_type = get_attr_type (dep_insn);
10254
10255 switch (dtype)
10256 {
10257 case REG_DEP_TRUE:
10258 /* Data dependency; DEP_INSN writes a register that INSN reads some
10259 cycles later. */
10260
10261 switch (insn_type)
10262 {
10263 case TYPE_STORE:
10264 /* Try to schedule three instructions between the store and
10265 the ALU instruction that generated the data. */
10266 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10267 {
10268 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10269 break;
10270
10271 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10272 return 4;
10273 }
10274 break;
10275 default:
10276 break;
10277 }
10278 break;
10279 case REG_DEP_ANTI:
10280 /* Penalize anti-dependencies for FPU instructions. */
10281 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10282 return 4;
10283 break;
10284 default:
10285 break;
10286 }
10287
10288 return cost;
10289 }
10290
10291 static int
10292 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10293 unsigned int)
10294 {
10295 switch (sparc_cpu)
10296 {
10297 case PROCESSOR_LEON5:
10298 cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10299 break;
10300 case PROCESSOR_SUPERSPARC:
10301 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10302 break;
10303 case PROCESSOR_HYPERSPARC:
10304 case PROCESSOR_SPARCLITE86X:
10305 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10306 break;
10307 default:
10308 break;
10309 }
10310 return cost;
10311 }
10312
10313 static void
10314 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10315 int sched_verbose ATTRIBUTE_UNUSED,
10316 int max_ready ATTRIBUTE_UNUSED)
10317 {}
10318
10319 static int
10320 sparc_use_sched_lookahead (void)
10321 {
10322 switch (sparc_cpu)
10323 {
10324 case PROCESSOR_ULTRASPARC:
10325 case PROCESSOR_ULTRASPARC3:
10326 return 4;
10327 case PROCESSOR_SUPERSPARC:
10328 case PROCESSOR_HYPERSPARC:
10329 case PROCESSOR_SPARCLITE86X:
10330 return 3;
10331 case PROCESSOR_NIAGARA4:
10332 case PROCESSOR_NIAGARA7:
10333 case PROCESSOR_M8:
10334 return 2;
10335 case PROCESSOR_NIAGARA:
10336 case PROCESSOR_NIAGARA2:
10337 case PROCESSOR_NIAGARA3:
10338 default:
10339 return 0;
10340 }
10341 }
10342
10343 static int
10344 sparc_issue_rate (void)
10345 {
10346 switch (sparc_cpu)
10347 {
10348 case PROCESSOR_ULTRASPARC:
10349 case PROCESSOR_ULTRASPARC3:
10350 case PROCESSOR_M8:
10351 return 4;
10352 case PROCESSOR_SUPERSPARC:
10353 return 3;
10354 case PROCESSOR_HYPERSPARC:
10355 case PROCESSOR_SPARCLITE86X:
10356 case PROCESSOR_V9:
10357 /* Assume V9 processors are capable of at least dual-issue. */
10358 case PROCESSOR_NIAGARA4:
10359 case PROCESSOR_NIAGARA7:
10360 return 2;
10361 case PROCESSOR_NIAGARA:
10362 case PROCESSOR_NIAGARA2:
10363 case PROCESSOR_NIAGARA3:
10364 default:
10365 return 1;
10366 }
10367 }
10368
10369 int
10370 sparc_branch_cost (bool speed_p, bool predictable_p)
10371 {
10372 if (!speed_p)
10373 return 2;
10374
10375 /* For pre-V9 processors we use a single value (usually 3) to take into
10376 account the potential annulling of the delay slot (which ends up being
10377 a bubble in the pipeline slot) plus a cycle to take into consideration
10378 the instruction cache effects.
10379
10380 On V9 and later processors, which have branch prediction facilities,
10381 we take into account whether the branch is (easily) predictable. */
10382 const int cost = sparc_costs->branch_cost;
10383
10384 switch (sparc_cpu)
10385 {
10386 case PROCESSOR_V9:
10387 case PROCESSOR_ULTRASPARC:
10388 case PROCESSOR_ULTRASPARC3:
10389 case PROCESSOR_NIAGARA:
10390 case PROCESSOR_NIAGARA2:
10391 case PROCESSOR_NIAGARA3:
10392 case PROCESSOR_NIAGARA4:
10393 case PROCESSOR_NIAGARA7:
10394 case PROCESSOR_M8:
10395 return cost + (predictable_p ? 0 : 2);
10396
10397 default:
10398 return cost;
10399 }
10400 }
10401
10402 static int
10403 set_extends (rtx_insn *insn)
10404 {
10405 rtx pat = PATTERN (insn);
10406
10407 switch (GET_CODE (SET_SRC (pat)))
10408 {
10409 /* Load and some shift instructions zero extend. */
10410 case MEM:
10411 case ZERO_EXTEND:
10412 /* sethi clears the high bits */
10413 case HIGH:
10414 /* LO_SUM is used with sethi. sethi cleared the high
10415 bits and the values used with lo_sum are positive */
10416 case LO_SUM:
10417 /* Store flag stores 0 or 1 */
10418 case LT: case LTU:
10419 case GT: case GTU:
10420 case LE: case LEU:
10421 case GE: case GEU:
10422 case EQ:
10423 case NE:
10424 return 1;
10425 case AND:
10426 {
10427 rtx op0 = XEXP (SET_SRC (pat), 0);
10428 rtx op1 = XEXP (SET_SRC (pat), 1);
10429 if (GET_CODE (op1) == CONST_INT)
10430 return INTVAL (op1) >= 0;
10431 if (GET_CODE (op0) != REG)
10432 return 0;
10433 if (sparc_check_64 (op0, insn) == 1)
10434 return 1;
10435 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10436 }
10437 case IOR:
10438 case XOR:
10439 {
10440 rtx op0 = XEXP (SET_SRC (pat), 0);
10441 rtx op1 = XEXP (SET_SRC (pat), 1);
10442 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10443 return 0;
10444 if (GET_CODE (op1) == CONST_INT)
10445 return INTVAL (op1) >= 0;
10446 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10447 }
10448 case LSHIFTRT:
10449 return GET_MODE (SET_SRC (pat)) == SImode;
10450 /* Positive integers leave the high bits zero. */
10451 case CONST_INT:
10452 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10453 case ASHIFTRT:
10454 case SIGN_EXTEND:
10455 return - (GET_MODE (SET_SRC (pat)) == SImode);
10456 case REG:
10457 return sparc_check_64 (SET_SRC (pat), insn);
10458 default:
10459 return 0;
10460 }
10461 }
10462
10463 /* We _ought_ to have only one kind per function, but... */
10464 static GTY(()) rtx sparc_addr_diff_list;
10465 static GTY(()) rtx sparc_addr_list;
10466
10467 void
10468 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10469 {
10470 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10471 if (diff)
10472 sparc_addr_diff_list
10473 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10474 else
10475 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10476 }
10477
10478 static void
10479 sparc_output_addr_vec (rtx vec)
10480 {
10481 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10482 int idx, vlen = XVECLEN (body, 0);
10483
10484 #ifdef ASM_OUTPUT_ADDR_VEC_START
10485 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10486 #endif
10487
10488 #ifdef ASM_OUTPUT_CASE_LABEL
10489 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10490 NEXT_INSN (lab));
10491 #else
10492 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10493 #endif
10494
10495 for (idx = 0; idx < vlen; idx++)
10496 {
10497 ASM_OUTPUT_ADDR_VEC_ELT
10498 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10499 }
10500
10501 #ifdef ASM_OUTPUT_ADDR_VEC_END
10502 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10503 #endif
10504 }
10505
10506 static void
10507 sparc_output_addr_diff_vec (rtx vec)
10508 {
10509 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10510 rtx base = XEXP (XEXP (body, 0), 0);
10511 int idx, vlen = XVECLEN (body, 1);
10512
10513 #ifdef ASM_OUTPUT_ADDR_VEC_START
10514 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10515 #endif
10516
10517 #ifdef ASM_OUTPUT_CASE_LABEL
10518 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10519 NEXT_INSN (lab));
10520 #else
10521 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10522 #endif
10523
10524 for (idx = 0; idx < vlen; idx++)
10525 {
10526 ASM_OUTPUT_ADDR_DIFF_ELT
10527 (asm_out_file,
10528 body,
10529 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10530 CODE_LABEL_NUMBER (base));
10531 }
10532
10533 #ifdef ASM_OUTPUT_ADDR_VEC_END
10534 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10535 #endif
10536 }
10537
10538 static void
10539 sparc_output_deferred_case_vectors (void)
10540 {
10541 rtx t;
10542 int align;
10543
10544 if (sparc_addr_list == NULL_RTX
10545 && sparc_addr_diff_list == NULL_RTX)
10546 return;
10547
10548 /* Align to cache line in the function's code section. */
10549 switch_to_section (current_function_section ());
10550
10551 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10552 if (align > 0)
10553 ASM_OUTPUT_ALIGN (asm_out_file, align);
10554
10555 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10556 sparc_output_addr_vec (XEXP (t, 0));
10557 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10558 sparc_output_addr_diff_vec (XEXP (t, 0));
10559
10560 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10561 }
10562
10563 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10564 unknown. Return 1 if the high bits are zero, -1 if the register is
10565 sign extended. */
10566 int
10567 sparc_check_64 (rtx x, rtx_insn *insn)
10568 {
10569 /* If a register is set only once it is safe to ignore insns this
10570 code does not know how to handle. The loop will either recognize
10571 the single set and return the correct value or fail to recognize
10572 it and return 0. */
10573 int set_once = 0;
10574 rtx y = x;
10575
10576 gcc_assert (GET_CODE (x) == REG);
10577
10578 if (GET_MODE (x) == DImode)
10579 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10580
10581 if (flag_expensive_optimizations
10582 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10583 set_once = 1;
10584
10585 if (insn == 0)
10586 {
10587 if (set_once)
10588 insn = get_last_insn_anywhere ();
10589 else
10590 return 0;
10591 }
10592
10593 while ((insn = PREV_INSN (insn)))
10594 {
10595 switch (GET_CODE (insn))
10596 {
10597 case JUMP_INSN:
10598 case NOTE:
10599 break;
10600 case CODE_LABEL:
10601 case CALL_INSN:
10602 default:
10603 if (! set_once)
10604 return 0;
10605 break;
10606 case INSN:
10607 {
10608 rtx pat = PATTERN (insn);
10609 if (GET_CODE (pat) != SET)
10610 return 0;
10611 if (rtx_equal_p (x, SET_DEST (pat)))
10612 return set_extends (insn);
10613 if (y && rtx_equal_p (y, SET_DEST (pat)))
10614 return set_extends (insn);
10615 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10616 return 0;
10617 }
10618 }
10619 }
10620 return 0;
10621 }
10622
10623 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10624 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10625
10626 const char *
10627 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10628 {
10629 static char asm_code[60];
10630
10631 /* The scratch register is only required when the destination
10632 register is not a 64-bit global or out register. */
10633 if (which_alternative != 2)
10634 operands[3] = operands[0];
10635
10636 /* We can only shift by constants <= 63. */
10637 if (GET_CODE (operands[2]) == CONST_INT)
10638 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10639
10640 if (GET_CODE (operands[1]) == CONST_INT)
10641 {
10642 output_asm_insn ("mov\t%1, %3", operands);
10643 }
10644 else
10645 {
10646 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10647 if (sparc_check_64 (operands[1], insn) <= 0)
10648 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10649 output_asm_insn ("or\t%L1, %3, %3", operands);
10650 }
10651
10652 strcpy (asm_code, opcode);
10653
10654 if (which_alternative != 2)
10655 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10656 else
10657 return
10658 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10659 }
10660
10661 /* Output rtl to increment the profiler label LABELNO
10663 for profiling a function entry. */
10664
10665 void
10666 sparc_profile_hook (int labelno)
10667 {
10668 char buf[32];
10669 rtx lab, fun;
10670
10671 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10672 if (NO_PROFILE_COUNTERS)
10673 {
10674 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10675 }
10676 else
10677 {
10678 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10679 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10680 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10681 }
10682 }
10683
10684 #ifdef TARGET_SOLARIS
10686 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10687
10688 static void
10689 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10690 tree decl ATTRIBUTE_UNUSED)
10691 {
10692 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10693 {
10694 solaris_elf_asm_comdat_section (name, flags, decl);
10695 return;
10696 }
10697
10698 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10699
10700 if (!(flags & SECTION_DEBUG))
10701 fputs (",#alloc", asm_out_file);
10702 #if HAVE_GAS_SECTION_EXCLUDE
10703 if (flags & SECTION_EXCLUDE)
10704 fputs (",#exclude", asm_out_file);
10705 #endif
10706 if (flags & SECTION_WRITE)
10707 fputs (",#write", asm_out_file);
10708 if (flags & SECTION_TLS)
10709 fputs (",#tls", asm_out_file);
10710 if (flags & SECTION_CODE)
10711 fputs (",#execinstr", asm_out_file);
10712
10713 if (flags & SECTION_NOTYPE)
10714 ;
10715 else if (flags & SECTION_BSS)
10716 fputs (",#nobits", asm_out_file);
10717 else
10718 fputs (",#progbits", asm_out_file);
10719
10720 fputc ('\n', asm_out_file);
10721 }
10722 #endif /* TARGET_SOLARIS */
10723
10724 /* We do not allow indirect calls to be optimized into sibling calls.
10725
10726 We cannot use sibling calls when delayed branches are disabled
10727 because they will likely require the call delay slot to be filled.
10728
10729 Also, on SPARC 32-bit we cannot emit a sibling call when the
10730 current function returns a structure. This is because the "unimp
10731 after call" convention would cause the callee to return to the
10732 wrong place. The generic code already disallows cases where the
10733 function being called returns a structure.
10734
10735 It may seem strange how this last case could occur. Usually there
10736 is code after the call which jumps to epilogue code which dumps the
10737 return value into the struct return area. That ought to invalidate
10738 the sibling call right? Well, in the C++ case we can end up passing
10739 the pointer to the struct return area to a constructor (which returns
10740 void) and then nothing else happens. Such a sibling call would look
10741 valid without the added check here.
10742
10743 VxWorks PIC PLT entries require the global pointer to be initialized
10744 on entry. We therefore can't emit sibling calls to them. */
10745 static bool
10746 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10747 {
10748 return (decl
10749 && flag_delayed_branch
10750 && (TARGET_ARCH64 || ! cfun->returns_struct)
10751 && !(TARGET_VXWORKS_RTP
10752 && flag_pic
10753 && !targetm.binds_local_p (decl)));
10754 }
10755
10756 /* libfunc renaming. */
10758
10759 static void
10760 sparc_init_libfuncs (void)
10761 {
10762 if (TARGET_ARCH32)
10763 {
10764 /* Use the subroutines that Sun's library provides for integer
10765 multiply and divide. The `*' prevents an underscore from
10766 being prepended by the compiler. .umul is a little faster
10767 than .mul. */
10768 set_optab_libfunc (smul_optab, SImode, "*.umul");
10769 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10770 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10771 set_optab_libfunc (smod_optab, SImode, "*.rem");
10772 set_optab_libfunc (umod_optab, SImode, "*.urem");
10773
10774 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10775 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10776 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10777 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10778 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10779 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10780
10781 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10782 is because with soft-float, the SFmode and DFmode sqrt
10783 instructions will be absent, and the compiler will notice and
10784 try to use the TFmode sqrt instruction for calls to the
10785 builtin function sqrt, but this fails. */
10786 if (TARGET_FPU)
10787 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10788
10789 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10790 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10791 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10792 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10793 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10794 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10795
10796 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10797 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10798 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10799 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10800
10801 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10802 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10803 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10804 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10805
10806 if (DITF_CONVERSION_LIBFUNCS)
10807 {
10808 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10809 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10810 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10811 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10812 }
10813
10814 if (SUN_CONVERSION_LIBFUNCS)
10815 {
10816 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10817 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10818 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10819 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10820 }
10821 }
10822 if (TARGET_ARCH64)
10823 {
10824 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10825 do not exist in the library. Make sure the compiler does not
10826 emit calls to them by accident. (It should always use the
10827 hardware instructions.) */
10828 set_optab_libfunc (smul_optab, SImode, 0);
10829 set_optab_libfunc (sdiv_optab, SImode, 0);
10830 set_optab_libfunc (udiv_optab, SImode, 0);
10831 set_optab_libfunc (smod_optab, SImode, 0);
10832 set_optab_libfunc (umod_optab, SImode, 0);
10833
10834 if (SUN_INTEGER_MULTIPLY_64)
10835 {
10836 set_optab_libfunc (smul_optab, DImode, "__mul64");
10837 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10838 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10839 set_optab_libfunc (smod_optab, DImode, "__rem64");
10840 set_optab_libfunc (umod_optab, DImode, "__urem64");
10841 }
10842
10843 if (SUN_CONVERSION_LIBFUNCS)
10844 {
10845 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10846 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10847 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10848 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10849 }
10850 }
10851 }
10852
10853 /* SPARC builtins. */
10855 enum sparc_builtins
10856 {
10857 /* FPU builtins. */
10858 SPARC_BUILTIN_LDFSR,
10859 SPARC_BUILTIN_STFSR,
10860
10861 /* VIS 1.0 builtins. */
10862 SPARC_BUILTIN_FPACK16,
10863 SPARC_BUILTIN_FPACK32,
10864 SPARC_BUILTIN_FPACKFIX,
10865 SPARC_BUILTIN_FEXPAND,
10866 SPARC_BUILTIN_FPMERGE,
10867 SPARC_BUILTIN_FMUL8X16,
10868 SPARC_BUILTIN_FMUL8X16AU,
10869 SPARC_BUILTIN_FMUL8X16AL,
10870 SPARC_BUILTIN_FMUL8SUX16,
10871 SPARC_BUILTIN_FMUL8ULX16,
10872 SPARC_BUILTIN_FMULD8SUX16,
10873 SPARC_BUILTIN_FMULD8ULX16,
10874 SPARC_BUILTIN_FALIGNDATAV4HI,
10875 SPARC_BUILTIN_FALIGNDATAV8QI,
10876 SPARC_BUILTIN_FALIGNDATAV2SI,
10877 SPARC_BUILTIN_FALIGNDATADI,
10878 SPARC_BUILTIN_WRGSR,
10879 SPARC_BUILTIN_RDGSR,
10880 SPARC_BUILTIN_ALIGNADDR,
10881 SPARC_BUILTIN_ALIGNADDRL,
10882 SPARC_BUILTIN_PDIST,
10883 SPARC_BUILTIN_EDGE8,
10884 SPARC_BUILTIN_EDGE8L,
10885 SPARC_BUILTIN_EDGE16,
10886 SPARC_BUILTIN_EDGE16L,
10887 SPARC_BUILTIN_EDGE32,
10888 SPARC_BUILTIN_EDGE32L,
10889 SPARC_BUILTIN_FCMPLE16,
10890 SPARC_BUILTIN_FCMPLE32,
10891 SPARC_BUILTIN_FCMPNE16,
10892 SPARC_BUILTIN_FCMPNE32,
10893 SPARC_BUILTIN_FCMPGT16,
10894 SPARC_BUILTIN_FCMPGT32,
10895 SPARC_BUILTIN_FCMPEQ16,
10896 SPARC_BUILTIN_FCMPEQ32,
10897 SPARC_BUILTIN_FPADD16,
10898 SPARC_BUILTIN_FPADD16S,
10899 SPARC_BUILTIN_FPADD32,
10900 SPARC_BUILTIN_FPADD32S,
10901 SPARC_BUILTIN_FPSUB16,
10902 SPARC_BUILTIN_FPSUB16S,
10903 SPARC_BUILTIN_FPSUB32,
10904 SPARC_BUILTIN_FPSUB32S,
10905 SPARC_BUILTIN_ARRAY8,
10906 SPARC_BUILTIN_ARRAY16,
10907 SPARC_BUILTIN_ARRAY32,
10908
10909 /* VIS 2.0 builtins. */
10910 SPARC_BUILTIN_EDGE8N,
10911 SPARC_BUILTIN_EDGE8LN,
10912 SPARC_BUILTIN_EDGE16N,
10913 SPARC_BUILTIN_EDGE16LN,
10914 SPARC_BUILTIN_EDGE32N,
10915 SPARC_BUILTIN_EDGE32LN,
10916 SPARC_BUILTIN_BMASK,
10917 SPARC_BUILTIN_BSHUFFLEV4HI,
10918 SPARC_BUILTIN_BSHUFFLEV8QI,
10919 SPARC_BUILTIN_BSHUFFLEV2SI,
10920 SPARC_BUILTIN_BSHUFFLEDI,
10921
10922 /* VIS 3.0 builtins. */
10923 SPARC_BUILTIN_CMASK8,
10924 SPARC_BUILTIN_CMASK16,
10925 SPARC_BUILTIN_CMASK32,
10926 SPARC_BUILTIN_FCHKSM16,
10927 SPARC_BUILTIN_FSLL16,
10928 SPARC_BUILTIN_FSLAS16,
10929 SPARC_BUILTIN_FSRL16,
10930 SPARC_BUILTIN_FSRA16,
10931 SPARC_BUILTIN_FSLL32,
10932 SPARC_BUILTIN_FSLAS32,
10933 SPARC_BUILTIN_FSRL32,
10934 SPARC_BUILTIN_FSRA32,
10935 SPARC_BUILTIN_PDISTN,
10936 SPARC_BUILTIN_FMEAN16,
10937 SPARC_BUILTIN_FPADD64,
10938 SPARC_BUILTIN_FPSUB64,
10939 SPARC_BUILTIN_FPADDS16,
10940 SPARC_BUILTIN_FPADDS16S,
10941 SPARC_BUILTIN_FPSUBS16,
10942 SPARC_BUILTIN_FPSUBS16S,
10943 SPARC_BUILTIN_FPADDS32,
10944 SPARC_BUILTIN_FPADDS32S,
10945 SPARC_BUILTIN_FPSUBS32,
10946 SPARC_BUILTIN_FPSUBS32S,
10947 SPARC_BUILTIN_FUCMPLE8,
10948 SPARC_BUILTIN_FUCMPNE8,
10949 SPARC_BUILTIN_FUCMPGT8,
10950 SPARC_BUILTIN_FUCMPEQ8,
10951 SPARC_BUILTIN_FHADDS,
10952 SPARC_BUILTIN_FHADDD,
10953 SPARC_BUILTIN_FHSUBS,
10954 SPARC_BUILTIN_FHSUBD,
10955 SPARC_BUILTIN_FNHADDS,
10956 SPARC_BUILTIN_FNHADDD,
10957 SPARC_BUILTIN_UMULXHI,
10958 SPARC_BUILTIN_XMULX,
10959 SPARC_BUILTIN_XMULXHI,
10960
10961 /* VIS 4.0 builtins. */
10962 SPARC_BUILTIN_FPADD8,
10963 SPARC_BUILTIN_FPADDS8,
10964 SPARC_BUILTIN_FPADDUS8,
10965 SPARC_BUILTIN_FPADDUS16,
10966 SPARC_BUILTIN_FPCMPLE8,
10967 SPARC_BUILTIN_FPCMPGT8,
10968 SPARC_BUILTIN_FPCMPULE16,
10969 SPARC_BUILTIN_FPCMPUGT16,
10970 SPARC_BUILTIN_FPCMPULE32,
10971 SPARC_BUILTIN_FPCMPUGT32,
10972 SPARC_BUILTIN_FPMAX8,
10973 SPARC_BUILTIN_FPMAX16,
10974 SPARC_BUILTIN_FPMAX32,
10975 SPARC_BUILTIN_FPMAXU8,
10976 SPARC_BUILTIN_FPMAXU16,
10977 SPARC_BUILTIN_FPMAXU32,
10978 SPARC_BUILTIN_FPMIN8,
10979 SPARC_BUILTIN_FPMIN16,
10980 SPARC_BUILTIN_FPMIN32,
10981 SPARC_BUILTIN_FPMINU8,
10982 SPARC_BUILTIN_FPMINU16,
10983 SPARC_BUILTIN_FPMINU32,
10984 SPARC_BUILTIN_FPSUB8,
10985 SPARC_BUILTIN_FPSUBS8,
10986 SPARC_BUILTIN_FPSUBUS8,
10987 SPARC_BUILTIN_FPSUBUS16,
10988
10989 /* VIS 4.0B builtins. */
10990
10991 /* Note that all the DICTUNPACK* entries should be kept
10992 contiguous. */
10993 SPARC_BUILTIN_FIRST_DICTUNPACK,
10994 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10995 SPARC_BUILTIN_DICTUNPACK16,
10996 SPARC_BUILTIN_DICTUNPACK32,
10997 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10998
10999 /* Note that all the FPCMP*SHL entries should be kept
11000 contiguous. */
11001 SPARC_BUILTIN_FIRST_FPCMPSHL,
11002 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
11003 SPARC_BUILTIN_FPCMPGT8SHL,
11004 SPARC_BUILTIN_FPCMPEQ8SHL,
11005 SPARC_BUILTIN_FPCMPNE8SHL,
11006 SPARC_BUILTIN_FPCMPLE16SHL,
11007 SPARC_BUILTIN_FPCMPGT16SHL,
11008 SPARC_BUILTIN_FPCMPEQ16SHL,
11009 SPARC_BUILTIN_FPCMPNE16SHL,
11010 SPARC_BUILTIN_FPCMPLE32SHL,
11011 SPARC_BUILTIN_FPCMPGT32SHL,
11012 SPARC_BUILTIN_FPCMPEQ32SHL,
11013 SPARC_BUILTIN_FPCMPNE32SHL,
11014 SPARC_BUILTIN_FPCMPULE8SHL,
11015 SPARC_BUILTIN_FPCMPUGT8SHL,
11016 SPARC_BUILTIN_FPCMPULE16SHL,
11017 SPARC_BUILTIN_FPCMPUGT16SHL,
11018 SPARC_BUILTIN_FPCMPULE32SHL,
11019 SPARC_BUILTIN_FPCMPUGT32SHL,
11020 SPARC_BUILTIN_FPCMPDE8SHL,
11021 SPARC_BUILTIN_FPCMPDE16SHL,
11022 SPARC_BUILTIN_FPCMPDE32SHL,
11023 SPARC_BUILTIN_FPCMPUR8SHL,
11024 SPARC_BUILTIN_FPCMPUR16SHL,
11025 SPARC_BUILTIN_FPCMPUR32SHL,
11026 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
11027
11028 SPARC_BUILTIN_MAX
11029 };
11030
11031 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
11032 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
11033
11034 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
11035 The instruction should require a constant operand of some sort. The
11036 function prints an error if OPVAL is not valid. */
11037
11038 static int
11039 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11040 {
11041 if (GET_CODE (opval) != CONST_INT)
11042 {
11043 error ("%qs expects a constant argument", insn_data[icode].name);
11044 return false;
11045 }
11046
11047 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11048 {
11049 error ("constant argument out of range for %qs", insn_data[icode].name);
11050 return false;
11051 }
11052 return true;
11053 }
11054
11055 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
11056 function decl or NULL_TREE if the builtin was not added. */
11057
11058 static tree
11059 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11060 tree type)
11061 {
11062 tree t
11063 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11064
11065 if (t)
11066 {
11067 sparc_builtins[code] = t;
11068 sparc_builtins_icode[code] = icode;
11069 }
11070
11071 return t;
11072 }
11073
11074 /* Likewise, but also marks the function as "const". */
11075
11076 static tree
11077 def_builtin_const (const char *name, enum insn_code icode,
11078 enum sparc_builtins code, tree type)
11079 {
11080 tree t = def_builtin (name, icode, code, type);
11081
11082 if (t)
11083 TREE_READONLY (t) = 1;
11084
11085 return t;
11086 }
11087
11088 /* Implement the TARGET_INIT_BUILTINS target hook.
11089 Create builtin functions for special SPARC instructions. */
11090
11091 static void
11092 sparc_init_builtins (void)
11093 {
11094 if (TARGET_FPU)
11095 sparc_fpu_init_builtins ();
11096
11097 if (TARGET_VIS)
11098 sparc_vis_init_builtins ();
11099 }
11100
11101 /* Create builtin functions for FPU instructions. */
11102
11103 static void
11104 sparc_fpu_init_builtins (void)
11105 {
11106 tree ftype
11107 = build_function_type_list (void_type_node,
11108 build_pointer_type (unsigned_type_node), 0);
11109 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11110 SPARC_BUILTIN_LDFSR, ftype);
11111 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11112 SPARC_BUILTIN_STFSR, ftype);
11113 }
11114
11115 /* Create builtin functions for VIS instructions. */
11116
11117 static void
11118 sparc_vis_init_builtins (void)
11119 {
11120 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11121 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11122 tree v4hi = build_vector_type (intHI_type_node, 4);
11123 tree v2hi = build_vector_type (intHI_type_node, 2);
11124 tree v2si = build_vector_type (intSI_type_node, 2);
11125 tree v1si = build_vector_type (intSI_type_node, 1);
11126
11127 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11128 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11129 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11130 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11131 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11132 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11133 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11134 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11135 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11136 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11137 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11138 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11139 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11140 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11141 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11142 v8qi, v8qi,
11143 intDI_type_node, 0);
11144 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11145 v8qi, v8qi, 0);
11146 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11147 v8qi, v8qi, 0);
11148 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11149 intSI_type_node, 0);
11150 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11151 intSI_type_node, 0);
11152 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11153 intDI_type_node, 0);
11154 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11155 intDI_type_node,
11156 intDI_type_node, 0);
11157 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11158 intSI_type_node,
11159 intSI_type_node, 0);
11160 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11161 ptr_type_node,
11162 intSI_type_node, 0);
11163 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11164 ptr_type_node,
11165 intDI_type_node, 0);
11166 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11167 ptr_type_node,
11168 ptr_type_node, 0);
11169 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11170 ptr_type_node,
11171 ptr_type_node, 0);
11172 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11173 v4hi, v4hi, 0);
11174 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11175 v2si, v2si, 0);
11176 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11177 v4hi, v4hi, 0);
11178 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11179 v2si, v2si, 0);
11180 tree void_ftype_di = build_function_type_list (void_type_node,
11181 intDI_type_node, 0);
11182 tree di_ftype_void = build_function_type_list (intDI_type_node,
11183 void_type_node, 0);
11184 tree void_ftype_si = build_function_type_list (void_type_node,
11185 intSI_type_node, 0);
11186 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11187 float_type_node,
11188 float_type_node, 0);
11189 tree df_ftype_df_df = build_function_type_list (double_type_node,
11190 double_type_node,
11191 double_type_node, 0);
11192
11193 /* Packing and expanding vectors. */
11194 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11195 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11196 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11197 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11198 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11199 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11200 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11201 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11202 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11203 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11204
11205 /* Multiplications. */
11206 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11207 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11208 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11209 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11210 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11211 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11212 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11213 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11214 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11215 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11216 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11217 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11218 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11219 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11220
11221 /* Data aligning. */
11222 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11223 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11224 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11225 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11226 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11227 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11228 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11229 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11230
11231 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11232 SPARC_BUILTIN_WRGSR, void_ftype_di);
11233 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11234 SPARC_BUILTIN_RDGSR, di_ftype_void);
11235
11236 if (TARGET_ARCH64)
11237 {
11238 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11239 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11240 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11241 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11242 }
11243 else
11244 {
11245 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11246 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11247 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11248 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11249 }
11250
11251 /* Pixel distance. */
11252 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11253 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11254
11255 /* Edge handling. */
11256 if (TARGET_ARCH64)
11257 {
11258 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11259 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11260 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11261 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11262 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11263 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11264 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11265 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11266 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11267 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11268 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11269 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11270 }
11271 else
11272 {
11273 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11274 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11275 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11276 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11277 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11278 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11279 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11280 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11281 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11282 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11283 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11284 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11285 }
11286
11287 /* Pixel compare. */
11288 if (TARGET_ARCH64)
11289 {
11290 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11291 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11292 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11293 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11294 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11295 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11296 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11297 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11298 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11299 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11300 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11301 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11302 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11303 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11304 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11305 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11306 }
11307 else
11308 {
11309 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11310 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11311 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11312 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11313 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11314 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11315 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11316 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11317 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11318 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11319 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11320 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11321 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11322 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11323 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11324 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11325 }
11326
11327 /* Addition and subtraction. */
11328 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11329 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11330 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11331 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11332 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11333 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11334 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11335 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11336 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11337 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11338 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11339 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11340 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11341 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11342 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11343 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11344
11345 /* Three-dimensional array addressing. */
11346 if (TARGET_ARCH64)
11347 {
11348 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11349 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11350 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11351 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11352 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11353 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11354 }
11355 else
11356 {
11357 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11358 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11359 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11360 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11361 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11362 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11363 }
11364
11365 if (TARGET_VIS2)
11366 {
11367 /* Edge handling. */
11368 if (TARGET_ARCH64)
11369 {
11370 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11371 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11372 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11373 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11374 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11375 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11376 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11377 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11378 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11379 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11380 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11381 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11382 }
11383 else
11384 {
11385 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11386 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11387 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11388 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11389 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11390 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11391 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11392 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11393 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11394 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11395 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11396 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11397 }
11398
11399 /* Byte mask and shuffle. */
11400 if (TARGET_ARCH64)
11401 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11402 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11403 else
11404 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11405 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11406 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11407 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11408 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11409 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11410 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11411 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11412 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11413 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11414 }
11415
11416 if (TARGET_VIS3)
11417 {
11418 if (TARGET_ARCH64)
11419 {
11420 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11421 SPARC_BUILTIN_CMASK8, void_ftype_di);
11422 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11423 SPARC_BUILTIN_CMASK16, void_ftype_di);
11424 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11425 SPARC_BUILTIN_CMASK32, void_ftype_di);
11426 }
11427 else
11428 {
11429 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11430 SPARC_BUILTIN_CMASK8, void_ftype_si);
11431 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11432 SPARC_BUILTIN_CMASK16, void_ftype_si);
11433 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11434 SPARC_BUILTIN_CMASK32, void_ftype_si);
11435 }
11436
11437 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11438 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11439
11440 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11441 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11442 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11443 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11444 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11445 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11446 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11447 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11448 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11449 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11450 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11451 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11452 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11453 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11454 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11455 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11456
11457 if (TARGET_ARCH64)
11458 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11459 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11460 else
11461 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11462 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11463
11464 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11465 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11466 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11467 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11468 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11469 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11470
11471 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11472 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11473 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11474 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11475 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11476 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11477 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11478 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11479 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11480 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11481 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11482 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11483 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11484 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11485 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11486 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11487
11488 if (TARGET_ARCH64)
11489 {
11490 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11491 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11492 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11493 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11494 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11495 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11496 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11497 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11498 }
11499 else
11500 {
11501 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11502 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11503 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11504 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11505 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11506 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11507 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11508 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11509 }
11510
11511 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11512 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11513 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11514 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11515 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11516 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11517 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11518 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11519 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11520 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11521 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11522 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11523
11524 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11525 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11526 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11527 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11528 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11529 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11530 }
11531
11532 if (TARGET_VIS4)
11533 {
11534 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11535 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11536 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11537 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11538 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11539 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11540 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11541 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11542
11543
11544 if (TARGET_ARCH64)
11545 {
11546 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11547 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11548 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11549 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11550 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11551 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11552 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11553 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11554 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11555 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11556 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11557 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11558 }
11559 else
11560 {
11561 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11562 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11563 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11564 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11565 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11566 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11567 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11568 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11569 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11570 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11571 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11572 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11573 }
11574
11575 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11576 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11577 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11578 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11579 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11580 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11581 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11582 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11583 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11584 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11585 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11586 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11587 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11588 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11589 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11590 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11591 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11592 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11593 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11594 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11595 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11596 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11597 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11598 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11599 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11600 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11601 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11602 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11603 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11604 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11605 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11606 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11607 }
11608
11609 if (TARGET_VIS4B)
11610 {
11611 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11612 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11613 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11614 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11615 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11616 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11617
11618 if (TARGET_ARCH64)
11619 {
11620 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11621 v8qi, v8qi,
11622 intSI_type_node, 0);
11623 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11624 v4hi, v4hi,
11625 intSI_type_node, 0);
11626 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11627 v2si, v2si,
11628 intSI_type_node, 0);
11629
11630 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11631 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11632 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11633 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11634 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11635 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11636 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11637 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11638
11639 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11640 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11641 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11642 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11643 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11644 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11645 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11646 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11647
11648 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11649 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11650 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11651 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11652 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11653 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11654 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11655 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11656
11657
11658 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11659 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11660 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11661 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11662
11663 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11664 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11665 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11666 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11667
11668 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11669 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11670 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11671 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11672
11673 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11674 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11675 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11676 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11677 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11678 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11679
11680 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11681 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11682 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11683 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11684 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11685 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11686
11687 }
11688 else
11689 {
11690 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11691 v8qi, v8qi,
11692 intSI_type_node, 0);
11693 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11694 v4hi, v4hi,
11695 intSI_type_node, 0);
11696 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11697 v2si, v2si,
11698 intSI_type_node, 0);
11699
11700 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11701 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11702 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11703 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11704 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11705 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11706 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11707 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11708
11709 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11710 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11711 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11712 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11713 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11714 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11715 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11716 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11717
11718 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11719 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11720 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11721 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11722 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11723 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11724 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11725 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11726
11727
11728 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11729 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11730 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11731 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11732
11733 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11734 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11735 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11736 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11737
11738 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11739 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11740 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11741 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11742
11743 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11744 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11745 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11746 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11747 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11748 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11749
11750 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11751 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11752 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11753 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11754 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11755 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11756 }
11757 }
11758 }
11759
11760 /* Implement TARGET_BUILTIN_DECL hook. */
11761
11762 static tree
11763 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11764 {
11765 if (code >= SPARC_BUILTIN_MAX)
11766 return error_mark_node;
11767
11768 return sparc_builtins[code];
11769 }
11770
11771 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11772
11773 static rtx
11774 sparc_expand_builtin (tree exp, rtx target,
11775 rtx subtarget ATTRIBUTE_UNUSED,
11776 machine_mode tmode ATTRIBUTE_UNUSED,
11777 int ignore ATTRIBUTE_UNUSED)
11778 {
11779 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11780 enum sparc_builtins code
11781 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11782 enum insn_code icode = sparc_builtins_icode[code];
11783 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11784 call_expr_arg_iterator iter;
11785 int arg_count = 0;
11786 rtx pat, op[4];
11787 tree arg;
11788
11789 if (nonvoid)
11790 {
11791 machine_mode tmode = insn_data[icode].operand[0].mode;
11792 if (!target
11793 || GET_MODE (target) != tmode
11794 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11795 op[0] = gen_reg_rtx (tmode);
11796 else
11797 op[0] = target;
11798 }
11799 else
11800 op[0] = NULL_RTX;
11801
11802 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11803 {
11804 const struct insn_operand_data *insn_op;
11805 int idx;
11806
11807 if (arg == error_mark_node)
11808 return NULL_RTX;
11809
11810 arg_count++;
11811 idx = arg_count - !nonvoid;
11812 insn_op = &insn_data[icode].operand[idx];
11813 op[arg_count] = expand_normal (arg);
11814
11815 /* Some of the builtins require constant arguments. We check
11816 for this here. */
11817 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11818 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11819 && arg_count == 3)
11820 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11821 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11822 && arg_count == 2))
11823 {
11824 if (!check_constant_argument (icode, idx, op[arg_count]))
11825 return const0_rtx;
11826 }
11827
11828 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11829 {
11830 if (!address_operand (op[arg_count], SImode))
11831 {
11832 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11833 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11834 }
11835 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11836 }
11837
11838 else if (insn_op->mode == V1DImode
11839 && GET_MODE (op[arg_count]) == DImode)
11840 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11841
11842 else if (insn_op->mode == V1SImode
11843 && GET_MODE (op[arg_count]) == SImode)
11844 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11845
11846 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11847 insn_op->mode))
11848 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11849 }
11850
11851 switch (arg_count)
11852 {
11853 case 0:
11854 pat = GEN_FCN (icode) (op[0]);
11855 break;
11856 case 1:
11857 if (nonvoid)
11858 pat = GEN_FCN (icode) (op[0], op[1]);
11859 else
11860 pat = GEN_FCN (icode) (op[1]);
11861 break;
11862 case 2:
11863 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11864 break;
11865 case 3:
11866 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11867 break;
11868 default:
11869 gcc_unreachable ();
11870 }
11871
11872 if (!pat)
11873 return NULL_RTX;
11874
11875 emit_insn (pat);
11876
11877 return (nonvoid ? op[0] : const0_rtx);
11878 }
11879
11880 /* Return the upper 16 bits of the 8x16 multiplication. */
11881
11882 static int
11883 sparc_vis_mul8x16 (int e8, int e16)
11884 {
11885 return (e8 * e16 + 128) / 256;
11886 }
11887
11888 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11889 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11890
11891 static void
11892 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11893 tree inner_type, tree cst0, tree cst1)
11894 {
11895 unsigned i, num = VECTOR_CST_NELTS (cst0);
11896 int scale;
11897
11898 switch (fncode)
11899 {
11900 case SPARC_BUILTIN_FMUL8X16:
11901 for (i = 0; i < num; ++i)
11902 {
11903 int val
11904 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11905 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11906 n_elts->quick_push (build_int_cst (inner_type, val));
11907 }
11908 break;
11909
11910 case SPARC_BUILTIN_FMUL8X16AU:
11911 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11912
11913 for (i = 0; i < num; ++i)
11914 {
11915 int val
11916 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11917 scale);
11918 n_elts->quick_push (build_int_cst (inner_type, val));
11919 }
11920 break;
11921
11922 case SPARC_BUILTIN_FMUL8X16AL:
11923 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11924
11925 for (i = 0; i < num; ++i)
11926 {
11927 int val
11928 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11929 scale);
11930 n_elts->quick_push (build_int_cst (inner_type, val));
11931 }
11932 break;
11933
11934 default:
11935 gcc_unreachable ();
11936 }
11937 }
11938
11939 /* Implement TARGET_FOLD_BUILTIN hook.
11940
11941 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11942 result of the function call is ignored. NULL_TREE is returned if the
11943 function could not be folded. */
11944
11945 static tree
11946 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11947 tree *args, bool ignore)
11948 {
11949 enum sparc_builtins code
11950 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11951 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11952 tree arg0, arg1, arg2;
11953
11954 if (ignore)
11955 switch (code)
11956 {
11957 case SPARC_BUILTIN_LDFSR:
11958 case SPARC_BUILTIN_STFSR:
11959 case SPARC_BUILTIN_ALIGNADDR:
11960 case SPARC_BUILTIN_WRGSR:
11961 case SPARC_BUILTIN_BMASK:
11962 case SPARC_BUILTIN_CMASK8:
11963 case SPARC_BUILTIN_CMASK16:
11964 case SPARC_BUILTIN_CMASK32:
11965 break;
11966
11967 default:
11968 return build_zero_cst (rtype);
11969 }
11970
11971 switch (code)
11972 {
11973 case SPARC_BUILTIN_FEXPAND:
11974 arg0 = args[0];
11975 STRIP_NOPS (arg0);
11976
11977 if (TREE_CODE (arg0) == VECTOR_CST)
11978 {
11979 tree inner_type = TREE_TYPE (rtype);
11980 unsigned i;
11981
11982 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11983 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11984 {
11985 unsigned HOST_WIDE_INT val
11986 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11987 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11988 }
11989 return n_elts.build ();
11990 }
11991 break;
11992
11993 case SPARC_BUILTIN_FMUL8X16:
11994 case SPARC_BUILTIN_FMUL8X16AU:
11995 case SPARC_BUILTIN_FMUL8X16AL:
11996 arg0 = args[0];
11997 arg1 = args[1];
11998 STRIP_NOPS (arg0);
11999 STRIP_NOPS (arg1);
12000
12001 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12002 {
12003 tree inner_type = TREE_TYPE (rtype);
12004 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
12005 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
12006 return n_elts.build ();
12007 }
12008 break;
12009
12010 case SPARC_BUILTIN_FPMERGE:
12011 arg0 = args[0];
12012 arg1 = args[1];
12013 STRIP_NOPS (arg0);
12014 STRIP_NOPS (arg1);
12015
12016 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12017 {
12018 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
12019 unsigned i;
12020 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12021 {
12022 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
12023 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
12024 }
12025
12026 return n_elts.build ();
12027 }
12028 break;
12029
12030 case SPARC_BUILTIN_PDIST:
12031 case SPARC_BUILTIN_PDISTN:
12032 arg0 = args[0];
12033 arg1 = args[1];
12034 STRIP_NOPS (arg0);
12035 STRIP_NOPS (arg1);
12036 if (code == SPARC_BUILTIN_PDIST)
12037 {
12038 arg2 = args[2];
12039 STRIP_NOPS (arg2);
12040 }
12041 else
12042 arg2 = integer_zero_node;
12043
12044 if (TREE_CODE (arg0) == VECTOR_CST
12045 && TREE_CODE (arg1) == VECTOR_CST
12046 && TREE_CODE (arg2) == INTEGER_CST)
12047 {
12048 bool overflow = false;
12049 widest_int result = wi::to_widest (arg2);
12050 widest_int tmp;
12051 unsigned i;
12052
12053 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12054 {
12055 tree e0 = VECTOR_CST_ELT (arg0, i);
12056 tree e1 = VECTOR_CST_ELT (arg1, i);
12057
12058 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12059
12060 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12061 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12062 if (wi::neg_p (tmp))
12063 tmp = wi::neg (tmp, &neg2_ovf);
12064 else
12065 neg2_ovf = wi::OVF_NONE;
12066 result = wi::add (result, tmp, SIGNED, &add2_ovf);
12067 overflow |= ((neg1_ovf != wi::OVF_NONE)
12068 | (neg2_ovf != wi::OVF_NONE)
12069 | (add1_ovf != wi::OVF_NONE)
12070 | (add2_ovf != wi::OVF_NONE));
12071 }
12072
12073 gcc_assert (!overflow);
12074
12075 return wide_int_to_tree (rtype, result);
12076 }
12077
12078 default:
12079 break;
12080 }
12081
12082 return NULL_TREE;
12083 }
12084
12085 /* ??? This duplicates information provided to the compiler by the
12087 ??? scheduler description. Some day, teach genautomata to output
12088 ??? the latencies and then CSE will just use that. */
12089
12090 static bool
12091 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12092 int opno ATTRIBUTE_UNUSED,
12093 int *total, bool speed ATTRIBUTE_UNUSED)
12094 {
12095 int code = GET_CODE (x);
12096 bool float_mode_p = FLOAT_MODE_P (mode);
12097
12098 switch (code)
12099 {
12100 case CONST_INT:
12101 if (SMALL_INT (x))
12102 *total = 0;
12103 else
12104 *total = 2;
12105 return true;
12106
12107 case CONST_WIDE_INT:
12108 *total = 0;
12109 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12110 *total += 2;
12111 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12112 *total += 2;
12113 return true;
12114
12115 case HIGH:
12116 *total = 2;
12117 return true;
12118
12119 case CONST:
12120 case LABEL_REF:
12121 case SYMBOL_REF:
12122 *total = 4;
12123 return true;
12124
12125 case CONST_DOUBLE:
12126 *total = 8;
12127 return true;
12128
12129 case MEM:
12130 /* If outer-code was a sign or zero extension, a cost
12131 of COSTS_N_INSNS (1) was already added in. This is
12132 why we are subtracting it back out. */
12133 if (outer_code == ZERO_EXTEND)
12134 {
12135 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12136 }
12137 else if (outer_code == SIGN_EXTEND)
12138 {
12139 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12140 }
12141 else if (float_mode_p)
12142 {
12143 *total = sparc_costs->float_load;
12144 }
12145 else
12146 {
12147 *total = sparc_costs->int_load;
12148 }
12149
12150 return true;
12151
12152 case PLUS:
12153 case MINUS:
12154 if (float_mode_p)
12155 *total = sparc_costs->float_plusminus;
12156 else
12157 *total = COSTS_N_INSNS (1);
12158 return false;
12159
12160 case FMA:
12161 {
12162 rtx sub;
12163
12164 gcc_assert (float_mode_p);
12165 *total = sparc_costs->float_mul;
12166
12167 sub = XEXP (x, 0);
12168 if (GET_CODE (sub) == NEG)
12169 sub = XEXP (sub, 0);
12170 *total += rtx_cost (sub, mode, FMA, 0, speed);
12171
12172 sub = XEXP (x, 2);
12173 if (GET_CODE (sub) == NEG)
12174 sub = XEXP (sub, 0);
12175 *total += rtx_cost (sub, mode, FMA, 2, speed);
12176 return true;
12177 }
12178
12179 case MULT:
12180 if (float_mode_p)
12181 *total = sparc_costs->float_mul;
12182 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12183 *total = COSTS_N_INSNS (25);
12184 else
12185 {
12186 int bit_cost;
12187
12188 bit_cost = 0;
12189 if (sparc_costs->int_mul_bit_factor)
12190 {
12191 int nbits;
12192
12193 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12194 {
12195 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12196 for (nbits = 0; value != 0; value &= value - 1)
12197 nbits++;
12198 }
12199 else
12200 nbits = 7;
12201
12202 if (nbits < 3)
12203 nbits = 3;
12204 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12205 bit_cost = COSTS_N_INSNS (bit_cost);
12206 }
12207
12208 if (mode == DImode || !TARGET_HARD_MUL)
12209 *total = sparc_costs->int_mulX + bit_cost;
12210 else
12211 *total = sparc_costs->int_mul + bit_cost;
12212 }
12213 return false;
12214
12215 case ASHIFT:
12216 case ASHIFTRT:
12217 case LSHIFTRT:
12218 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12219 return false;
12220
12221 case DIV:
12222 case UDIV:
12223 case MOD:
12224 case UMOD:
12225 if (float_mode_p)
12226 {
12227 if (mode == DFmode)
12228 *total = sparc_costs->float_div_df;
12229 else
12230 *total = sparc_costs->float_div_sf;
12231 }
12232 else
12233 {
12234 if (mode == DImode)
12235 *total = sparc_costs->int_divX;
12236 else
12237 *total = sparc_costs->int_div;
12238 }
12239 return false;
12240
12241 case NEG:
12242 if (! float_mode_p)
12243 {
12244 *total = COSTS_N_INSNS (1);
12245 return false;
12246 }
12247 /* FALLTHRU */
12248
12249 case ABS:
12250 case FLOAT:
12251 case UNSIGNED_FLOAT:
12252 case FIX:
12253 case UNSIGNED_FIX:
12254 case FLOAT_EXTEND:
12255 case FLOAT_TRUNCATE:
12256 *total = sparc_costs->float_move;
12257 return false;
12258
12259 case SQRT:
12260 if (mode == DFmode)
12261 *total = sparc_costs->float_sqrt_df;
12262 else
12263 *total = sparc_costs->float_sqrt_sf;
12264 return false;
12265
12266 case COMPARE:
12267 if (float_mode_p)
12268 *total = sparc_costs->float_cmp;
12269 else
12270 *total = COSTS_N_INSNS (1);
12271 return false;
12272
12273 case IF_THEN_ELSE:
12274 if (float_mode_p)
12275 *total = sparc_costs->float_cmove;
12276 else
12277 *total = sparc_costs->int_cmove;
12278 return false;
12279
12280 case IOR:
12281 /* Handle the NAND vector patterns. */
12282 if (sparc_vector_mode_supported_p (mode)
12283 && GET_CODE (XEXP (x, 0)) == NOT
12284 && GET_CODE (XEXP (x, 1)) == NOT)
12285 {
12286 *total = COSTS_N_INSNS (1);
12287 return true;
12288 }
12289 else
12290 return false;
12291
12292 default:
12293 return false;
12294 }
12295 }
12296
12297 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12298
12299 static inline bool
12300 general_or_i64_p (reg_class_t rclass)
12301 {
12302 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12303 }
12304
12305 /* Implement TARGET_REGISTER_MOVE_COST. */
12306
12307 static int
12308 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12309 reg_class_t from, reg_class_t to)
12310 {
12311 bool need_memory = false;
12312
12313 /* This helps postreload CSE to eliminate redundant comparisons. */
12314 if (from == NO_REGS || to == NO_REGS)
12315 return 100;
12316
12317 if (from == FPCC_REGS || to == FPCC_REGS)
12318 need_memory = true;
12319 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12320 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12321 {
12322 if (TARGET_VIS3)
12323 {
12324 int size = GET_MODE_SIZE (mode);
12325 if (size == 8 || size == 4)
12326 {
12327 if (! TARGET_ARCH32 || size == 4)
12328 return 4;
12329 else
12330 return 6;
12331 }
12332 }
12333 need_memory = true;
12334 }
12335
12336 if (need_memory)
12337 {
12338 if (sparc_cpu == PROCESSOR_ULTRASPARC
12339 || sparc_cpu == PROCESSOR_ULTRASPARC3
12340 || sparc_cpu == PROCESSOR_NIAGARA
12341 || sparc_cpu == PROCESSOR_NIAGARA2
12342 || sparc_cpu == PROCESSOR_NIAGARA3
12343 || sparc_cpu == PROCESSOR_NIAGARA4
12344 || sparc_cpu == PROCESSOR_NIAGARA7
12345 || sparc_cpu == PROCESSOR_M8)
12346 return 12;
12347
12348 return 6;
12349 }
12350
12351 return 2;
12352 }
12353
12354 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12355 This is achieved by means of a manual dynamic stack space allocation in
12356 the current frame. We make the assumption that SEQ doesn't contain any
12357 function calls, with the possible exception of calls to the GOT helper. */
12358
12359 static void
12360 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12361 {
12362 /* We must preserve the lowest 16 words for the register save area. */
12363 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12364 /* We really need only 2 words of fresh stack space. */
12365 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12366
12367 rtx slot
12368 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12369 SPARC_STACK_BIAS + offset));
12370
12371 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12372 emit_insn (gen_rtx_SET (slot, reg));
12373 if (reg2)
12374 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12375 reg2));
12376 emit_insn (seq);
12377 if (reg2)
12378 emit_insn (gen_rtx_SET (reg2,
12379 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12380 emit_insn (gen_rtx_SET (reg, slot));
12381 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12382 }
12383
12384 /* Output the assembler code for a thunk function. THUNK_DECL is the
12385 declaration for the thunk function itself, FUNCTION is the decl for
12386 the target function. DELTA is an immediate constant offset to be
12387 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12388 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12389
12390 static void
12391 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12392 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12393 tree function)
12394 {
12395 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12396 rtx this_rtx, funexp;
12397 rtx_insn *insn;
12398 unsigned int int_arg_first;
12399
12400 reload_completed = 1;
12401 epilogue_completed = 1;
12402
12403 emit_note (NOTE_INSN_PROLOGUE_END);
12404
12405 if (TARGET_FLAT)
12406 {
12407 sparc_leaf_function_p = 1;
12408
12409 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12410 }
12411 else if (flag_delayed_branch)
12412 {
12413 /* We will emit a regular sibcall below, so we need to instruct
12414 output_sibcall that we are in a leaf function. */
12415 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12416
12417 /* This will cause final.cc to invoke leaf_renumber_regs so we
12418 must behave as if we were in a not-yet-leafified function. */
12419 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12420 }
12421 else
12422 {
12423 /* We will emit the sibcall manually below, so we will need to
12424 manually spill non-leaf registers. */
12425 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12426
12427 /* We really are in a leaf function. */
12428 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12429 }
12430
12431 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12432 returns a structure, the structure return pointer is there instead. */
12433 if (TARGET_ARCH64
12434 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12435 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12436 else
12437 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12438
12439 /* Add DELTA. When possible use a plain add, otherwise load it into
12440 a register first. */
12441 if (delta)
12442 {
12443 rtx delta_rtx = GEN_INT (delta);
12444
12445 if (! SPARC_SIMM13_P (delta))
12446 {
12447 rtx scratch = gen_rtx_REG (Pmode, 1);
12448 emit_move_insn (scratch, delta_rtx);
12449 delta_rtx = scratch;
12450 }
12451
12452 /* THIS_RTX += DELTA. */
12453 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12454 }
12455
12456 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12457 if (vcall_offset)
12458 {
12459 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12460 rtx scratch = gen_rtx_REG (Pmode, 1);
12461
12462 gcc_assert (vcall_offset < 0);
12463
12464 /* SCRATCH = *THIS_RTX. */
12465 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12466
12467 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12468 may not have any available scratch register at this point. */
12469 if (SPARC_SIMM13_P (vcall_offset))
12470 ;
12471 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12472 else if (! fixed_regs[5]
12473 /* The below sequence is made up of at least 2 insns,
12474 while the default method may need only one. */
12475 && vcall_offset < -8192)
12476 {
12477 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12478 emit_move_insn (scratch2, vcall_offset_rtx);
12479 vcall_offset_rtx = scratch2;
12480 }
12481 else
12482 {
12483 rtx increment = GEN_INT (-4096);
12484
12485 /* VCALL_OFFSET is a negative number whose typical range can be
12486 estimated as -32768..0 in 32-bit mode. In almost all cases
12487 it is therefore cheaper to emit multiple add insns than
12488 spilling and loading the constant into a register (at least
12489 6 insns). */
12490 while (! SPARC_SIMM13_P (vcall_offset))
12491 {
12492 emit_insn (gen_add2_insn (scratch, increment));
12493 vcall_offset += 4096;
12494 }
12495 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12496 }
12497
12498 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12499 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12500 gen_rtx_PLUS (Pmode,
12501 scratch,
12502 vcall_offset_rtx)));
12503
12504 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12505 emit_insn (gen_add2_insn (this_rtx, scratch));
12506 }
12507
12508 /* Generate a tail call to the target function. */
12509 if (! TREE_USED (function))
12510 {
12511 assemble_external (function);
12512 TREE_USED (function) = 1;
12513 }
12514 funexp = XEXP (DECL_RTL (function), 0);
12515
12516 if (flag_delayed_branch)
12517 {
12518 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12519 insn = emit_call_insn (gen_sibcall (funexp));
12520 SIBLING_CALL_P (insn) = 1;
12521 }
12522 else
12523 {
12524 /* The hoops we have to jump through in order to generate a sibcall
12525 without using delay slots... */
12526 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12527
12528 if (flag_pic)
12529 {
12530 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12531 start_sequence ();
12532 load_got_register (); /* clobbers %o7 */
12533 if (!TARGET_VXWORKS_RTP)
12534 pic_offset_table_rtx = got_register_rtx;
12535 scratch = sparc_legitimize_pic_address (funexp, scratch);
12536 seq = get_insns ();
12537 end_sequence ();
12538 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12539 }
12540 else if (TARGET_ARCH32)
12541 {
12542 emit_insn (gen_rtx_SET (scratch,
12543 gen_rtx_HIGH (SImode, funexp)));
12544 emit_insn (gen_rtx_SET (scratch,
12545 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12546 }
12547 else /* TARGET_ARCH64 */
12548 {
12549 switch (sparc_code_model)
12550 {
12551 case CM_MEDLOW:
12552 case CM_MEDMID:
12553 /* The destination can serve as a temporary. */
12554 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12555 break;
12556
12557 case CM_MEDANY:
12558 case CM_EMBMEDANY:
12559 /* The destination cannot serve as a temporary. */
12560 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12561 start_sequence ();
12562 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12563 seq = get_insns ();
12564 end_sequence ();
12565 emit_and_preserve (seq, spill_reg, 0);
12566 break;
12567
12568 default:
12569 gcc_unreachable ();
12570 }
12571 }
12572
12573 emit_jump_insn (gen_indirect_jump (scratch));
12574 }
12575
12576 emit_barrier ();
12577
12578 /* Run just enough of rest_of_compilation to get the insns emitted.
12579 There's not really enough bulk here to make other passes such as
12580 instruction scheduling worth while. */
12581 insn = get_insns ();
12582 shorten_branches (insn);
12583 assemble_start_function (thunk_fndecl, fnname);
12584 final_start_function (insn, file, 1);
12585 final (insn, file, 1);
12586 final_end_function ();
12587 assemble_end_function (thunk_fndecl, fnname);
12588
12589 reload_completed = 0;
12590 epilogue_completed = 0;
12591 }
12592
12593 /* Return true if sparc_output_mi_thunk would be able to output the
12594 assembler code for the thunk function specified by the arguments
12595 it is passed, and false otherwise. */
12596 static bool
12597 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12598 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12599 HOST_WIDE_INT vcall_offset,
12600 const_tree function ATTRIBUTE_UNUSED)
12601 {
12602 /* Bound the loop used in the default method above. */
12603 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12604 }
12605
12606 /* How to allocate a 'struct machine_function'. */
12607
12608 static struct machine_function *
12609 sparc_init_machine_status (void)
12610 {
12611 return ggc_cleared_alloc<machine_function> ();
12612 }
12613
12614 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12616
12617 static unsigned HOST_WIDE_INT
12618 sparc_asan_shadow_offset (void)
12619 {
12620 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12621 }
12622
12623 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12625 We need to emit DTP-relative relocations. */
12626
12627 static void
12628 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12629 {
12630 switch (size)
12631 {
12632 case 4:
12633 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12634 break;
12635 case 8:
12636 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12637 break;
12638 default:
12639 gcc_unreachable ();
12640 }
12641 output_addr_const (file, x);
12642 fputs (")", file);
12643 }
12644
12645 /* Do whatever processing is required at the end of a file. */
12646
12647 static void
12648 sparc_file_end (void)
12649 {
12650 /* If we need to emit the special GOT helper function, do so now. */
12651 if (got_helper_needed)
12652 {
12653 const char *name = XSTR (got_helper_rtx, 0);
12654 #ifdef DWARF2_UNWIND_INFO
12655 bool do_cfi;
12656 #endif
12657
12658 if (USE_HIDDEN_LINKONCE)
12659 {
12660 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12661 get_identifier (name),
12662 build_function_type_list (void_type_node,
12663 NULL_TREE));
12664 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12665 NULL_TREE, void_type_node);
12666 TREE_PUBLIC (decl) = 1;
12667 TREE_STATIC (decl) = 1;
12668 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12669 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12670 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12671 resolve_unique_section (decl, 0, flag_function_sections);
12672 allocate_struct_function (decl, true);
12673 cfun->is_thunk = 1;
12674 current_function_decl = decl;
12675 init_varasm_status ();
12676 assemble_start_function (decl, name);
12677 }
12678 else
12679 {
12680 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12681 switch_to_section (text_section);
12682 if (align > 0)
12683 ASM_OUTPUT_ALIGN (asm_out_file, align);
12684 ASM_OUTPUT_LABEL (asm_out_file, name);
12685 }
12686
12687 #ifdef DWARF2_UNWIND_INFO
12688 do_cfi = dwarf2out_do_cfi_asm ();
12689 if (do_cfi)
12690 output_asm_insn (".cfi_startproc", NULL);
12691 #endif
12692 if (flag_delayed_branch)
12693 {
12694 output_asm_insn ("jmp\t%%o7+8", NULL);
12695 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12696 }
12697 else
12698 {
12699 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12700 output_asm_insn ("jmp\t%%o7+8", NULL);
12701 output_asm_insn (" nop", NULL);
12702 }
12703 #ifdef DWARF2_UNWIND_INFO
12704 if (do_cfi)
12705 output_asm_insn (".cfi_endproc", NULL);
12706 #endif
12707 }
12708
12709 if (NEED_INDICATE_EXEC_STACK)
12710 file_end_indicate_exec_stack ();
12711
12712 #ifdef TARGET_SOLARIS
12713 solaris_file_end ();
12714 #endif
12715 }
12716
12717 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12718 /* Implement TARGET_MANGLE_TYPE. */
12719
12720 static const char *
12721 sparc_mangle_type (const_tree type)
12722 {
12723 if (TARGET_ARCH32
12724 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12725 && TARGET_LONG_DOUBLE_128)
12726 return "g";
12727
12728 /* For all other types, use normal C++ mangling. */
12729 return NULL;
12730 }
12731 #endif
12732
12733 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12734 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12735 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12736
12737 void
12738 sparc_emit_membar_for_model (enum memmodel model,
12739 int load_store, int before_after)
12740 {
12741 /* Bits for the MEMBAR mmask field. */
12742 const int LoadLoad = 1;
12743 const int StoreLoad = 2;
12744 const int LoadStore = 4;
12745 const int StoreStore = 8;
12746
12747 int mm = 0, implied = 0;
12748
12749 switch (sparc_memory_model)
12750 {
12751 case SMM_SC:
12752 /* Sequential Consistency. All memory transactions are immediately
12753 visible in sequential execution order. No barriers needed. */
12754 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12755 break;
12756
12757 case SMM_TSO:
12758 /* Total Store Ordering: all memory transactions with store semantics
12759 are followed by an implied StoreStore. */
12760 implied |= StoreStore;
12761
12762 /* If we're not looking for a raw barrer (before+after), then atomic
12763 operations get the benefit of being both load and store. */
12764 if (load_store == 3 && before_after == 1)
12765 implied |= StoreLoad;
12766 /* FALLTHRU */
12767
12768 case SMM_PSO:
12769 /* Partial Store Ordering: all memory transactions with load semantics
12770 are followed by an implied LoadLoad | LoadStore. */
12771 implied |= LoadLoad | LoadStore;
12772
12773 /* If we're not looking for a raw barrer (before+after), then atomic
12774 operations get the benefit of being both load and store. */
12775 if (load_store == 3 && before_after == 2)
12776 implied |= StoreLoad | StoreStore;
12777 /* FALLTHRU */
12778
12779 case SMM_RMO:
12780 /* Relaxed Memory Ordering: no implicit bits. */
12781 break;
12782
12783 default:
12784 gcc_unreachable ();
12785 }
12786
12787 if (before_after & 1)
12788 {
12789 if (is_mm_release (model) || is_mm_acq_rel (model)
12790 || is_mm_seq_cst (model))
12791 {
12792 if (load_store & 1)
12793 mm |= LoadLoad | StoreLoad;
12794 if (load_store & 2)
12795 mm |= LoadStore | StoreStore;
12796 }
12797 }
12798 if (before_after & 2)
12799 {
12800 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12801 || is_mm_seq_cst (model))
12802 {
12803 if (load_store & 1)
12804 mm |= LoadLoad | LoadStore;
12805 if (load_store & 2)
12806 mm |= StoreLoad | StoreStore;
12807 }
12808 }
12809
12810 /* Remove the bits implied by the system memory model. */
12811 mm &= ~implied;
12812
12813 /* For raw barriers (before+after), always emit a barrier.
12814 This will become a compile-time barrier if needed. */
12815 if (mm || before_after == 3)
12816 emit_insn (gen_membar (GEN_INT (mm)));
12817 }
12818
12819 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12820 compare and swap on the word containing the byte or half-word. */
12821
12822 static void
12823 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12824 rtx oldval, rtx newval)
12825 {
12826 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12827 rtx addr = gen_reg_rtx (Pmode);
12828 rtx off = gen_reg_rtx (SImode);
12829 rtx oldv = gen_reg_rtx (SImode);
12830 rtx newv = gen_reg_rtx (SImode);
12831 rtx oldvalue = gen_reg_rtx (SImode);
12832 rtx newvalue = gen_reg_rtx (SImode);
12833 rtx res = gen_reg_rtx (SImode);
12834 rtx resv = gen_reg_rtx (SImode);
12835 rtx memsi, val, mask, cc;
12836
12837 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12838
12839 if (Pmode != SImode)
12840 addr1 = gen_lowpart (SImode, addr1);
12841 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12842
12843 memsi = gen_rtx_MEM (SImode, addr);
12844 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12845 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12846
12847 val = copy_to_reg (memsi);
12848
12849 emit_insn (gen_rtx_SET (off,
12850 gen_rtx_XOR (SImode, off,
12851 GEN_INT (GET_MODE (mem) == QImode
12852 ? 3 : 2))));
12853
12854 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12855
12856 if (GET_MODE (mem) == QImode)
12857 mask = force_reg (SImode, GEN_INT (0xff));
12858 else
12859 mask = force_reg (SImode, GEN_INT (0xffff));
12860
12861 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12862
12863 emit_insn (gen_rtx_SET (val,
12864 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12865 val)));
12866
12867 oldval = gen_lowpart (SImode, oldval);
12868 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12869
12870 newval = gen_lowpart_common (SImode, newval);
12871 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12872
12873 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12874
12875 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12876
12877 rtx_code_label *end_label = gen_label_rtx ();
12878 rtx_code_label *loop_label = gen_label_rtx ();
12879 emit_label (loop_label);
12880
12881 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12882
12883 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12884
12885 emit_move_insn (bool_result, const1_rtx);
12886
12887 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12888
12889 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12890
12891 emit_insn (gen_rtx_SET (resv,
12892 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12893 res)));
12894
12895 emit_move_insn (bool_result, const0_rtx);
12896
12897 cc = gen_compare_reg_1 (NE, resv, val);
12898 emit_insn (gen_rtx_SET (val, resv));
12899
12900 /* Use cbranchcc4 to separate the compare and branch! */
12901 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12902 cc, const0_rtx, loop_label));
12903
12904 emit_label (end_label);
12905
12906 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12907
12908 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12909
12910 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12911 }
12912
12913 /* Expand code to perform a compare-and-swap. */
12914
12915 void
12916 sparc_expand_compare_and_swap (rtx operands[])
12917 {
12918 rtx bval, retval, mem, oldval, newval;
12919 machine_mode mode;
12920 enum memmodel model;
12921
12922 bval = operands[0];
12923 retval = operands[1];
12924 mem = operands[2];
12925 oldval = operands[3];
12926 newval = operands[4];
12927 model = (enum memmodel) INTVAL (operands[6]);
12928 mode = GET_MODE (mem);
12929
12930 sparc_emit_membar_for_model (model, 3, 1);
12931
12932 if (reg_overlap_mentioned_p (retval, oldval))
12933 oldval = copy_to_reg (oldval);
12934
12935 if (mode == QImode || mode == HImode)
12936 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12937 else
12938 {
12939 rtx (*gen) (rtx, rtx, rtx, rtx);
12940 rtx x;
12941
12942 if (mode == SImode)
12943 gen = gen_atomic_compare_and_swapsi_1;
12944 else
12945 gen = gen_atomic_compare_and_swapdi_1;
12946 emit_insn (gen (retval, mem, oldval, newval));
12947
12948 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12949 if (x != bval)
12950 convert_move (bval, x, 1);
12951 }
12952
12953 sparc_emit_membar_for_model (model, 3, 2);
12954 }
12955
12956 void
12957 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12958 {
12959 rtx t_1, t_2, t_3;
12960
12961 sel = gen_lowpart (DImode, sel);
12962 switch (vmode)
12963 {
12964 case E_V2SImode:
12965 /* inp = xxxxxxxAxxxxxxxB */
12966 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12967 NULL_RTX, 1, OPTAB_DIRECT);
12968 /* t_1 = ....xxxxxxxAxxx. */
12969 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12970 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12971 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12972 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12973 /* sel = .......B */
12974 /* t_1 = ...A.... */
12975 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12976 /* sel = ...A...B */
12977 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12978 /* sel = AAAABBBB * 4 */
12979 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12980 /* sel = { A*4, A*4+1, A*4+2, ... } */
12981 break;
12982
12983 case E_V4HImode:
12984 /* inp = xxxAxxxBxxxCxxxD */
12985 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12986 NULL_RTX, 1, OPTAB_DIRECT);
12987 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12988 NULL_RTX, 1, OPTAB_DIRECT);
12989 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12990 NULL_RTX, 1, OPTAB_DIRECT);
12991 /* t_1 = ..xxxAxxxBxxxCxx */
12992 /* t_2 = ....xxxAxxxBxxxC */
12993 /* t_3 = ......xxxAxxxBxx */
12994 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12995 GEN_INT (0x07),
12996 NULL_RTX, 1, OPTAB_DIRECT);
12997 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12998 GEN_INT (0x0700),
12999 NULL_RTX, 1, OPTAB_DIRECT);
13000 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
13001 GEN_INT (0x070000),
13002 NULL_RTX, 1, OPTAB_DIRECT);
13003 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
13004 GEN_INT (0x07000000),
13005 NULL_RTX, 1, OPTAB_DIRECT);
13006 /* sel = .......D */
13007 /* t_1 = .....C.. */
13008 /* t_2 = ...B.... */
13009 /* t_3 = .A...... */
13010 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
13011 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
13012 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
13013 /* sel = .A.B.C.D */
13014 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
13015 /* sel = AABBCCDD * 2 */
13016 t_1 = force_reg (SImode, GEN_INT (0x01010101));
13017 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
13018 break;
13019
13020 case E_V8QImode:
13021 /* input = xAxBxCxDxExFxGxH */
13022 sel = expand_simple_binop (DImode, AND, sel,
13023 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
13024 | 0x0f0f0f0f),
13025 NULL_RTX, 1, OPTAB_DIRECT);
13026 /* sel = .A.B.C.D.E.F.G.H */
13027 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
13028 NULL_RTX, 1, OPTAB_DIRECT);
13029 /* t_1 = ..A.B.C.D.E.F.G. */
13030 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13031 NULL_RTX, 1, OPTAB_DIRECT);
13032 /* sel = .AABBCCDDEEFFGGH */
13033 sel = expand_simple_binop (DImode, AND, sel,
13034 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
13035 | 0xff00ff),
13036 NULL_RTX, 1, OPTAB_DIRECT);
13037 /* sel = ..AB..CD..EF..GH */
13038 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
13039 NULL_RTX, 1, OPTAB_DIRECT);
13040 /* t_1 = ....AB..CD..EF.. */
13041 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13042 NULL_RTX, 1, OPTAB_DIRECT);
13043 /* sel = ..ABABCDCDEFEFGH */
13044 sel = expand_simple_binop (DImode, AND, sel,
13045 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13046 NULL_RTX, 1, OPTAB_DIRECT);
13047 /* sel = ....ABCD....EFGH */
13048 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13049 NULL_RTX, 1, OPTAB_DIRECT);
13050 /* t_1 = ........ABCD.... */
13051 sel = gen_lowpart (SImode, sel);
13052 t_1 = gen_lowpart (SImode, t_1);
13053 break;
13054
13055 default:
13056 gcc_unreachable ();
13057 }
13058
13059 /* Always perform the final addition/merge within the bmask insn. */
13060 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13061 }
13062
13063 /* Implement TARGET_VEC_PERM_CONST. */
13064
13065 static bool
13066 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
13067 rtx op1, const vec_perm_indices &sel)
13068 {
13069 if (!TARGET_VIS2)
13070 return false;
13071
13072 /* All 8-byte permutes are supported. */
13073 if (!target)
13074 return GET_MODE_SIZE (vmode) == 8;
13075
13076 /* Force target-independent code to convert constant permutations on other
13077 modes down to V8QI. Rely on this to avoid the complexity of the byte
13078 order of the permutation. */
13079 if (vmode != V8QImode)
13080 return false;
13081
13082 rtx nop0 = force_reg (vmode, op0);
13083 if (op0 == op1)
13084 op1 = nop0;
13085 op0 = nop0;
13086 op1 = force_reg (vmode, op1);
13087
13088 unsigned int i, mask;
13089 for (i = mask = 0; i < 8; ++i)
13090 mask |= (sel[i] & 0xf) << (28 - i*4);
13091 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13092
13093 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13094 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13095 return true;
13096 }
13097
13098 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
13099
13100 static bool
13101 sparc_frame_pointer_required (void)
13102 {
13103 /* If the stack pointer is dynamically modified in the function, it cannot
13104 serve as the frame pointer. */
13105 if (cfun->calls_alloca)
13106 return true;
13107
13108 /* If the function receives nonlocal gotos, it needs to save the frame
13109 pointer in the nonlocal_goto_save_area object. */
13110 if (cfun->has_nonlocal_label)
13111 return true;
13112
13113 /* In flat mode, that's it. */
13114 if (TARGET_FLAT)
13115 return false;
13116
13117 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13118 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13119 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13120 }
13121
13122 /* The way this is structured, we can't eliminate SFP in favor of SP
13123 if the frame pointer is required: we want to use the SFP->HFP elimination
13124 in that case. But the test in update_eliminables doesn't know we are
13125 assuming below that we only do the former elimination. */
13126
13127 static bool
13128 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13129 {
13130 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13131 }
13132
13133 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13134 they won't be allocated. */
13135
13136 static void
13137 sparc_conditional_register_usage (void)
13138 {
13139 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13140 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13141 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13142 /* then honor it. */
13143 if (TARGET_ARCH32 && fixed_regs[5])
13144 fixed_regs[5] = 1;
13145 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13146 fixed_regs[5] = 0;
13147 if (! TARGET_V9)
13148 {
13149 int regno;
13150 for (regno = SPARC_FIRST_V9_FP_REG;
13151 regno <= SPARC_LAST_V9_FP_REG;
13152 regno++)
13153 fixed_regs[regno] = 1;
13154 /* %fcc0 is used by v8 and v9. */
13155 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13156 regno <= SPARC_LAST_V9_FCC_REG;
13157 regno++)
13158 fixed_regs[regno] = 1;
13159 }
13160 if (! TARGET_FPU)
13161 {
13162 int regno;
13163 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13164 fixed_regs[regno] = 1;
13165 }
13166 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13167 /* then honor it. Likewise with g3 and g4. */
13168 if (fixed_regs[2] == 2)
13169 fixed_regs[2] = ! TARGET_APP_REGS;
13170 if (fixed_regs[3] == 2)
13171 fixed_regs[3] = ! TARGET_APP_REGS;
13172 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13173 fixed_regs[4] = ! TARGET_APP_REGS;
13174 else if (TARGET_CM_EMBMEDANY)
13175 fixed_regs[4] = 1;
13176 else if (fixed_regs[4] == 2)
13177 fixed_regs[4] = 0;
13178
13179 /* Disable leaf function optimization in flat mode. */
13180 if (TARGET_FLAT)
13181 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13182
13183 if (TARGET_VIS)
13184 global_regs[SPARC_GSR_REG] = 1;
13185 }
13186
13187 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13188
13189 static bool
13190 sparc_use_pseudo_pic_reg (void)
13191 {
13192 return !TARGET_VXWORKS_RTP && flag_pic;
13193 }
13194
13195 /* Implement TARGET_INIT_PIC_REG. */
13196
13197 static void
13198 sparc_init_pic_reg (void)
13199 {
13200 edge entry_edge;
13201 rtx_insn *seq;
13202
13203 /* In PIC mode, we need to always initialize the PIC register if optimization
13204 is enabled, because we are called from IRA and LRA may later force things
13205 to the constant pool for optimization purposes. */
13206 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13207 return;
13208
13209 start_sequence ();
13210 load_got_register ();
13211 if (!TARGET_VXWORKS_RTP)
13212 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13213 seq = get_insns ();
13214 end_sequence ();
13215
13216 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13217 insert_insn_on_edge (seq, entry_edge);
13218 commit_one_edge_insertion (entry_edge);
13219 }
13220
13221 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13222
13223 - We can't load constants into FP registers.
13224 - We can't load FP constants into integer registers when soft-float,
13225 because there is no soft-float pattern with a r/F constraint.
13226 - We can't load FP constants into integer registers for TFmode unless
13227 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13228 - Try and reload integer constants (symbolic or otherwise) back into
13229 registers directly, rather than having them dumped to memory. */
13230
13231 static reg_class_t
13232 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13233 {
13234 machine_mode mode = GET_MODE (x);
13235 if (CONSTANT_P (x))
13236 {
13237 if (FP_REG_CLASS_P (rclass)
13238 || rclass == GENERAL_OR_FP_REGS
13239 || rclass == GENERAL_OR_EXTRA_FP_REGS
13240 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13241 || (mode == TFmode && ! const_zero_operand (x, mode)))
13242 return NO_REGS;
13243
13244 if (GET_MODE_CLASS (mode) == MODE_INT)
13245 return GENERAL_REGS;
13246
13247 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13248 {
13249 if (! FP_REG_CLASS_P (rclass)
13250 || !(const_zero_operand (x, mode)
13251 || const_all_ones_operand (x, mode)))
13252 return NO_REGS;
13253 }
13254 }
13255
13256 if (TARGET_VIS3
13257 && ! TARGET_ARCH64
13258 && (rclass == EXTRA_FP_REGS
13259 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13260 {
13261 int regno = true_regnum (x);
13262
13263 if (SPARC_INT_REG_P (regno))
13264 return (rclass == EXTRA_FP_REGS
13265 ? FP_REGS : GENERAL_OR_FP_REGS);
13266 }
13267
13268 return rclass;
13269 }
13270
13271 /* Return true if we use LRA instead of reload pass. */
13272
13273 static bool
13274 sparc_lra_p (void)
13275 {
13276 return TARGET_LRA;
13277 }
13278
13279 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13280 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13281
13282 const char *
13283 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13284 {
13285 char mulstr[32];
13286
13287 gcc_assert (! TARGET_ARCH64);
13288
13289 if (sparc_check_64 (operands[1], insn) <= 0)
13290 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13291 if (which_alternative == 1)
13292 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13293 if (GET_CODE (operands[2]) == CONST_INT)
13294 {
13295 if (which_alternative == 1)
13296 {
13297 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13298 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13299 output_asm_insn (mulstr, operands);
13300 return "srlx\t%L0, 32, %H0";
13301 }
13302 else
13303 {
13304 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13305 output_asm_insn ("or\t%L1, %3, %3", operands);
13306 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13307 output_asm_insn (mulstr, operands);
13308 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13309 return "mov\t%3, %L0";
13310 }
13311 }
13312 else if (rtx_equal_p (operands[1], operands[2]))
13313 {
13314 if (which_alternative == 1)
13315 {
13316 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13317 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13318 output_asm_insn (mulstr, operands);
13319 return "srlx\t%L0, 32, %H0";
13320 }
13321 else
13322 {
13323 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13324 output_asm_insn ("or\t%L1, %3, %3", operands);
13325 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13326 output_asm_insn (mulstr, operands);
13327 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13328 return "mov\t%3, %L0";
13329 }
13330 }
13331 if (sparc_check_64 (operands[2], insn) <= 0)
13332 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13333 if (which_alternative == 1)
13334 {
13335 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13336 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13337 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13338 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13339 output_asm_insn (mulstr, operands);
13340 return "srlx\t%L0, 32, %H0";
13341 }
13342 else
13343 {
13344 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13345 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13346 output_asm_insn ("or\t%L1, %3, %3", operands);
13347 output_asm_insn ("or\t%L2, %4, %4", operands);
13348 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13349 output_asm_insn (mulstr, operands);
13350 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13351 return "mov\t%3, %L0";
13352 }
13353 }
13354
13355 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13356 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13357 and INNER_MODE are the modes describing TARGET. */
13358
13359 static void
13360 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13361 machine_mode inner_mode)
13362 {
13363 rtx t1, final_insn, sel;
13364 int bmask;
13365
13366 t1 = gen_reg_rtx (mode);
13367
13368 elt = convert_modes (SImode, inner_mode, elt, true);
13369 emit_move_insn (gen_lowpart(SImode, t1), elt);
13370
13371 switch (mode)
13372 {
13373 case E_V2SImode:
13374 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13375 bmask = 0x45674567;
13376 break;
13377 case E_V4HImode:
13378 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13379 bmask = 0x67676767;
13380 break;
13381 case E_V8QImode:
13382 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13383 bmask = 0x77777777;
13384 break;
13385 default:
13386 gcc_unreachable ();
13387 }
13388
13389 sel = force_reg (SImode, GEN_INT (bmask));
13390 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13391 emit_insn (final_insn);
13392 }
13393
13394 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13395 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13396
13397 static void
13398 vector_init_fpmerge (rtx target, rtx elt)
13399 {
13400 rtx t1, t2, t2_low, t3, t3_low;
13401
13402 t1 = gen_reg_rtx (V4QImode);
13403 elt = convert_modes (SImode, QImode, elt, true);
13404 emit_move_insn (gen_lowpart (SImode, t1), elt);
13405
13406 t2 = gen_reg_rtx (V8QImode);
13407 t2_low = gen_lowpart (V4QImode, t2);
13408 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13409
13410 t3 = gen_reg_rtx (V8QImode);
13411 t3_low = gen_lowpart (V4QImode, t3);
13412 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13413
13414 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13415 }
13416
13417 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13418 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13419
13420 static void
13421 vector_init_faligndata (rtx target, rtx elt)
13422 {
13423 rtx t1 = gen_reg_rtx (V4HImode);
13424 int i;
13425
13426 elt = convert_modes (SImode, HImode, elt, true);
13427 emit_move_insn (gen_lowpart (SImode, t1), elt);
13428
13429 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13430 force_reg (SImode, GEN_INT (6)),
13431 const0_rtx));
13432
13433 for (i = 0; i < 4; i++)
13434 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13435 }
13436
13437 /* Emit code to initialize TARGET to values for individual fields VALS. */
13438
13439 void
13440 sparc_expand_vector_init (rtx target, rtx vals)
13441 {
13442 const machine_mode mode = GET_MODE (target);
13443 const machine_mode inner_mode = GET_MODE_INNER (mode);
13444 const int n_elts = GET_MODE_NUNITS (mode);
13445 int i, n_var = 0;
13446 bool all_same = true;
13447 rtx mem;
13448
13449 for (i = 0; i < n_elts; i++)
13450 {
13451 rtx x = XVECEXP (vals, 0, i);
13452 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13453 n_var++;
13454
13455 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13456 all_same = false;
13457 }
13458
13459 if (n_var == 0)
13460 {
13461 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13462 return;
13463 }
13464
13465 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13466 {
13467 if (GET_MODE_SIZE (inner_mode) == 4)
13468 {
13469 emit_move_insn (gen_lowpart (SImode, target),
13470 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13471 return;
13472 }
13473 else if (GET_MODE_SIZE (inner_mode) == 8)
13474 {
13475 emit_move_insn (gen_lowpart (DImode, target),
13476 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13477 return;
13478 }
13479 }
13480 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13481 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13482 {
13483 emit_move_insn (gen_highpart (word_mode, target),
13484 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13485 emit_move_insn (gen_lowpart (word_mode, target),
13486 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13487 return;
13488 }
13489
13490 if (all_same && GET_MODE_SIZE (mode) == 8)
13491 {
13492 if (TARGET_VIS2)
13493 {
13494 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13495 return;
13496 }
13497 if (mode == V8QImode)
13498 {
13499 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13500 return;
13501 }
13502 if (mode == V4HImode)
13503 {
13504 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13505 return;
13506 }
13507 }
13508
13509 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13510 for (i = 0; i < n_elts; i++)
13511 emit_move_insn (adjust_address_nv (mem, inner_mode,
13512 i * GET_MODE_SIZE (inner_mode)),
13513 XVECEXP (vals, 0, i));
13514 emit_move_insn (target, mem);
13515 }
13516
13517 /* Implement TARGET_SECONDARY_RELOAD. */
13518
13519 static reg_class_t
13520 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13521 machine_mode mode, secondary_reload_info *sri)
13522 {
13523 enum reg_class rclass = (enum reg_class) rclass_i;
13524
13525 sri->icode = CODE_FOR_nothing;
13526 sri->extra_cost = 0;
13527
13528 /* We need a temporary when loading/storing a HImode/QImode value
13529 between memory and the FPU registers. This can happen when combine puts
13530 a paradoxical subreg in a float/fix conversion insn. */
13531 if (FP_REG_CLASS_P (rclass)
13532 && (mode == HImode || mode == QImode)
13533 && (GET_CODE (x) == MEM
13534 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13535 && true_regnum (x) == -1)))
13536 return GENERAL_REGS;
13537
13538 /* On 32-bit we need a temporary when loading/storing a DFmode value
13539 between unaligned memory and the upper FPU registers. */
13540 if (TARGET_ARCH32
13541 && rclass == EXTRA_FP_REGS
13542 && mode == DFmode
13543 && GET_CODE (x) == MEM
13544 && ! mem_min_alignment (x, 8))
13545 return FP_REGS;
13546
13547 if (((TARGET_CM_MEDANY
13548 && symbolic_operand (x, mode))
13549 || (TARGET_CM_EMBMEDANY
13550 && text_segment_operand (x, mode)))
13551 && ! flag_pic)
13552 {
13553 if (in_p)
13554 sri->icode = direct_optab_handler (reload_in_optab, mode);
13555 else
13556 sri->icode = direct_optab_handler (reload_out_optab, mode);
13557 return NO_REGS;
13558 }
13559
13560 if (TARGET_VIS3 && TARGET_ARCH32)
13561 {
13562 int regno = true_regnum (x);
13563
13564 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13565 to move 8-byte values in 4-byte pieces. This only works via
13566 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13567 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13568 an FP_REGS intermediate move. */
13569 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13570 || ((general_or_i64_p (rclass)
13571 || rclass == GENERAL_OR_FP_REGS)
13572 && SPARC_FP_REG_P (regno)))
13573 {
13574 sri->extra_cost = 2;
13575 return FP_REGS;
13576 }
13577 }
13578
13579 return NO_REGS;
13580 }
13581
13582 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13583
13584 On SPARC when not VIS3 it is not possible to directly move data
13585 between GENERAL_REGS and FP_REGS. */
13586
13587 static bool
13588 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13589 reg_class_t class2)
13590 {
13591 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13592 && (! TARGET_VIS3
13593 || GET_MODE_SIZE (mode) > 8
13594 || GET_MODE_SIZE (mode) < 4));
13595 }
13596
13597 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13598
13599 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13600 because the movsi and movsf patterns don't handle r/f moves.
13601 For v8 we copy the default definition. */
13602
13603 static machine_mode
13604 sparc_secondary_memory_needed_mode (machine_mode mode)
13605 {
13606 if (TARGET_ARCH64)
13607 {
13608 if (GET_MODE_BITSIZE (mode) < 32)
13609 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13610 return mode;
13611 }
13612 else
13613 {
13614 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13615 return mode_for_size (BITS_PER_WORD,
13616 GET_MODE_CLASS (mode), 0).require ();
13617 return mode;
13618 }
13619 }
13620
13621 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13622 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13623
13624 bool
13625 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13626 {
13627 enum rtx_code rc = GET_CODE (operands[1]);
13628 machine_mode cmp_mode;
13629 rtx cc_reg, dst, cmp;
13630
13631 cmp = operands[1];
13632 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13633 return false;
13634
13635 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13636 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13637
13638 cmp_mode = GET_MODE (XEXP (cmp, 0));
13639 rc = GET_CODE (cmp);
13640
13641 dst = operands[0];
13642 if (! rtx_equal_p (operands[2], dst)
13643 && ! rtx_equal_p (operands[3], dst))
13644 {
13645 if (reg_overlap_mentioned_p (dst, cmp))
13646 dst = gen_reg_rtx (mode);
13647
13648 emit_move_insn (dst, operands[3]);
13649 }
13650 else if (operands[2] == dst)
13651 {
13652 operands[2] = operands[3];
13653
13654 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13655 rc = reverse_condition_maybe_unordered (rc);
13656 else
13657 rc = reverse_condition (rc);
13658 }
13659
13660 if (XEXP (cmp, 1) == const0_rtx
13661 && GET_CODE (XEXP (cmp, 0)) == REG
13662 && cmp_mode == DImode
13663 && v9_regcmp_p (rc))
13664 cc_reg = XEXP (cmp, 0);
13665 else
13666 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13667
13668 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13669
13670 emit_insn (gen_rtx_SET (dst,
13671 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13672
13673 if (dst != operands[0])
13674 emit_move_insn (operands[0], dst);
13675
13676 return true;
13677 }
13678
13679 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13680 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13681 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13682 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13683 code to be used for the condition mask. */
13684
13685 void
13686 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13687 {
13688 enum rtx_code code = signed_condition (GET_CODE (operands[3]));
13689 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13690
13691 mask = gen_reg_rtx (Pmode);
13692 cop0 = operands[4];
13693 cop1 = operands[5];
13694 if (code == LT || code == GE)
13695 {
13696 code = swap_condition (code);
13697 std::swap (cop0, cop1);
13698 }
13699
13700 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13701
13702 fcmp = gen_rtx_UNSPEC (Pmode,
13703 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13704 fcode);
13705
13706 cmask = gen_rtx_UNSPEC (DImode,
13707 gen_rtvec (2, mask, gsr),
13708 ccode);
13709
13710 bshuf = gen_rtx_UNSPEC (mode,
13711 gen_rtvec (3, operands[1], operands[2], gsr),
13712 UNSPEC_BSHUFFLE);
13713
13714 emit_insn (gen_rtx_SET (mask, fcmp));
13715 emit_insn (gen_rtx_SET (gsr, cmask));
13716
13717 emit_insn (gen_rtx_SET (operands[0], bshuf));
13718 }
13719
13720 /* On the SPARC, any mode which naturally allocates into the single float
13721 registers should return 4 here. */
13722
13723 unsigned int
13724 sparc_regmode_natural_size (machine_mode mode)
13725 {
13726 const enum mode_class cl = GET_MODE_CLASS (mode);
13727
13728 if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
13729 return 4;
13730
13731 return UNITS_PER_WORD;
13732 }
13733
13734 /* Implement TARGET_HARD_REGNO_NREGS.
13735
13736 On SPARC, ordinary registers hold 32 bits worth; this means both
13737 integer and floating point registers. On v9, integer regs hold 64
13738 bits worth; floating point regs hold 32 bits worth (this includes the
13739 new fp regs as even the odd ones are included in the hard register
13740 count). */
13741
13742 static unsigned int
13743 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13744 {
13745 if (regno == SPARC_GSR_REG)
13746 return 1;
13747 if (TARGET_ARCH64)
13748 {
13749 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13750 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13751 return CEIL (GET_MODE_SIZE (mode), 4);
13752 }
13753 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13754 }
13755
13756 /* Implement TARGET_HARD_REGNO_MODE_OK.
13757
13758 ??? Because of the funny way we pass parameters we should allow certain
13759 ??? types of float/complex values to be in integer registers during
13760 ??? RTL generation. This only matters on arch32. */
13761
13762 static bool
13763 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13764 {
13765 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13766 }
13767
13768 /* Implement TARGET_MODES_TIEABLE_P.
13769
13770 For V9 we have to deal with the fact that only the lower 32 floating
13771 point registers are 32-bit addressable. */
13772
13773 static bool
13774 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13775 {
13776 enum mode_class mclass1, mclass2;
13777 unsigned short size1, size2;
13778
13779 if (mode1 == mode2)
13780 return true;
13781
13782 mclass1 = GET_MODE_CLASS (mode1);
13783 mclass2 = GET_MODE_CLASS (mode2);
13784 if (mclass1 != mclass2)
13785 return false;
13786
13787 if (! TARGET_V9)
13788 return true;
13789
13790 /* Classes are the same and we are V9 so we have to deal with upper
13791 vs. lower floating point registers. If one of the modes is a
13792 4-byte mode, and the other is not, we have to mark them as not
13793 tieable because only the lower 32 floating point register are
13794 addressable 32-bits at a time.
13795
13796 We can't just test explicitly for SFmode, otherwise we won't
13797 cover the vector mode cases properly. */
13798
13799 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13800 return true;
13801
13802 size1 = GET_MODE_SIZE (mode1);
13803 size2 = GET_MODE_SIZE (mode2);
13804 if ((size1 > 4 && size2 == 4)
13805 || (size2 > 4 && size1 == 4))
13806 return false;
13807
13808 return true;
13809 }
13810
13811 /* Implement TARGET_CSTORE_MODE. */
13812
13813 static scalar_int_mode
13814 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13815 {
13816 return (TARGET_ARCH64 ? DImode : SImode);
13817 }
13818
13819 /* Return the compound expression made of T1 and T2. */
13820
13821 static inline tree
13822 compound_expr (tree t1, tree t2)
13823 {
13824 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13825 }
13826
13827 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13828
13829 static void
13830 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13831 {
13832 if (!TARGET_FPU)
13833 return;
13834
13835 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13836 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13837
13838 /* We generate the equivalent of feholdexcept (&fenv_var):
13839
13840 unsigned int fenv_var;
13841 __builtin_store_fsr (&fenv_var);
13842
13843 unsigned int tmp1_var;
13844 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13845
13846 __builtin_load_fsr (&tmp1_var); */
13847
13848 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13849 TREE_ADDRESSABLE (fenv_var) = 1;
13850 tree fenv_addr = build_fold_addr_expr (fenv_var);
13851 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13852 tree hold_stfsr
13853 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13854 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13855
13856 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13857 TREE_ADDRESSABLE (tmp1_var) = 1;
13858 tree masked_fenv_var
13859 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13860 build_int_cst (unsigned_type_node,
13861 ~(accrued_exception_mask | trap_enable_mask)));
13862 tree hold_mask
13863 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13864 NULL_TREE, NULL_TREE);
13865
13866 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13867 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13868 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13869
13870 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13871
13872 /* We reload the value of tmp1_var to clear the exceptions:
13873
13874 __builtin_load_fsr (&tmp1_var); */
13875
13876 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13877
13878 /* We generate the equivalent of feupdateenv (&fenv_var):
13879
13880 unsigned int tmp2_var;
13881 __builtin_store_fsr (&tmp2_var);
13882
13883 __builtin_load_fsr (&fenv_var);
13884
13885 if (SPARC_LOW_FE_EXCEPT_VALUES)
13886 tmp2_var >>= 5;
13887 __atomic_feraiseexcept ((int) tmp2_var); */
13888
13889 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13890 TREE_ADDRESSABLE (tmp2_var) = 1;
13891 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13892 tree update_stfsr
13893 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13894 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13895
13896 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13897
13898 tree atomic_feraiseexcept
13899 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13900 tree update_call
13901 = build_call_expr (atomic_feraiseexcept, 1,
13902 fold_convert (integer_type_node, tmp2_var));
13903
13904 if (SPARC_LOW_FE_EXCEPT_VALUES)
13905 {
13906 tree shifted_tmp2_var
13907 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13908 build_int_cst (unsigned_type_node, 5));
13909 tree update_shift
13910 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13911 update_call = compound_expr (update_shift, update_call);
13912 }
13913
13914 *update
13915 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13916 }
13917
13918 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13919
13920 SImode loads to floating-point registers are not zero-extended.
13921 The definition for LOAD_EXTEND_OP specifies that integer loads
13922 narrower than BITS_PER_WORD will be zero-extended. As a result,
13923 we inhibit changes from SImode unless they are to a mode that is
13924 identical in size.
13925
13926 Likewise for SFmode, since word-mode paradoxical subregs are
13927 problematic on big-endian architectures. */
13928
13929 static bool
13930 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13931 reg_class_t rclass)
13932 {
13933 if (TARGET_ARCH64
13934 && GET_MODE_SIZE (from) == 4
13935 && GET_MODE_SIZE (to) != 4)
13936 return !reg_classes_intersect_p (rclass, FP_REGS);
13937 return true;
13938 }
13939
13940 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13941
13942 static HOST_WIDE_INT
13943 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13944 {
13945 if (TREE_CODE (exp) == STRING_CST)
13946 return MAX (align, FASTEST_ALIGNMENT);
13947 return align;
13948 }
13949
13950 /* Implement TARGET_ZERO_CALL_USED_REGS.
13951
13952 Generate a sequence of instructions that zero registers specified by
13953 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
13954 zeroed. */
13955
13956 static HARD_REG_SET
13957 sparc_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
13958 {
13959 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13960 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
13961 {
13962 /* Do not touch the CC registers or the FP registers if no VIS. */
13963 if (regno >= SPARC_FCC_REG
13964 || (regno >= SPARC_FIRST_FP_REG && !TARGET_VIS))
13965 CLEAR_HARD_REG_BIT (need_zeroed_hardregs, regno);
13966
13967 /* Do not access the odd upper FP registers individually. */
13968 else if (regno >= SPARC_FIRST_V9_FP_REG && (regno & 1))
13969 ;
13970
13971 /* Use the most natural mode for the registers, which is not given by
13972 regno_reg_rtx/reg_raw_mode for the FP registers on the SPARC. */
13973 else
13974 {
13975 machine_mode mode;
13976 rtx reg;
13977
13978 if (regno < SPARC_FIRST_FP_REG)
13979 {
13980 reg = regno_reg_rtx[regno];
13981 mode = GET_MODE (reg);
13982 }
13983 else
13984 {
13985 mode = regno < SPARC_FIRST_V9_FP_REG ? SFmode : DFmode;
13986 reg = gen_raw_REG (mode, regno);
13987 }
13988
13989 emit_move_insn (reg, CONST0_RTX (mode));
13990 }
13991 }
13992
13993 return need_zeroed_hardregs;
13994 }
13995
13996 #include "gt-sparc.h"
13997