1 /* Subroutines used for code generation for RISC-V. 2 Copyright (C) 2011-2024 Free Software Foundation, Inc. 3 Contributed by Andrew Waterman (andrew (at) sifive.com). 4 Based on MIPS target for GNU compiler. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #define IN_TARGET_CODE 1 23 24 #define INCLUDE_STRING 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "target.h" 29 #include "backend.h" 30 #include "tm.h" 31 #include "rtl.h" 32 #include "regs.h" 33 #include "insn-config.h" 34 #include "insn-attr.h" 35 #include "recog.h" 36 #include "output.h" 37 #include "alias.h" 38 #include "tree.h" 39 #include "stringpool.h" 40 #include "attribs.h" 41 #include "varasm.h" 42 #include "stor-layout.h" 43 #include "calls.h" 44 #include "function.h" 45 #include "explow.h" 46 #include "ifcvt.h" 47 #include "memmodel.h" 48 #include "emit-rtl.h" 49 #include "reload.h" 50 #include "tm_p.h" 51 #include "basic-block.h" 52 #include "expr.h" 53 #include "optabs.h" 54 #include "bitmap.h" 55 #include "df.h" 56 #include "function-abi.h" 57 #include "diagnostic.h" 58 #include "builtins.h" 59 #include "predict.h" 60 #include "tree-pass.h" 61 #include "opts.h" 62 #include "tm-constrs.h" 63 #include "rtl-iter.h" 64 #include "gimple.h" 65 #include "cfghooks.h" 66 #include "cfgloop.h" 67 #include "cfgrtl.h" 68 #include "shrink-wrap.h" 69 #include "sel-sched.h" 70 #include "sched-int.h" 71 #include "fold-const.h" 72 #include "gimple-iterator.h" 73 #include "gimple-expr.h" 74 #include "tree-vectorizer.h" 75 #include "gcse.h" 76 #include "tree-dfa.h" 77 #include "target-globals.h" 78 79 /* This file should be included last. */ 80 #include "target-def.h" 81 #include "riscv-vector-costs.h" 82 #include "riscv-subset.h" 83 84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ 85 #define UNSPEC_ADDRESS_P(X) \ 86 (GET_CODE (X) == UNSPEC \ 87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ 88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) 89 90 /* Extract the symbol or label from UNSPEC wrapper X. */ 91 #define UNSPEC_ADDRESS(X) \ 92 XVECEXP (X, 0, 0) 93 94 /* Extract the symbol type from UNSPEC wrapper X. */ 95 #define UNSPEC_ADDRESS_TYPE(X) \ 96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) 97 98 /* Extract the backup dynamic frm rtl. */ 99 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm) 100 101 /* True the mode switching has static frm, or false. */ 102 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p) 103 104 /* True if we can use the instructions in the XTheadInt extension 105 to handle interrupts, or false. */ 106 #define TH_INT_INTERRUPT(c) \ 107 (TARGET_XTHEADINT \ 108 /* The XTheadInt extension only supports rv32. */ \ 109 && !TARGET_64BIT \ 110 && (c)->machine->interrupt_handler_p \ 111 /* The XTheadInt instructions can only be executed in M-mode. */ \ 112 && (c)->machine->interrupt_mode == MACHINE_MODE) 113 114 /* Information about a function's frame layout. */ 115 struct GTY(()) riscv_frame_info { 116 /* The size of the frame in bytes. */ 117 poly_int64 total_size; 118 119 /* Bit X is set if the function saves or restores GPR X. */ 120 unsigned int mask; 121 122 /* Likewise FPR X. */ 123 unsigned int fmask; 124 125 /* Likewise for vector registers. */ 126 unsigned int vmask; 127 128 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ 129 unsigned save_libcall_adjustment; 130 131 /* the minimum number of bytes, in multiples of 16-byte address increments, 132 required to cover the registers in a multi push & pop. */ 133 unsigned multi_push_adj_base; 134 135 /* the number of additional 16-byte address increments allocated for the stack 136 frame in a multi push & pop. */ 137 unsigned multi_push_adj_addi; 138 139 /* Offsets of fixed-point and floating-point save areas from frame bottom */ 140 poly_int64 gp_sp_offset; 141 poly_int64 fp_sp_offset; 142 143 /* Top and bottom offsets of vector save areas from frame bottom. */ 144 poly_int64 v_sp_offset_top; 145 poly_int64 v_sp_offset_bottom; 146 147 /* Offset of virtual frame pointer from stack pointer/frame bottom */ 148 poly_int64 frame_pointer_offset; 149 150 /* Offset of hard frame pointer from stack pointer/frame bottom */ 151 poly_int64 hard_frame_pointer_offset; 152 153 /* The offset of arg_pointer_rtx from the bottom of the frame. */ 154 poly_int64 arg_pointer_offset; 155 156 /* Reset this struct, clean all field to zero. */ 157 void reset(void); 158 }; 159 160 enum riscv_privilege_levels { 161 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE 162 }; 163 164 struct GTY(()) mode_switching_info { 165 /* The RTL variable which stores the dynamic FRM value. We always use this 166 RTX to restore dynamic FRM rounding mode in mode switching. */ 167 rtx dynamic_frm; 168 169 /* The boolean variables indicates there is at least one static rounding 170 mode instruction in the function or not. */ 171 bool static_frm_p; 172 173 mode_switching_info () 174 { 175 dynamic_frm = NULL_RTX; 176 static_frm_p = false; 177 } 178 }; 179 180 struct GTY(()) machine_function { 181 /* The number of extra stack bytes taken up by register varargs. 182 This area is allocated by the callee at the very top of the frame. */ 183 int varargs_size; 184 185 /* True if current function is a naked function. */ 186 bool naked_p; 187 188 /* True if current function is an interrupt function. */ 189 bool interrupt_handler_p; 190 /* For an interrupt handler, indicates the privilege level. */ 191 enum riscv_privilege_levels interrupt_mode; 192 193 /* True if attributes on current function have been checked. */ 194 bool attributes_checked_p; 195 196 /* True if RA must be saved because of a far jump. */ 197 bool far_jump_used; 198 199 /* The current frame information, calculated by riscv_compute_frame_info. */ 200 struct riscv_frame_info frame; 201 202 /* The components already handled by separate shrink-wrapping, which should 203 not be considered by the prologue and epilogue. */ 204 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER]; 205 206 /* The mode swithching information for the FRM rounding modes. */ 207 struct mode_switching_info mode_sw_info; 208 }; 209 210 /* Information about a single argument. */ 211 struct riscv_arg_info { 212 /* True if the argument is at least partially passed on the stack. */ 213 bool stack_p; 214 215 /* The number of integer registers allocated to this argument. */ 216 unsigned int num_gprs; 217 218 /* The offset of the first register used, provided num_gprs is nonzero. 219 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ 220 unsigned int gpr_offset; 221 222 /* The number of floating-point registers allocated to this argument. */ 223 unsigned int num_fprs; 224 225 /* The offset of the first register used, provided num_fprs is nonzero. */ 226 unsigned int fpr_offset; 227 228 /* The number of vector registers allocated to this argument. */ 229 unsigned int num_vrs; 230 231 /* The offset of the first register used, provided num_vrs is nonzero. */ 232 unsigned int vr_offset; 233 234 /* The number of mask registers allocated to this argument. */ 235 unsigned int num_mrs; 236 237 /* The offset of the first register used, provided num_mrs is nonzero. */ 238 unsigned int mr_offset; 239 }; 240 241 /* One stage in a constant building sequence. These sequences have 242 the form: 243 244 A = VALUE[0] 245 A = A CODE[1] VALUE[1] 246 A = A CODE[2] VALUE[2] 247 ... 248 249 where A is an accumulator, each CODE[i] is a binary rtl operation 250 and each VALUE[i] is a constant integer. CODE[0] is undefined. */ 251 struct riscv_integer_op { 252 enum rtx_code code; 253 unsigned HOST_WIDE_INT value; 254 }; 255 256 /* The largest number of operations needed to load an integer constant. 257 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */ 258 #define RISCV_MAX_INTEGER_OPS 8 259 260 enum riscv_fusion_pairs 261 { 262 RISCV_FUSE_NOTHING = 0, 263 RISCV_FUSE_ZEXTW = (1 << 0), 264 RISCV_FUSE_ZEXTH = (1 << 1), 265 RISCV_FUSE_ZEXTWS = (1 << 2), 266 RISCV_FUSE_LDINDEXED = (1 << 3), 267 RISCV_FUSE_LUI_ADDI = (1 << 4), 268 RISCV_FUSE_AUIPC_ADDI = (1 << 5), 269 RISCV_FUSE_LUI_LD = (1 << 6), 270 RISCV_FUSE_AUIPC_LD = (1 << 7), 271 RISCV_FUSE_LDPREINCREMENT = (1 << 8), 272 RISCV_FUSE_ALIGNED_STD = (1 << 9), 273 }; 274 275 /* Costs of various operations on the different architectures. */ 276 277 struct riscv_tune_param 278 { 279 unsigned short fp_add[2]; 280 unsigned short fp_mul[2]; 281 unsigned short fp_div[2]; 282 unsigned short int_mul[2]; 283 unsigned short int_div[2]; 284 unsigned short issue_rate; 285 unsigned short branch_cost; 286 unsigned short memory_cost; 287 unsigned short fmv_cost; 288 bool slow_unaligned_access; 289 bool use_divmod_expansion; 290 unsigned int fusible_ops; 291 const struct cpu_vector_cost *vec_costs; 292 }; 293 294 295 /* Global variables for machine-dependent things. */ 296 297 /* Whether unaligned accesses execute very slowly. */ 298 bool riscv_slow_unaligned_access_p; 299 300 /* Whether user explicitly passed -mstrict-align. */ 301 bool riscv_user_wants_strict_align; 302 303 /* Stack alignment to assume/maintain. */ 304 unsigned riscv_stack_boundary; 305 306 /* Whether in riscv_output_mi_thunk. */ 307 static bool riscv_in_thunk_func = false; 308 309 /* If non-zero, this is an offset to be added to SP to redefine the CFA 310 when restoring the FP register from the stack. Only valid when generating 311 the epilogue. */ 312 static poly_int64 epilogue_cfa_sp_offset; 313 314 /* Which tuning parameters to use. */ 315 static const struct riscv_tune_param *tune_param; 316 317 /* Which automaton to use for tuning. */ 318 enum riscv_microarchitecture_type riscv_microarchitecture; 319 320 /* The number of chunks in a single vector register. */ 321 poly_uint16 riscv_vector_chunks; 322 323 /* The number of bytes in a vector chunk. */ 324 unsigned riscv_bytes_per_vector_chunk; 325 326 /* Index R is the smallest register class that contains register R. */ 327 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = { 328 GR_REGS, GR_REGS, GR_REGS, GR_REGS, 329 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS, 330 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS, 331 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, 332 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS, 333 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, 334 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, 335 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, 336 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 337 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 338 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 339 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 340 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 341 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 342 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 343 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 344 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS, 345 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 346 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 347 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 348 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 349 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 350 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 351 NO_REGS, NO_REGS, NO_REGS, NO_REGS, 352 VM_REGS, VD_REGS, VD_REGS, VD_REGS, 353 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 354 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 355 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 356 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 357 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 358 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 359 VD_REGS, VD_REGS, VD_REGS, VD_REGS, 360 }; 361 362 /* RVV costs for VLS vector operations. */ 363 static const common_vector_cost rvv_vls_vector_cost = { 364 1, /* int_stmt_cost */ 365 1, /* fp_stmt_cost */ 366 1, /* gather_load_cost */ 367 1, /* scatter_store_cost */ 368 1, /* vec_to_scalar_cost */ 369 1, /* scalar_to_vec_cost */ 370 1, /* permute_cost */ 371 1, /* align_load_cost */ 372 1, /* align_store_cost */ 373 2, /* unalign_load_cost */ 374 2, /* unalign_store_cost */ 375 }; 376 377 /* RVV costs for VLA vector operations. */ 378 static const scalable_vector_cost rvv_vla_vector_cost = { 379 { 380 1, /* int_stmt_cost */ 381 1, /* fp_stmt_cost */ 382 1, /* gather_load_cost */ 383 1, /* scatter_store_cost */ 384 1, /* vec_to_scalar_cost */ 385 1, /* scalar_to_vec_cost */ 386 1, /* permute_cost */ 387 1, /* align_load_cost */ 388 1, /* align_store_cost */ 389 2, /* unalign_load_cost */ 390 2, /* unalign_store_cost */ 391 }, 392 }; 393 394 /* RVV register move cost. */ 395 static const regmove_vector_cost rvv_regmove_vector_cost = { 396 2, /* GR2VR */ 397 2, /* FR2VR */ 398 2, /* VR2GR */ 399 2, /* VR2FR */ 400 }; 401 402 /* Generic costs for vector insn classes. It is supposed to be the vector cost 403 models used by default if no other cost model was specified. */ 404 static const struct cpu_vector_cost generic_vector_cost = { 405 1, /* scalar_int_stmt_cost */ 406 1, /* scalar_fp_stmt_cost */ 407 1, /* scalar_load_cost */ 408 1, /* scalar_store_cost */ 409 3, /* cond_taken_branch_cost */ 410 1, /* cond_not_taken_branch_cost */ 411 &rvv_vls_vector_cost, /* vls */ 412 &rvv_vla_vector_cost, /* vla */ 413 &rvv_regmove_vector_cost, /* regmove */ 414 }; 415 416 /* Costs to use when optimizing for rocket. */ 417 static const struct riscv_tune_param rocket_tune_info = { 418 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ 419 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ 420 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ 421 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ 422 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */ 423 1, /* issue_rate */ 424 3, /* branch_cost */ 425 5, /* memory_cost */ 426 8, /* fmv_cost */ 427 true, /* slow_unaligned_access */ 428 false, /* use_divmod_expansion */ 429 RISCV_FUSE_NOTHING, /* fusible_ops */ 430 NULL, /* vector cost */ 431 }; 432 433 /* Costs to use when optimizing for Sifive 7 Series. */ 434 static const struct riscv_tune_param sifive_7_tune_info = { 435 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ 436 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ 437 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ 438 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ 439 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */ 440 2, /* issue_rate */ 441 4, /* branch_cost */ 442 3, /* memory_cost */ 443 8, /* fmv_cost */ 444 true, /* slow_unaligned_access */ 445 false, /* use_divmod_expansion */ 446 RISCV_FUSE_NOTHING, /* fusible_ops */ 447 NULL, /* vector cost */ 448 }; 449 450 /* Costs to use when optimizing for Sifive p400 Series. */ 451 static const struct riscv_tune_param sifive_p400_tune_info = { 452 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */ 453 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */ 454 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ 455 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ 456 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ 457 3, /* issue_rate */ 458 4, /* branch_cost */ 459 3, /* memory_cost */ 460 4, /* fmv_cost */ 461 true, /* slow_unaligned_access */ 462 false, /* use_divmod_expansion */ 463 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ 464 &generic_vector_cost, /* vector cost */ 465 }; 466 467 /* Costs to use when optimizing for Sifive p600 Series. */ 468 static const struct riscv_tune_param sifive_p600_tune_info = { 469 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */ 470 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */ 471 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ 472 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ 473 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ 474 4, /* issue_rate */ 475 4, /* branch_cost */ 476 3, /* memory_cost */ 477 4, /* fmv_cost */ 478 true, /* slow_unaligned_access */ 479 false, /* use_divmod_expansion */ 480 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ 481 &generic_vector_cost, /* vector cost */ 482 }; 483 484 /* Costs to use when optimizing for T-HEAD c906. */ 485 static const struct riscv_tune_param thead_c906_tune_info = { 486 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ 487 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ 488 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ 489 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ 490 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */ 491 1, /* issue_rate */ 492 3, /* branch_cost */ 493 5, /* memory_cost */ 494 8, /* fmv_cost */ 495 false, /* slow_unaligned_access */ 496 false, /* use_divmod_expansion */ 497 RISCV_FUSE_NOTHING, /* fusible_ops */ 498 NULL, /* vector cost */ 499 }; 500 501 /* Costs to use when optimizing for xiangshan nanhu. */ 502 static const struct riscv_tune_param xiangshan_nanhu_tune_info = { 503 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */ 504 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */ 505 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */ 506 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */ 507 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ 508 6, /* issue_rate */ 509 3, /* branch_cost */ 510 3, /* memory_cost */ 511 3, /* fmv_cost */ 512 true, /* slow_unaligned_access */ 513 false, /* use_divmod_expansion */ 514 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ 515 NULL, /* vector cost */ 516 }; 517 518 /* Costs to use when optimizing for a generic ooo profile. */ 519 static const struct riscv_tune_param generic_ooo_tune_info = { 520 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */ 521 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */ 522 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */ 523 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ 524 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ 525 1, /* issue_rate */ 526 3, /* branch_cost */ 527 4, /* memory_cost */ 528 4, /* fmv_cost */ 529 false, /* slow_unaligned_access */ 530 false, /* use_divmod_expansion */ 531 RISCV_FUSE_NOTHING, /* fusible_ops */ 532 &generic_vector_cost, /* vector cost */ 533 }; 534 535 /* Costs to use when optimizing for size. */ 536 static const struct riscv_tune_param optimize_size_tune_info = { 537 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ 538 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */ 539 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */ 540 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */ 541 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */ 542 1, /* issue_rate */ 543 1, /* branch_cost */ 544 2, /* memory_cost */ 545 8, /* fmv_cost */ 546 false, /* slow_unaligned_access */ 547 false, /* use_divmod_expansion */ 548 RISCV_FUSE_NOTHING, /* fusible_ops */ 549 NULL, /* vector cost */ 550 }; 551 552 static bool riscv_avoid_shrink_wrapping_separate (); 553 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); 554 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); 555 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int, 556 bool *); 557 558 /* Defining target-specific uses of __attribute__. */ 559 static const attribute_spec riscv_gnu_attributes[] = 560 { 561 /* Syntax: { name, min_len, max_len, decl_required, type_required, 562 function_type_required, affects_type_identity, handler, 563 exclude } */ 564 565 /* The attribute telling no prologue/epilogue. */ 566 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute, 567 NULL}, 568 /* This attribute generates prologue/epilogue for interrupt handlers. */ 569 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute, 570 NULL}, 571 572 /* The following two are used for the built-in properties of the Vector type 573 and are not used externally */ 574 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL}, 575 {"RVV type", 0, 0, false, true, false, true, NULL, NULL}, 576 /* This attribute is used to declare a function, forcing it to use the 577 standard vector calling convention variant. Syntax: 578 __attribute__((riscv_vector_cc)). */ 579 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL}, 580 /* This attribute is used to declare a new type, to appoint the exactly 581 bits size of the type. For example: 582 583 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256))); 584 585 The new created type f_vint8m1_t will be exactly 256 bits. It can be 586 be used in globals, structs, unions, and arrays instead of sizeless 587 types. */ 588 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true, 589 riscv_handle_rvv_vector_bits_attribute, NULL}, 590 }; 591 592 static const scoped_attribute_specs riscv_gnu_attribute_table = 593 { 594 "gnu", {riscv_gnu_attributes} 595 }; 596 597 static const attribute_spec riscv_attributes[] = 598 { 599 /* This attribute is used to declare a function, forcing it to use the 600 standard vector calling convention variant. Syntax: 601 [[riscv::vector_cc]]. */ 602 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL}, 603 /* This attribute is used to declare a new type, to appoint the exactly 604 bits size of the type. For example: 605 606 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256))); 607 608 The new created type f_vint8m1_t will be exactly 256 bits. It can be 609 be used in globals, structs, unions, and arrays instead of sizeless 610 types. */ 611 {"rvv_vector_bits", 1, 1, false, true, false, true, 612 riscv_handle_rvv_vector_bits_attribute, NULL}, 613 }; 614 615 static const scoped_attribute_specs riscv_nongnu_attribute_table = 616 { 617 "riscv", {riscv_attributes} 618 }; 619 620 static const scoped_attribute_specs *const riscv_attribute_table[] = 621 { 622 &riscv_gnu_attribute_table, 623 &riscv_nongnu_attribute_table 624 }; 625 626 /* Order for the CLOBBERs/USEs of gpr_save. */ 627 static const unsigned gpr_save_reg_order[] = { 628 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM, 629 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM, 630 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM, 631 S10_REGNUM, S11_REGNUM 632 }; 633 634 /* A table describing all the processors GCC knows about. */ 635 static const struct riscv_tune_info riscv_tune_info_table[] = { 636 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ 637 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO}, 638 #include "riscv-cores.def" 639 }; 640 641 /* Global variable to distinguish whether we should save and restore s0/fp for 642 function. */ 643 static bool riscv_save_frame_pointer; 644 645 typedef enum 646 { 647 PUSH_IDX = 0, 648 POP_IDX, 649 POPRET_IDX, 650 POPRETZ_IDX, 651 ZCMP_OP_NUM 652 } riscv_zcmp_op_t; 653 654 typedef insn_code (*code_for_push_pop_t) (machine_mode); 655 656 void riscv_frame_info::reset(void) 657 { 658 total_size = 0; 659 mask = 0; 660 fmask = 0; 661 vmask = 0; 662 save_libcall_adjustment = 0; 663 664 gp_sp_offset = 0; 665 fp_sp_offset = 0; 666 v_sp_offset_top = 0; 667 v_sp_offset_bottom = 0; 668 669 frame_pointer_offset = 0; 670 671 hard_frame_pointer_offset = 0; 672 673 arg_pointer_offset = 0; 674 } 675 676 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ 677 678 static unsigned int 679 riscv_min_arithmetic_precision (void) 680 { 681 return 32; 682 } 683 684 /* Get the arch string from an options object. */ 685 686 template <class T> 687 static const char * 688 get_arch_str (const T *opts) 689 { 690 return opts->x_riscv_arch_string; 691 } 692 693 template <class T> 694 static const char * 695 get_tune_str (const T *opts) 696 { 697 const char *tune_string = RISCV_TUNE_STRING_DEFAULT; 698 if (opts->x_riscv_tune_string) 699 tune_string = opts->x_riscv_tune_string; 700 else if (opts->x_riscv_cpu_string) 701 tune_string = opts->x_riscv_cpu_string; 702 return tune_string; 703 } 704 705 /* Return the riscv_tune_info entry for the given name string, return nullptr 706 if NULL_P is true, otherwise return an placeholder and report error. */ 707 708 const struct riscv_tune_info * 709 riscv_parse_tune (const char *tune_string, bool null_p) 710 { 711 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string); 712 713 if (cpu) 714 tune_string = cpu->tune; 715 716 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++) 717 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0) 718 return riscv_tune_info_table + i; 719 720 if (null_p) 721 return nullptr; 722 723 error ("unknown cpu %qs for %<-mtune%>", tune_string); 724 return riscv_tune_info_table; 725 } 726 727 /* Helper function for riscv_build_integer; arguments are as for 728 riscv_build_integer. */ 729 730 static int 731 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS], 732 HOST_WIDE_INT value, machine_mode mode) 733 { 734 HOST_WIDE_INT low_part = CONST_LOW_PART (value); 735 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost; 736 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; 737 738 if (SMALL_OPERAND (value) || LUI_OPERAND (value)) 739 { 740 /* Simply ADDI or LUI. */ 741 codes[0].code = UNKNOWN; 742 codes[0].value = value; 743 return 1; 744 } 745 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value)) 746 { 747 /* Simply BSETI. */ 748 codes[0].code = UNKNOWN; 749 codes[0].value = value; 750 751 /* RISC-V sign-extends all 32bit values that live in a 32bit 752 register. To avoid paradoxes, we thus need to use the 753 sign-extended (negative) representation (-1 << 31) for the 754 value, if we want to build (1 << 31) in SImode. This will 755 then expand to an LUI instruction. */ 756 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31)) 757 codes[0].value = (HOST_WIDE_INT_M1U << 31); 758 759 return 1; 760 } 761 762 /* End with ADDI. When constructing HImode constants, do not generate any 763 intermediate value that is not itself a valid HImode constant. The 764 XORI case below will handle those remaining HImode constants. */ 765 if (low_part != 0 766 && (mode != HImode 767 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1))) 768 { 769 HOST_WIDE_INT upper_part = value - low_part; 770 if (mode != VOIDmode) 771 upper_part = trunc_int_for_mode (value - low_part, mode); 772 773 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode); 774 if (alt_cost < cost) 775 { 776 alt_codes[alt_cost-1].code = PLUS; 777 alt_codes[alt_cost-1].value = low_part; 778 memcpy (codes, alt_codes, sizeof (alt_codes)); 779 cost = alt_cost; 780 } 781 } 782 783 /* End with XORI. */ 784 if (cost > 2 && (low_part < 0 || mode == HImode)) 785 { 786 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode); 787 if (alt_cost < cost) 788 { 789 alt_codes[alt_cost-1].code = XOR; 790 alt_codes[alt_cost-1].value = low_part; 791 memcpy (codes, alt_codes, sizeof (alt_codes)); 792 cost = alt_cost; 793 } 794 } 795 796 /* Eliminate trailing zeros and end with SLLI. */ 797 if (cost > 2 && (value & 1) == 0) 798 { 799 int shift = ctz_hwi (value); 800 unsigned HOST_WIDE_INT x = value; 801 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift); 802 803 /* Don't eliminate the lower 12 bits if LUI might apply. */ 804 if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS)) 805 shift -= IMM_BITS, x <<= IMM_BITS; 806 807 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode); 808 if (alt_cost < cost) 809 { 810 alt_codes[alt_cost-1].code = ASHIFT; 811 alt_codes[alt_cost-1].value = shift; 812 memcpy (codes, alt_codes, sizeof (alt_codes)); 813 cost = alt_cost; 814 } 815 } 816 817 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB)) 818 { 819 int leading_ones = clz_hwi (~value); 820 int trailing_ones = ctz_hwi (~value); 821 822 /* If all bits are one except a few that are zero, and the zero bits 823 are within a range of 11 bits, then we can synthesize a constant 824 by loading a small negative constant and rotating. */ 825 if (leading_ones < 64 826 && ((64 - leading_ones - trailing_ones) < 12)) 827 { 828 codes[0].code = UNKNOWN; 829 /* The sign-bit might be zero, so just rotate to be safe. */ 830 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones) 831 | (value << (64 - trailing_ones))); 832 codes[1].code = ROTATERT; 833 codes[1].value = 64 - trailing_ones; 834 cost = 2; 835 } 836 /* Handle the case where the 11 bit range of zero bits wraps around. */ 837 else 838 { 839 int upper_trailing_ones = ctz_hwi (~value >> 32); 840 int lower_leading_ones = clz_hwi (~value << 32); 841 842 if (upper_trailing_ones < 32 && lower_leading_ones < 32 843 && ((64 - upper_trailing_ones - lower_leading_ones) < 12)) 844 { 845 codes[0].code = UNKNOWN; 846 /* The sign-bit might be zero, so just rotate to be safe. */ 847 codes[0].value = ((value << (32 - upper_trailing_ones)) 848 | ((unsigned HOST_WIDE_INT) value 849 >> (32 + upper_trailing_ones))); 850 codes[1].code = ROTATERT; 851 codes[1].value = 32 - upper_trailing_ones; 852 cost = 2; 853 } 854 } 855 } 856 857 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS); 858 return cost; 859 } 860 861 /* Fill CODES with a sequence of rtl operations to load VALUE. 862 Return the number of operations needed. */ 863 864 static int 865 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, 866 machine_mode mode) 867 { 868 int cost = riscv_build_integer_1 (codes, value, mode); 869 870 /* Eliminate leading zeros and end with SRLI. */ 871 if (value > 0 && cost > 2) 872 { 873 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; 874 int alt_cost, shift = clz_hwi (value); 875 HOST_WIDE_INT shifted_val; 876 877 /* Try filling trailing bits with 1s. */ 878 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1); 879 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode); 880 if (alt_cost < cost) 881 { 882 alt_codes[alt_cost-1].code = LSHIFTRT; 883 alt_codes[alt_cost-1].value = shift; 884 memcpy (codes, alt_codes, sizeof (alt_codes)); 885 cost = alt_cost; 886 } 887 888 /* Try filling trailing bits with 0s. */ 889 shifted_val = value << shift; 890 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode); 891 if (alt_cost < cost) 892 { 893 alt_codes[alt_cost-1].code = LSHIFTRT; 894 alt_codes[alt_cost-1].value = shift; 895 memcpy (codes, alt_codes, sizeof (alt_codes)); 896 cost = alt_cost; 897 } 898 } 899 900 if (!TARGET_64BIT 901 && (value > INT32_MAX || value < INT32_MIN)) 902 { 903 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32); 904 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32); 905 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; 906 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS]; 907 int hi_cost, lo_cost; 908 909 hi_cost = riscv_build_integer_1 (hicode, hival, mode); 910 if (hi_cost < cost) 911 { 912 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode); 913 if (lo_cost + hi_cost < cost) 914 { 915 memcpy (codes, alt_codes, 916 lo_cost * sizeof (struct riscv_integer_op)); 917 memcpy (codes + lo_cost, hicode, 918 hi_cost * sizeof (struct riscv_integer_op)); 919 cost = lo_cost + hi_cost; 920 } 921 } 922 } 923 924 return cost; 925 } 926 927 /* Return the cost of constructing VAL in the event that a scratch 928 register is available. */ 929 930 static int 931 riscv_split_integer_cost (HOST_WIDE_INT val) 932 { 933 int cost; 934 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32); 935 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32); 936 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; 937 938 cost = 2 + riscv_build_integer (codes, loval, VOIDmode); 939 if (loval != hival) 940 cost += riscv_build_integer (codes, hival, VOIDmode); 941 942 return cost; 943 } 944 945 /* Return the cost of constructing the integer constant VAL. */ 946 947 static int 948 riscv_integer_cost (HOST_WIDE_INT val) 949 { 950 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; 951 return MIN (riscv_build_integer (codes, val, VOIDmode), 952 riscv_split_integer_cost (val)); 953 } 954 955 /* Try to split a 64b integer into 32b parts, then reassemble. */ 956 957 static rtx 958 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode) 959 { 960 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32); 961 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32); 962 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode); 963 964 riscv_move_integer (lo, lo, loval, mode); 965 966 if (loval == hival) 967 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32)); 968 else 969 { 970 riscv_move_integer (hi, hi, hival, mode); 971 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32)); 972 } 973 974 hi = force_reg (mode, hi); 975 return gen_rtx_PLUS (mode, hi, lo); 976 } 977 978 /* Return true if X is a thread-local symbol. */ 979 980 static bool 981 riscv_tls_symbol_p (const_rtx x) 982 { 983 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0; 984 } 985 986 /* Return true if symbol X binds locally. */ 987 988 static bool 989 riscv_symbol_binds_local_p (const_rtx x) 990 { 991 if (SYMBOL_REF_P (x)) 992 return (SYMBOL_REF_DECL (x) 993 ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) 994 : SYMBOL_REF_LOCAL_P (x)); 995 else 996 return false; 997 } 998 999 /* Return the method that should be used to access SYMBOL_REF or 1000 LABEL_REF X. */ 1001 1002 static enum riscv_symbol_type 1003 riscv_classify_symbol (const_rtx x) 1004 { 1005 if (riscv_tls_symbol_p (x)) 1006 return SYMBOL_TLS; 1007 1008 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x)) 1009 return SYMBOL_GOT_DISP; 1010 1011 switch (riscv_cmodel) 1012 { 1013 case CM_MEDLOW: 1014 return SYMBOL_ABSOLUTE; 1015 case CM_LARGE: 1016 if (SYMBOL_REF_P (x)) 1017 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM; 1018 return SYMBOL_PCREL; 1019 default: 1020 return SYMBOL_PCREL; 1021 } 1022 } 1023 1024 /* Classify the base of symbolic expression X. */ 1025 1026 enum riscv_symbol_type 1027 riscv_classify_symbolic_expression (rtx x) 1028 { 1029 rtx offset; 1030 1031 split_const (x, &x, &offset); 1032 if (UNSPEC_ADDRESS_P (x)) 1033 return UNSPEC_ADDRESS_TYPE (x); 1034 1035 return riscv_classify_symbol (x); 1036 } 1037 1038 /* Return true if X is a symbolic constant. If it is, store the type of 1039 the symbol in *SYMBOL_TYPE. */ 1040 1041 bool 1042 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type) 1043 { 1044 rtx offset; 1045 1046 split_const (x, &x, &offset); 1047 if (UNSPEC_ADDRESS_P (x)) 1048 { 1049 *symbol_type = UNSPEC_ADDRESS_TYPE (x); 1050 x = UNSPEC_ADDRESS (x); 1051 } 1052 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) 1053 *symbol_type = riscv_classify_symbol (x); 1054 else 1055 return false; 1056 1057 if (offset == const0_rtx) 1058 return true; 1059 1060 /* Nonzero offsets are only valid for references that don't use the GOT. */ 1061 switch (*symbol_type) 1062 { 1063 case SYMBOL_ABSOLUTE: 1064 case SYMBOL_PCREL: 1065 case SYMBOL_TLS_LE: 1066 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ 1067 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); 1068 1069 default: 1070 return false; 1071 } 1072 } 1073 1074 /* Returns the number of instructions necessary to reference a symbol. */ 1075 1076 static int riscv_symbol_insns (enum riscv_symbol_type type) 1077 { 1078 switch (type) 1079 { 1080 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */ 1081 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */ 1082 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */ 1083 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */ 1084 case SYMBOL_TLSDESC: return 6; /* 4-instruction call + ADD TP + the reference. */ 1085 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */ 1086 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */ 1087 default: gcc_unreachable (); 1088 } 1089 } 1090 1091 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA 1092 Manual draft. For details, please see: 1093 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */ 1094 1095 static unsigned HOST_WIDE_INT fli_value_hf[32] = 1096 { 1097 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8, 1098 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8, 1099 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8, 1100 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8, 1101 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */ 1102 0x78p8, 1103 0x7cp8, 0x7ep8 1104 }; 1105 1106 static unsigned HOST_WIDE_INT fli_value_sf[32] = 1107 { 1108 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20, 1109 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20, 1110 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20, 1111 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20 1112 }; 1113 1114 static unsigned HOST_WIDE_INT fli_value_df[32] = 1115 { 1116 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48, 1117 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48, 1118 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48, 1119 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48, 1120 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48, 1121 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48, 1122 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48, 1123 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48 1124 }; 1125 1126 /* Display floating-point values at the assembly level, which is consistent 1127 with the zfa extension of llvm: 1128 https://reviews.llvm.org/D145645. */ 1129 1130 const char *fli_value_print[32] = 1131 { 1132 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125", 1133 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875", 1134 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0", 1135 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan" 1136 }; 1137 1138 /* Return index of the FLI instruction table if rtx X is an immediate constant that can 1139 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */ 1140 1141 int 1142 riscv_float_const_rtx_index_for_fli (rtx x) 1143 { 1144 unsigned HOST_WIDE_INT *fli_value_array; 1145 1146 machine_mode mode = GET_MODE (x); 1147 1148 if (!TARGET_ZFA 1149 || !CONST_DOUBLE_P(x) 1150 || mode == VOIDmode 1151 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH)) 1152 || (mode == SFmode && !TARGET_HARD_FLOAT) 1153 || (mode == DFmode && !TARGET_DOUBLE_FLOAT)) 1154 return -1; 1155 1156 if (!SCALAR_FLOAT_MODE_P (mode) 1157 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT 1158 /* Only support up to DF mode. */ 1159 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode)) 1160 return -1; 1161 1162 unsigned HOST_WIDE_INT ival = 0; 1163 1164 long res[2]; 1165 real_to_target (res, 1166 CONST_DOUBLE_REAL_VALUE (x), 1167 REAL_MODE_FORMAT (mode)); 1168 1169 if (mode == DFmode) 1170 { 1171 int order = BYTES_BIG_ENDIAN ? 1 : 0; 1172 ival = zext_hwi (res[order], 32); 1173 ival |= (zext_hwi (res[1 - order], 32) << 32); 1174 1175 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */ 1176 if (ival & (unsigned HOST_WIDE_INT)0xffffffff) 1177 return -1; 1178 } 1179 else 1180 ival = zext_hwi (res[0], 32); 1181 1182 switch (mode) 1183 { 1184 case E_HFmode: 1185 fli_value_array = fli_value_hf; 1186 break; 1187 case E_SFmode: 1188 fli_value_array = fli_value_sf; 1189 break; 1190 case E_DFmode: 1191 fli_value_array = fli_value_df; 1192 break; 1193 default: 1194 return -1; 1195 } 1196 1197 if (fli_value_array[0] == ival) 1198 return 0; 1199 1200 if (fli_value_array[1] == ival) 1201 return 1; 1202 1203 /* Perform a binary search to find target index. */ 1204 unsigned l, r, m; 1205 1206 l = 2; 1207 r = 31; 1208 1209 while (l <= r) 1210 { 1211 m = (l + r) / 2; 1212 if (fli_value_array[m] == ival) 1213 return m; 1214 else if (fli_value_array[m] < ival) 1215 l = m+1; 1216 else 1217 r = m-1; 1218 } 1219 1220 return -1; 1221 } 1222 1223 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */ 1224 1225 static bool 1226 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1227 { 1228 return riscv_const_insns (x) > 0; 1229 } 1230 1231 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 1232 1233 static bool 1234 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1235 { 1236 enum riscv_symbol_type type; 1237 rtx base, offset; 1238 1239 /* There's no way to calculate VL-based values using relocations. */ 1240 subrtx_iterator::array_type array; 1241 FOR_EACH_SUBRTX (iter, array, x, ALL) 1242 if (GET_CODE (*iter) == CONST_POLY_INT) 1243 return true; 1244 1245 /* There is no assembler syntax for expressing an address-sized 1246 high part. */ 1247 if (GET_CODE (x) == HIGH) 1248 return true; 1249 1250 if (satisfies_constraint_zfli (x)) 1251 return true; 1252 1253 split_const (x, &base, &offset); 1254 if (riscv_symbolic_constant_p (base, &type)) 1255 { 1256 if (type == SYMBOL_FORCE_TO_MEM) 1257 return false; 1258 1259 /* As an optimization, don't spill symbolic constants that are as 1260 cheap to rematerialize as to access in the constant pool. */ 1261 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0) 1262 return true; 1263 1264 /* As an optimization, avoid needlessly generate dynamic relocations. */ 1265 if (flag_pic) 1266 return true; 1267 } 1268 1269 /* TLS symbols must be computed by riscv_legitimize_move. */ 1270 if (tls_referenced_p (x)) 1271 return true; 1272 1273 return false; 1274 } 1275 1276 /* Return true if register REGNO is a valid base register for mode MODE. 1277 STRICT_P is true if REG_OK_STRICT is in effect. */ 1278 1279 int 1280 riscv_regno_mode_ok_for_base_p (int regno, 1281 machine_mode mode ATTRIBUTE_UNUSED, 1282 bool strict_p) 1283 { 1284 if (!HARD_REGISTER_NUM_P (regno)) 1285 { 1286 if (!strict_p) 1287 return true; 1288 regno = reg_renumber[regno]; 1289 } 1290 1291 /* These fake registers will be eliminated to either the stack or 1292 hard frame pointer, both of which are usually valid base registers. 1293 Reload deals with the cases where the eliminated form isn't valid. */ 1294 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) 1295 return true; 1296 1297 return GP_REG_P (regno); 1298 } 1299 1300 /* Get valid index register class. 1301 The RISC-V base instructions don't support index registers, 1302 but extensions might support that. */ 1303 1304 enum reg_class 1305 riscv_index_reg_class () 1306 { 1307 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX) 1308 return GR_REGS; 1309 1310 return NO_REGS; 1311 } 1312 1313 /* Return true if register REGNO is a valid index register. 1314 The RISC-V base instructions don't support index registers, 1315 but extensions might support that. */ 1316 1317 int 1318 riscv_regno_ok_for_index_p (int regno) 1319 { 1320 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX) 1321 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1); 1322 1323 return 0; 1324 } 1325 1326 /* Return true if X is a valid base register for mode MODE. 1327 STRICT_P is true if REG_OK_STRICT is in effect. */ 1328 1329 bool 1330 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) 1331 { 1332 if (!strict_p && GET_CODE (x) == SUBREG) 1333 x = SUBREG_REG (x); 1334 1335 return (REG_P (x) 1336 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); 1337 } 1338 1339 /* Return true if, for every base register BASE_REG, (plus BASE_REG X) 1340 can address a value of mode MODE. */ 1341 1342 static bool 1343 riscv_valid_offset_p (rtx x, machine_mode mode) 1344 { 1345 /* Check that X is a signed 12-bit number. */ 1346 if (!const_arith_operand (x, Pmode)) 1347 return false; 1348 1349 /* We may need to split multiword moves, so make sure that every word 1350 is accessible. */ 1351 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD 1352 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD)) 1353 return false; 1354 1355 return true; 1356 } 1357 1358 /* Should a symbol of type SYMBOL_TYPE should be split in two? */ 1359 1360 bool 1361 riscv_split_symbol_type (enum riscv_symbol_type symbol_type) 1362 { 1363 if (symbol_type == SYMBOL_TLS_LE) 1364 return true; 1365 1366 if (!TARGET_EXPLICIT_RELOCS) 1367 return false; 1368 1369 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL; 1370 } 1371 1372 /* Return true if a LO_SUM can address a value of mode MODE when the 1373 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which 1374 is used when the mode is BLKmode. */ 1375 1376 static bool 1377 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode, 1378 rtx x) 1379 { 1380 int align, size; 1381 1382 /* Check that symbols of type SYMBOL_TYPE can be used to access values 1383 of mode MODE. */ 1384 if (riscv_symbol_insns (sym_type) == 0) 1385 return false; 1386 1387 /* Check that there is a known low-part relocation. */ 1388 if (!riscv_split_symbol_type (sym_type)) 1389 return false; 1390 1391 /* We can't tell size or alignment when we have BLKmode, so try extracing a 1392 decl from the symbol if possible. */ 1393 if (mode == BLKmode) 1394 { 1395 rtx offset; 1396 1397 /* Extract the symbol from the LO_SUM operand, if any. */ 1398 split_const (x, &x, &offset); 1399 1400 /* Might be a CODE_LABEL. We can compute align but not size for that, 1401 so don't bother trying to handle it. */ 1402 if (!SYMBOL_REF_P (x)) 1403 return false; 1404 1405 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */ 1406 align = (SYMBOL_REF_DECL (x) 1407 ? DECL_ALIGN (SYMBOL_REF_DECL (x)) 1408 : 1); 1409 size = (SYMBOL_REF_DECL (x) 1410 && DECL_SIZE (SYMBOL_REF_DECL (x)) 1411 && tree_fits_uhwi_p (DECL_SIZE (SYMBOL_REF_DECL (x))) 1412 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x))) 1413 : 2*BITS_PER_WORD); 1414 } 1415 else 1416 { 1417 align = GET_MODE_ALIGNMENT (mode); 1418 size = GET_MODE_BITSIZE (mode).to_constant (); 1419 } 1420 1421 /* We may need to split multiword moves, so make sure that each word 1422 can be accessed without inducing a carry. */ 1423 if (size > BITS_PER_WORD 1424 && (!TARGET_STRICT_ALIGN || size > align)) 1425 return false; 1426 1427 return true; 1428 } 1429 1430 /* Return true if mode is the RVV enabled mode. 1431 For example: 'RVVMF2SI' mode is disabled, 1432 wheras 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */ 1433 1434 bool 1435 riscv_v_ext_vector_mode_p (machine_mode mode) 1436 { 1437 #define ENTRY(MODE, REQUIREMENT, ...) \ 1438 case MODE##mode: \ 1439 return REQUIREMENT; 1440 switch (mode) 1441 { 1442 #include "riscv-vector-switch.def" 1443 default: 1444 return false; 1445 } 1446 1447 return false; 1448 } 1449 1450 /* Return true if mode is the RVV enabled tuple mode. */ 1451 1452 bool 1453 riscv_v_ext_tuple_mode_p (machine_mode mode) 1454 { 1455 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \ 1456 case MODE##mode: \ 1457 return REQUIREMENT; 1458 switch (mode) 1459 { 1460 #include "riscv-vector-switch.def" 1461 default: 1462 return false; 1463 } 1464 1465 return false; 1466 } 1467 1468 /* Return true if mode is the RVV enabled vls mode. */ 1469 1470 bool 1471 riscv_v_ext_vls_mode_p (machine_mode mode) 1472 { 1473 #define VLS_ENTRY(MODE, REQUIREMENT) \ 1474 case MODE##mode: \ 1475 return REQUIREMENT; 1476 switch (mode) 1477 { 1478 #include "riscv-vector-switch.def" 1479 default: 1480 return false; 1481 } 1482 1483 return false; 1484 } 1485 1486 /* Return true if it is either of below modes. 1487 1. RVV vector mode. 1488 2. RVV tuple mode. 1489 3. RVV vls mode. */ 1490 1491 static bool 1492 riscv_v_ext_mode_p (machine_mode mode) 1493 { 1494 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode) 1495 || riscv_v_ext_vls_mode_p (mode); 1496 } 1497 1498 static unsigned 1499 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size, 1500 unsigned scalar_unit_size) 1501 { 1502 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0); 1503 1504 if (vls_unit_size < scalar_unit_size) 1505 return 1; 1506 1507 /* Ensure the vls mode is exact_div by scalar_unit_size. */ 1508 gcc_assert ((vls_unit_size % scalar_unit_size) == 0); 1509 1510 return vls_unit_size / scalar_unit_size; 1511 } 1512 1513 static machine_mode 1514 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size) 1515 { 1516 switch (vls_mode_size) 1517 { 1518 case 16: 1519 return TImode; 1520 case 8: 1521 return DImode; 1522 case 4: 1523 return SImode; 1524 case 2: 1525 return HImode; 1526 case 1: 1527 return QImode; 1528 default: 1529 gcc_unreachable (); 1530 } 1531 } 1532 1533 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct 1534 NUNITS size for corresponding machine_mode. */ 1535 1536 poly_int64 1537 riscv_v_adjust_nunits (machine_mode mode, int scale) 1538 { 1539 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); 1540 if (riscv_v_ext_mode_p (mode)) 1541 { 1542 if (TARGET_MIN_VLEN == 32) 1543 scale = scale / 2; 1544 return riscv_vector_chunks * scale; 1545 } 1546 return scale; 1547 } 1548 1549 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct 1550 NUNITS size for corresponding machine_mode. */ 1551 1552 poly_int64 1553 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf) 1554 { 1555 if (riscv_v_ext_mode_p (mode)) 1556 { 1557 scalar_mode smode = GET_MODE_INNER (mode); 1558 int size = GET_MODE_SIZE (smode); 1559 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size; 1560 if (fractional_p) 1561 return nunits_per_chunk / lmul * riscv_vector_chunks * nf; 1562 else 1563 return nunits_per_chunk * lmul * riscv_vector_chunks * nf; 1564 } 1565 /* Set the disabled RVV modes size as 1 by default. */ 1566 return 1; 1567 } 1568 1569 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct 1570 BYTE size for corresponding machine_mode. */ 1571 1572 poly_int64 1573 riscv_v_adjust_bytesize (machine_mode mode, int scale) 1574 { 1575 if (riscv_v_ext_vector_mode_p (mode)) 1576 { 1577 if (TARGET_XTHEADVECTOR) 1578 return BYTES_PER_RISCV_VECTOR; 1579 1580 poly_int64 nunits = GET_MODE_NUNITS (mode); 1581 1582 if (nunits.coeffs[0] > 8) 1583 return exact_div (nunits, 8); 1584 else if (nunits.is_constant ()) 1585 return 1; 1586 else 1587 return poly_int64 (1, 1); 1588 } 1589 1590 return scale; 1591 } 1592 1593 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct 1594 PRECISION size for corresponding machine_mode. */ 1595 1596 poly_int64 1597 riscv_v_adjust_precision (machine_mode mode, int scale) 1598 { 1599 return riscv_v_adjust_nunits (mode, scale); 1600 } 1601 1602 /* Return true if X is a valid address for machine mode MODE. If it is, 1603 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in 1604 effect. */ 1605 1606 static bool 1607 riscv_classify_address (struct riscv_address_info *info, rtx x, 1608 machine_mode mode, bool strict_p) 1609 { 1610 if (th_classify_address (info, x, mode, strict_p)) 1611 return true; 1612 1613 switch (GET_CODE (x)) 1614 { 1615 case REG: 1616 case SUBREG: 1617 info->type = ADDRESS_REG; 1618 info->reg = x; 1619 info->offset = const0_rtx; 1620 return riscv_valid_base_register_p (info->reg, mode, strict_p); 1621 1622 case PLUS: 1623 /* RVV load/store disallow any offset. */ 1624 if (riscv_v_ext_mode_p (mode)) 1625 return false; 1626 1627 info->type = ADDRESS_REG; 1628 info->reg = XEXP (x, 0); 1629 info->offset = XEXP (x, 1); 1630 return (riscv_valid_base_register_p (info->reg, mode, strict_p) 1631 && riscv_valid_offset_p (info->offset, mode)); 1632 1633 case LO_SUM: 1634 /* RVV load/store disallow LO_SUM. */ 1635 if (riscv_v_ext_mode_p (mode)) 1636 return false; 1637 1638 info->type = ADDRESS_LO_SUM; 1639 info->reg = XEXP (x, 0); 1640 info->offset = XEXP (x, 1); 1641 /* We have to trust the creator of the LO_SUM to do something vaguely 1642 sane. Target-independent code that creates a LO_SUM should also 1643 create and verify the matching HIGH. Target-independent code that 1644 adds an offset to a LO_SUM must prove that the offset will not 1645 induce a carry. Failure to do either of these things would be 1646 a bug, and we are not required to check for it here. The RISC-V 1647 backend itself should only create LO_SUMs for valid symbolic 1648 constants, with the high part being either a HIGH or a copy 1649 of _gp. */ 1650 info->symbol_type 1651 = riscv_classify_symbolic_expression (info->offset); 1652 return (riscv_valid_base_register_p (info->reg, mode, strict_p) 1653 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset)); 1654 1655 case CONST_INT: 1656 /* We only allow the const0_rtx for the RVV load/store. For example: 1657 +----------------------------------------------------------+ 1658 | li a5,0 | 1659 | vsetvli zero,a1,e32,m1,ta,ma | 1660 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. | 1661 | vs1r.v v24,0(a0) | 1662 +----------------------------------------------------------+ 1663 It can be folded to: 1664 +----------------------------------------------------------+ 1665 | vsetvli zero,a1,e32,m1,ta,ma | 1666 | vle32.v v24,0(zero) | 1667 | vs1r.v v24,0(a0) | 1668 +----------------------------------------------------------+ 1669 This behavior will benefit the underlying RVV auto vectorization. */ 1670 if (riscv_v_ext_mode_p (mode)) 1671 return x == const0_rtx; 1672 1673 /* Small-integer addresses don't occur very often, but they 1674 are legitimate if x0 is a valid base register. */ 1675 info->type = ADDRESS_CONST_INT; 1676 return SMALL_OPERAND (INTVAL (x)); 1677 1678 default: 1679 return false; 1680 } 1681 } 1682 1683 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ 1684 1685 static bool 1686 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, 1687 code_helper = ERROR_MARK) 1688 { 1689 /* Disallow RVV modes base address. 1690 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */ 1691 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x)))) 1692 return false; 1693 struct riscv_address_info addr; 1694 1695 return riscv_classify_address (&addr, x, mode, strict_p); 1696 } 1697 1698 /* Return true if hard reg REGNO can be used in compressed instructions. */ 1699 1700 static bool 1701 riscv_compressed_reg_p (int regno) 1702 { 1703 /* x8-x15/f8-f15 are compressible registers. */ 1704 return ((TARGET_RVC || TARGET_ZCA) 1705 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15) 1706 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15))); 1707 } 1708 1709 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */ 1710 1711 static bool 1712 riscv_compressed_lw_offset_p (rtx x) 1713 { 1714 return (CONST_INT_P (x) 1715 && (INTVAL (x) & 3) == 0 1716 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET)); 1717 } 1718 1719 /* Return true if load/store from/to address x can be compressed. */ 1720 1721 static bool 1722 riscv_compressed_lw_address_p (rtx x) 1723 { 1724 struct riscv_address_info addr; 1725 bool result = riscv_classify_address (&addr, x, GET_MODE (x), 1726 reload_completed); 1727 1728 /* Return false if address is not compressed_reg + small_offset. */ 1729 if (!result 1730 || addr.type != ADDRESS_REG 1731 /* Before reload, assume all registers are OK. */ 1732 || (reload_completed 1733 && !riscv_compressed_reg_p (REGNO (addr.reg)) 1734 && addr.reg != stack_pointer_rtx) 1735 || !riscv_compressed_lw_offset_p (addr.offset)) 1736 return false; 1737 1738 return result; 1739 } 1740 1741 /* Return the number of instructions needed to load or store a value 1742 of mode MODE at address X. Return 0 if X isn't valid for MODE. 1743 Assume that multiword moves may need to be split into word moves 1744 if MIGHT_SPLIT_P, otherwise assume that a single load or store is 1745 enough. */ 1746 1747 int 1748 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p) 1749 { 1750 struct riscv_address_info addr = {}; 1751 int n = 1; 1752 1753 if (!riscv_classify_address (&addr, x, mode, false)) 1754 { 1755 /* This could be a pattern from the pic.md file. In which case we want 1756 this address to always have a cost of 3 to make it as expensive as the 1757 most expensive symbol. This prevents constant propagation from 1758 preferring symbols over register plus offset. */ 1759 return 3; 1760 } 1761 1762 /* BLKmode is used for single unaligned loads and stores and should 1763 not count as a multiword mode. */ 1764 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p) 1765 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1766 1767 if (addr.type == ADDRESS_LO_SUM) 1768 n += riscv_symbol_insns (addr.symbol_type) - 1; 1769 1770 return n; 1771 } 1772 1773 /* Return the number of instructions needed to load constant X. 1774 Return 0 if X isn't a valid constant. */ 1775 1776 int 1777 riscv_const_insns (rtx x) 1778 { 1779 enum riscv_symbol_type symbol_type; 1780 rtx offset; 1781 1782 switch (GET_CODE (x)) 1783 { 1784 case HIGH: 1785 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type) 1786 || !riscv_split_symbol_type (symbol_type)) 1787 return 0; 1788 1789 /* This is simply an LUI. */ 1790 return 1; 1791 1792 case CONST_INT: 1793 { 1794 int cost = riscv_integer_cost (INTVAL (x)); 1795 /* Force complicated constants to memory. */ 1796 return cost < 4 ? cost : 0; 1797 } 1798 1799 case CONST_DOUBLE: 1800 /* See if we can use FMV directly. */ 1801 if (satisfies_constraint_zfli (x)) 1802 return 1; 1803 1804 /* We can use x0 to load floating-point zero. */ 1805 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; 1806 case CONST_VECTOR: 1807 { 1808 /* TODO: This is not accurate, we will need to 1809 adapt the COST of CONST_VECTOR in the future 1810 for the following cases: 1811 1812 - 1. const duplicate vector with element value 1813 in range of [-16, 15]. 1814 - 2. const duplicate vector with element value 1815 out range of [-16, 15]. 1816 - 3. const series vector. 1817 ...etc. */ 1818 if (riscv_v_ext_mode_p (GET_MODE (x))) 1819 { 1820 /* const series vector. */ 1821 rtx base, step; 1822 if (const_vec_series_p (x, &base, &step)) 1823 { 1824 /* This is not accurate, we will need to adapt the COST 1825 * accurately according to BASE && STEP. */ 1826 return 1; 1827 } 1828 1829 rtx elt; 1830 if (const_vec_duplicate_p (x, &elt)) 1831 { 1832 /* We don't allow CONST_VECTOR for DI vector on RV32 1833 system since the ELT constant value can not held 1834 within a single register to disable reload a DI 1835 register vec_duplicate into vmv.v.x. */ 1836 scalar_mode smode = GET_MODE_INNER (GET_MODE (x)); 1837 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD) 1838 && !immediate_operand (elt, Pmode)) 1839 return 0; 1840 /* Constants from -16 to 15 can be loaded with vmv.v.i. 1841 The Wc0, Wc1 constraints are already covered by the 1842 vi constraint so we do not need to check them here 1843 separately. */ 1844 if (satisfies_constraint_vi (x)) 1845 return 1; 1846 1847 /* Any int/FP constants can always be broadcast from a 1848 scalar register. Loading of a floating-point 1849 constant incurs a literal-pool access. Allow this in 1850 order to increase vectorization possibilities. */ 1851 int n = riscv_const_insns (elt); 1852 if (CONST_DOUBLE_P (elt)) 1853 return 1 + 4; /* vfmv.v.f + memory access. */ 1854 else 1855 { 1856 /* We need as many insns as it takes to load the constant 1857 into a GPR and one vmv.v.x. */ 1858 if (n != 0) 1859 return 1 + n; 1860 else 1861 return 1 + 4; /*vmv.v.x + memory access. */ 1862 } 1863 } 1864 } 1865 1866 /* TODO: We may support more const vector in the future. */ 1867 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; 1868 } 1869 1870 case CONST: 1871 /* See if we can refer to X directly. */ 1872 if (riscv_symbolic_constant_p (x, &symbol_type)) 1873 return riscv_symbol_insns (symbol_type); 1874 1875 /* Otherwise try splitting the constant into a base and offset. */ 1876 split_const (x, &x, &offset); 1877 if (offset != 0) 1878 { 1879 int n = riscv_const_insns (x); 1880 if (n != 0) 1881 return n + riscv_integer_cost (INTVAL (offset)); 1882 } 1883 return 0; 1884 1885 case SYMBOL_REF: 1886 case LABEL_REF: 1887 return riscv_symbol_insns (riscv_classify_symbol (x)); 1888 1889 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB 1890 instruction and several scalar shift or mult instructions, 1891 it is so far unknown. We set it to 4 temporarily. */ 1892 case CONST_POLY_INT: 1893 return 4; 1894 1895 default: 1896 return 0; 1897 } 1898 } 1899 1900 /* X is a doubleword constant that can be handled by splitting it into 1901 two words and loading each word separately. Return the number of 1902 instructions required to do this. */ 1903 1904 int 1905 riscv_split_const_insns (rtx x) 1906 { 1907 unsigned int low, high; 1908 1909 low = riscv_const_insns (riscv_subword (x, false)); 1910 high = riscv_const_insns (riscv_subword (x, true)); 1911 gcc_assert (low > 0 && high > 0); 1912 return low + high; 1913 } 1914 1915 /* Return the number of instructions needed to implement INSN, 1916 given that it loads from or stores to MEM. */ 1917 1918 int 1919 riscv_load_store_insns (rtx mem, rtx_insn *insn) 1920 { 1921 machine_mode mode; 1922 bool might_split_p; 1923 rtx set; 1924 1925 gcc_assert (MEM_P (mem)); 1926 mode = GET_MODE (mem); 1927 1928 /* Try to prove that INSN does not need to be split. */ 1929 might_split_p = true; 1930 if (GET_MODE_BITSIZE (mode).to_constant () <= 32) 1931 might_split_p = false; 1932 else if (GET_MODE_BITSIZE (mode).to_constant () == 64) 1933 { 1934 set = single_set (insn); 1935 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set))) 1936 might_split_p = false; 1937 } 1938 1939 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p); 1940 } 1941 1942 /* Emit a move from SRC to DEST. Assume that the move expanders can 1943 handle all moves if !can_create_pseudo_p (). The distinction is 1944 important because, unlike emit_move_insn, the move expanders know 1945 how to force Pmode objects into the constant pool even when the 1946 constant pool address is not itself legitimate. */ 1947 1948 rtx 1949 riscv_emit_move (rtx dest, rtx src) 1950 { 1951 return (can_create_pseudo_p () 1952 ? emit_move_insn (dest, src) 1953 : emit_move_insn_1 (dest, src)); 1954 } 1955 1956 /* Emit an instruction of the form (set TARGET SRC). */ 1957 1958 static rtx 1959 riscv_emit_set (rtx target, rtx src) 1960 { 1961 emit_insn (gen_rtx_SET (target, src)); 1962 return target; 1963 } 1964 1965 /* Emit an instruction of the form (set DEST (CODE X)). */ 1966 1967 rtx 1968 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x) 1969 { 1970 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x)); 1971 } 1972 1973 /* Emit an instruction of the form (set DEST (CODE X Y)). */ 1974 1975 rtx 1976 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y) 1977 { 1978 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y)); 1979 } 1980 1981 /* Compute (CODE X Y) and store the result in a new register 1982 of mode MODE. Return that new register. */ 1983 1984 static rtx 1985 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y) 1986 { 1987 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y); 1988 } 1989 1990 static rtx 1991 riscv_swap_instruction (rtx inst) 1992 { 1993 gcc_assert (GET_MODE (inst) == SImode); 1994 if (BYTES_BIG_ENDIAN) 1995 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1); 1996 return inst; 1997 } 1998 1999 /* Copy VALUE to a register and return that register. If new pseudos 2000 are allowed, copy it into a new register, otherwise use DEST. */ 2001 2002 static rtx 2003 riscv_force_temporary (rtx dest, rtx value) 2004 { 2005 if (can_create_pseudo_p ()) 2006 return force_reg (Pmode, value); 2007 else 2008 { 2009 riscv_emit_move (dest, value); 2010 return dest; 2011 } 2012 } 2013 2014 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, 2015 then add CONST_INT OFFSET to the result. */ 2016 2017 static rtx 2018 riscv_unspec_address_offset (rtx base, rtx offset, 2019 enum riscv_symbol_type symbol_type) 2020 { 2021 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), 2022 UNSPEC_ADDRESS_FIRST + symbol_type); 2023 if (offset != const0_rtx) 2024 base = gen_rtx_PLUS (Pmode, base, offset); 2025 return gen_rtx_CONST (Pmode, base); 2026 } 2027 2028 /* Return an UNSPEC address with underlying address ADDRESS and symbol 2029 type SYMBOL_TYPE. */ 2030 2031 rtx 2032 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type) 2033 { 2034 rtx base, offset; 2035 2036 split_const (address, &base, &offset); 2037 return riscv_unspec_address_offset (base, offset, symbol_type); 2038 } 2039 2040 /* If OP is an UNSPEC address, return the address to which it refers, 2041 otherwise return OP itself. */ 2042 2043 static rtx 2044 riscv_strip_unspec_address (rtx op) 2045 { 2046 rtx base, offset; 2047 2048 split_const (op, &base, &offset); 2049 if (UNSPEC_ADDRESS_P (base)) 2050 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); 2051 return op; 2052 } 2053 2054 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the 2055 high part to BASE and return the result. Just return BASE otherwise. 2056 TEMP is as for riscv_force_temporary. 2057 2058 The returned expression can be used as the first operand to a LO_SUM. */ 2059 2060 static rtx 2061 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type) 2062 { 2063 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type)); 2064 return riscv_force_temporary (temp, addr); 2065 } 2066 2067 /* Load an entry from the GOT for a TLS GD access. */ 2068 2069 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym) 2070 { 2071 if (Pmode == DImode) 2072 return gen_got_load_tls_gddi (dest, sym); 2073 else 2074 return gen_got_load_tls_gdsi (dest, sym); 2075 } 2076 2077 /* Load an entry from the GOT for a TLS IE access. */ 2078 2079 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym) 2080 { 2081 if (Pmode == DImode) 2082 return gen_got_load_tls_iedi (dest, sym); 2083 else 2084 return gen_got_load_tls_iesi (dest, sym); 2085 } 2086 2087 /* Add in the thread pointer for a TLS LE access. */ 2088 2089 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym) 2090 { 2091 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); 2092 if (Pmode == DImode) 2093 return gen_tls_add_tp_ledi (dest, base, tp, sym); 2094 else 2095 return gen_tls_add_tp_lesi (dest, base, tp, sym); 2096 } 2097 2098 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise 2099 it appears in a MEM of that mode. Return true if ADDR is a legitimate 2100 constant in that context and can be split into high and low parts. 2101 If so, and if LOW_OUT is nonnull, emit the high part and store the 2102 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. 2103 2104 TEMP is as for riscv_force_temporary and is used to load the high 2105 part into a register. 2106 2107 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be 2108 a legitimize SET_SRC for an .md pattern, otherwise the low part 2109 is guaranteed to be a legitimate address for mode MODE. */ 2110 2111 bool 2112 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) 2113 { 2114 enum riscv_symbol_type symbol_type; 2115 2116 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) 2117 || !riscv_symbolic_constant_p (addr, &symbol_type) 2118 || riscv_symbol_insns (symbol_type) == 0 2119 || !riscv_split_symbol_type (symbol_type)) 2120 return false; 2121 2122 if (low_out) 2123 switch (symbol_type) 2124 { 2125 case SYMBOL_FORCE_TO_MEM: 2126 return false; 2127 2128 case SYMBOL_ABSOLUTE: 2129 { 2130 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); 2131 high = riscv_force_temporary (temp, high); 2132 *low_out = gen_rtx_LO_SUM (Pmode, high, addr); 2133 } 2134 break; 2135 2136 case SYMBOL_PCREL: 2137 { 2138 static unsigned seqno; 2139 char buf[32]; 2140 rtx label; 2141 2142 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno); 2143 gcc_assert ((size_t) bytes < sizeof (buf)); 2144 2145 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 2146 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL; 2147 /* ??? Ugly hack to make weak symbols work. May need to change the 2148 RTL for the auipc and/or low patterns to get a better fix for 2149 this. */ 2150 if (! nonzero_address_p (addr)) 2151 SYMBOL_REF_WEAK (label) = 1; 2152 2153 if (temp == NULL) 2154 temp = gen_reg_rtx (Pmode); 2155 2156 if (Pmode == DImode) 2157 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno))); 2158 else 2159 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno))); 2160 2161 *low_out = gen_rtx_LO_SUM (Pmode, temp, label); 2162 2163 seqno++; 2164 } 2165 break; 2166 2167 default: 2168 gcc_unreachable (); 2169 } 2170 2171 return true; 2172 } 2173 2174 /* Return a legitimate address for REG + OFFSET. TEMP is as for 2175 riscv_force_temporary; it is only needed when OFFSET is not a 2176 SMALL_OPERAND. */ 2177 2178 static rtx 2179 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) 2180 { 2181 if (!SMALL_OPERAND (offset)) 2182 { 2183 rtx high; 2184 2185 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. 2186 The addition inside the macro CONST_HIGH_PART may cause an 2187 overflow, so we need to force a sign-extension check. */ 2188 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); 2189 offset = CONST_LOW_PART (offset); 2190 high = riscv_force_temporary (temp, high); 2191 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); 2192 } 2193 return plus_constant (Pmode, reg, offset); 2194 } 2195 2196 /* The __tls_get_attr symbol. */ 2197 static GTY(()) rtx riscv_tls_symbol; 2198 2199 /* Return an instruction sequence that calls __tls_get_addr. SYM is 2200 the TLS symbol we are referencing and TYPE is the symbol type to use 2201 (either global dynamic or local dynamic). RESULT is an RTX for the 2202 return value location. */ 2203 2204 static rtx_insn * 2205 riscv_call_tls_get_addr (rtx sym, rtx result) 2206 { 2207 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func; 2208 rtx_insn *insn; 2209 2210 if (!riscv_tls_symbol) 2211 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr"); 2212 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol); 2213 2214 start_sequence (); 2215 2216 emit_insn (riscv_got_load_tls_gd (a0, sym)); 2217 insn = emit_call_insn (gen_call_value (result, func, const0_rtx, 2218 gen_int_mode (RISCV_CC_BASE, SImode))); 2219 RTL_CONST_CALL_P (insn) = 1; 2220 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); 2221 insn = get_insns (); 2222 2223 end_sequence (); 2224 2225 return insn; 2226 } 2227 2228 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return 2229 its address. The return value will be both a valid address and a valid 2230 SET_SRC (either a REG or a LO_SUM). */ 2231 2232 static rtx 2233 riscv_legitimize_tls_address (rtx loc) 2234 { 2235 rtx dest, tp, tmp, a0; 2236 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); 2237 2238 #if 0 2239 /* TLS copy relocs are now deprecated and should not be used. */ 2240 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */ 2241 if (!flag_pic) 2242 model = TLS_MODEL_LOCAL_EXEC; 2243 #endif 2244 2245 switch (model) 2246 { 2247 case TLS_MODEL_LOCAL_DYNAMIC: 2248 /* Rely on section anchors for the optimization that LDM TLS 2249 provides. The anchor's address is loaded with GD TLS. */ 2250 case TLS_MODEL_GLOBAL_DYNAMIC: 2251 if (TARGET_TLSDESC) 2252 { 2253 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); 2254 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); 2255 dest = gen_reg_rtx (Pmode); 2256 2257 emit_insn (gen_tlsdesc (Pmode, loc)); 2258 emit_insn (gen_add3_insn (dest, a0, tp)); 2259 } 2260 else 2261 { 2262 tmp = gen_rtx_REG (Pmode, GP_RETURN); 2263 dest = gen_reg_rtx (Pmode); 2264 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, 2265 loc); 2266 } 2267 break; 2268 2269 case TLS_MODEL_INITIAL_EXEC: 2270 /* la.tls.ie; tp-relative add */ 2271 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); 2272 tmp = gen_reg_rtx (Pmode); 2273 emit_insn (riscv_got_load_tls_ie (tmp, loc)); 2274 dest = gen_reg_rtx (Pmode); 2275 emit_insn (gen_add3_insn (dest, tmp, tp)); 2276 break; 2277 2278 case TLS_MODEL_LOCAL_EXEC: 2279 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE); 2280 dest = gen_reg_rtx (Pmode); 2281 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc)); 2282 dest = gen_rtx_LO_SUM (Pmode, dest, 2283 riscv_unspec_address (loc, SYMBOL_TLS_LE)); 2284 break; 2285 2286 default: 2287 gcc_unreachable (); 2288 } 2289 return dest; 2290 } 2291 2292 /* If X is not a valid address for mode MODE, force it into a register. */ 2294 2295 static rtx 2296 riscv_force_address (rtx x, machine_mode mode) 2297 { 2298 if (!riscv_legitimate_address_p (mode, x, false)) 2299 { 2300 if (can_create_pseudo_p ()) 2301 return force_reg (Pmode, x); 2302 else 2303 { 2304 /* It's only safe for the thunk function. 2305 Use ra as the temp regiater. */ 2306 gcc_assert (riscv_in_thunk_func); 2307 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode); 2308 riscv_emit_move (reg, x); 2309 return reg; 2310 } 2311 } 2312 2313 return x; 2314 } 2315 2316 /* Modify base + offset so that offset fits within a compressed load/store insn 2317 and the excess is added to base. */ 2318 2319 static rtx 2320 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset) 2321 { 2322 rtx addr, high; 2323 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess 2324 into HIGH. */ 2325 high = GEN_INT (offset & ~CSW_MAX_OFFSET); 2326 offset &= CSW_MAX_OFFSET; 2327 if (!SMALL_OPERAND (INTVAL (high))) 2328 high = force_reg (Pmode, high); 2329 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base)); 2330 addr = plus_constant (Pmode, base, offset); 2331 return addr; 2332 } 2333 2334 /* Helper for riscv_legitimize_address. Given X, return true if it 2335 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. 2336 2337 This respectively represent canonical shift-add rtxs or scaled 2338 memory addresses. */ 2339 static bool 2340 mem_shadd_or_shadd_rtx_p (rtx x) 2341 { 2342 return ((GET_CODE (x) == ASHIFT 2343 || GET_CODE (x) == MULT) 2344 && CONST_INT_P (XEXP (x, 1)) 2345 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) 2346 || (GET_CODE (x) == MULT 2347 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3)))); 2348 } 2349 2350 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can 2351 be legitimized in a way that the generic machinery might not expect, 2352 return a new address, otherwise return NULL. MODE is the mode of 2353 the memory being accessed. */ 2354 2355 static rtx 2356 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 2357 machine_mode mode) 2358 { 2359 rtx addr; 2360 2361 if (riscv_tls_symbol_p (x)) 2362 return riscv_legitimize_tls_address (x); 2363 2364 /* See if the address can split into a high part and a LO_SUM. */ 2365 if (riscv_split_symbol (NULL, x, mode, &addr)) 2366 return riscv_force_address (addr, mode); 2367 2368 /* Handle BASE + OFFSET. */ 2369 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) 2370 && INTVAL (XEXP (x, 1)) != 0) 2371 { 2372 rtx base = XEXP (x, 0); 2373 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 2374 2375 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ 2376 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) 2377 && SMALL_OPERAND (offset)) 2378 { 2379 rtx index = XEXP (base, 0); 2380 rtx fp = XEXP (base, 1); 2381 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) 2382 { 2383 2384 /* If we were given a MULT, we must fix the constant 2385 as we're going to create the ASHIFT form. */ 2386 int shift_val = INTVAL (XEXP (index, 1)); 2387 if (GET_CODE (index) == MULT) 2388 shift_val = exact_log2 (shift_val); 2389 2390 rtx reg1 = gen_reg_rtx (Pmode); 2391 rtx reg2 = gen_reg_rtx (Pmode); 2392 rtx reg3 = gen_reg_rtx (Pmode); 2393 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); 2394 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val)); 2395 riscv_emit_binary (PLUS, reg3, reg2, reg1); 2396 2397 return reg3; 2398 } 2399 } 2400 2401 if (!riscv_valid_base_register_p (base, mode, false)) 2402 base = copy_to_mode_reg (Pmode, base); 2403 if (optimize_function_for_size_p (cfun) 2404 && (strcmp (current_pass->name, "shorten_memrefs") == 0) 2405 && mode == SImode) 2406 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow 2407 possible compressed load/store. */ 2408 addr = riscv_shorten_lw_offset (base, offset); 2409 else 2410 addr = riscv_add_offset (NULL, base, offset); 2411 return riscv_force_address (addr, mode); 2412 } 2413 2414 return x; 2415 } 2416 2417 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE 2418 is the original src mode before promotion. */ 2419 2420 void 2421 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value, 2422 machine_mode orig_mode) 2423 { 2424 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; 2425 machine_mode mode; 2426 int i, num_ops; 2427 rtx x = NULL_RTX; 2428 2429 mode = GET_MODE (dest); 2430 /* We use the original mode for the riscv_build_integer call, because HImode 2431 values are given special treatment. */ 2432 num_ops = riscv_build_integer (codes, value, orig_mode); 2433 2434 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */ 2435 && num_ops >= riscv_split_integer_cost (value)) 2436 x = riscv_split_integer (value, mode); 2437 else 2438 { 2439 codes[0].value = trunc_int_for_mode (codes[0].value, mode); 2440 /* Apply each binary operation to X. */ 2441 x = GEN_INT (codes[0].value); 2442 2443 for (i = 1; i < num_ops; i++) 2444 { 2445 if (!can_create_pseudo_p ()) 2446 x = riscv_emit_set (temp, x); 2447 else 2448 x = force_reg (mode, x); 2449 codes[i].value = trunc_int_for_mode (codes[i].value, mode); 2450 x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); 2451 } 2452 } 2453 2454 riscv_emit_set (dest, x); 2455 } 2456 2457 /* Subroutine of riscv_legitimize_move. Move constant SRC into register 2458 DEST given that SRC satisfies immediate_operand but doesn't satisfy 2459 move_operand. */ 2460 2461 static void 2462 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) 2463 { 2464 rtx base, offset; 2465 2466 /* Split moves of big integers into smaller pieces. */ 2467 if (splittable_const_int_operand (src, mode)) 2468 { 2469 riscv_move_integer (dest, dest, INTVAL (src), mode); 2470 return; 2471 } 2472 2473 if (satisfies_constraint_zfli (src)) 2474 { 2475 riscv_emit_set (dest, src); 2476 return; 2477 } 2478 2479 /* Split moves of symbolic constants into high/low pairs. */ 2480 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src)) 2481 { 2482 riscv_emit_set (dest, src); 2483 return; 2484 } 2485 2486 /* Generate the appropriate access sequences for TLS symbols. */ 2487 if (riscv_tls_symbol_p (src)) 2488 { 2489 riscv_emit_move (dest, riscv_legitimize_tls_address (src)); 2490 return; 2491 } 2492 2493 /* If we have (const (plus symbol offset)), and that expression cannot 2494 be forced into memory, load the symbol first and add in the offset. Also 2495 prefer to do this even if the constant _can_ be forced into memory, as it 2496 usually produces better code. */ 2497 split_const (src, &base, &offset); 2498 if (offset != const0_rtx 2499 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ())) 2500 { 2501 base = riscv_force_temporary (dest, base); 2502 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset))); 2503 return; 2504 } 2505 2506 /* Handle below format. 2507 (const:DI 2508 (plus:DI 2509 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0 2510 (const_poly_int:DI [16, 16]) // <- op_1 2511 )) 2512 */ 2513 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS 2514 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1))) 2515 { 2516 rtx dest_tmp = gen_reg_rtx (mode); 2517 rtx tmp = gen_reg_rtx (mode); 2518 2519 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0)); 2520 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1)); 2521 2522 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp))); 2523 return; 2524 } 2525 2526 src = force_const_mem (mode, src); 2527 2528 /* When using explicit relocs, constant pool references are sometimes 2529 not legitimate addresses. */ 2530 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0)); 2531 riscv_emit_move (dest, src); 2532 } 2533 2534 /* Report when we try to do something that requires vector when vector is 2535 disabled. This is an error of last resort and isn't very high-quality. It 2536 usually involves attempts to measure the vector length in some way. */ 2537 2538 static void 2539 riscv_report_v_required (void) 2540 { 2541 static bool reported_p = false; 2542 2543 /* Avoid reporting a slew of messages for a single oversight. */ 2544 if (reported_p) 2545 return; 2546 2547 error ("this operation requires the RVV ISA extension"); 2548 inform (input_location, "you can enable RVV using the command-line" 2549 " option %<-march%>, or by using the %<target%>" 2550 " attribute or pragma"); 2551 reported_p = true; 2552 } 2553 2554 /* Helper function to operation for rtx_code CODE. */ 2555 static void 2556 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1, 2557 rtx op2) 2558 { 2559 if (can_create_pseudo_p ()) 2560 { 2561 rtx result; 2562 if (GET_RTX_CLASS (code) == RTX_UNARY) 2563 result = expand_simple_unop (mode, code, op1, NULL_RTX, false); 2564 else 2565 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false, 2566 OPTAB_DIRECT); 2567 riscv_emit_move (op0, result); 2568 } 2569 else 2570 { 2571 rtx pat; 2572 /* The following implementation is for prologue and epilogue. 2573 Because prologue and epilogue can not use pseudo register. 2574 We can't using expand_simple_binop or expand_simple_unop. */ 2575 if (GET_RTX_CLASS (code) == RTX_UNARY) 2576 pat = gen_rtx_fmt_e (code, mode, op1); 2577 else 2578 pat = gen_rtx_fmt_ee (code, mode, op1, op2); 2579 emit_insn (gen_rtx_SET (op0, pat)); 2580 } 2581 } 2582 2583 /* Expand mult operation with constant integer, multiplicand also used as a 2584 * temporary register. */ 2585 2586 static void 2587 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand, 2588 HOST_WIDE_INT multiplier) 2589 { 2590 if (multiplier == 0) 2591 { 2592 riscv_emit_move (dest, GEN_INT (0)); 2593 return; 2594 } 2595 2596 bool neg_p = multiplier < 0; 2597 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier); 2598 2599 if (multiplier_abs == 1) 2600 { 2601 if (neg_p) 2602 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX); 2603 else 2604 riscv_emit_move (dest, multiplicand); 2605 } 2606 else 2607 { 2608 if (pow2p_hwi (multiplier_abs)) 2609 { 2610 /* 2611 multiplicand = [BYTES_PER_RISCV_VECTOR]. 2612 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8]. 2613 Sequence: 2614 csrr a5, vlenb 2615 slli a5, a5, 3 2616 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8]. 2617 Sequence: 2618 csrr a5, vlenb 2619 slli a5, a5, 3 2620 neg a5, a5 2621 */ 2622 riscv_expand_op (ASHIFT, mode, dest, multiplicand, 2623 gen_int_mode (exact_log2 (multiplier_abs), QImode)); 2624 if (neg_p) 2625 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX); 2626 } 2627 else if (pow2p_hwi (multiplier_abs + 1)) 2628 { 2629 /* 2630 multiplicand = [BYTES_PER_RISCV_VECTOR]. 2631 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7]. 2632 Sequence: 2633 csrr a5, vlenb 2634 slli a4, a5, 3 2635 sub a5, a4, a5 2636 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7]. 2637 Sequence: 2638 csrr a5, vlenb 2639 slli a4, a5, 3 2640 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4 2641 */ 2642 riscv_expand_op (ASHIFT, mode, dest, multiplicand, 2643 gen_int_mode (exact_log2 (multiplier_abs + 1), 2644 QImode)); 2645 if (neg_p) 2646 riscv_expand_op (MINUS, mode, dest, multiplicand, dest); 2647 else 2648 riscv_expand_op (MINUS, mode, dest, dest, multiplicand); 2649 } 2650 else if (pow2p_hwi (multiplier - 1)) 2651 { 2652 /* 2653 multiplicand = [BYTES_PER_RISCV_VECTOR]. 2654 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9]. 2655 Sequence: 2656 csrr a5, vlenb 2657 slli a4, a5, 3 2658 add a5, a4, a5 2659 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9]. 2660 Sequence: 2661 csrr a5, vlenb 2662 slli a4, a5, 3 2663 add a5, a4, a5 2664 neg a5, a5 2665 */ 2666 riscv_expand_op (ASHIFT, mode, dest, multiplicand, 2667 gen_int_mode (exact_log2 (multiplier_abs - 1), 2668 QImode)); 2669 riscv_expand_op (PLUS, mode, dest, dest, multiplicand); 2670 if (neg_p) 2671 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX); 2672 } 2673 else 2674 { 2675 /* We use multiplication for remaining cases. */ 2676 gcc_assert ( 2677 TARGET_MUL 2678 && "M-extension must be enabled to calculate the poly_int " 2679 "size/offset."); 2680 riscv_emit_move (dest, gen_int_mode (multiplier, mode)); 2681 riscv_expand_op (MULT, mode, dest, dest, multiplicand); 2682 } 2683 } 2684 } 2685 2686 /* Analyze src and emit const_poly_int mov sequence. */ 2687 2688 void 2689 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src) 2690 { 2691 poly_int64 value = rtx_to_poly_int64 (src); 2692 /* It use HOST_WIDE_INT intead of int since 32bit type is not enough 2693 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */ 2694 HOST_WIDE_INT offset = value.coeffs[0]; 2695 HOST_WIDE_INT factor = value.coeffs[1]; 2696 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1]; 2697 int div_factor = 0; 2698 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions. 2699 For any (const_poly_int:MODE [m, n]), the calculation formula is as 2700 follows. 2701 constant = m - n. 2702 When minimum VLEN = 32, poly of VLENB = (4, 4). 2703 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1). 2704 When minimum VLEN > 32, poly of VLENB = (8, 8). 2705 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1). 2706 magn = (n, n) / base. 2707 (m, n) = base * magn + constant. 2708 This calculation doesn't need div operation. */ 2709 2710 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))) 2711 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode)); 2712 else 2713 { 2714 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode)); 2715 emit_move_insn (gen_lowpart (Pmode, tmp), 2716 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode)); 2717 } 2718 2719 if (BYTES_PER_RISCV_VECTOR.is_constant ()) 2720 { 2721 gcc_assert (value.is_constant ()); 2722 riscv_emit_move (dest, GEN_INT (value.to_constant ())); 2723 return; 2724 } 2725 else 2726 { 2727 int max_power = exact_log2 (MAX_POLY_VARIANT); 2728 for (int i = 0; i <= max_power; i++) 2729 { 2730 int possible_div_factor = 1 << i; 2731 if (factor % (vlenb / possible_div_factor) == 0) 2732 { 2733 div_factor = possible_div_factor; 2734 break; 2735 } 2736 } 2737 gcc_assert (div_factor != 0); 2738 } 2739 2740 if (div_factor != 1) 2741 riscv_expand_op (LSHIFTRT, mode, tmp, tmp, 2742 gen_int_mode (exact_log2 (div_factor), QImode)); 2743 2744 riscv_expand_mult_with_const_int (mode, dest, tmp, 2745 factor / (vlenb / div_factor)); 2746 HOST_WIDE_INT constant = offset - factor; 2747 2748 if (constant == 0) 2749 return; 2750 else if (SMALL_OPERAND (constant)) 2751 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode)); 2752 else 2753 { 2754 /* Handle the constant value is not a 12-bit value. */ 2755 rtx high; 2756 2757 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. 2758 The addition inside the macro CONST_HIGH_PART may cause an 2759 overflow, so we need to force a sign-extension check. */ 2760 high = gen_int_mode (CONST_HIGH_PART (constant), mode); 2761 constant = CONST_LOW_PART (constant); 2762 riscv_emit_move (tmp, high); 2763 riscv_expand_op (PLUS, mode, dest, tmp, dest); 2764 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode)); 2765 } 2766 } 2767 2768 /* Adjust scalable frame of vector for prologue && epilogue. */ 2769 2770 static void 2771 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue) 2772 { 2773 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode); 2774 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode); 2775 rtx insn, dwarf, adjust_frame_rtx; 2776 2777 riscv_legitimize_poly_move (Pmode, adjust_size, tmp, 2778 gen_int_mode (offset, Pmode)); 2779 2780 if (epilogue) 2781 insn = gen_add3_insn (target, target, adjust_size); 2782 else 2783 insn = gen_sub3_insn (target, target, adjust_size); 2784 2785 insn = emit_insn (insn); 2786 2787 RTX_FRAME_RELATED_P (insn) = 1; 2788 2789 adjust_frame_rtx 2790 = gen_rtx_SET (target, 2791 plus_constant (Pmode, target, epilogue ? offset : -offset)); 2792 2793 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx), 2794 NULL_RTX); 2795 2796 REG_NOTES (insn) = dwarf; 2797 } 2798 2799 /* Take care below subreg const_poly_int move: 2800 2801 1. (set (subreg:DI (reg:TI 237) 8) 2802 (subreg:DI (const_poly_int:TI [4, 2]) 8)) 2803 => 2804 (set (subreg:DI (reg:TI 237) 8) 2805 (const_int 0)) */ 2806 2807 static bool 2808 riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src) 2809 { 2810 gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))); 2811 gcc_assert (SUBREG_BYTE (src).is_constant ()); 2812 2813 int byte_offset = SUBREG_BYTE (src).to_constant (); 2814 rtx const_poly = SUBREG_REG (src); 2815 machine_mode subreg_mode = GET_MODE (const_poly); 2816 2817 if (subreg_mode != TImode) /* Only TImode is needed for now. */ 2818 return false; 2819 2820 if (byte_offset == 8) 2821 { 2822 /* The const_poly_int cannot exceed int64, just set zero here. */ 2823 emit_move_insn (dest, CONST0_RTX (mode)); 2824 return true; 2825 } 2826 2827 /* The below transform will be covered in somewhere else. 2828 Thus, ignore this here. 2829 (set (subreg:DI (reg:TI 237) 0) 2830 (subreg:DI (const_poly_int:TI [4, 2]) 0)) 2831 => 2832 (set (subreg:DI (reg:TI 237) 0) 2833 (const_poly_int:DI [4, 2])) */ 2834 2835 return false; 2836 } 2837 2838 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent 2839 sequence that is valid. */ 2840 2841 bool 2842 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) 2843 { 2844 if (CONST_POLY_INT_P (src)) 2845 { 2846 /* 2847 Handle: 2848 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156) 2849 (const_int 96 [0x60])) [0 S1 A8]) 2850 (const_poly_int:QI [8, 8])) 2851 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil)) 2852 */ 2853 if (MEM_P (dest)) 2854 { 2855 emit_move_insn (dest, force_reg (mode, src)); 2856 return true; 2857 } 2858 poly_int64 value = rtx_to_poly_int64 (src); 2859 if (!value.is_constant () && !TARGET_VECTOR) 2860 { 2861 riscv_report_v_required (); 2862 return false; 2863 } 2864 2865 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode) 2866 return false; 2867 2868 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode)) 2869 { 2870 /* In RV32 system, handle (const_poly_int:QI [m, n]) 2871 (const_poly_int:HI [m, n]). 2872 In RV64 system, handle (const_poly_int:QI [m, n]) 2873 (const_poly_int:HI [m, n]) 2874 (const_poly_int:SI [m, n]). */ 2875 rtx tmp = gen_reg_rtx (Pmode); 2876 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp, 2877 src); 2878 } 2879 else 2880 { 2881 /* In RV32 system, handle (const_poly_int:SI [m, n]) 2882 (const_poly_int:DI [m, n]). 2883 In RV64 system, handle (const_poly_int:DI [m, n]). 2884 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode, 2885 the offset should not exceed 4GiB in general. */ 2886 rtx tmp = gen_reg_rtx (mode); 2887 riscv_legitimize_poly_move (mode, dest, tmp, src); 2888 } 2889 return true; 2890 } 2891 2892 if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)) 2893 && riscv_legitimize_subreg_const_poly_move (mode, dest, src)) 2894 return true; 2895 2896 /* Expand 2897 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0)) 2898 Expand this data movement instead of simply forbid it since 2899 we can improve the code generation for this following scenario 2900 by RVV auto-vectorization: 2901 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI)) 2902 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0)) 2903 Since RVV mode and scalar mode are in different REG_CLASS, 2904 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */ 2905 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src)))) 2906 { 2907 machine_mode vmode = GET_MODE (SUBREG_REG (src)); 2908 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant (); 2909 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant (); 2910 /* We should be able to handle both partial and paradoxical subreg. */ 2911 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1; 2912 scalar_mode smode = as_a<scalar_mode> (mode); 2913 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size; 2914 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8) 2915 && !TARGET_VECTOR_ELEN_64 ? 2 : 1; 2916 bool need_int_reg_p = false; 2917 2918 if (num == 2) 2919 { 2920 /* If we want to extract 64bit value but ELEN < 64, 2921 we use RVV vector mode with EEW = 32 to extract 2922 the highpart and lowpart. */ 2923 need_int_reg_p = smode == DFmode; 2924 smode = SImode; 2925 nunits = nunits * 2; 2926 } 2927 2928 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)) 2929 { 2930 rtx v = gen_lowpart (vmode, SUBREG_REG (src)); 2931 rtx int_reg = dest; 2932 2933 if (need_int_reg_p) 2934 { 2935 int_reg = gen_reg_rtx (DImode); 2936 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest)); 2937 } 2938 2939 for (unsigned int i = 0; i < num; i++) 2940 { 2941 rtx result; 2942 if (num == 1) 2943 result = int_reg; 2944 else if (i == 0) 2945 result = gen_lowpart (smode, int_reg); 2946 else 2947 result = gen_reg_rtx (smode); 2948 2949 riscv_vector::emit_vec_extract (result, v, 2950 gen_int_mode (index + i, Pmode)); 2951 2952 if (i == 1) 2953 { 2954 if (UNITS_PER_WORD < mode_size) 2955 /* If Pmode = SImode and mode = DImode, we just need to 2956 extract element of index = 1 from the vector and move it 2957 into the highpart of the DEST since DEST consists of 2 2958 scalar registers. */ 2959 emit_move_insn (gen_highpart (smode, int_reg), result); 2960 else 2961 { 2962 rtx tmp = expand_binop (Pmode, ashl_optab, 2963 gen_lowpart (Pmode, result), 2964 gen_int_mode (32, Pmode), 2965 NULL_RTX, 0, OPTAB_DIRECT); 2966 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg, 2967 NULL_RTX, 0, OPTAB_DIRECT); 2968 emit_move_insn (int_reg, tmp2); 2969 } 2970 } 2971 } 2972 2973 if (need_int_reg_p) 2974 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg)); 2975 else 2976 emit_move_insn (dest, int_reg); 2977 } 2978 else 2979 gcc_unreachable (); 2980 2981 return true; 2982 } 2983 /* Expand 2984 (set (reg:QI target) (mem:QI (address))) 2985 to 2986 (set (reg:DI temp) (zero_extend:DI (mem:QI (address)))) 2987 (set (reg:QI target) (subreg:QI (reg:DI temp) 0)) 2988 with auto-sign/zero extend. */ 2989 if (GET_MODE_CLASS (mode) == MODE_INT 2990 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD 2991 && can_create_pseudo_p () 2992 && MEM_P (src)) 2993 { 2994 rtx temp_reg; 2995 int zero_extend_p; 2996 2997 temp_reg = gen_reg_rtx (word_mode); 2998 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND); 2999 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode, 3000 zero_extend_p)); 3001 riscv_emit_move (dest, gen_lowpart (mode, temp_reg)); 3002 return true; 3003 } 3004 3005 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) 3006 { 3007 rtx reg; 3008 3009 if (GET_CODE (src) == CONST_INT) 3010 { 3011 /* Apply the equivalent of PROMOTE_MODE here for constants to 3012 improve cse. */ 3013 machine_mode promoted_mode = mode; 3014 if (GET_MODE_CLASS (mode) == MODE_INT 3015 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD) 3016 promoted_mode = word_mode; 3017 3018 if (splittable_const_int_operand (src, mode)) 3019 { 3020 reg = gen_reg_rtx (promoted_mode); 3021 riscv_move_integer (reg, reg, INTVAL (src), mode); 3022 } 3023 else 3024 reg = force_reg (promoted_mode, src); 3025 3026 if (promoted_mode != mode) 3027 reg = gen_lowpart (mode, reg); 3028 } 3029 else 3030 reg = force_reg (mode, src); 3031 riscv_emit_move (dest, reg); 3032 return true; 3033 } 3034 3035 /* In order to fit NaN boxing, expand 3036 (set FP_REG (reg:HF src)) 3037 to 3038 (set (reg:SI/DI mask) (const_int -65536) 3039 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0))) 3040 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp))) 3041 (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X)) 3042 */ 3043 3044 if (TARGET_HARD_FLOAT 3045 && !TARGET_ZFHMIN && mode == HFmode 3046 && REG_P (dest) && FP_REG_P (REGNO (dest)) 3047 && REG_P (src) && !FP_REG_P (REGNO (src)) 3048 && can_create_pseudo_p ()) 3049 { 3050 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode)); 3051 rtx temp = gen_reg_rtx (word_mode); 3052 emit_insn (gen_extend_insn (temp, 3053 simplify_gen_subreg (HImode, src, mode, 0), 3054 word_mode, HImode, 1)); 3055 if (word_mode == SImode) 3056 emit_insn (gen_iorsi3 (temp, mask, temp)); 3057 else 3058 emit_insn (gen_iordi3 (temp, mask, temp)); 3059 3060 riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp), 3061 UNSPEC_FMV_SFP16_X)); 3062 3063 return true; 3064 } 3065 3066 /* We need to deal with constants that would be legitimate 3067 immediate_operands but aren't legitimate move_operands. */ 3068 if (CONSTANT_P (src) && !move_operand (src, mode)) 3069 { 3070 riscv_legitimize_const_move (mode, dest, src); 3071 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); 3072 return true; 3073 } 3074 3075 /* RISC-V GCC may generate non-legitimate address due to we provide some 3076 pattern for optimize access PIC local symbol and it's make GCC generate 3077 unrecognizable instruction during optmizing. */ 3078 3079 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0), 3080 reload_completed)) 3081 { 3082 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode); 3083 } 3084 3085 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0), 3086 reload_completed)) 3087 { 3088 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode); 3089 } 3090 3091 return false; 3092 } 3093 3094 /* Return true if there is an instruction that implements CODE and accepts 3095 X as an immediate operand. */ 3096 3097 static int 3098 riscv_immediate_operand_p (int code, HOST_WIDE_INT x) 3099 { 3100 switch (code) 3101 { 3102 case ASHIFT: 3103 case ASHIFTRT: 3104 case LSHIFTRT: 3105 /* All shift counts are truncated to a valid constant. */ 3106 return true; 3107 3108 case AND: 3109 case IOR: 3110 case XOR: 3111 case PLUS: 3112 case LT: 3113 case LTU: 3114 /* These instructions take 12-bit signed immediates. */ 3115 return SMALL_OPERAND (x); 3116 3117 case LE: 3118 /* We add 1 to the immediate and use SLT. */ 3119 return SMALL_OPERAND (x + 1); 3120 3121 case LEU: 3122 /* Likewise SLTU, but reject the always-true case. */ 3123 return SMALL_OPERAND (x + 1) && x + 1 != 0; 3124 3125 case GE: 3126 case GEU: 3127 /* We can emulate an immediate of 1 by using GT/GTU against x0. */ 3128 return x == 1; 3129 3130 default: 3131 /* By default assume that x0 can be used for 0. */ 3132 return x == 0; 3133 } 3134 } 3135 3136 /* Return the cost of binary operation X, given that the instruction 3137 sequence for a word-sized or smaller operation takes SIGNLE_INSNS 3138 instructions and that the sequence of a double-word operation takes 3139 DOUBLE_INSNS instructions. */ 3140 3141 static int 3142 riscv_binary_cost (rtx x, int single_insns, int double_insns) 3143 { 3144 if (!riscv_v_ext_mode_p (GET_MODE (x)) 3145 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2) 3146 return COSTS_N_INSNS (double_insns); 3147 return COSTS_N_INSNS (single_insns); 3148 } 3149 3150 /* Return the cost of sign- or zero-extending OP. */ 3151 3152 static int 3153 riscv_extend_cost (rtx op, bool unsigned_p) 3154 { 3155 if (MEM_P (op)) 3156 return 0; 3157 3158 if (unsigned_p && GET_MODE (op) == QImode) 3159 /* We can use ANDI. */ 3160 return COSTS_N_INSNS (1); 3161 3162 /* ZBA provide zext.w. */ 3163 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode) 3164 return COSTS_N_INSNS (1); 3165 3166 /* ZBB provide zext.h, sext.b and sext.h. */ 3167 if (TARGET_ZBB) 3168 { 3169 if (!unsigned_p && GET_MODE (op) == QImode) 3170 return COSTS_N_INSNS (1); 3171 3172 if (GET_MODE (op) == HImode) 3173 return COSTS_N_INSNS (1); 3174 } 3175 3176 if (!unsigned_p && GET_MODE (op) == SImode) 3177 /* We can use SEXT.W. */ 3178 return COSTS_N_INSNS (1); 3179 3180 /* We need to use a shift left and a shift right. */ 3181 return COSTS_N_INSNS (2); 3182 } 3183 3184 /* Implement TARGET_RTX_COSTS. */ 3185 3186 #define SINGLE_SHIFT_COST 1 3187 3188 static bool 3189 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED, 3190 int *total, bool speed) 3191 { 3192 /* TODO: We set RVV instruction cost as 1 by default. 3193 Cost Model need to be well analyzed and supported in the future. */ 3194 if (riscv_v_ext_mode_p (mode)) 3195 { 3196 *total = COSTS_N_INSNS (1); 3197 return true; 3198 } 3199 3200 bool float_mode_p = FLOAT_MODE_P (mode); 3201 int cost; 3202 3203 switch (GET_CODE (x)) 3204 { 3205 case SET: 3206 /* If we are called for an INSN that's a simple set of a register, 3207 then cost based on the SET_SRC alone. */ 3208 if (outer_code == INSN 3209 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x)))) 3210 { 3211 riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed); 3212 return true; 3213 } 3214 3215 /* Otherwise return FALSE indicating we should recurse into both the 3216 SET_DEST and SET_SRC combining the cost of both. */ 3217 return false; 3218 3219 case CONST_INT: 3220 /* trivial constants checked using OUTER_CODE in case they are 3221 encodable in insn itself w/o need for additional insn(s). */ 3222 if (riscv_immediate_operand_p (outer_code, INTVAL (x))) 3223 { 3224 *total = 0; 3225 return true; 3226 } 3227 /* Fall through. */ 3228 3229 case SYMBOL_REF: 3230 case LABEL_REF: 3231 case CONST_DOUBLE: 3232 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE 3233 rtl object. Weird recheck due to switch-case fall through above. */ 3234 if (GET_CODE (x) == CONST_DOUBLE) 3235 gcc_assert (GET_MODE (x) != VOIDmode); 3236 /* Fall through. */ 3237 3238 case CONST: 3239 /* Non trivial CONST_INT Fall through: check if need multiple insns. */ 3240 if ((cost = riscv_const_insns (x)) > 0) 3241 { 3242 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero. 3243 2. For constants loaded more than once, the approach so far has 3244 been to duplicate the operation than to CSE the constant. 3245 3. TODO: make cost more accurate specially if riscv_const_insns 3246 returns > 1. */ 3247 if (outer_code == SET || GET_MODE (x) == VOIDmode) 3248 *total = COSTS_N_INSNS (1); 3249 } 3250 else /* The instruction will be fetched from the constant pool. */ 3251 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE)); 3252 return true; 3253 3254 case MEM: 3255 /* If the address is legitimate, return the number of 3256 instructions it needs. */ 3257 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0) 3258 { 3259 /* When optimizing for size, make uncompressible 32-bit addresses 3260 more expensive so that compressible 32-bit addresses are 3261 preferred. */ 3262 if ((TARGET_RVC || TARGET_ZCA) 3263 && !speed && riscv_mshorten_memrefs && mode == SImode 3264 && !riscv_compressed_lw_address_p (XEXP (x, 0))) 3265 cost++; 3266 3267 *total = COSTS_N_INSNS (cost + tune_param->memory_cost); 3268 return true; 3269 } 3270 /* Otherwise use the default handling. */ 3271 return false; 3272 3273 case IF_THEN_ELSE: 3274 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) 3275 && reg_or_0_operand (XEXP (x, 1), mode) 3276 && sfb_alu_operand (XEXP (x, 2), mode) 3277 && comparison_operator (XEXP (x, 0), VOIDmode)) 3278 { 3279 /* For predicated conditional-move operations we assume the cost 3280 of a single instruction even though there are actually two. */ 3281 *total = COSTS_N_INSNS (1); 3282 return true; 3283 } 3284 else if (TARGET_ZICOND_LIKE 3285 && outer_code == SET 3286 && ((GET_CODE (XEXP (x, 1)) == REG 3287 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1)))) 3288 || (GET_CODE (XEXP (x, 2)) == REG 3289 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2)))) 3290 || (COMPARISON_P (XEXP (x, 0)) 3291 && GET_CODE (XEXP (x, 1)) == REG 3292 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0))) 3293 || (COMPARISON_P (XEXP (x, 0)) 3294 && GET_CODE (XEXP (x, 1)) == REG 3295 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0))))) 3296 { 3297 *total = COSTS_N_INSNS (1); 3298 return true; 3299 } 3300 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx) 3301 { 3302 if (equality_operator (XEXP (x, 0), mode) 3303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT) 3304 { 3305 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1); 3306 return true; 3307 } 3308 if (ordered_comparison_operator (XEXP (x, 0), mode)) 3309 { 3310 *total = COSTS_N_INSNS (1); 3311 return true; 3312 } 3313 } 3314 return false; 3315 3316 case NOT: 3317 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1); 3318 return false; 3319 3320 case AND: 3321 /* slli.uw pattern for zba. */ 3322 if (TARGET_ZBA && TARGET_64BIT && mode == DImode 3323 && GET_CODE (XEXP (x, 0)) == ASHIFT) 3324 { 3325 rtx and_rhs = XEXP (x, 1); 3326 rtx ashift_lhs = XEXP (XEXP (x, 0), 0); 3327 rtx ashift_rhs = XEXP (XEXP (x, 0), 1); 3328 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs)) 3329 && CONST_INT_P (ashift_rhs) 3330 && CONST_INT_P (and_rhs) 3331 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff)) 3332 *total = COSTS_N_INSNS (1); 3333 return true; 3334 } 3335 /* bclri pattern for zbs. */ 3336 if (TARGET_ZBS 3337 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode)) 3338 { 3339 *total = COSTS_N_INSNS (1); 3340 return true; 3341 } 3342 /* bclr pattern for zbs. */ 3343 if (TARGET_ZBS 3344 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))) 3345 && GET_CODE (XEXP (x, 0)) == ROTATE 3346 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0)) 3347 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2) 3348 { 3349 *total = COSTS_N_INSNS (1); 3350 return true; 3351 } 3352 3353 gcc_fallthrough (); 3354 case IOR: 3355 case XOR: 3356 /* orn, andn and xorn pattern for zbb. */ 3357 if (TARGET_ZBB 3358 && GET_CODE (XEXP (x, 0)) == NOT) 3359 { 3360 *total = riscv_binary_cost (x, 1, 2); 3361 return true; 3362 } 3363 3364 /* bset[i] and binv[i] pattern for zbs. */ 3365 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR) 3366 && TARGET_ZBS 3367 && ((GET_CODE (XEXP (x, 0)) == ASHIFT 3368 && CONST_INT_P (XEXP (XEXP (x, 0), 0))) 3369 || single_bit_mask_operand (XEXP (x, 1), VOIDmode))) 3370 { 3371 *total = COSTS_N_INSNS (1); 3372 return true; 3373 } 3374 3375 /* Double-word operations use two single-word operations. */ 3376 *total = riscv_binary_cost (x, 1, 2); 3377 return false; 3378 3379 case ZERO_EXTRACT: 3380 /* This is an SImode shift. */ 3381 if (outer_code == SET 3382 && CONST_INT_P (XEXP (x, 1)) 3383 && CONST_INT_P (XEXP (x, 2)) 3384 && (INTVAL (XEXP (x, 2)) > 0) 3385 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32)) 3386 { 3387 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); 3388 return true; 3389 } 3390 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */ 3391 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET 3392 && GET_CODE (XEXP (x, 1)) == CONST_INT 3393 && INTVAL (XEXP (x, 1)) == 1) 3394 { 3395 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); 3396 return true; 3397 } 3398 gcc_fallthrough (); 3399 case SIGN_EXTRACT: 3400 if (TARGET_XTHEADBB && outer_code == SET 3401 && CONST_INT_P (XEXP (x, 1)) 3402 && CONST_INT_P (XEXP (x, 2))) 3403 { 3404 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); 3405 return true; 3406 } 3407 return false; 3408 3409 case ASHIFT: 3410 /* bset pattern for zbs. */ 3411 if (TARGET_ZBS 3412 && CONST_INT_P (XEXP (x, 0)) 3413 && INTVAL (XEXP (x, 0)) == 1) 3414 { 3415 *total = COSTS_N_INSNS (1); 3416 return true; 3417 } 3418 gcc_fallthrough (); 3419 case ASHIFTRT: 3420 case LSHIFTRT: 3421 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST, 3422 CONSTANT_P (XEXP (x, 1)) ? 4 : 9); 3423 return false; 3424 3425 case ABS: 3426 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3); 3427 return false; 3428 3429 case LO_SUM: 3430 *total = set_src_cost (XEXP (x, 0), mode, speed); 3431 return true; 3432 3433 case LT: 3434 /* This is an SImode shift. */ 3435 if (outer_code == SET && GET_MODE (x) == DImode 3436 && GET_MODE (XEXP (x, 0)) == SImode) 3437 { 3438 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); 3439 return true; 3440 } 3441 /* Fall through. */ 3442 case LTU: 3443 case LE: 3444 case LEU: 3445 case GT: 3446 case GTU: 3447 case GE: 3448 case GEU: 3449 case EQ: 3450 case NE: 3451 /* Branch comparisons have VOIDmode, so use the first operand's 3452 mode instead. */ 3453 mode = GET_MODE (XEXP (x, 0)); 3454 if (float_mode_p) 3455 *total = tune_param->fp_add[mode == DFmode]; 3456 else 3457 *total = riscv_binary_cost (x, 1, 3); 3458 return false; 3459 3460 case UNORDERED: 3461 case ORDERED: 3462 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */ 3463 mode = GET_MODE (XEXP (x, 0)); 3464 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2); 3465 return false; 3466 3467 case UNEQ: 3468 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */ 3469 mode = GET_MODE (XEXP (x, 0)); 3470 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3); 3471 return false; 3472 3473 case LTGT: 3474 /* (FLT(A, A) || FGT(B, B)). */ 3475 mode = GET_MODE (XEXP (x, 0)); 3476 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2); 3477 return false; 3478 3479 case UNGE: 3480 case UNGT: 3481 case UNLE: 3482 case UNLT: 3483 /* FLT or FLE, but guarded by an FFLAGS read and write. */ 3484 mode = GET_MODE (XEXP (x, 0)); 3485 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4); 3486 return false; 3487 3488 case MINUS: 3489 if (float_mode_p) 3490 *total = tune_param->fp_add[mode == DFmode]; 3491 else 3492 *total = riscv_binary_cost (x, 1, 4); 3493 return false; 3494 3495 case PLUS: 3496 /* add.uw pattern for zba. */ 3497 if (TARGET_ZBA 3498 && (TARGET_64BIT && (mode == DImode)) 3499 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 3500 && register_operand (XEXP (XEXP (x, 0), 0), 3501 GET_MODE (XEXP (XEXP (x, 0), 0))) 3502 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) 3503 { 3504 *total = COSTS_N_INSNS (1); 3505 return true; 3506 } 3507 /* shNadd pattern for zba. */ 3508 if (TARGET_ZBA 3509 && ((!TARGET_64BIT && (mode == SImode)) || 3510 (TARGET_64BIT && (mode == DImode))) 3511 && (GET_CODE (XEXP (x, 0)) == ASHIFT) 3512 && register_operand (XEXP (XEXP (x, 0), 0), 3513 GET_MODE (XEXP (XEXP (x, 0), 0))) 3514 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3515 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3)) 3516 { 3517 *total = COSTS_N_INSNS (1); 3518 return true; 3519 } 3520 /* Before strength-reduction, the shNadd can be expressed as the addition 3521 of a multiplication with a power-of-two. If this case is not handled, 3522 the strength-reduction in expmed.c will calculate an inflated cost. */ 3523 if (TARGET_ZBA 3524 && mode == word_mode 3525 && GET_CODE (XEXP (x, 0)) == MULT 3526 && register_operand (XEXP (XEXP (x, 0), 0), 3527 GET_MODE (XEXP (XEXP (x, 0), 0))) 3528 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3529 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1))) 3530 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3)) 3531 { 3532 *total = COSTS_N_INSNS (1); 3533 return true; 3534 } 3535 /* shNadd.uw pattern for zba. 3536 [(set (match_operand:DI 0 "register_operand" "=r") 3537 (plus:DI 3538 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") 3539 (match_operand:QI 2 "immediate_operand" "I")) 3540 (match_operand 3 "immediate_operand" "")) 3541 (match_operand:DI 4 "register_operand" "r")))] 3542 "TARGET_64BIT && TARGET_ZBA 3543 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3) 3544 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff" 3545 */ 3546 if (TARGET_ZBA 3547 && (TARGET_64BIT && (mode == DImode)) 3548 && (GET_CODE (XEXP (x, 0)) == AND) 3549 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))) 3550 { 3551 do { 3552 rtx and_lhs = XEXP (XEXP (x, 0), 0); 3553 rtx and_rhs = XEXP (XEXP (x, 0), 1); 3554 if (GET_CODE (and_lhs) != ASHIFT) 3555 break; 3556 if (!CONST_INT_P (and_rhs)) 3557 break; 3558 3559 rtx ashift_rhs = XEXP (and_lhs, 1); 3560 3561 if (!CONST_INT_P (ashift_rhs) 3562 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3)) 3563 break; 3564 3565 if (CONST_INT_P (and_rhs) 3566 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff)) 3567 { 3568 *total = COSTS_N_INSNS (1); 3569 return true; 3570 } 3571 } while (false); 3572 } 3573 3574 if (float_mode_p) 3575 *total = tune_param->fp_add[mode == DFmode]; 3576 else 3577 *total = riscv_binary_cost (x, 1, 4); 3578 return false; 3579 3580 case NEG: 3581 { 3582 rtx op = XEXP (x, 0); 3583 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode)) 3584 { 3585 *total = (tune_param->fp_mul[mode == DFmode] 3586 + set_src_cost (XEXP (op, 0), mode, speed) 3587 + set_src_cost (XEXP (op, 1), mode, speed) 3588 + set_src_cost (XEXP (op, 2), mode, speed)); 3589 return true; 3590 } 3591 } 3592 3593 if (float_mode_p) 3594 *total = tune_param->fp_add[mode == DFmode]; 3595 else 3596 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1); 3597 return false; 3598 3599 case MULT: 3600 if (float_mode_p) 3601 *total = tune_param->fp_mul[mode == DFmode]; 3602 else if (!(TARGET_MUL || TARGET_ZMMUL)) 3603 /* Estimate the cost of a library call. */ 3604 *total = COSTS_N_INSNS (speed ? 32 : 6); 3605 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD) 3606 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2); 3607 else if (!speed) 3608 *total = COSTS_N_INSNS (1); 3609 else 3610 *total = tune_param->int_mul[mode == DImode]; 3611 return false; 3612 3613 case DIV: 3614 case SQRT: 3615 case MOD: 3616 if (float_mode_p) 3617 { 3618 *total = tune_param->fp_div[mode == DFmode]; 3619 return false; 3620 } 3621 /* Fall through. */ 3622 3623 case UDIV: 3624 case UMOD: 3625 if (!TARGET_DIV) 3626 /* Estimate the cost of a library call. */ 3627 *total = COSTS_N_INSNS (speed ? 32 : 6); 3628 else if (speed) 3629 *total = tune_param->int_div[mode == DImode]; 3630 else 3631 *total = COSTS_N_INSNS (1); 3632 return false; 3633 3634 case ZERO_EXTEND: 3635 /* This is an SImode shift. */ 3636 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT) 3637 { 3638 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); 3639 return true; 3640 } 3641 /* Fall through. */ 3642 case SIGN_EXTEND: 3643 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND); 3644 return false; 3645 3646 case BSWAP: 3647 if (TARGET_ZBB) 3648 { 3649 /* RISC-V only defines rev8 for XLEN, so we will need an extra 3650 shift-right instruction for smaller modes. */ 3651 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2); 3652 return true; 3653 } 3654 return false; 3655 3656 case FLOAT: 3657 case UNSIGNED_FLOAT: 3658 case FIX: 3659 case FLOAT_EXTEND: 3660 case FLOAT_TRUNCATE: 3661 *total = tune_param->fp_add[mode == DFmode]; 3662 return false; 3663 3664 case FMA: 3665 *total = (tune_param->fp_mul[mode == DFmode] 3666 + set_src_cost (XEXP (x, 0), mode, speed) 3667 + set_src_cost (XEXP (x, 1), mode, speed) 3668 + set_src_cost (XEXP (x, 2), mode, speed)); 3669 return true; 3670 3671 case UNSPEC: 3672 if (XINT (x, 1) == UNSPEC_AUIPC) 3673 { 3674 /* Make AUIPC cheap to avoid spilling its result to the stack. */ 3675 *total = 1; 3676 return true; 3677 } 3678 return false; 3679 3680 default: 3681 return false; 3682 } 3683 } 3684 3685 /* Implement TARGET_ADDRESS_COST. */ 3686 3687 static int 3688 riscv_address_cost (rtx addr, machine_mode mode, 3689 addr_space_t as ATTRIBUTE_UNUSED, 3690 bool speed ATTRIBUTE_UNUSED) 3691 { 3692 /* When optimizing for size, make uncompressible 32-bit addresses more 3693 * expensive so that compressible 32-bit addresses are preferred. */ 3694 if ((TARGET_RVC || TARGET_ZCA) 3695 && !speed && riscv_mshorten_memrefs && mode == SImode 3696 && !riscv_compressed_lw_address_p (addr)) 3697 return riscv_address_insns (addr, mode, false) + 1; 3698 return riscv_address_insns (addr, mode, false); 3699 } 3700 3701 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost 3702 calculation for conditional branches: one unit is considered the cost 3703 of microarchitecture-dependent actual branch execution and therefore 3704 multiplied by BRANCH_COST and any remaining units are considered fixed 3705 branch overhead. Branches on a floating-point condition incur an extra 3706 instruction cost as they will be split into an FCMP operation followed 3707 by a branch on an integer condition. */ 3708 3709 static int 3710 riscv_insn_cost (rtx_insn *insn, bool speed) 3711 { 3712 rtx x = PATTERN (insn); 3713 int cost = pattern_cost (x, speed); 3714 3715 if (JUMP_P (insn)) 3716 { 3717 if (GET_CODE (x) == PARALLEL) 3718 x = XVECEXP (x, 0, 0); 3719 if (GET_CODE (x) == SET 3720 && GET_CODE (SET_DEST (x)) == PC 3721 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE) 3722 { 3723 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1); 3724 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0)))) 3725 cost += COSTS_N_INSNS (1); 3726 } 3727 } 3728 return cost; 3729 } 3730 3731 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, 3732 but we consider cost units of branch instructions equal to cost units of 3733 other instructions. */ 3734 3735 static unsigned int 3736 riscv_max_noce_ifcvt_seq_cost (edge e) 3737 { 3738 bool predictable_p = predictable_edge_p (e); 3739 3740 if (predictable_p) 3741 { 3742 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost)) 3743 return param_max_rtl_if_conversion_predictable_cost; 3744 } 3745 else 3746 { 3747 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost)) 3748 return param_max_rtl_if_conversion_unpredictable_cost; 3749 } 3750 3751 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p)); 3752 } 3753 3754 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a 3755 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)' 3756 by our actual conditional branch cost, observing that our branches test 3757 conditions directly, so there is no preparatory extra condition-set 3758 instruction. */ 3759 3760 static bool 3761 riscv_noce_conversion_profitable_p (rtx_insn *seq, 3762 struct noce_if_info *if_info) 3763 { 3764 struct noce_if_info riscv_if_info = *if_info; 3765 3766 riscv_if_info.original_cost -= COSTS_N_INSNS (2); 3767 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p); 3768 3769 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4' 3770 to emit a conditional set operation on DImode output it comes up 3771 with a sequence such as: 3772 3773 (insn 26 0 27 (set (reg:SI 140) 3774 (eq:SI (reg/v:DI 137 [ c ]) 3775 (const_int 0 [0]))) 302 {*seq_zero_disi} 3776 (nil)) 3777 (insn 27 26 28 (set (reg:DI 139) 3778 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal} 3779 (nil)) 3780 3781 because our `cstore<mode>4' pattern expands to an insn that gives 3782 a SImode output. The output of conditional set is 0 or 1 boolean, 3783 so it is valid for input in any scalar integer mode and therefore 3784 combine later folds the zero extend operation into an equivalent 3785 conditional set operation that produces a DImode output, however 3786 this redundant zero extend operation counts towards the cost of 3787 the replacement sequence. Compensate for that by incrementing the 3788 cost of the original sequence as well as the maximum sequence cost 3789 accordingly. Likewise for sign extension. */ 3790 rtx last_dest = NULL_RTX; 3791 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) 3792 { 3793 if (!NONDEBUG_INSN_P (insn)) 3794 continue; 3795 3796 rtx x = PATTERN (insn); 3797 if (NONJUMP_INSN_P (insn) 3798 && GET_CODE (x) == SET) 3799 { 3800 rtx src = SET_SRC (x); 3801 enum rtx_code code = GET_CODE (src); 3802 if (last_dest != NULL_RTX 3803 && (code == SIGN_EXTEND || code == ZERO_EXTEND) 3804 && REG_P (XEXP (src, 0)) 3805 && REGNO (XEXP (src, 0)) == REGNO (last_dest)) 3806 { 3807 riscv_if_info.original_cost += COSTS_N_INSNS (1); 3808 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); 3809 } 3810 last_dest = NULL_RTX; 3811 rtx dest = SET_DEST (x); 3812 if (COMPARISON_P (src) 3813 && REG_P (dest) 3814 && GET_MODE (dest) == SImode) 3815 last_dest = dest; 3816 } 3817 else 3818 last_dest = NULL_RTX; 3819 } 3820 3821 return default_noce_conversion_profitable_p (seq, &riscv_if_info); 3822 } 3823 3824 /* Return one word of double-word value OP. HIGH_P is true to select the 3825 high part or false to select the low part. */ 3826 3827 rtx 3828 riscv_subword (rtx op, bool high_p) 3829 { 3830 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0; 3831 machine_mode mode = GET_MODE (op); 3832 3833 if (mode == VOIDmode) 3834 mode = TARGET_64BIT ? TImode : DImode; 3835 3836 if (MEM_P (op)) 3837 return adjust_address (op, word_mode, byte); 3838 3839 if (REG_P (op)) 3840 gcc_assert (!FP_REG_RTX_P (op)); 3841 3842 return simplify_gen_subreg (word_mode, op, mode, byte); 3843 } 3844 3845 /* Return true if a 64-bit move from SRC to DEST should be split into two. */ 3846 3847 bool 3848 riscv_split_64bit_move_p (rtx dest, rtx src) 3849 { 3850 if (TARGET_64BIT) 3851 return false; 3852 3853 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */ 3854 if (satisfies_constraint_zfli (src)) 3855 return false; 3856 3857 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case 3858 of zeroing an FPR with FCVT.D.W. */ 3859 if (TARGET_DOUBLE_FLOAT 3860 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) 3861 || (FP_REG_RTX_P (dest) && MEM_P (src)) 3862 || (FP_REG_RTX_P (src) && MEM_P (dest)) 3863 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))))) 3864 return false; 3865 3866 return true; 3867 } 3868 3869 /* Split a doubleword move from SRC to DEST. On 32-bit targets, 3870 this function handles 64-bit moves for which riscv_split_64bit_move_p 3871 holds. For 64-bit targets, this function handles 128-bit moves. */ 3872 3873 void 3874 riscv_split_doubleword_move (rtx dest, rtx src) 3875 { 3876 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */ 3877 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV)) 3878 { 3879 if (FP_REG_RTX_P (dest)) 3880 { 3881 rtx low_src = riscv_subword (src, false); 3882 rtx high_src = riscv_subword (src, true); 3883 3884 if (TARGET_ZFA) 3885 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src)); 3886 else 3887 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src)); 3888 return; 3889 } 3890 if (FP_REG_RTX_P (src)) 3891 { 3892 rtx low_dest = riscv_subword (dest, false); 3893 rtx high_dest = riscv_subword (dest, true); 3894 3895 if (TARGET_ZFA) 3896 { 3897 emit_insn (gen_movsidf2_low_rv32 (low_dest, src)); 3898 emit_insn (gen_movsidf2_high_rv32 (high_dest, src)); 3899 return; 3900 } 3901 else 3902 { 3903 emit_insn (gen_th_fmv_x_w (low_dest, src)); 3904 emit_insn (gen_th_fmv_x_hw (high_dest, src)); 3905 } 3906 return; 3907 } 3908 } 3909 3910 /* The operation can be split into two normal moves. Decide in 3911 which order to do them. */ 3912 rtx low_dest = riscv_subword (dest, false); 3913 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) 3914 { 3915 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true)); 3916 riscv_emit_move (low_dest, riscv_subword (src, false)); 3917 } 3918 else 3919 { 3920 riscv_emit_move (low_dest, riscv_subword (src, false)); 3921 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true)); 3922 } 3923 } 3924 3925 /* Return the appropriate instructions to move SRC into DEST. Assume 3927 that SRC is operand 1 and DEST is operand 0. */ 3928 3929 const char * 3930 riscv_output_move (rtx dest, rtx src) 3931 { 3932 enum rtx_code dest_code, src_code; 3933 machine_mode mode; 3934 bool dbl_p; 3935 unsigned width; 3936 const char *insn; 3937 3938 if ((insn = th_output_move (dest, src))) 3939 return insn; 3940 3941 dest_code = GET_CODE (dest); 3942 src_code = GET_CODE (src); 3943 mode = GET_MODE (dest); 3944 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8); 3945 width = GET_MODE_SIZE (mode).to_constant (); 3946 3947 if (dbl_p && riscv_split_64bit_move_p (dest, src)) 3948 return "#"; 3949 3950 if (dest_code == REG && GP_REG_P (REGNO (dest))) 3951 { 3952 if (src_code == REG && FP_REG_P (REGNO (src))) 3953 switch (width) 3954 { 3955 case 2: 3956 if (TARGET_ZFHMIN) 3957 return "fmv.x.h\t%0,%1"; 3958 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */ 3959 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16"; 3960 case 4: 3961 return "fmv.x.s\t%0,%1"; 3962 case 8: 3963 return "fmv.x.d\t%0,%1"; 3964 } 3965 3966 if (src_code == MEM) 3967 switch (width) 3968 { 3969 case 1: return "lbu\t%0,%1"; 3970 case 2: return "lhu\t%0,%1"; 3971 case 4: return "lw\t%0,%1"; 3972 case 8: return "ld\t%0,%1"; 3973 } 3974 3975 if (src_code == CONST_INT) 3976 { 3977 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src))) 3978 return "li\t%0,%1"; 3979 3980 if (TARGET_ZBS 3981 && SINGLE_BIT_MASK_OPERAND (INTVAL (src))) 3982 return "bseti\t%0,zero,%S1"; 3983 3984 /* Should never reach here. */ 3985 abort (); 3986 } 3987 3988 if (src_code == HIGH) 3989 return "lui\t%0,%h1"; 3990 3991 if (symbolic_operand (src, VOIDmode)) 3992 switch (riscv_classify_symbolic_expression (src)) 3993 { 3994 case SYMBOL_GOT_DISP: return "la\t%0,%1"; 3995 case SYMBOL_ABSOLUTE: return "lla\t%0,%1"; 3996 case SYMBOL_PCREL: return "lla\t%0,%1"; 3997 default: gcc_unreachable (); 3998 } 3999 } 4000 if ((src_code == REG && GP_REG_P (REGNO (src))) 4001 || (src == CONST0_RTX (mode))) 4002 { 4003 if (dest_code == REG) 4004 { 4005 if (GP_REG_P (REGNO (dest))) 4006 return "mv\t%0,%z1"; 4007 4008 if (FP_REG_P (REGNO (dest))) 4009 switch (width) 4010 { 4011 case 2: 4012 if (TARGET_ZFHMIN) 4013 return "fmv.h.x\t%0,%z1"; 4014 /* High 16 bits should be all-1, otherwise HW will treated 4015 as a n-bit canonical NaN, but isn't matter for softfloat. */ 4016 return "fmv.s.x\t%0,%1"; 4017 case 4: 4018 return "fmv.s.x\t%0,%z1"; 4019 case 8: 4020 if (TARGET_64BIT) 4021 return "fmv.d.x\t%0,%z1"; 4022 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */ 4023 gcc_assert (src == CONST0_RTX (mode)); 4024 return "fcvt.d.w\t%0,x0"; 4025 } 4026 } 4027 if (dest_code == MEM) 4028 switch (width) 4029 { 4030 case 1: return "sb\t%z1,%0"; 4031 case 2: return "sh\t%z1,%0"; 4032 case 4: return "sw\t%z1,%0"; 4033 case 8: return "sd\t%z1,%0"; 4034 } 4035 } 4036 if (src_code == REG && FP_REG_P (REGNO (src))) 4037 { 4038 if (dest_code == REG && FP_REG_P (REGNO (dest))) 4039 switch (width) 4040 { 4041 case 2: 4042 if (TARGET_ZFH) 4043 return "fmv.h\t%0,%1"; 4044 return "fmv.s\t%0,%1"; 4045 case 4: 4046 return "fmv.s\t%0,%1"; 4047 case 8: 4048 return "fmv.d\t%0,%1"; 4049 } 4050 4051 if (dest_code == MEM) 4052 switch (width) 4053 { 4054 case 2: 4055 return "fsh\t%1,%0"; 4056 case 4: 4057 return "fsw\t%1,%0"; 4058 case 8: 4059 return "fsd\t%1,%0"; 4060 } 4061 } 4062 if (dest_code == REG && FP_REG_P (REGNO (dest))) 4063 { 4064 if (src_code == MEM) 4065 switch (width) 4066 { 4067 case 2: 4068 return "flh\t%0,%1"; 4069 case 4: 4070 return "flw\t%0,%1"; 4071 case 8: 4072 return "fld\t%0,%1"; 4073 } 4074 4075 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src)) 4076 switch (width) 4077 { 4078 case 2: 4079 return "fli.h\t%0,%1"; 4080 case 4: 4081 return "fli.s\t%0,%1"; 4082 case 8: 4083 return "fli.d\t%0,%1"; 4084 } 4085 } 4086 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT) 4087 { 4088 /* We only want a single full vector register VLEN read after reload. */ 4089 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR)); 4090 return "csrr\t%0,vlenb"; 4091 } 4092 gcc_unreachable (); 4093 } 4094 4095 const char * 4096 riscv_output_return () 4097 { 4098 if (cfun->machine->naked_p) 4099 return ""; 4100 4101 return "ret"; 4102 } 4103 4104 4105 /* Return true if CMP1 is a suitable second operand for integer ordering 4107 test CODE. See also the *sCC patterns in riscv.md. */ 4108 4109 static bool 4110 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) 4111 { 4112 switch (code) 4113 { 4114 case GT: 4115 case GTU: 4116 return reg_or_0_operand (cmp1, VOIDmode); 4117 4118 case GE: 4119 case GEU: 4120 return cmp1 == const1_rtx; 4121 4122 case LT: 4123 case LTU: 4124 return arith_operand (cmp1, VOIDmode); 4125 4126 case LE: 4127 return sle_operand (cmp1, VOIDmode); 4128 4129 case LEU: 4130 return sleu_operand (cmp1, VOIDmode); 4131 4132 default: 4133 gcc_unreachable (); 4134 } 4135 } 4136 4137 /* Return true if *CMP1 (of mode MODE) is a valid second operand for 4138 integer ordering test *CODE, or if an equivalent combination can 4139 be formed by adjusting *CODE and *CMP1. When returning true, update 4140 *CODE and *CMP1 with the chosen code and operand, otherwise leave 4141 them alone. */ 4142 4143 static bool 4144 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, 4145 machine_mode mode) 4146 { 4147 HOST_WIDE_INT plus_one; 4148 4149 if (riscv_int_order_operand_ok_p (*code, *cmp1)) 4150 return true; 4151 4152 if (CONST_INT_P (*cmp1)) 4153 switch (*code) 4154 { 4155 case LE: 4156 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); 4157 if (INTVAL (*cmp1) < plus_one) 4158 { 4159 *code = LT; 4160 *cmp1 = force_reg (mode, GEN_INT (plus_one)); 4161 return true; 4162 } 4163 break; 4164 4165 case LEU: 4166 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); 4167 if (plus_one != 0) 4168 { 4169 *code = LTU; 4170 *cmp1 = force_reg (mode, GEN_INT (plus_one)); 4171 return true; 4172 } 4173 break; 4174 4175 default: 4176 break; 4177 } 4178 return false; 4179 } 4180 4181 /* Compare CMP0 and CMP1 using ordering test CODE and store the result 4182 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR 4183 is nonnull, it's OK to set TARGET to the inverse of the result and 4184 flip *INVERT_PTR instead. */ 4185 4186 static void 4187 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr, 4188 rtx target, rtx cmp0, rtx cmp1) 4189 { 4190 machine_mode mode; 4191 4192 /* First see if there is a RISCV instruction that can do this operation. 4193 If not, try doing the same for the inverse operation. If that also 4194 fails, force CMP1 into a register and try again. */ 4195 mode = GET_MODE (cmp0); 4196 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode)) 4197 riscv_emit_binary (code, target, cmp0, cmp1); 4198 else 4199 { 4200 enum rtx_code inv_code = reverse_condition (code); 4201 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode)) 4202 { 4203 cmp1 = force_reg (mode, cmp1); 4204 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1); 4205 } 4206 else if (invert_ptr == 0) 4207 { 4208 rtx inv_target = riscv_force_binary (word_mode, 4209 inv_code, cmp0, cmp1); 4210 riscv_emit_binary (EQ, target, inv_target, const0_rtx); 4211 } 4212 else 4213 { 4214 *invert_ptr = !*invert_ptr; 4215 riscv_emit_binary (inv_code, target, cmp0, cmp1); 4216 } 4217 } 4218 } 4219 4220 /* Return a register that is zero iff CMP0 and CMP1 are equal. 4221 The register will have the same mode as CMP0. */ 4222 4223 static rtx 4224 riscv_zero_if_equal (rtx cmp0, rtx cmp1) 4225 { 4226 if (cmp1 == const0_rtx) 4227 return cmp0; 4228 4229 return expand_binop (GET_MODE (cmp0), sub_optab, 4230 cmp0, cmp1, 0, 0, OPTAB_DIRECT); 4231 } 4232 4233 /* Helper function for riscv_extend_comparands to Sign-extend the OP. 4234 However if the OP is SI subreg promoted with an inner DI, such as 4235 (subreg/s/v:SI (reg/v:DI) 0) 4236 just peel off the SUBREG to get DI, avoiding extraneous extension. */ 4237 4238 static void 4239 riscv_sign_extend_if_not_subreg_prom (rtx *op) 4240 { 4241 if (GET_CODE (*op) == SUBREG 4242 && SUBREG_PROMOTED_VAR_P (*op) 4243 && SUBREG_PROMOTED_SIGNED_P (*op) 4244 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant () 4245 == GET_MODE_SIZE (word_mode))) 4246 *op = XEXP (*op, 0); 4247 else 4248 *op = gen_rtx_SIGN_EXTEND (word_mode, *op); 4249 } 4250 4251 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ 4252 4253 static void 4254 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1) 4255 { 4256 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */ 4257 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ()) 4258 { 4259 /* It is more profitable to zero-extend QImode values. But not if the 4260 first operand has already been sign-extended, and the second one is 4261 is a constant or has already been sign-extended also. */ 4262 if (unsigned_condition (code) == code 4263 && (GET_MODE (*op0) == QImode 4264 && ! (GET_CODE (*op0) == SUBREG 4265 && SUBREG_PROMOTED_VAR_P (*op0) 4266 && SUBREG_PROMOTED_SIGNED_P (*op0) 4267 && (CONST_INT_P (*op1) 4268 || (GET_CODE (*op1) == SUBREG 4269 && SUBREG_PROMOTED_VAR_P (*op1) 4270 && SUBREG_PROMOTED_SIGNED_P (*op1)))))) 4271 { 4272 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0); 4273 if (CONST_INT_P (*op1)) 4274 *op1 = GEN_INT ((uint8_t) INTVAL (*op1)); 4275 else 4276 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1); 4277 } 4278 else 4279 { 4280 riscv_sign_extend_if_not_subreg_prom (op0); 4281 4282 if (*op1 != const0_rtx) 4283 riscv_sign_extend_if_not_subreg_prom (op1); 4284 } 4285 } 4286 } 4287 4288 /* Convert a comparison into something that can be used in a branch or 4289 conditional move. On entry, *OP0 and *OP1 are the values being 4290 compared and *CODE is the code used to compare them. 4291 4292 Update *CODE, *OP0 and *OP1 so that they describe the final comparison. 4293 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are 4294 emitted. */ 4295 4296 static void 4297 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1, 4298 bool need_eq_ne_p = false) 4299 { 4300 if (need_eq_ne_p) 4301 { 4302 rtx cmp_op0 = *op0; 4303 rtx cmp_op1 = *op1; 4304 if (*code == EQ || *code == NE) 4305 { 4306 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1); 4307 *op1 = const0_rtx; 4308 return; 4309 } 4310 gcc_unreachable (); 4311 } 4312 4313 if (splittable_const_int_operand (*op1, VOIDmode)) 4314 { 4315 HOST_WIDE_INT rhs = INTVAL (*op1); 4316 4317 if (*code == EQ || *code == NE) 4318 { 4319 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */ 4320 if (SMALL_OPERAND (-rhs)) 4321 { 4322 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0, 4323 GEN_INT (-rhs)); 4324 *op1 = const0_rtx; 4325 } 4326 } 4327 else 4328 { 4329 static const enum rtx_code mag_comparisons[][2] = { 4330 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE} 4331 }; 4332 4333 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */ 4334 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++) 4335 { 4336 HOST_WIDE_INT new_rhs; 4337 bool increment = *code == mag_comparisons[i][0]; 4338 bool decrement = *code == mag_comparisons[i][1]; 4339 if (!increment && !decrement) 4340 continue; 4341 4342 new_rhs = rhs + (increment ? 1 : -1); 4343 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0)); 4344 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs) 4345 && (rhs < 0) == (new_rhs < 0)) 4346 { 4347 *op1 = GEN_INT (new_rhs); 4348 *code = mag_comparisons[i][increment]; 4349 } 4350 break; 4351 } 4352 } 4353 } 4354 4355 riscv_extend_comparands (*code, op0, op1); 4356 4357 *op0 = force_reg (word_mode, *op0); 4358 if (*op1 != const0_rtx) 4359 *op1 = force_reg (word_mode, *op1); 4360 } 4361 4362 /* Like riscv_emit_int_compare, but for floating-point comparisons. */ 4363 4364 static void 4365 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1, 4366 bool *invert_ptr = nullptr) 4367 { 4368 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1; 4369 enum rtx_code fp_code = *code; 4370 *code = NE; 4371 4372 switch (fp_code) 4373 { 4374 case UNORDERED: 4375 *code = EQ; 4376 /* Fall through. */ 4377 4378 case ORDERED: 4379 /* a == a && b == b */ 4380 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0); 4381 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1); 4382 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1); 4383 *op1 = const0_rtx; 4384 break; 4385 4386 case UNEQ: 4387 /* ordered(a, b) > (a == b) */ 4388 *code = EQ; 4389 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0); 4390 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1); 4391 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1); 4392 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1); 4393 break; 4394 4395 #define UNORDERED_COMPARISON(CODE, CMP) \ 4396 case CODE: \ 4397 *code = EQ; \ 4398 *op0 = gen_reg_rtx (word_mode); \ 4399 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \ 4400 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \ 4401 else if (GET_MODE (cmp_op0) == SFmode) \ 4402 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \ 4403 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \ 4404 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \ 4405 else if (GET_MODE (cmp_op0) == DFmode) \ 4406 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \ 4407 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \ 4408 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \ 4409 else if (GET_MODE (cmp_op0) == HFmode) \ 4410 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \ 4411 else \ 4412 gcc_unreachable (); \ 4413 *op1 = const0_rtx; \ 4414 break; 4415 4416 case UNLT: 4417 std::swap (cmp_op0, cmp_op1); 4418 gcc_fallthrough (); 4419 4420 UNORDERED_COMPARISON(UNGT, le) 4421 4422 case UNLE: 4423 std::swap (cmp_op0, cmp_op1); 4424 gcc_fallthrough (); 4425 4426 UNORDERED_COMPARISON(UNGE, lt) 4427 #undef UNORDERED_COMPARISON 4428 4429 case NE: 4430 fp_code = EQ; 4431 if (invert_ptr != nullptr) 4432 *invert_ptr = !*invert_ptr; 4433 else 4434 { 4435 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1); 4436 cmp_op1 = const0_rtx; 4437 } 4438 gcc_fallthrough (); 4439 4440 case EQ: 4441 case LE: 4442 case LT: 4443 case GE: 4444 case GT: 4445 /* We have instructions for these cases. */ 4446 *code = fp_code; 4447 *op0 = cmp_op0; 4448 *op1 = cmp_op1; 4449 break; 4450 4451 case LTGT: 4452 /* (a < b) | (a > b) */ 4453 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1); 4454 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1); 4455 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1); 4456 *op1 = const0_rtx; 4457 break; 4458 4459 default: 4460 gcc_unreachable (); 4461 } 4462 } 4463 4464 /* CODE-compare OP0 and OP1. Store the result in TARGET. */ 4465 4466 void 4467 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr) 4468 { 4469 riscv_extend_comparands (code, &op0, &op1); 4470 op0 = force_reg (word_mode, op0); 4471 4472 if (code == EQ || code == NE) 4473 { 4474 rtx zie = riscv_zero_if_equal (op0, op1); 4475 riscv_emit_binary (code, target, zie, const0_rtx); 4476 } 4477 else 4478 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1); 4479 } 4480 4481 /* Like riscv_expand_int_scc, but for floating-point comparisons. */ 4482 4483 void 4484 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, 4485 bool *invert_ptr) 4486 { 4487 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr); 4488 4489 machine_mode mode = GET_MODE (target); 4490 if (mode != word_mode) 4491 { 4492 rtx cmp = riscv_force_binary (word_mode, code, op0, op1); 4493 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode)); 4494 } 4495 else 4496 riscv_emit_binary (code, target, op0, op1); 4497 } 4498 4499 /* Jump to LABEL if (CODE OP0 OP1) holds. */ 4500 4501 void 4502 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1) 4503 { 4504 if (FLOAT_MODE_P (GET_MODE (op1))) 4505 riscv_emit_float_compare (&code, &op0, &op1); 4506 else 4507 riscv_emit_int_compare (&code, &op0, &op1); 4508 4509 if (FLOAT_MODE_P (GET_MODE (op0))) 4510 { 4511 op0 = riscv_force_binary (word_mode, code, op0, op1); 4512 op1 = const0_rtx; 4513 code = NE; 4514 } 4515 4516 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 4517 emit_jump_insn (gen_condjump (condition, label)); 4518 } 4519 4520 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST. 4521 Return 0 if expansion failed. */ 4522 4523 bool 4524 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) 4525 { 4526 machine_mode mode = GET_MODE (dest); 4527 rtx_code code = GET_CODE (op); 4528 rtx op0 = XEXP (op, 0); 4529 rtx op1 = XEXP (op, 1); 4530 4531 if (((TARGET_ZICOND_LIKE 4532 || (arith_operand (cons, mode) && arith_operand (alt, mode))) 4533 && (GET_MODE_CLASS (mode) == MODE_INT)) 4534 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) 4535 { 4536 machine_mode mode0 = GET_MODE (op0); 4537 machine_mode mode1 = GET_MODE (op1); 4538 4539 /* An integer comparison must be comparing WORD_MODE objects. We 4540 must enforce that so that we don't strip away a sign_extension 4541 thinking it is unnecessary. We might consider using 4542 riscv_extend_operands if they are not already properly extended. */ 4543 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode) 4544 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode)) 4545 return false; 4546 4547 /* In the fallback generic case use MODE rather than WORD_MODE for 4548 the output of the SCC instruction, to match the mode of the NEG 4549 operation below. The output of SCC is 0 or 1 boolean, so it is 4550 valid for input in any scalar integer mode. */ 4551 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE 4552 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) 4553 ? word_mode : mode); 4554 bool invert = false; 4555 4556 /* Canonicalize the comparison. It must be an equality comparison 4557 of integer operands, or with SFB it can be any comparison of 4558 integer operands. If it isn't, then emit an SCC instruction 4559 so that we can then use an equality comparison against zero. */ 4560 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode)) 4561 || !INTEGRAL_MODE_P (mode0)) 4562 { 4563 bool *invert_ptr = nullptr; 4564 4565 /* If riscv_expand_int_scc inverts the condition, then it will 4566 flip the value of INVERT. We need to know where so that 4567 we can adjust it for our needs. */ 4568 if (code == LE || code == LEU || code == GE || code == GEU) 4569 invert_ptr = &invert; 4570 4571 /* Emit an SCC-like instruction into a temporary so that we can 4572 use an EQ/NE comparison. We can support both FP and integer 4573 conditional moves. */ 4574 if (INTEGRAL_MODE_P (mode0)) 4575 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr); 4576 else if (FLOAT_MODE_P (mode0) 4577 && fp_scc_comparison (op, GET_MODE (op))) 4578 riscv_expand_float_scc (tmp, code, op0, op1, &invert); 4579 else 4580 return false; 4581 4582 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx); 4583 4584 /* We've generated a new comparison. Update the local variables. */ 4585 code = GET_CODE (op); 4586 op0 = XEXP (op, 0); 4587 op1 = XEXP (op, 1); 4588 } 4589 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV) 4590 riscv_expand_int_scc (tmp, code, op0, op1, &invert); 4591 4592 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) 4593 { 4594 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU); 4595 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4596 4597 /* The expander is a bit loose in its specification of the true 4598 arm of the conditional move. That allows us to support more 4599 cases for extensions which are more general than SFB. But 4600 does mean we need to force CONS into a register at this point. */ 4601 cons = force_reg (mode, cons); 4602 /* With XTheadCondMov we need to force ALT into a register too. */ 4603 alt = force_reg (mode, alt); 4604 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond, 4605 cons, alt))); 4606 return true; 4607 } 4608 else if (!TARGET_ZICOND_LIKE) 4609 { 4610 if (invert) 4611 std::swap (cons, alt); 4612 4613 rtx reg1 = gen_reg_rtx (mode); 4614 rtx reg2 = gen_reg_rtx (mode); 4615 rtx reg3 = gen_reg_rtx (mode); 4616 rtx reg4 = gen_reg_rtx (mode); 4617 4618 riscv_emit_unary (NEG, reg1, tmp); 4619 riscv_emit_binary (AND, reg2, reg1, cons); 4620 riscv_emit_unary (NOT, reg3, reg1); 4621 riscv_emit_binary (AND, reg4, reg3, alt); 4622 riscv_emit_binary (IOR, dest, reg2, reg4); 4623 return true; 4624 } 4625 /* 0, reg or 0, imm */ 4626 else if (cons == CONST0_RTX (mode) 4627 && (REG_P (alt) 4628 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode)))) 4629 { 4630 riscv_emit_int_compare (&code, &op0, &op1, true); 4631 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4632 alt = force_reg (mode, alt); 4633 emit_insn (gen_rtx_SET (dest, 4634 gen_rtx_IF_THEN_ELSE (mode, cond, 4635 cons, alt))); 4636 return true; 4637 } 4638 /* imm, imm */ 4639 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) 4640 && CONST_INT_P (alt) && alt != CONST0_RTX (mode)) 4641 { 4642 riscv_emit_int_compare (&code, &op0, &op1, true); 4643 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4644 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons); 4645 alt = force_reg (mode, gen_int_mode (t, mode)); 4646 emit_insn (gen_rtx_SET (dest, 4647 gen_rtx_IF_THEN_ELSE (mode, cond, 4648 CONST0_RTX (mode), 4649 alt))); 4650 /* CONS might not fit into a signed 12 bit immediate suitable 4651 for an addi instruction. If that's the case, force it 4652 into a register. */ 4653 if (!SMALL_OPERAND (INTVAL (cons))) 4654 cons = force_reg (mode, cons); 4655 riscv_emit_binary (PLUS, dest, dest, cons); 4656 return true; 4657 } 4658 /* imm, reg */ 4659 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt)) 4660 { 4661 /* Optimize for register value of 0. */ 4662 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode)) 4663 { 4664 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4665 cons = force_reg (mode, cons); 4666 emit_insn (gen_rtx_SET (dest, 4667 gen_rtx_IF_THEN_ELSE (mode, cond, 4668 cons, alt))); 4669 return true; 4670 } 4671 4672 riscv_emit_int_compare (&code, &op0, &op1, true); 4673 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4674 4675 rtx temp1 = gen_reg_rtx (mode); 4676 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode); 4677 4678 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate 4679 suitable for an addi instruction. If that's the case, force it 4680 into a register. */ 4681 if (!SMALL_OPERAND (INTVAL (temp2))) 4682 temp2 = force_reg (mode, temp2); 4683 if (!SMALL_OPERAND (INTVAL (cons))) 4684 cons = force_reg (mode, cons); 4685 4686 riscv_emit_binary (PLUS, temp1, alt, temp2); 4687 emit_insn (gen_rtx_SET (dest, 4688 gen_rtx_IF_THEN_ELSE (mode, cond, 4689 CONST0_RTX (mode), 4690 temp1))); 4691 riscv_emit_binary (PLUS, dest, dest, cons); 4692 return true; 4693 } 4694 /* reg, 0 or imm, 0 */ 4695 else if ((REG_P (cons) 4696 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode))) 4697 && alt == CONST0_RTX (mode)) 4698 { 4699 riscv_emit_int_compare (&code, &op0, &op1, true); 4700 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4701 cons = force_reg (mode, cons); 4702 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond, 4703 cons, alt))); 4704 return true; 4705 } 4706 /* reg, imm */ 4707 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode)) 4708 { 4709 /* Optimize for register value of 0. */ 4710 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode)) 4711 { 4712 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4713 alt = force_reg (mode, alt); 4714 emit_insn (gen_rtx_SET (dest, 4715 gen_rtx_IF_THEN_ELSE (mode, cond, 4716 cons, alt))); 4717 return true; 4718 } 4719 4720 riscv_emit_int_compare (&code, &op0, &op1, true); 4721 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4722 4723 rtx temp1 = gen_reg_rtx (mode); 4724 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode); 4725 4726 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate 4727 suitable for an addi instruction. If that's the case, force it 4728 into a register. */ 4729 if (!SMALL_OPERAND (INTVAL (temp2))) 4730 temp2 = force_reg (mode, temp2); 4731 if (!SMALL_OPERAND (INTVAL (alt))) 4732 alt = force_reg (mode, alt); 4733 4734 riscv_emit_binary (PLUS, temp1, cons, temp2); 4735 emit_insn (gen_rtx_SET (dest, 4736 gen_rtx_IF_THEN_ELSE (mode, cond, 4737 temp1, 4738 CONST0_RTX (mode)))); 4739 riscv_emit_binary (PLUS, dest, dest, alt); 4740 return true; 4741 } 4742 /* reg, reg */ 4743 else if (REG_P (cons) && REG_P (alt)) 4744 { 4745 if (((code == EQ && rtx_equal_p (cons, op0)) 4746 || (code == NE && rtx_equal_p (alt, op0))) 4747 && op1 == CONST0_RTX (mode)) 4748 { 4749 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4750 alt = force_reg (mode, alt); 4751 emit_insn (gen_rtx_SET (dest, 4752 gen_rtx_IF_THEN_ELSE (mode, cond, 4753 cons, alt))); 4754 return true; 4755 } 4756 4757 rtx reg1 = gen_reg_rtx (mode); 4758 rtx reg2 = gen_reg_rtx (mode); 4759 riscv_emit_int_compare (&code, &op0, &op1, true); 4760 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); 4761 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE, 4762 GET_MODE (op0), op0, op1); 4763 emit_insn (gen_rtx_SET (reg2, 4764 gen_rtx_IF_THEN_ELSE (mode, cond2, 4765 CONST0_RTX (mode), 4766 cons))); 4767 emit_insn (gen_rtx_SET (reg1, 4768 gen_rtx_IF_THEN_ELSE (mode, cond1, 4769 CONST0_RTX (mode), 4770 alt))); 4771 riscv_emit_binary (PLUS, dest, reg1, reg2); 4772 return true; 4773 } 4774 } 4775 4776 return false; 4777 } 4778 4779 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at 4780 least PARM_BOUNDARY bits of alignment, but will be given anything up 4781 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ 4782 4783 static unsigned int 4784 riscv_function_arg_boundary (machine_mode mode, const_tree type) 4785 { 4786 unsigned int alignment; 4787 4788 /* Use natural alignment if the type is not aggregate data. */ 4789 if (type && !AGGREGATE_TYPE_P (type)) 4790 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); 4791 else 4792 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); 4793 4794 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); 4795 } 4796 4797 /* If MODE represents an argument that can be passed or returned in 4798 floating-point registers, return the number of registers, else 0. */ 4799 4800 static unsigned 4801 riscv_pass_mode_in_fpr_p (machine_mode mode) 4802 { 4803 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) 4804 { 4805 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 4806 return 1; 4807 4808 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 4809 return 2; 4810 } 4811 4812 return 0; 4813 } 4814 4815 typedef struct { 4816 const_tree type; 4817 HOST_WIDE_INT offset; 4818 } riscv_aggregate_field; 4819 4820 /* Identify subfields of aggregates that are candidates for passing in 4821 floating-point registers. */ 4822 4823 static int 4824 riscv_flatten_aggregate_field (const_tree type, 4825 riscv_aggregate_field fields[2], 4826 int n, HOST_WIDE_INT offset, 4827 bool ignore_zero_width_bit_field_p) 4828 { 4829 switch (TREE_CODE (type)) 4830 { 4831 case RECORD_TYPE: 4832 /* Can't handle incomplete types nor sizes that are not fixed. */ 4833 if (!COMPLETE_TYPE_P (type) 4834 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 4835 || !tree_fits_uhwi_p (TYPE_SIZE (type))) 4836 return -1; 4837 4838 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) 4839 if (TREE_CODE (f) == FIELD_DECL) 4840 { 4841 if (!TYPE_P (TREE_TYPE (f))) 4842 return -1; 4843 4844 /* The C++ front end strips zero-length bit-fields from structs. 4845 So we need to ignore them in the C front end to make C code 4846 compatible with C++ code. */ 4847 if (ignore_zero_width_bit_field_p 4848 && DECL_BIT_FIELD (f) 4849 && (DECL_SIZE (f) == NULL_TREE 4850 || integer_zerop (DECL_SIZE (f)))) 4851 ; 4852 else 4853 { 4854 HOST_WIDE_INT pos = offset + int_byte_position (f); 4855 n = riscv_flatten_aggregate_field (TREE_TYPE (f), 4856 fields, n, pos, 4857 ignore_zero_width_bit_field_p); 4858 } 4859 if (n < 0) 4860 return -1; 4861 } 4862 return n; 4863 4864 case ARRAY_TYPE: 4865 { 4866 HOST_WIDE_INT n_elts; 4867 riscv_aggregate_field subfields[2]; 4868 tree index = TYPE_DOMAIN (type); 4869 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); 4870 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type), 4871 subfields, 0, offset, 4872 ignore_zero_width_bit_field_p); 4873 4874 /* Can't handle incomplete types nor sizes that are not fixed. */ 4875 if (n_subfields <= 0 4876 || !COMPLETE_TYPE_P (type) 4877 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 4878 || !index 4879 || !TYPE_MAX_VALUE (index) 4880 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) 4881 || !TYPE_MIN_VALUE (index) 4882 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) 4883 || !tree_fits_uhwi_p (elt_size)) 4884 return -1; 4885 4886 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) 4887 - tree_to_uhwi (TYPE_MIN_VALUE (index)); 4888 gcc_assert (n_elts >= 0); 4889 4890 for (HOST_WIDE_INT i = 0; i < n_elts; i++) 4891 for (int j = 0; j < n_subfields; j++) 4892 { 4893 if (n >= 2) 4894 return -1; 4895 4896 fields[n] = subfields[j]; 4897 fields[n++].offset += i * tree_to_uhwi (elt_size); 4898 } 4899 4900 return n; 4901 } 4902 4903 case COMPLEX_TYPE: 4904 { 4905 /* Complex type need consume 2 field, so n must be 0. */ 4906 if (n != 0) 4907 return -1; 4908 4909 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant (); 4910 4911 if (elt_size <= UNITS_PER_FP_ARG) 4912 { 4913 fields[0].type = TREE_TYPE (type); 4914 fields[0].offset = offset; 4915 fields[1].type = TREE_TYPE (type); 4916 fields[1].offset = offset + elt_size; 4917 4918 return 2; 4919 } 4920 4921 return -1; 4922 } 4923 4924 default: 4925 if (n < 2 4926 && ((SCALAR_FLOAT_TYPE_P (type) 4927 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG) 4928 || (INTEGRAL_TYPE_P (type) 4929 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD))) 4930 { 4931 fields[n].type = type; 4932 fields[n].offset = offset; 4933 return n + 1; 4934 } 4935 else 4936 return -1; 4937 } 4938 } 4939 4940 /* Identify candidate aggregates for passing in floating-point registers. 4941 Candidates have at most two fields after flattening. */ 4942 4943 static int 4944 riscv_flatten_aggregate_argument (const_tree type, 4945 riscv_aggregate_field fields[2], 4946 bool ignore_zero_width_bit_field_p) 4947 { 4948 if (!type || TREE_CODE (type) != RECORD_TYPE) 4949 return -1; 4950 4951 return riscv_flatten_aggregate_field (type, fields, 0, 0, 4952 ignore_zero_width_bit_field_p); 4953 } 4954 4955 /* See whether TYPE is a record whose fields should be returned in one or 4956 two floating-point registers. If so, populate FIELDS accordingly. */ 4957 4958 static unsigned 4959 riscv_pass_aggregate_in_fpr_pair_p (const_tree type, 4960 riscv_aggregate_field fields[2]) 4961 { 4962 static int warned = 0; 4963 4964 /* This is the old ABI, which differs for C++ and C. */ 4965 int n_old = riscv_flatten_aggregate_argument (type, fields, false); 4966 for (int i = 0; i < n_old; i++) 4967 if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) 4968 { 4969 n_old = -1; 4970 break; 4971 } 4972 4973 /* This is the new ABI, which is the same for C++ and C. */ 4974 int n_new = riscv_flatten_aggregate_argument (type, fields, true); 4975 for (int i = 0; i < n_new; i++) 4976 if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) 4977 { 4978 n_new = -1; 4979 break; 4980 } 4981 4982 if ((n_old != n_new) && (warned == 0)) 4983 { 4984 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length " 4985 "bit-fields changed in GCC 10"); 4986 warned = 1; 4987 } 4988 4989 return n_new > 0 ? n_new : 0; 4990 } 4991 4992 /* See whether TYPE is a record whose fields should be returned in one or 4993 floating-point register and one integer register. If so, populate 4994 FIELDS accordingly. */ 4995 4996 static bool 4997 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type, 4998 riscv_aggregate_field fields[2]) 4999 { 5000 static int warned = 0; 5001 5002 /* This is the old ABI, which differs for C++ and C. */ 5003 unsigned num_int_old = 0, num_float_old = 0; 5004 int n_old = riscv_flatten_aggregate_argument (type, fields, false); 5005 for (int i = 0; i < n_old; i++) 5006 { 5007 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type); 5008 num_int_old += INTEGRAL_TYPE_P (fields[i].type); 5009 } 5010 5011 /* This is the new ABI, which is the same for C++ and C. */ 5012 unsigned num_int_new = 0, num_float_new = 0; 5013 int n_new = riscv_flatten_aggregate_argument (type, fields, true); 5014 for (int i = 0; i < n_new; i++) 5015 { 5016 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type); 5017 num_int_new += INTEGRAL_TYPE_P (fields[i].type); 5018 } 5019 5020 if (((num_int_old == 1 && num_float_old == 1 5021 && (num_int_old != num_int_new || num_float_old != num_float_new)) 5022 || (num_int_new == 1 && num_float_new == 1 5023 && (num_int_old != num_int_new || num_float_old != num_float_new))) 5024 && (warned == 0)) 5025 { 5026 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length " 5027 "bit-fields changed in GCC 10"); 5028 warned = 1; 5029 } 5030 5031 return num_int_new == 1 && num_float_new == 1; 5032 } 5033 5034 /* Return the representation of an argument passed or returned in an FPR 5035 when the value has mode VALUE_MODE and the type has TYPE_MODE. The 5036 two modes may be different for structures like: 5037 5038 struct __attribute__((packed)) foo { float f; } 5039 5040 where the SFmode value "f" is passed in REGNO but the struct itself 5041 has mode BLKmode. */ 5042 5043 static rtx 5044 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno, 5045 machine_mode value_mode, 5046 HOST_WIDE_INT offset) 5047 { 5048 rtx x = gen_rtx_REG (value_mode, regno); 5049 5050 if (type_mode != value_mode) 5051 { 5052 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset)); 5053 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); 5054 } 5055 return x; 5056 } 5057 5058 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. 5059 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and 5060 byte offset for the first value, likewise MODE2 and OFFSET2 for the 5061 second value. */ 5062 5063 static rtx 5064 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1, 5065 machine_mode mode1, HOST_WIDE_INT offset1, 5066 unsigned regno2, machine_mode mode2, 5067 HOST_WIDE_INT offset2) 5068 { 5069 return gen_rtx_PARALLEL 5070 (mode, 5071 gen_rtvec (2, 5072 gen_rtx_EXPR_LIST (VOIDmode, 5073 gen_rtx_REG (mode1, regno1), 5074 GEN_INT (offset1)), 5075 gen_rtx_EXPR_LIST (VOIDmode, 5076 gen_rtx_REG (mode2, regno2), 5077 GEN_INT (offset2)))); 5078 } 5079 5080 static rtx 5081 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, 5082 unsigned gpr_base) 5083 { 5084 gcc_assert (riscv_v_ext_vls_mode_p (mode)); 5085 5086 unsigned count = 0; 5087 unsigned regnum = 0; 5088 machine_mode gpr_mode = VOIDmode; 5089 unsigned vls_size = GET_MODE_SIZE (mode).to_constant (); 5090 unsigned gpr_size = GET_MODE_SIZE (Xmode); 5091 5092 if (IN_RANGE (vls_size, 0, gpr_size * 2)) 5093 { 5094 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size); 5095 5096 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS) 5097 { 5098 regnum = gpr_base + info->gpr_offset; 5099 info->num_gprs = count; 5100 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size); 5101 } 5102 } 5103 5104 if (!regnum) 5105 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */ 5106 5107 gcc_assert (gpr_mode != VOIDmode); 5108 5109 rtx reg = gen_rtx_REG (gpr_mode, regnum); 5110 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode)); 5111 5112 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x)); 5113 } 5114 5115 /* Initialize a variable CUM of type CUMULATIVE_ARGS 5116 for a call to a function whose data type is FNTYPE. 5117 For a library call, FNTYPE is 0. */ 5118 5119 void 5120 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int) 5121 { 5122 memset (cum, 0, sizeof (*cum)); 5123 5124 if (fntype) 5125 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id (); 5126 else 5127 cum->variant_cc = RISCV_CC_BASE; 5128 } 5129 5130 /* Return true if TYPE is a vector type that can be passed in vector registers. 5131 */ 5132 5133 static bool 5134 riscv_vector_type_p (const_tree type) 5135 { 5136 /* Currently, only builtin scalabler vector type is allowed, in the future, 5137 more vector types may be allowed, such as GNU vector type, etc. */ 5138 return riscv_vector::builtin_type_p (type); 5139 } 5140 5141 static unsigned int 5142 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode); 5143 5144 /* Subroutine of riscv_get_arg_info. */ 5145 5146 static rtx 5147 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, 5148 machine_mode mode, bool return_p) 5149 { 5150 gcc_assert (riscv_v_ext_mode_p (mode)); 5151 5152 info->mr_offset = cum->num_mrs; 5153 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) 5154 { 5155 /* For scalable mask return value. */ 5156 if (return_p) 5157 return gen_rtx_REG (mode, V_REG_FIRST); 5158 5159 /* For the first scalable mask argument. */ 5160 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS) 5161 { 5162 info->num_mrs = 1; 5163 return gen_rtx_REG (mode, V_REG_FIRST); 5164 } 5165 else 5166 { 5167 /* Rest scalable mask arguments are treated as scalable data 5168 arguments. */ 5169 } 5170 } 5171 5172 /* The number and alignment of vector registers need for this scalable vector 5173 argument. When the mode size is less than a full vector, we use 1 vector 5174 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number 5175 information. */ 5176 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode); 5177 int LMUL = riscv_v_ext_tuple_mode_p (mode) 5178 ? nregs / riscv_vector::get_nf (mode) 5179 : nregs; 5180 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST; 5181 int arg_reg_end = V_ARG_LAST - V_REG_FIRST; 5182 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL); 5183 5184 /* For scalable data and scalable tuple return value. */ 5185 if (return_p) 5186 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST); 5187 5188 /* Iterate through the USED_VRS array to find vector register groups that have 5189 not been allocated and the first register is aligned with LMUL. */ 5190 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL) 5191 { 5192 /* The index in USED_VRS array. */ 5193 int idx = i - arg_reg_start; 5194 /* Find the first register unused. */ 5195 if (!cum->used_vrs[idx]) 5196 { 5197 bool find_set = true; 5198 /* Ensure there are NREGS continuous unused registers. */ 5199 for (int j = 1; j < nregs; j++) 5200 if (cum->used_vrs[idx + j]) 5201 { 5202 find_set = false; 5203 /* Update I to the last aligned register which 5204 cannot be used and the next iteration will add 5205 LMUL step to I. */ 5206 i += (j / LMUL) * LMUL; 5207 break; 5208 } 5209 5210 if (find_set) 5211 { 5212 info->num_vrs = nregs; 5213 info->vr_offset = idx; 5214 return gen_rtx_REG (mode, i + V_REG_FIRST); 5215 } 5216 } 5217 } 5218 5219 return NULL_RTX; 5220 } 5221 5222 /* Fill INFO with information about a single argument, and return an RTL 5223 pattern to pass or return the argument. Return NULL_RTX if argument cannot 5224 pass or return in registers, then the argument may be passed by reference or 5225 through the stack or . CUM is the cumulative state for earlier arguments. 5226 MODE is the mode of this argument and TYPE is its type (if known). NAMED is 5227 true if this is a named (fixed) argument rather than a variable one. RETURN_P 5228 is true if returning the argument, or false if passing the argument. */ 5229 5230 static rtx 5231 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, 5232 machine_mode mode, const_tree type, bool named, 5233 bool return_p) 5234 { 5235 unsigned num_bytes, num_words; 5236 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; 5237 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; 5238 unsigned alignment = riscv_function_arg_boundary (mode, type); 5239 5240 memset (info, 0, sizeof (*info)); 5241 info->gpr_offset = cum->num_gprs; 5242 info->fpr_offset = cum->num_fprs; 5243 5244 /* Passed by reference when the scalable vector argument is anonymous. */ 5245 if (riscv_v_ext_mode_p (mode) && !named) 5246 return NULL_RTX; 5247 5248 if (named) 5249 { 5250 riscv_aggregate_field fields[2]; 5251 unsigned fregno = fpr_base + info->fpr_offset; 5252 unsigned gregno = gpr_base + info->gpr_offset; 5253 5254 /* Pass one- or two-element floating-point aggregates in FPRs. */ 5255 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields)) 5256 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) 5257 switch (info->num_fprs) 5258 { 5259 case 1: 5260 return riscv_pass_fpr_single (mode, fregno, 5261 TYPE_MODE (fields[0].type), 5262 fields[0].offset); 5263 5264 case 2: 5265 return riscv_pass_fpr_pair (mode, fregno, 5266 TYPE_MODE (fields[0].type), 5267 fields[0].offset, 5268 fregno + 1, 5269 TYPE_MODE (fields[1].type), 5270 fields[1].offset); 5271 5272 default: 5273 gcc_unreachable (); 5274 } 5275 5276 /* Pass real and complex floating-point numbers in FPRs. */ 5277 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode)) 5278 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) 5279 switch (GET_MODE_CLASS (mode)) 5280 { 5281 case MODE_FLOAT: 5282 return gen_rtx_REG (mode, fregno); 5283 5284 case MODE_COMPLEX_FLOAT: 5285 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0, 5286 fregno + 1, GET_MODE_INNER (mode), 5287 GET_MODE_UNIT_SIZE (mode)); 5288 5289 default: 5290 gcc_unreachable (); 5291 } 5292 5293 /* Pass structs with one float and one integer in an FPR and a GPR. */ 5294 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields) 5295 && info->gpr_offset < MAX_ARGS_IN_REGISTERS 5296 && info->fpr_offset < MAX_ARGS_IN_REGISTERS) 5297 { 5298 info->num_gprs = 1; 5299 info->num_fprs = 1; 5300 5301 if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) 5302 std::swap (fregno, gregno); 5303 5304 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type), 5305 fields[0].offset, 5306 gregno, TYPE_MODE (fields[1].type), 5307 fields[1].offset); 5308 } 5309 5310 /* For scalable vector argument. */ 5311 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode)) 5312 return riscv_get_vector_arg (info, cum, mode, return_p); 5313 5314 /* For vls mode aggregated in gpr. */ 5315 if (riscv_v_ext_vls_mode_p (mode)) 5316 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base); 5317 } 5318 5319 /* Work out the size of the argument. */ 5320 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant (); 5321 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5322 5323 /* Doubleword-aligned varargs start on an even register boundary. */ 5324 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) 5325 info->gpr_offset += info->gpr_offset & 1; 5326 5327 /* Partition the argument between registers and stack. */ 5328 info->num_fprs = 0; 5329 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); 5330 info->stack_p = (num_words - info->num_gprs) != 0; 5331 5332 if (info->num_gprs || return_p) 5333 return gen_rtx_REG (mode, gpr_base + info->gpr_offset); 5334 5335 return NULL_RTX; 5336 } 5337 5338 /* Implement TARGET_FUNCTION_ARG. */ 5339 5340 static rtx 5341 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 5342 { 5343 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5344 struct riscv_arg_info info; 5345 5346 if (arg.end_marker_p ()) 5347 /* Return the calling convention that used by the current function. */ 5348 return gen_int_mode (cum->variant_cc, SImode); 5349 5350 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); 5351 } 5352 5353 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */ 5354 5355 static void 5356 riscv_function_arg_advance (cumulative_args_t cum_v, 5357 const function_arg_info &arg) 5358 { 5359 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5360 struct riscv_arg_info info; 5361 5362 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); 5363 5364 /* Set the corresponding register in USED_VRS to used status. */ 5365 for (unsigned int i = 0; i < info.num_vrs; i++) 5366 { 5367 gcc_assert (!cum->used_vrs[info.vr_offset + i]); 5368 cum->used_vrs[info.vr_offset + i] = true; 5369 } 5370 5371 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V) 5372 { 5373 error ("RVV type %qT cannot be passed to an unprototyped function", 5374 arg.type); 5375 /* Avoid repeating the message */ 5376 cum->variant_cc = RISCV_CC_V; 5377 } 5378 5379 /* Advance the register count. This has the effect of setting 5380 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned 5381 argument required us to skip the final GPR and pass the whole 5382 argument on the stack. */ 5383 cum->num_fprs = info.fpr_offset + info.num_fprs; 5384 cum->num_gprs = info.gpr_offset + info.num_gprs; 5385 cum->num_mrs = info.mr_offset + info.num_mrs; 5386 } 5387 5388 /* Implement TARGET_ARG_PARTIAL_BYTES. */ 5389 5390 static int 5391 riscv_arg_partial_bytes (cumulative_args_t cum, 5392 const function_arg_info &generic_arg) 5393 { 5394 struct riscv_arg_info arg; 5395 5396 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, 5397 generic_arg.type, generic_arg.named, false); 5398 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; 5399 } 5400 5401 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, 5402 VALTYPE is the return type and MODE is VOIDmode. For libcalls, 5403 VALTYPE is null and MODE is the mode of the return value. */ 5404 5405 rtx 5406 riscv_function_value (const_tree type, const_tree func, machine_mode mode) 5407 { 5408 struct riscv_arg_info info; 5409 CUMULATIVE_ARGS args; 5410 5411 if (type) 5412 { 5413 int unsigned_p = TYPE_UNSIGNED (type); 5414 5415 mode = TYPE_MODE (type); 5416 5417 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, 5418 return values, promote the mode here too. */ 5419 mode = promote_function_mode (type, mode, &unsigned_p, func, 1); 5420 } 5421 5422 memset (&args, 0, sizeof args); 5423 5424 return riscv_get_arg_info (&info, &args, mode, type, true, true); 5425 } 5426 5427 /* Implement TARGET_PASS_BY_REFERENCE. */ 5428 5429 static bool 5430 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) 5431 { 5432 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();; 5433 struct riscv_arg_info info; 5434 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5435 5436 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we 5437 never pass variadic arguments in floating-point and vector registers, 5438 so we can avoid the call to riscv_get_arg_info in this case. */ 5439 if (cum != NULL) 5440 { 5441 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); 5442 5443 /* Don't pass by reference if we can use a floating-point register. */ 5444 if (info.num_fprs) 5445 return false; 5446 5447 /* Don't pass by reference if we can use general register(s) for vls. */ 5448 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode)) 5449 return false; 5450 5451 /* Don't pass by reference if we can use vector register groups. */ 5452 if (info.num_vrs > 0 || info.num_mrs > 0) 5453 return false; 5454 } 5455 5456 /* Passed by reference when: 5457 1. The scalable vector argument is anonymous. 5458 2. Args cannot be passed through vector registers. */ 5459 if (riscv_v_ext_mode_p (arg.mode)) 5460 return true; 5461 5462 /* Pass by reference if the data do not fit in two integer registers. */ 5463 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); 5464 } 5465 5466 /* Implement TARGET_RETURN_IN_MEMORY. */ 5467 5468 static bool 5469 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 5470 { 5471 CUMULATIVE_ARGS args; 5472 cumulative_args_t cum = pack_cumulative_args (&args); 5473 5474 /* The rules for returning in memory are the same as for passing the 5475 first named argument by reference. */ 5476 memset (&args, 0, sizeof args); 5477 function_arg_info arg (const_cast<tree> (type), /*named=*/true); 5478 return riscv_pass_by_reference (cum, arg); 5479 } 5480 5481 /* Implement TARGET_SETUP_INCOMING_VARARGS. */ 5482 5483 static void 5484 riscv_setup_incoming_varargs (cumulative_args_t cum, 5485 const function_arg_info &arg, 5486 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl) 5487 { 5488 CUMULATIVE_ARGS local_cum; 5489 int gp_saved; 5490 5491 /* The caller has advanced CUM up to, but not beyond, the last named 5492 argument. Advance a local copy of CUM past the last "real" named 5493 argument, to find out how many registers are left over. */ 5494 local_cum = *get_cumulative_args (cum); 5495 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) 5496 || arg.type != NULL_TREE) 5497 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg); 5498 5499 /* Found out how many registers we need to save. */ 5500 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; 5501 5502 if (!no_rtl && gp_saved > 0) 5503 { 5504 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, 5505 REG_PARM_STACK_SPACE (cfun->decl) 5506 - gp_saved * UNITS_PER_WORD); 5507 rtx mem = gen_frame_mem (BLKmode, ptr); 5508 set_mem_alias_set (mem, get_varargs_alias_set ()); 5509 5510 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, 5511 mem, gp_saved); 5512 } 5513 if (REG_PARM_STACK_SPACE (cfun->decl) == 0) 5514 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; 5515 } 5516 5517 /* Return the descriptor of the Standard Vector Calling Convention Variant. */ 5518 5519 static const predefined_function_abi & 5520 riscv_v_abi () 5521 { 5522 predefined_function_abi &v_abi = function_abis[RISCV_CC_V]; 5523 if (!v_abi.initialized_p ()) 5524 { 5525 HARD_REG_SET full_reg_clobbers 5526 = default_function_abi.full_reg_clobbers (); 5527 /* Callee-saved vector registers: v1-v7, v24-v31. */ 5528 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1) 5529 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); 5530 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1) 5531 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); 5532 v_abi.initialize (RISCV_CC_V, full_reg_clobbers); 5533 } 5534 return v_abi; 5535 } 5536 5537 static bool 5538 riscv_vector_int_type_p (const_tree type) 5539 { 5540 machine_mode mode = TYPE_MODE (type); 5541 5542 if (VECTOR_MODE_P (mode)) 5543 return INTEGRAL_MODE_P (GET_MODE_INNER (mode)); 5544 5545 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); 5546 return strstr (name, "int") != NULL || strstr (name, "uint") != NULL; 5547 } 5548 5549 static bool 5550 riscv_vector_float_type_p (const_tree type) 5551 { 5552 machine_mode mode = TYPE_MODE (type); 5553 5554 if (VECTOR_MODE_P (mode)) 5555 return FLOAT_MODE_P (GET_MODE_INNER (mode)); 5556 5557 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); 5558 return strstr (name, "vfloat") != NULL; 5559 } 5560 5561 static int 5562 riscv_vector_element_bitsize (const_tree type) 5563 { 5564 machine_mode mode = TYPE_MODE (type); 5565 5566 if (VECTOR_MODE_P (mode)) 5567 return GET_MODE_BITSIZE (GET_MODE_INNER (mode)); 5568 5569 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); 5570 5571 if (strstr (name, "bool") != NULL) 5572 return 1; 5573 else if (strstr (name, "int8") != NULL) 5574 return 8; 5575 else if (strstr (name, "int16") != NULL || strstr (name, "float16") != NULL) 5576 return 16; 5577 else if (strstr (name, "int32") != NULL || strstr (name, "float32") != NULL) 5578 return 32; 5579 else if (strstr (name, "int64") != NULL || strstr (name, "float64") != NULL) 5580 return 64; 5581 5582 gcc_unreachable (); 5583 } 5584 5585 static int 5586 riscv_vector_required_min_vlen (const_tree type) 5587 { 5588 machine_mode mode = TYPE_MODE (type); 5589 5590 if (riscv_v_ext_mode_p (mode)) 5591 return TARGET_MIN_VLEN; 5592 5593 int element_bitsize = riscv_vector_element_bitsize (type); 5594 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); 5595 5596 if (strstr (name, "bool64") != NULL) 5597 return element_bitsize * 64; 5598 else if (strstr (name, "bool32") != NULL) 5599 return element_bitsize * 32; 5600 else if (strstr (name, "bool16") != NULL) 5601 return element_bitsize * 16; 5602 else if (strstr (name, "bool8") != NULL) 5603 return element_bitsize * 8; 5604 else if (strstr (name, "bool4") != NULL) 5605 return element_bitsize * 4; 5606 else if (strstr (name, "bool2") != NULL) 5607 return element_bitsize * 2; 5608 5609 if (strstr (name, "mf8") != NULL) 5610 return element_bitsize * 8; 5611 else if (strstr (name, "mf4") != NULL) 5612 return element_bitsize * 4; 5613 else if (strstr (name, "mf2") != NULL) 5614 return element_bitsize * 2; 5615 5616 return element_bitsize; 5617 } 5618 5619 static void 5620 riscv_validate_vector_type (const_tree type, const char *hint) 5621 { 5622 gcc_assert (riscv_vector_type_p (type)); 5623 5624 if (!TARGET_VECTOR) 5625 { 5626 error_at (input_location, "%s %qT requires the V ISA extension", 5627 hint, type); 5628 return; 5629 } 5630 5631 int element_bitsize = riscv_vector_element_bitsize (type); 5632 bool int_type_p = riscv_vector_int_type_p (type); 5633 5634 if (int_type_p && element_bitsize == 64 5635 && !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags)) 5636 { 5637 error_at (input_location, 5638 "%s %qT requires the zve64x, zve64f, zve64d or v ISA extension", 5639 hint, type); 5640 return; 5641 } 5642 5643 bool float_type_p = riscv_vector_float_type_p (type); 5644 5645 if (float_type_p && element_bitsize == 16 5646 && !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)) 5647 { 5648 error_at (input_location, 5649 "%s %qT requires the zvfhmin or zvfh ISA extension", 5650 hint, type); 5651 return; 5652 } 5653 5654 if (float_type_p && element_bitsize == 32 5655 && !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags)) 5656 { 5657 error_at (input_location, 5658 "%s %qT requires the zve32f, zve64f, zve64d or v ISA extension", 5659 hint, type); 5660 return; 5661 } 5662 5663 if (float_type_p && element_bitsize == 64 5664 && !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags)) 5665 { 5666 error_at (input_location, 5667 "%s %qT requires the zve64d or v ISA extension", hint, type); 5668 return; 5669 } 5670 5671 int required_min_vlen = riscv_vector_required_min_vlen (type); 5672 5673 if (TARGET_MIN_VLEN < required_min_vlen) 5674 { 5675 error_at ( 5676 input_location, 5677 "%s %qT requires the minimal vector length %qd but %qd is given", 5678 hint, type, required_min_vlen, TARGET_MIN_VLEN); 5679 return; 5680 } 5681 } 5682 5683 /* Return true if a function with type FNTYPE returns its value in 5684 RISC-V V registers. */ 5685 5686 static bool 5687 riscv_return_value_is_vector_type_p (const_tree fntype) 5688 { 5689 tree return_type = TREE_TYPE (fntype); 5690 5691 if (riscv_vector_type_p (return_type)) 5692 { 5693 riscv_validate_vector_type (return_type, "return type"); 5694 return true; 5695 } 5696 else 5697 return false; 5698 } 5699 5700 /* Return true if a function with type FNTYPE takes arguments in 5701 RISC-V V registers. */ 5702 5703 static bool 5704 riscv_arguments_is_vector_type_p (const_tree fntype) 5705 { 5706 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node; 5707 chain = TREE_CHAIN (chain)) 5708 { 5709 tree arg_type = TREE_VALUE (chain); 5710 if (riscv_vector_type_p (arg_type)) 5711 { 5712 riscv_validate_vector_type (arg_type, "argument type"); 5713 return true; 5714 } 5715 } 5716 5717 return false; 5718 } 5719 5720 /* Return true if FUNC is a riscv_vector_cc function. 5721 For more details please reference the below link. 5722 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */ 5723 static bool 5724 riscv_vector_cc_function_p (const_tree fntype) 5725 { 5726 tree attr = TYPE_ATTRIBUTES (fntype); 5727 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE 5728 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE; 5729 5730 if (vector_cc_p && !TARGET_VECTOR) 5731 error_at (input_location, 5732 "function attribute %qs requires the V ISA extension", 5733 "riscv_vector_cc"); 5734 5735 return vector_cc_p; 5736 } 5737 5738 /* Implement TARGET_FNTYPE_ABI. */ 5739 5740 static const predefined_function_abi & 5741 riscv_fntype_abi (const_tree fntype) 5742 { 5743 /* Implement the vector calling convention. For more details please 5744 reference the below link. 5745 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */ 5746 if (riscv_return_value_is_vector_type_p (fntype) 5747 || riscv_arguments_is_vector_type_p (fntype) 5748 || riscv_vector_cc_function_p (fntype)) 5749 return riscv_v_abi (); 5750 5751 return default_function_abi; 5752 } 5753 5754 /* Return riscv calling convention of call_insn. */ 5755 riscv_cc 5756 get_riscv_cc (const rtx use) 5757 { 5758 gcc_assert (GET_CODE (use) == USE); 5759 rtx unspec = XEXP (use, 0); 5760 gcc_assert (GET_CODE (unspec) == UNSPEC 5761 && XINT (unspec, 1) == UNSPEC_CALLEE_CC); 5762 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0)); 5763 gcc_assert (cc < RISCV_CC_UNKNOWN); 5764 return cc; 5765 } 5766 5767 /* Implement TARGET_INSN_CALLEE_ABI. */ 5768 5769 const predefined_function_abi & 5770 riscv_insn_callee_abi (const rtx_insn *insn) 5771 { 5772 rtx pat = PATTERN (insn); 5773 gcc_assert (GET_CODE (pat) == PARALLEL); 5774 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1)); 5775 return function_abis[cc]; 5776 } 5777 5778 /* Handle an attribute requiring a FUNCTION_DECL; 5779 arguments as in struct attribute_spec.handler. */ 5780 static tree 5781 riscv_handle_fndecl_attribute (tree *node, tree name, 5782 tree args ATTRIBUTE_UNUSED, 5783 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 5784 { 5785 if (TREE_CODE (*node) != FUNCTION_DECL) 5786 { 5787 warning (OPT_Wattributes, "%qE attribute only applies to functions", 5788 name); 5789 *no_add_attrs = true; 5790 } 5791 5792 return NULL_TREE; 5793 } 5794 5795 /* Verify type based attributes. NODE is the what the attribute is being 5796 applied to. NAME is the attribute name. ARGS are the attribute args. 5797 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if 5798 the attribute should be ignored. */ 5799 5800 static tree 5801 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, 5802 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 5803 { 5804 /* Check for an argument. */ 5805 if (is_attribute_p ("interrupt", name)) 5806 { 5807 if (args) 5808 { 5809 tree cst = TREE_VALUE (args); 5810 const char *string; 5811 5812 if (TREE_CODE (cst) != STRING_CST) 5813 { 5814 warning (OPT_Wattributes, 5815 "%qE attribute requires a string argument", 5816 name); 5817 *no_add_attrs = true; 5818 return NULL_TREE; 5819 } 5820 5821 string = TREE_STRING_POINTER (cst); 5822 if (strcmp (string, "user") && strcmp (string, "supervisor") 5823 && strcmp (string, "machine")) 5824 { 5825 warning (OPT_Wattributes, 5826 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, " 5827 "or %<\"machine\"%>", name); 5828 *no_add_attrs = true; 5829 } 5830 } 5831 } 5832 5833 return NULL_TREE; 5834 } 5835 5836 static tree 5837 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args, 5838 ATTRIBUTE_UNUSED int flags, 5839 bool *no_add_attrs) 5840 { 5841 if (!is_attribute_p ("riscv_rvv_vector_bits", name)) 5842 return NULL_TREE; 5843 5844 *no_add_attrs = true; 5845 5846 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL) 5847 { 5848 error ( 5849 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified", 5850 "riscv_rvv_vector_bits"); 5851 return NULL_TREE; 5852 } 5853 5854 tree type = *node; 5855 5856 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type)) 5857 { 5858 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type); 5859 return NULL_TREE; 5860 } 5861 5862 tree size = TREE_VALUE (args); 5863 5864 if (TREE_CODE (size) != INTEGER_CST) 5865 { 5866 error ("%qs requires an integer constant", "riscv_rvv_vector_bits"); 5867 return NULL_TREE; 5868 } 5869 5870 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size); 5871 unsigned HOST_WIDE_INT type_mode_bits 5872 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant (); 5873 5874 if (args_in_bits != type_mode_bits) 5875 { 5876 error ("invalid RVV vector size %qd, " 5877 "expected size is %qd based on LMUL of type and %qs", 5878 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl"); 5879 return NULL_TREE; 5880 } 5881 5882 type = build_distinct_type_copy (type); 5883 TYPE_ATTRIBUTES (type) 5884 = remove_attribute ("RVV sizeless type", 5885 copy_list (TYPE_ATTRIBUTES (type))); 5886 5887 /* The operations like alu/cmp on vbool*_t is not well defined, 5888 continue to treat vbool*_t as indivisible. */ 5889 if (!VECTOR_BOOLEAN_TYPE_P (type)) 5890 TYPE_INDIVISIBLE_P (type) = 0; 5891 5892 *node = type; 5893 5894 return NULL_TREE; 5895 } 5896 5897 /* Return true if function TYPE is an interrupt function. */ 5898 static bool 5899 riscv_interrupt_type_p (tree type) 5900 { 5901 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL; 5902 } 5903 5904 /* Return true if FUNC is a naked function. */ 5905 static bool 5906 riscv_naked_function_p (tree func) 5907 { 5908 tree func_decl = func; 5909 if (func == NULL_TREE) 5910 func_decl = current_function_decl; 5911 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl)); 5912 } 5913 5914 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */ 5915 static bool 5916 riscv_allocate_stack_slots_for_args () 5917 { 5918 /* Naked functions should not allocate stack slots for arguments. */ 5919 return !riscv_naked_function_p (current_function_decl); 5920 } 5921 5922 /* Implement TARGET_WARN_FUNC_RETURN. */ 5923 static bool 5924 riscv_warn_func_return (tree decl) 5925 { 5926 /* Naked functions are implemented entirely in assembly, including the 5927 return sequence, so suppress warnings about this. */ 5928 return !riscv_naked_function_p (decl); 5929 } 5930 5931 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ 5932 5933 static void 5934 riscv_va_start (tree valist, rtx nextarg) 5935 { 5936 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size); 5937 std_expand_builtin_va_start (valist, nextarg); 5938 } 5939 5940 /* Make ADDR suitable for use as a call or sibcall target. */ 5941 5942 rtx 5943 riscv_legitimize_call_address (rtx addr) 5944 { 5945 if (!call_insn_operand (addr, VOIDmode)) 5946 { 5947 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode); 5948 riscv_emit_move (reg, addr); 5949 return reg; 5950 } 5951 return addr; 5952 } 5953 5954 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM 5955 in context CONTEXT. HI_RELOC indicates a high-part reloc. */ 5956 5957 static void 5958 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc) 5959 { 5960 const char *reloc; 5961 5962 switch (riscv_classify_symbolic_expression (op)) 5963 { 5964 case SYMBOL_ABSOLUTE: 5965 reloc = hi_reloc ? "%hi" : "%lo"; 5966 break; 5967 5968 case SYMBOL_PCREL: 5969 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo"; 5970 break; 5971 5972 case SYMBOL_TLS_LE: 5973 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo"; 5974 break; 5975 5976 default: 5977 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R'); 5978 return; 5979 } 5980 5981 fprintf (file, "%s(", reloc); 5982 output_addr_const (file, riscv_strip_unspec_address (op)); 5983 fputc (')', file); 5984 } 5985 5986 /* Return the memory model that encapuslates both given models. */ 5987 5988 enum memmodel 5989 riscv_union_memmodels (enum memmodel model1, enum memmodel model2) 5990 { 5991 model1 = memmodel_base (model1); 5992 model2 = memmodel_base (model2); 5993 5994 enum memmodel weaker = model1 <= model2 ? model1: model2; 5995 enum memmodel stronger = model1 > model2 ? model1: model2; 5996 5997 switch (stronger) 5998 { 5999 case MEMMODEL_SEQ_CST: 6000 case MEMMODEL_ACQ_REL: 6001 return stronger; 6002 case MEMMODEL_RELEASE: 6003 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME) 6004 return MEMMODEL_ACQ_REL; 6005 else 6006 return stronger; 6007 case MEMMODEL_ACQUIRE: 6008 case MEMMODEL_CONSUME: 6009 case MEMMODEL_RELAXED: 6010 return stronger; 6011 default: 6012 gcc_unreachable (); 6013 } 6014 } 6015 6016 /* Return true if the .AQ suffix should be added to an AMO to implement the 6017 acquire portion of memory model MODEL. */ 6018 6019 static bool 6020 riscv_memmodel_needs_amo_acquire (enum memmodel model) 6021 { 6022 /* ZTSO amo mappings require no annotations. */ 6023 if (TARGET_ZTSO) 6024 return false; 6025 6026 switch (model) 6027 { 6028 case MEMMODEL_ACQ_REL: 6029 case MEMMODEL_SEQ_CST: 6030 case MEMMODEL_ACQUIRE: 6031 case MEMMODEL_CONSUME: 6032 return true; 6033 6034 case MEMMODEL_RELEASE: 6035 case MEMMODEL_RELAXED: 6036 return false; 6037 6038 default: 6039 gcc_unreachable (); 6040 } 6041 } 6042 6043 /* Return true if the .RL suffix should be added to an AMO to implement the 6044 release portion of memory model MODEL. */ 6045 6046 static bool 6047 riscv_memmodel_needs_amo_release (enum memmodel model) 6048 { 6049 /* ZTSO amo mappings require no annotations. */ 6050 if (TARGET_ZTSO) 6051 return false; 6052 6053 switch (model) 6054 { 6055 case MEMMODEL_ACQ_REL: 6056 case MEMMODEL_SEQ_CST: 6057 case MEMMODEL_RELEASE: 6058 return true; 6059 6060 case MEMMODEL_ACQUIRE: 6061 case MEMMODEL_CONSUME: 6062 case MEMMODEL_RELAXED: 6063 return false; 6064 6065 default: 6066 gcc_unreachable (); 6067 } 6068 } 6069 6070 /* Get REGNO alignment of vector mode. 6071 The alignment = LMUL when the LMUL >= 1. 6072 Otherwise, alignment = 1. */ 6073 int 6074 riscv_get_v_regno_alignment (machine_mode mode) 6075 { 6076 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8. 6077 but for mask vector register, register numbers can be any number. */ 6078 int lmul = 1; 6079 machine_mode rvv_mode = mode; 6080 if (riscv_v_ext_vls_mode_p (rvv_mode)) 6081 { 6082 int size = GET_MODE_BITSIZE (rvv_mode).to_constant (); 6083 if (size < TARGET_MIN_VLEN) 6084 return 1; 6085 else 6086 return size / TARGET_MIN_VLEN; 6087 } 6088 if (riscv_v_ext_tuple_mode_p (rvv_mode)) 6089 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode); 6090 poly_int64 size = GET_MODE_SIZE (rvv_mode); 6091 if (known_gt (size, UNITS_PER_V_REG)) 6092 lmul = exact_div (size, UNITS_PER_V_REG).to_constant (); 6093 return lmul; 6094 } 6095 6096 /* Define ASM_OUTPUT_OPCODE to do anything special before 6097 emitting an opcode. */ 6098 const char * 6099 riscv_asm_output_opcode (FILE *asm_out_file, const char *p) 6100 { 6101 if (TARGET_XTHEADVECTOR) 6102 return th_asm_output_opcode (asm_out_file, p); 6103 6104 return p; 6105 } 6106 6107 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are: 6108 6109 'h' Print the high-part relocation associated with OP, after stripping 6110 any outermost HIGH. 6111 'R' Print the low-part relocation associated with OP. 6112 'C' Print the integer branch condition for comparison OP. 6113 'N' Print the inverse of the integer branch condition for comparison OP. 6114 'A' Print the atomic operation suffix for memory model OP. 6115 'I' Print the LR suffix for memory model OP. 6116 'J' Print the SC suffix for memory model OP. 6117 'z' Print x0 if OP is zero, otherwise print OP normally. 6118 'i' Print i if the operand is not a register. 6119 'S' Print shift-index of single-bit mask OP. 6120 'T' Print shift-index of inverted single-bit mask OP. 6121 '~' Print w if TARGET_64BIT is true; otherwise not print anything. 6122 6123 Note please keep this list and the list in riscv.md in sync. */ 6124 6125 static void 6126 riscv_print_operand (FILE *file, rtx op, int letter) 6127 { 6128 /* `~` does not take an operand so op will be null 6129 Check for before accessing op. 6130 */ 6131 if (letter == '~') 6132 { 6133 if (TARGET_64BIT) 6134 fputc('w', file); 6135 return; 6136 } 6137 machine_mode mode = GET_MODE (op); 6138 enum rtx_code code = GET_CODE (op); 6139 6140 switch (letter) 6141 { 6142 case 'o': { 6143 /* Print 'OP' variant for RVV instructions. 6144 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv). 6145 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi). 6146 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */ 6147 if (riscv_v_ext_mode_p (mode)) 6148 { 6149 if (REG_P (op)) 6150 asm_fprintf (file, "v"); 6151 else if (CONST_VECTOR_P (op)) 6152 asm_fprintf (file, "i"); 6153 else 6154 output_operand_lossage ("invalid vector operand"); 6155 } 6156 else 6157 { 6158 if (CONST_INT_P (op)) 6159 asm_fprintf (file, "i"); 6160 else 6161 asm_fprintf (file, "x"); 6162 } 6163 break; 6164 } 6165 case 'v': { 6166 rtx elt; 6167 6168 if (REG_P (op)) 6169 asm_fprintf (file, "%s", reg_names[REGNO (op)]); 6170 else 6171 { 6172 if (!const_vec_duplicate_p (op, &elt)) 6173 output_operand_lossage ("invalid vector constant"); 6174 else if (satisfies_constraint_Wc0 (op)) 6175 asm_fprintf (file, "0"); 6176 else if (satisfies_constraint_vi (op) 6177 || satisfies_constraint_vj (op) 6178 || satisfies_constraint_vk (op)) 6179 asm_fprintf (file, "%wd", INTVAL (elt)); 6180 else 6181 output_operand_lossage ("invalid vector constant"); 6182 } 6183 break; 6184 } 6185 case 'V': { 6186 rtx elt; 6187 if (!const_vec_duplicate_p (op, &elt)) 6188 output_operand_lossage ("invalid vector constant"); 6189 else if (satisfies_constraint_vj (op)) 6190 asm_fprintf (file, "%wd", -INTVAL (elt)); 6191 else 6192 output_operand_lossage ("invalid vector constant"); 6193 break; 6194 } 6195 case 'm': { 6196 if (riscv_v_ext_mode_p (mode)) 6197 { 6198 /* Calculate lmul according to mode and print the value. */ 6199 int lmul = riscv_get_v_regno_alignment (mode); 6200 asm_fprintf (file, "%d", lmul); 6201 } 6202 else if (code == CONST_INT) 6203 { 6204 /* If it is a const_int value, it denotes the VLMUL field enum. */ 6205 unsigned int vlmul = UINTVAL (op); 6206 switch (vlmul) 6207 { 6208 case riscv_vector::LMUL_1: 6209 asm_fprintf (file, "%s", "m1"); 6210 break; 6211 case riscv_vector::LMUL_2: 6212 asm_fprintf (file, "%s", "m2"); 6213 break; 6214 case riscv_vector::LMUL_4: 6215 asm_fprintf (file, "%s", "m4"); 6216 break; 6217 case riscv_vector::LMUL_8: 6218 asm_fprintf (file, "%s", "m8"); 6219 break; 6220 case riscv_vector::LMUL_F8: 6221 asm_fprintf (file, "%s", "mf8"); 6222 break; 6223 case riscv_vector::LMUL_F4: 6224 asm_fprintf (file, "%s", "mf4"); 6225 break; 6226 case riscv_vector::LMUL_F2: 6227 asm_fprintf (file, "%s", "mf2"); 6228 break; 6229 default: 6230 gcc_unreachable (); 6231 } 6232 } 6233 else 6234 output_operand_lossage ("invalid vector constant"); 6235 break; 6236 } 6237 case 'p': { 6238 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) 6239 { 6240 /* Print for RVV mask operand. 6241 If op is reg, print ",v0.t". 6242 Otherwise, don't print anything. */ 6243 if (code == REG) 6244 fprintf (file, ",%s.t", reg_names[REGNO (op)]); 6245 } 6246 else if (code == CONST_INT) 6247 { 6248 /* Tail && Mask policy. */ 6249 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u"); 6250 } 6251 else 6252 output_operand_lossage ("invalid vector constant"); 6253 break; 6254 } 6255 case 'h': 6256 if (code == HIGH) 6257 op = XEXP (op, 0); 6258 riscv_print_operand_reloc (file, op, true); 6259 break; 6260 6261 case 'R': 6262 riscv_print_operand_reloc (file, op, false); 6263 break; 6264 6265 case 'C': 6266 /* The RTL names match the instruction names. */ 6267 fputs (GET_RTX_NAME (code), file); 6268 break; 6269 6270 case 'N': 6271 /* The RTL names match the instruction names. */ 6272 fputs (GET_RTX_NAME (reverse_condition (code)), file); 6273 break; 6274 6275 case 'A': 6276 if (!CONST_INT_P (op)) 6277 output_operand_lossage ("invalid operand for '%%%c'", letter); 6278 else 6279 { 6280 const enum memmodel model = memmodel_base (INTVAL (op)); 6281 if (riscv_memmodel_needs_amo_acquire (model) 6282 && riscv_memmodel_needs_amo_release (model)) 6283 fputs (".aqrl", file); 6284 else if (riscv_memmodel_needs_amo_acquire (model)) 6285 fputs (".aq", file); 6286 else if (riscv_memmodel_needs_amo_release (model)) 6287 fputs (".rl", file); 6288 } 6289 break; 6290 6291 case 'I': 6292 if (!CONST_INT_P (op)) 6293 output_operand_lossage ("invalid operand for '%%%c'", letter); 6294 else 6295 { 6296 const enum memmodel model = memmodel_base (INTVAL (op)); 6297 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST) 6298 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */ 6299 break; 6300 else if (model == MEMMODEL_SEQ_CST) 6301 fputs (".aqrl", file); 6302 else if (riscv_memmodel_needs_amo_acquire (model)) 6303 fputs (".aq", file); 6304 } 6305 break; 6306 6307 case 'J': 6308 if (!CONST_INT_P (op)) 6309 output_operand_lossage ("invalid operand for '%%%c'", letter); 6310 else 6311 { 6312 const enum memmodel model = memmodel_base (INTVAL (op)); 6313 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST) 6314 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */ 6315 fputs (".rl", file); 6316 else if (TARGET_ZTSO) 6317 break; 6318 else if (riscv_memmodel_needs_amo_release (model)) 6319 fputs (".rl", file); 6320 } 6321 break; 6322 6323 case 'L': 6324 { 6325 const char *ntl_hint = NULL; 6326 switch (INTVAL (op)) 6327 { 6328 case 0: 6329 ntl_hint = "ntl.all"; 6330 break; 6331 case 1: 6332 ntl_hint = "ntl.pall"; 6333 break; 6334 case 2: 6335 ntl_hint = "ntl.p1"; 6336 break; 6337 } 6338 6339 if (ntl_hint) 6340 asm_fprintf (file, "%s\n\t", ntl_hint); 6341 break; 6342 } 6343 6344 case 'i': 6345 if (code != REG) 6346 fputs ("i", file); 6347 break; 6348 6349 case 'B': 6350 fputs (GET_RTX_NAME (code), file); 6351 break; 6352 6353 case 'S': 6354 if (!CONST_INT_P (op)) 6355 output_operand_lossage ("invalid operand for '%%%c'", letter); 6356 else 6357 { 6358 rtx newop = GEN_INT (ctz_hwi (INTVAL (op))); 6359 output_addr_const (file, newop); 6360 } 6361 break; 6362 case 'T': 6363 if (!CONST_INT_P (op)) 6364 output_operand_lossage ("invalid operand for '%%%c'", letter); 6365 else 6366 { 6367 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op))); 6368 output_addr_const (file, newop); 6369 } 6370 break; 6371 case 'X': 6372 if (!CONST_INT_P (op)) 6373 output_operand_lossage ("invalid operand for '%%%c'", letter); 6374 else 6375 { 6376 int ival = INTVAL (op) + 1; 6377 rtx newop = GEN_INT (ctz_hwi (ival) + 1); 6378 output_addr_const (file, newop); 6379 } 6380 break; 6381 case 'Y': 6382 if (!CONST_INT_P (op)) 6383 output_operand_lossage ("invalid operand for '%%%c'", letter); 6384 else 6385 { 6386 unsigned int imm = (UINTVAL (op) & 63); 6387 gcc_assert (imm <= 63); 6388 rtx newop = GEN_INT (imm); 6389 output_addr_const (file, newop); 6390 } 6391 break; 6392 default: 6393 switch (code) 6394 { 6395 case REG: 6396 if (letter && letter != 'z') 6397 output_operand_lossage ("invalid use of '%%%c'", letter); 6398 fprintf (file, "%s", reg_names[REGNO (op)]); 6399 break; 6400 6401 case MEM: 6402 if (letter && letter != 'z') 6403 output_operand_lossage ("invalid use of '%%%c'", letter); 6404 else 6405 output_address (mode, XEXP (op, 0)); 6406 break; 6407 6408 case CONST_DOUBLE: 6409 { 6410 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op))) 6411 { 6412 fputs (reg_names[GP_REG_FIRST], file); 6413 break; 6414 } 6415 6416 int fli_index = riscv_float_const_rtx_index_for_fli (op); 6417 if (fli_index == -1 || fli_index > 31) 6418 { 6419 output_operand_lossage ("invalid use of '%%%c'", letter); 6420 break; 6421 } 6422 asm_fprintf (file, "%s", fli_value_print[fli_index]); 6423 break; 6424 } 6425 6426 default: 6427 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op))) 6428 fputs (reg_names[GP_REG_FIRST], file); 6429 else if (letter && letter != 'z') 6430 output_operand_lossage ("invalid use of '%%%c'", letter); 6431 else 6432 output_addr_const (file, riscv_strip_unspec_address (op)); 6433 break; 6434 } 6435 } 6436 } 6437 6438 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */ 6439 static bool 6440 riscv_print_operand_punct_valid_p (unsigned char code) 6441 { 6442 return (code == '~'); 6443 } 6444 6445 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 6446 6447 static void 6448 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x) 6449 { 6450 struct riscv_address_info addr; 6451 6452 if (th_print_operand_address (file, mode, x)) 6453 return; 6454 6455 if (riscv_classify_address (&addr, x, word_mode, true)) 6456 switch (addr.type) 6457 { 6458 case ADDRESS_REG: 6459 output_addr_const (file, riscv_strip_unspec_address (addr.offset)); 6460 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]); 6461 return; 6462 6463 case ADDRESS_LO_SUM: 6464 riscv_print_operand_reloc (file, addr.offset, false); 6465 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]); 6466 return; 6467 6468 case ADDRESS_CONST_INT: 6469 output_addr_const (file, x); 6470 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]); 6471 return; 6472 6473 case ADDRESS_SYMBOLIC: 6474 output_addr_const (file, riscv_strip_unspec_address (x)); 6475 return; 6476 6477 default: 6478 gcc_unreachable (); 6479 } 6480 6481 gcc_unreachable (); 6482 } 6483 6484 static bool 6485 riscv_size_ok_for_small_data_p (int size) 6486 { 6487 return g_switch_value && IN_RANGE (size, 1, g_switch_value); 6488 } 6489 6490 /* Return true if EXP should be placed in the small data section. */ 6491 6492 static bool 6493 riscv_in_small_data_p (const_tree x) 6494 { 6495 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use 6496 the anchor symbol to address nearby objects. In large model, it can get 6497 the better result using the anchor optiomization. */ 6498 if (riscv_cmodel == CM_LARGE) 6499 return false; 6500 6501 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL) 6502 return false; 6503 6504 if (VAR_P (x) && DECL_SECTION_NAME (x)) 6505 { 6506 const char *sec = DECL_SECTION_NAME (x); 6507 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0; 6508 } 6509 6510 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x))); 6511 } 6512 6513 /* Switch to the appropriate section for output of DECL. */ 6514 6515 static section * 6516 riscv_select_section (tree decl, int reloc, 6517 unsigned HOST_WIDE_INT align) 6518 { 6519 switch (categorize_decl_for_section (decl, reloc)) 6520 { 6521 case SECCAT_SRODATA: 6522 return get_named_section (decl, ".srodata", reloc); 6523 6524 default: 6525 return default_elf_select_section (decl, reloc, align); 6526 } 6527 } 6528 6529 /* Switch to the appropriate section for output of DECL. */ 6530 6531 static void 6532 riscv_unique_section (tree decl, int reloc) 6533 { 6534 const char *prefix = NULL; 6535 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 6536 6537 switch (categorize_decl_for_section (decl, reloc)) 6538 { 6539 case SECCAT_SRODATA: 6540 prefix = one_only ? ".sr" : ".srodata"; 6541 break; 6542 6543 default: 6544 break; 6545 } 6546 if (prefix) 6547 { 6548 const char *name, *linkonce; 6549 char *string; 6550 6551 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 6552 name = targetm.strip_name_encoding (name); 6553 6554 /* If we're using one_only, then there needs to be a .gnu.linkonce 6555 prefix to the section name. */ 6556 linkonce = one_only ? ".gnu.linkonce" : ""; 6557 6558 string = ACONCAT ((linkonce, prefix, ".", name, NULL)); 6559 6560 set_decl_section_name (decl, string); 6561 return; 6562 } 6563 default_unique_section (decl, reloc); 6564 } 6565 6566 /* Constant pools are per-function when in large code model. */ 6567 6568 static inline bool 6569 riscv_can_use_per_function_literal_pools_p (void) 6570 { 6571 return riscv_cmodel == CM_LARGE; 6572 } 6573 6574 static bool 6575 riscv_use_blocks_for_constant_p (machine_mode, const_rtx) 6576 { 6577 /* We can't use blocks for constants when we're using a per-function 6578 constant pool. */ 6579 return !riscv_can_use_per_function_literal_pools_p (); 6580 } 6581 6582 /* Return a section for X, handling small data. */ 6583 6584 static section * 6585 riscv_elf_select_rtx_section (machine_mode mode, rtx x, 6586 unsigned HOST_WIDE_INT align) 6587 { 6588 /* The literal pool stays with the function. */ 6589 if (riscv_can_use_per_function_literal_pools_p ()) 6590 return function_section (current_function_decl); 6591 6592 section *s = default_elf_select_rtx_section (mode, x, align); 6593 6594 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ())) 6595 { 6596 if (startswith (s->named.name, ".rodata.cst")) 6597 { 6598 /* Rename .rodata.cst* to .srodata.cst*. */ 6599 char *name = (char *) alloca (strlen (s->named.name) + 2); 6600 sprintf (name, ".s%s", s->named.name + 1); 6601 return get_section (name, s->named.common.flags, NULL); 6602 } 6603 6604 if (s == data_section) 6605 return sdata_section; 6606 } 6607 6608 return s; 6609 } 6610 6611 /* Make the last instruction frame-related and note that it performs 6612 the operation described by FRAME_PATTERN. */ 6613 6614 static void 6615 riscv_set_frame_expr (rtx frame_pattern) 6616 { 6617 rtx insn; 6618 6619 insn = get_last_insn (); 6620 RTX_FRAME_RELATED_P (insn) = 1; 6621 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, 6622 frame_pattern, 6623 REG_NOTES (insn)); 6624 } 6625 6626 /* Return a frame-related rtx that stores REG at MEM. 6627 REG must be a single register. */ 6628 6629 static rtx 6630 riscv_frame_set (rtx mem, rtx reg) 6631 { 6632 rtx set = gen_rtx_SET (mem, reg); 6633 RTX_FRAME_RELATED_P (set) = 1; 6634 return set; 6635 } 6636 6637 /* Returns true if the current function might contain a far jump. */ 6638 6639 static bool 6640 riscv_far_jump_used_p () 6641 { 6642 size_t func_size = 0; 6643 6644 if (cfun->machine->far_jump_used) 6645 return true; 6646 6647 /* We can't change far_jump_used during or after reload, as there is 6648 no chance to change stack frame layout. So we must rely on the 6649 conservative heuristic below having done the right thing. */ 6650 if (reload_in_progress || reload_completed) 6651 return false; 6652 6653 /* Estimate the function length. */ 6654 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6655 func_size += get_attr_length (insn); 6656 6657 /* Conservatively determine whether some jump might exceed 1 MiB 6658 displacement. */ 6659 if (func_size * 2 >= 0x100000) 6660 cfun->machine->far_jump_used = true; 6661 6662 return cfun->machine->far_jump_used; 6663 } 6664 6665 /* Return true, if the current function must save the incoming return 6666 address. */ 6667 6668 static bool 6669 riscv_save_return_addr_reg_p (void) 6670 { 6671 /* The $ra register is call-clobbered: if this is not a leaf function, 6672 save it. */ 6673 if (!crtl->is_leaf) 6674 return true; 6675 6676 /* We need to save the incoming return address if __builtin_eh_return 6677 is being used to set a different return address. */ 6678 if (crtl->calls_eh_return) 6679 return true; 6680 6681 /* Far jumps/branches use $ra as a temporary to set up the target jump 6682 location (clobbering the incoming return address). */ 6683 if (riscv_far_jump_used_p ()) 6684 return true; 6685 6686 /* We need to save it if anyone has used that. */ 6687 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 6688 return true; 6689 6690 /* Need not to use ra for leaf when frame pointer is turned off by 6691 option whatever the omit-leaf-frame's value. */ 6692 if (frame_pointer_needed && crtl->is_leaf 6693 && !TARGET_OMIT_LEAF_FRAME_POINTER) 6694 return true; 6695 6696 return false; 6697 } 6698 6699 /* Return true if the current function must save register REGNO. */ 6700 6701 static bool 6702 riscv_save_reg_p (unsigned int regno) 6703 { 6704 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno); 6705 bool might_clobber = crtl->saves_all_registers 6706 || df_regs_ever_live_p (regno); 6707 6708 if (call_saved && might_clobber) 6709 return true; 6710 6711 /* Save callee-saved V registers. */ 6712 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno) 6713 && might_clobber) 6714 return true; 6715 6716 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 6717 return true; 6718 6719 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ()) 6720 return true; 6721 6722 /* If this is an interrupt handler, then must save extra registers. */ 6723 if (cfun->machine->interrupt_handler_p) 6724 { 6725 /* zero register is always zero. */ 6726 if (regno == GP_REG_FIRST) 6727 return false; 6728 6729 /* The function will return the stack pointer to its original value. */ 6730 if (regno == STACK_POINTER_REGNUM) 6731 return false; 6732 6733 /* By convention, we assume that gp and tp are safe. */ 6734 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM) 6735 return false; 6736 6737 /* We must save every register used in this function. If this is not a 6738 leaf function, then we must save all temporary registers. */ 6739 if (df_regs_ever_live_p (regno) 6740 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno))) 6741 return true; 6742 } 6743 6744 return false; 6745 } 6746 6747 /* Return TRUE if Zcmp push and pop insns should be 6748 avoided. FALSE otherwise. 6749 Only use multi push & pop if all GPRs masked can be covered, 6750 and stack access is SP based, 6751 and GPRs are at top of the stack frame, 6752 and no conflicts in stack allocation with other features */ 6753 static bool 6754 riscv_avoid_multi_push (const struct riscv_frame_info *frame) 6755 { 6756 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed 6757 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0 6758 || crtl->args.pretend_args_size != 0 6759 || (use_shrink_wrapping_separate () 6760 && !riscv_avoid_shrink_wrapping_separate ()) 6761 || (frame->mask & ~MULTI_PUSH_GPR_MASK)) 6762 return true; 6763 6764 return false; 6765 } 6766 6767 /* Determine whether to use multi push insn. */ 6768 static bool 6769 riscv_use_multi_push (const struct riscv_frame_info *frame) 6770 { 6771 if (riscv_avoid_multi_push (frame)) 6772 return false; 6773 6774 return (frame->multi_push_adj_base != 0); 6775 } 6776 6777 /* Return TRUE if a libcall to save/restore GPRs should be 6778 avoided. FALSE otherwise. */ 6779 static bool 6780 riscv_avoid_save_libcall (void) 6781 { 6782 if (!TARGET_SAVE_RESTORE 6783 || crtl->calls_eh_return 6784 || frame_pointer_needed 6785 || cfun->machine->interrupt_handler_p 6786 || cfun->machine->varargs_size != 0 6787 || crtl->args.pretend_args_size != 0) 6788 return true; 6789 6790 return false; 6791 } 6792 6793 /* Determine whether to call GPR save/restore routines. */ 6794 static bool 6795 riscv_use_save_libcall (const struct riscv_frame_info *frame) 6796 { 6797 if (riscv_avoid_save_libcall ()) 6798 return false; 6799 6800 return frame->save_libcall_adjustment != 0; 6801 } 6802 6803 /* Determine which GPR save/restore routine to call. */ 6804 6805 static unsigned 6806 riscv_save_libcall_count (unsigned mask) 6807 { 6808 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) 6809 if (BITSET_P (mask, n)) 6810 return CALLEE_SAVED_REG_NUMBER (n) + 1; 6811 abort (); 6812 } 6813 6814 /* calculate number of s regs in multi push and pop. 6815 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */ 6816 static unsigned 6817 riscv_multi_push_sregs_count (unsigned mask) 6818 { 6819 unsigned num = riscv_save_libcall_count (mask); 6820 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS 6821 : num; 6822 } 6823 6824 /* calculate number of regs(ra, s0-sx) in multi push and pop. */ 6825 static unsigned 6826 riscv_multi_push_regs_count (unsigned mask) 6827 { 6828 /* 1 is for ra */ 6829 return riscv_multi_push_sregs_count (mask) + 1; 6830 } 6831 6832 /* Handle 16 bytes align for poly_int. */ 6833 static poly_int64 6834 riscv_16bytes_align (poly_int64 value) 6835 { 6836 return aligned_upper_bound (value, 16); 6837 } 6838 6839 static HOST_WIDE_INT 6840 riscv_16bytes_align (HOST_WIDE_INT value) 6841 { 6842 return ROUND_UP (value, 16); 6843 } 6844 6845 /* Handle stack align for poly_int. */ 6846 static poly_int64 6847 riscv_stack_align (poly_int64 value) 6848 { 6849 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8); 6850 } 6851 6852 static HOST_WIDE_INT 6853 riscv_stack_align (HOST_WIDE_INT value) 6854 { 6855 return RISCV_STACK_ALIGN (value); 6856 } 6857 6858 /* Populate the current function's riscv_frame_info structure. 6859 6860 RISC-V stack frames grown downward. High addresses are at the top. 6861 6862 +-------------------------------+ 6863 | | 6864 | incoming stack arguments | 6865 | | 6866 +-------------------------------+ <-- incoming stack pointer 6867 | | 6868 | callee-allocated save area | 6869 | for arguments that are | 6870 | split between registers and | 6871 | the stack | 6872 | | 6873 +-------------------------------+ <-- arg_pointer_rtx 6874 | | 6875 | callee-allocated save area | 6876 | for register varargs | 6877 | | 6878 +-------------------------------+ <-- hard_frame_pointer_rtx; 6879 | | stack_pointer_rtx + gp_sp_offset 6880 | GPR save area | + UNITS_PER_WORD 6881 | | 6882 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset 6883 | | + UNITS_PER_FP_REG 6884 | FPR save area | 6885 | | 6886 +-------------------------------+ <-- stack_pointer_rtx 6887 | | + v_sp_offset_top 6888 | Vector Registers save area | 6889 | | 6890 | ----------------------------- | <-- stack_pointer_rtx 6891 | padding | + v_sp_offset_bottom 6892 +-------------------------------+ <-- frame_pointer_rtx (virtual) 6893 | | 6894 | local variables | 6895 | | 6896 P +-------------------------------+ 6897 | | 6898 | outgoing stack arguments | 6899 | | 6900 +-------------------------------+ <-- stack_pointer_rtx 6901 6902 Dynamic stack allocations such as alloca insert data at point P. 6903 They decrease stack_pointer_rtx but leave frame_pointer_rtx and 6904 hard_frame_pointer_rtx unchanged. */ 6905 6906 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); 6907 6908 static void 6909 riscv_compute_frame_info (void) 6910 { 6911 struct riscv_frame_info *frame; 6912 poly_int64 offset; 6913 bool interrupt_save_prologue_temp = false; 6914 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0; 6915 unsigned int num_v_saved = 0; 6916 6917 frame = &cfun->machine->frame; 6918 6919 /* In an interrupt function, there are two cases in which t0 needs to be used: 6920 1, If we have a large frame, then we need to save/restore t0. We check for 6921 this before clearing the frame struct. 6922 2, Need to save and restore some CSRs in the frame. */ 6923 if (cfun->machine->interrupt_handler_p) 6924 { 6925 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); 6926 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)) 6927 || (TARGET_HARD_FLOAT || TARGET_ZFINX)) 6928 interrupt_save_prologue_temp = true; 6929 } 6930 6931 frame->reset(); 6932 6933 if (!cfun->machine->naked_p) 6934 { 6935 /* Find out which GPRs we need to save. */ 6936 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 6937 if (riscv_save_reg_p (regno) 6938 || (interrupt_save_prologue_temp 6939 && (regno == RISCV_PROLOGUE_TEMP_REGNUM))) 6940 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; 6941 6942 /* If this function calls eh_return, we must also save and restore the 6943 EH data registers. */ 6944 if (crtl->calls_eh_return) 6945 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) 6946 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; 6947 6948 /* Find out which FPRs we need to save. This loop must iterate over 6949 the same space as its companion in riscv_for_each_saved_reg. */ 6950 if (TARGET_HARD_FLOAT) 6951 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 6952 if (riscv_save_reg_p (regno)) 6953 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; 6954 6955 /* Find out which V registers we need to save. */ 6956 if (TARGET_VECTOR) 6957 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++) 6958 if (riscv_save_reg_p (regno)) 6959 { 6960 frame->vmask |= 1 << (regno - V_REG_FIRST); 6961 num_v_saved++; 6962 } 6963 } 6964 6965 if (frame->mask) 6966 { 6967 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD); 6968 6969 /* 1 is for ra */ 6970 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask); 6971 /* Only use save/restore routines if they don't alter the stack size. */ 6972 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size 6973 && !riscv_avoid_save_libcall ()) 6974 { 6975 /* Libcall saves/restores 3 registers at once, so we need to 6976 allocate 12 bytes for callee-saved register. */ 6977 if (TARGET_RVE) 6978 x_save_size = 3 * UNITS_PER_WORD; 6979 6980 frame->save_libcall_adjustment = x_save_size; 6981 } 6982 6983 if (!riscv_avoid_multi_push (frame)) 6984 { 6985 /* num(ra, s0-sx) */ 6986 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask); 6987 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD); 6988 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size); 6989 } 6990 } 6991 6992 /* In an interrupt function, we need extra space for the initial saves of CSRs. */ 6993 if (cfun->machine->interrupt_handler_p 6994 && ((TARGET_HARD_FLOAT && frame->fmask) 6995 || (TARGET_ZFINX 6996 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */ 6997 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM))))) 6998 /* Save and restore FCSR. */ 6999 /* TODO: When P or V extensions support interrupts, some of their CSRs 7000 may also need to be saved and restored. */ 7001 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD); 7002 7003 /* At the bottom of the frame are any outgoing stack arguments. */ 7004 offset = riscv_stack_align (crtl->outgoing_args_size); 7005 /* Next are local stack variables. */ 7006 offset += riscv_stack_align (get_frame_size ()); 7007 /* The virtual frame pointer points above the local variables. */ 7008 frame->frame_pointer_offset = offset; 7009 /* Next are the callee-saved VRs. */ 7010 if (frame->vmask) 7011 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG); 7012 frame->v_sp_offset_top = offset; 7013 frame->v_sp_offset_bottom 7014 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG; 7015 /* Next are the callee-saved FPRs. */ 7016 if (frame->fmask) 7017 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG); 7018 frame->fp_sp_offset = offset - UNITS_PER_FP_REG; 7019 /* Next are the callee-saved GPRs. */ 7020 if (frame->mask) 7021 { 7022 offset += x_save_size; 7023 /* align to 16 bytes and add paddings to GPR part to honor 7024 both stack alignment and zcmp pus/pop size alignment. */ 7025 if (riscv_use_multi_push (frame) 7026 && known_lt (offset, frame->multi_push_adj_base 7027 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM)) 7028 offset = riscv_16bytes_align (offset); 7029 } 7030 frame->gp_sp_offset = offset - UNITS_PER_WORD; 7031 /* The hard frame pointer points above the callee-saved GPRs. */ 7032 frame->hard_frame_pointer_offset = offset; 7033 /* Above the hard frame pointer is the callee-allocated varags save area. */ 7034 offset += riscv_stack_align (cfun->machine->varargs_size); 7035 /* Next is the callee-allocated area for pretend stack arguments. */ 7036 offset += riscv_stack_align (crtl->args.pretend_args_size); 7037 /* Arg pointer must be below pretend args, but must be above alignment 7038 padding. */ 7039 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; 7040 frame->total_size = offset; 7041 7042 /* Next points the incoming stack pointer and any incoming arguments. */ 7043 } 7044 7045 /* Make sure that we're not trying to eliminate to the wrong hard frame 7046 pointer. */ 7047 7048 static bool 7049 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 7050 { 7051 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM); 7052 } 7053 7054 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer 7055 or argument pointer. TO is either the stack pointer or hard frame 7056 pointer. */ 7057 7058 poly_int64 7059 riscv_initial_elimination_offset (int from, int to) 7060 { 7061 poly_int64 src, dest; 7062 7063 riscv_compute_frame_info (); 7064 7065 if (to == HARD_FRAME_POINTER_REGNUM) 7066 dest = cfun->machine->frame.hard_frame_pointer_offset; 7067 else if (to == STACK_POINTER_REGNUM) 7068 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ 7069 else 7070 gcc_unreachable (); 7071 7072 if (from == FRAME_POINTER_REGNUM) 7073 src = cfun->machine->frame.frame_pointer_offset; 7074 else if (from == ARG_POINTER_REGNUM) 7075 src = cfun->machine->frame.arg_pointer_offset; 7076 else 7077 gcc_unreachable (); 7078 7079 return src - dest; 7080 } 7081 7082 /* Implement RETURN_ADDR_RTX. We do not support moving back to a 7083 previous frame. */ 7084 7085 rtx 7086 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 7087 { 7088 if (count != 0) 7089 return const0_rtx; 7090 7091 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); 7092 } 7093 7094 /* Emit code to change the current function's return address to 7095 ADDRESS. SCRATCH is available as a scratch register, if needed. 7096 ADDRESS and SCRATCH are both word-mode GPRs. */ 7097 7098 void 7099 riscv_set_return_address (rtx address, rtx scratch) 7100 { 7101 rtx slot_address; 7102 7103 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)); 7104 slot_address = riscv_add_offset (scratch, stack_pointer_rtx, 7105 cfun->machine->frame.gp_sp_offset.to_constant()); 7106 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address); 7107 } 7108 7109 /* Save register REG to MEM. Make the instruction frame-related. */ 7110 7111 static void 7112 riscv_save_reg (rtx reg, rtx mem) 7113 { 7114 riscv_emit_move (mem, reg); 7115 riscv_set_frame_expr (riscv_frame_set (mem, reg)); 7116 } 7117 7118 /* Restore register REG from MEM. */ 7119 7120 static void 7121 riscv_restore_reg (rtx reg, rtx mem) 7122 { 7123 rtx insn = riscv_emit_move (reg, mem); 7124 rtx dwarf = NULL_RTX; 7125 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); 7126 7127 if (known_gt (epilogue_cfa_sp_offset, 0) 7128 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM) 7129 { 7130 rtx cfa_adjust_rtx 7131 = gen_rtx_PLUS (Pmode, stack_pointer_rtx, 7132 gen_int_mode (epilogue_cfa_sp_offset, Pmode)); 7133 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); 7134 } 7135 7136 REG_NOTES (insn) = dwarf; 7137 RTX_FRAME_RELATED_P (insn) = 1; 7138 } 7139 7140 /* A function to save or store a register. The first argument is the 7141 register and the second is the stack slot. */ 7142 typedef void (*riscv_save_restore_fn) (rtx, rtx); 7143 7144 /* Use FN to save or restore register REGNO. MODE is the register's 7145 mode and OFFSET is the offset of its save slot from the current 7146 stack pointer. */ 7147 7148 static void 7149 riscv_save_restore_reg (machine_mode mode, int regno, 7150 HOST_WIDE_INT offset, riscv_save_restore_fn fn) 7151 { 7152 rtx mem; 7153 7154 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); 7155 fn (gen_rtx_REG (mode, regno), mem); 7156 } 7157 7158 /* Return the next register up from REGNO up to LIMIT for the callee 7159 to save or restore. OFFSET will be adjusted accordingly. 7160 If INC is set, then REGNO will be incremented first. 7161 Returns INVALID_REGNUM if there is no such next register. */ 7162 7163 static unsigned int 7164 riscv_next_saved_reg (unsigned int regno, unsigned int limit, 7165 HOST_WIDE_INT *offset, bool inc = true) 7166 { 7167 if (inc) 7168 regno++; 7169 7170 while (regno <= limit) 7171 { 7172 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) 7173 { 7174 *offset = *offset - UNITS_PER_WORD; 7175 return regno; 7176 } 7177 7178 regno++; 7179 } 7180 return INVALID_REGNUM; 7181 } 7182 7183 /* Return TRUE if provided REGNO is eh return data register. */ 7184 7185 static bool 7186 riscv_is_eh_return_data_register (unsigned int regno) 7187 { 7188 unsigned int i, regnum; 7189 7190 if (!crtl->calls_eh_return) 7191 return false; 7192 7193 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) 7194 if (regno == regnum) 7195 { 7196 return true; 7197 } 7198 7199 return false; 7200 } 7201 7202 /* Call FN for each register that is saved by the current function. 7203 SP_OFFSET is the offset of the current stack pointer from the start 7204 of the frame. */ 7205 7206 static void 7207 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn, 7208 bool epilogue, bool maybe_eh_return) 7209 { 7210 HOST_WIDE_INT offset, first_fp_offset; 7211 unsigned int regno, num_masked_fp = 0; 7212 unsigned int start = GP_REG_FIRST; 7213 unsigned int limit = GP_REG_LAST; 7214 7215 /* Save the link register and s-registers. */ 7216 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant () 7217 + UNITS_PER_WORD; 7218 for (regno = riscv_next_saved_reg (start, limit, &offset, false); 7219 regno != INVALID_REGNUM; 7220 regno = riscv_next_saved_reg (regno, limit, &offset)) 7221 { 7222 if (cfun->machine->reg_is_wrapped_separately[regno]) 7223 continue; 7224 7225 /* If this is a normal return in a function that calls the eh_return 7226 builtin, then do not restore the eh return data registers as that 7227 would clobber the return value. But we do still need to save them 7228 in the prologue, and restore them for an exception return, so we 7229 need special handling here. */ 7230 if (epilogue && !maybe_eh_return 7231 && riscv_is_eh_return_data_register (regno)) 7232 continue; 7233 7234 /* In an interrupt function, save and restore some necessary CSRs in the stack 7235 to avoid changes in CSRs. */ 7236 if (regno == RISCV_PROLOGUE_TEMP_REGNUM 7237 && cfun->machine->interrupt_handler_p 7238 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask) 7239 || (TARGET_ZFINX 7240 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM))))) 7241 { 7242 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack 7243 offset misaligned later. */ 7244 unsigned int fcsr_size = UNITS_PER_WORD; 7245 if (!epilogue) 7246 { 7247 riscv_save_restore_reg (word_mode, regno, offset, fn); 7248 offset -= fcsr_size; 7249 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode))); 7250 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 7251 offset, riscv_save_reg); 7252 } 7253 else 7254 { 7255 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 7256 offset - fcsr_size, riscv_restore_reg); 7257 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode))); 7258 riscv_save_restore_reg (word_mode, regno, offset, fn); 7259 offset -= fcsr_size; 7260 } 7261 continue; 7262 } 7263 7264 if (TARGET_XTHEADMEMPAIR) 7265 { 7266 /* Get the next reg/offset pair. */ 7267 HOST_WIDE_INT offset2 = offset; 7268 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2); 7269 7270 /* Validate everything before emitting a mempair instruction. */ 7271 if (regno2 != INVALID_REGNUM 7272 && !cfun->machine->reg_is_wrapped_separately[regno2] 7273 && !(epilogue && !maybe_eh_return 7274 && riscv_is_eh_return_data_register (regno2))) 7275 { 7276 bool load_p = (fn == riscv_restore_reg); 7277 rtx operands[4]; 7278 th_mempair_prepare_save_restore_operands (operands, 7279 load_p, word_mode, 7280 regno, offset, 7281 regno2, offset2); 7282 7283 /* If the operands fit into a mempair insn, then emit one. */ 7284 if (th_mempair_operands_p (operands, load_p, word_mode)) 7285 { 7286 th_mempair_save_restore_regs (operands, load_p, word_mode); 7287 offset = offset2; 7288 regno = regno2; 7289 continue; 7290 } 7291 } 7292 } 7293 7294 riscv_save_restore_reg (word_mode, regno, offset, fn); 7295 } 7296 7297 /* This loop must iterate over the same space as its companion in 7298 riscv_compute_frame_info. */ 7299 first_fp_offset 7300 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant (); 7301 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 7302 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) 7303 { 7304 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno]; 7305 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; 7306 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame)) 7307 ? CALLEE_SAVED_FREG_NUMBER (regno) 7308 : num_masked_fp; 7309 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant (); 7310 if (handle_reg) 7311 riscv_save_restore_reg (mode, regno, offset, fn); 7312 num_masked_fp++; 7313 } 7314 } 7315 7316 /* Call FN for each V register that is saved by the current function. */ 7317 7318 static void 7319 riscv_for_each_saved_v_reg (poly_int64 &remaining_size, 7320 riscv_save_restore_fn fn, bool prologue) 7321 { 7322 rtx vlen = NULL_RTX; 7323 if (cfun->machine->frame.vmask != 0) 7324 { 7325 if (UNITS_PER_V_REG.is_constant () 7326 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ())) 7327 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ()); 7328 else 7329 { 7330 vlen = RISCV_PROLOGUE_TEMP (Pmode); 7331 rtx insn 7332 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode)); 7333 RTX_FRAME_RELATED_P (insn) = 1; 7334 } 7335 } 7336 7337 /* Select the mode where LMUL is 1 and SEW is largest. */ 7338 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode; 7339 7340 if (prologue) 7341 { 7342 /* This loop must iterate over the same space as its companion in 7343 riscv_compute_frame_info. */ 7344 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++) 7345 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST)) 7346 { 7347 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno]; 7348 if (handle_reg) 7349 { 7350 rtx insn = NULL_RTX; 7351 if (CONST_INT_P (vlen)) 7352 { 7353 gcc_assert (SMALL_OPERAND (-INTVAL (vlen))); 7354 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, 7355 stack_pointer_rtx, 7356 GEN_INT (-INTVAL (vlen)))); 7357 } 7358 else 7359 insn = emit_insn ( 7360 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen)); 7361 gcc_assert (insn != NULL_RTX); 7362 RTX_FRAME_RELATED_P (insn) = 1; 7363 riscv_save_restore_reg (m1_mode, regno, 0, fn); 7364 remaining_size -= UNITS_PER_V_REG; 7365 } 7366 } 7367 } 7368 else 7369 { 7370 /* This loop must iterate over the same space as its companion in 7371 riscv_compute_frame_info. */ 7372 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--) 7373 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST)) 7374 { 7375 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno]; 7376 if (handle_reg) 7377 { 7378 riscv_save_restore_reg (m1_mode, regno, 0, fn); 7379 rtx insn = emit_insn ( 7380 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen)); 7381 gcc_assert (insn != NULL_RTX); 7382 RTX_FRAME_RELATED_P (insn) = 1; 7383 remaining_size -= UNITS_PER_V_REG; 7384 } 7385 } 7386 } 7387 } 7388 7389 /* For stack frames that can't be allocated with a single ADDI instruction, 7390 compute the best value to initially allocate. It must at a minimum 7391 allocate enough space to spill the callee-saved registers. If TARGET_RVC, 7392 try to pick a value that will allow compression of the register saves 7393 without adding extra instructions. */ 7394 7395 static HOST_WIDE_INT 7396 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) 7397 { 7398 HOST_WIDE_INT remaining_const_size; 7399 if (!remaining_size.is_constant ()) 7400 remaining_const_size 7401 = riscv_stack_align (remaining_size.coeffs[0]) 7402 - riscv_stack_align (remaining_size.coeffs[1]); 7403 else 7404 remaining_const_size = remaining_size.to_constant (); 7405 7406 /* First step must be set to the top of vector registers save area if any 7407 vector registers need be preversed. */ 7408 if (frame->vmask != 0) 7409 return (remaining_size - frame->v_sp_offset_top).to_constant (); 7410 7411 if (SMALL_OPERAND (remaining_const_size)) 7412 return remaining_const_size; 7413 7414 poly_int64 callee_saved_first_step = 7415 remaining_size - frame->frame_pointer_offset; 7416 gcc_assert(callee_saved_first_step.is_constant ()); 7417 HOST_WIDE_INT min_first_step = 7418 riscv_stack_align (callee_saved_first_step.to_constant ()); 7419 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; 7420 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; 7421 gcc_assert (min_first_step <= max_first_step); 7422 7423 /* As an optimization, use the least-significant bits of the total frame 7424 size, so that the second adjustment step is just LUI + ADD. */ 7425 if (!SMALL_OPERAND (min_second_step) 7426 && remaining_const_size % IMM_REACH <= max_first_step 7427 && remaining_const_size % IMM_REACH >= min_first_step) 7428 return remaining_const_size % IMM_REACH; 7429 7430 if (TARGET_RVC || TARGET_ZCA) 7431 { 7432 /* If we need two subtracts, and one is small enough to allow compressed 7433 loads and stores, then put that one first. */ 7434 if (IN_RANGE (min_second_step, 0, 7435 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH))) 7436 return MAX (min_second_step, min_first_step); 7437 7438 /* If we need LUI + ADDI + ADD for the second adjustment step, then start 7439 with the minimum first step, so that we can get compressed loads and 7440 stores. */ 7441 else if (!SMALL_OPERAND (min_second_step)) 7442 return min_first_step; 7443 } 7444 7445 return max_first_step; 7446 } 7447 7448 static rtx 7449 riscv_adjust_libcall_cfi_prologue () 7450 { 7451 rtx dwarf = NULL_RTX; 7452 rtx adjust_sp_rtx, reg, mem, insn; 7453 int saved_size = cfun->machine->frame.save_libcall_adjustment; 7454 int offset; 7455 7456 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 7457 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) 7458 { 7459 /* The save order is ra, s0, s1, s2 to s11. */ 7460 if (regno == RETURN_ADDR_REGNUM) 7461 offset = saved_size - UNITS_PER_WORD; 7462 else if (regno == S0_REGNUM) 7463 offset = saved_size - UNITS_PER_WORD * 2; 7464 else if (regno == S1_REGNUM) 7465 offset = saved_size - UNITS_PER_WORD * 3; 7466 else 7467 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD); 7468 7469 reg = gen_rtx_REG (Pmode, regno); 7470 mem = gen_frame_mem (Pmode, plus_constant (Pmode, 7471 stack_pointer_rtx, 7472 offset)); 7473 7474 insn = gen_rtx_SET (mem, reg); 7475 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); 7476 } 7477 7478 /* Debug info for adjust sp. */ 7479 adjust_sp_rtx = 7480 gen_rtx_SET (stack_pointer_rtx, 7481 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size))); 7482 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, 7483 dwarf); 7484 return dwarf; 7485 } 7486 7487 static rtx 7488 riscv_adjust_multi_push_cfi_prologue (int saved_size) 7489 { 7490 rtx dwarf = NULL_RTX; 7491 rtx adjust_sp_rtx, reg, mem, insn; 7492 unsigned int mask = cfun->machine->frame.mask; 7493 int offset; 7494 int saved_cnt = 0; 7495 7496 if (mask & S10_MASK) 7497 mask |= S11_MASK; 7498 7499 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--) 7500 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST)) 7501 { 7502 /* The save order is s11-s0, ra 7503 from high to low addr. */ 7504 offset = saved_size - UNITS_PER_WORD * (++saved_cnt); 7505 7506 reg = gen_rtx_REG (Pmode, regno); 7507 mem = gen_frame_mem (Pmode, 7508 plus_constant (Pmode, stack_pointer_rtx, offset)); 7509 7510 insn = gen_rtx_SET (mem, reg); 7511 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); 7512 } 7513 7514 /* Debug info for adjust sp. */ 7515 adjust_sp_rtx 7516 = gen_rtx_SET (stack_pointer_rtx, 7517 plus_constant (Pmode, stack_pointer_rtx, -saved_size)); 7518 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf); 7519 return dwarf; 7520 } 7521 7522 static void 7523 riscv_emit_stack_tie (void) 7524 { 7525 if (Pmode == SImode) 7526 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx)); 7527 else 7528 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); 7529 } 7530 7531 /*zcmp multi push and pop code_for_push_pop function ptr array */ 7532 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM] 7533 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra, 7534 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra}, 7535 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0, 7536 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0}, 7537 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1, 7538 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1}, 7539 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2, 7540 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2}, 7541 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3, 7542 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3}, 7543 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4, 7544 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4}, 7545 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5, 7546 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5}, 7547 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6, 7548 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6}, 7549 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7, 7550 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7}, 7551 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8, 7552 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8}, 7553 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9, 7554 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9}, 7555 {nullptr, nullptr, nullptr, nullptr}, 7556 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11, 7557 code_for_gpr_multi_popret_up_to_s11, 7558 code_for_gpr_multi_popretz_up_to_s11}}; 7559 7560 static rtx 7561 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size, 7562 unsigned int regs_num) 7563 { 7564 gcc_assert (op < ZCMP_OP_NUM); 7565 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS 7566 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/ 7567 rtx stack_adj = GEN_INT (adj_size); 7568 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj); 7569 } 7570 7571 static unsigned 7572 get_multi_push_fpr_mask (unsigned max_fprs_push) 7573 { 7574 unsigned mask_fprs_push = 0, num_f_pushed = 0; 7575 for (unsigned regno = FP_REG_FIRST; 7576 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++) 7577 if (riscv_save_reg_p (regno)) 7578 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++; 7579 return mask_fprs_push; 7580 } 7581 7582 /* Expand the "prologue" pattern. */ 7583 7584 void 7585 riscv_expand_prologue (void) 7586 { 7587 struct riscv_frame_info *frame = &cfun->machine->frame; 7588 poly_int64 remaining_size = frame->total_size; 7589 unsigned mask = frame->mask; 7590 unsigned fmask = frame->fmask; 7591 int spimm, multi_push_additional, stack_adj; 7592 rtx insn, dwarf = NULL_RTX; 7593 unsigned th_int_mask = 0; 7594 7595 if (flag_stack_usage_info) 7596 current_function_static_stack_size = constant_lower_bound (remaining_size); 7597 7598 if (cfun->machine->naked_p) 7599 return; 7600 7601 /* prefer muti-push to save-restore libcall. */ 7602 if (riscv_use_multi_push (frame)) 7603 { 7604 remaining_size -= frame->multi_push_adj_base; 7605 /* If there are vector registers that need to be saved, then it can only 7606 be reduced to the frame->v_sp_offset_top position at most, since the 7607 vector registers will need to be saved one by one by decreasing the SP 7608 later. */ 7609 poly_int64 remaining_size_above_varea 7610 = frame->vmask != 0 7611 ? remaining_size - frame->v_sp_offset_top 7612 : remaining_size; 7613 7614 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP)) 7615 spimm = 3; 7616 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP)) 7617 spimm = 2; 7618 else if (known_gt (remaining_size_above_varea, 0)) 7619 spimm = 1; 7620 else 7621 spimm = 0; 7622 multi_push_additional = spimm * ZCMP_SP_INC_STEP; 7623 frame->multi_push_adj_addi = multi_push_additional; 7624 remaining_size -= multi_push_additional; 7625 7626 /* emit multi push insn & dwarf along with it. */ 7627 stack_adj = frame->multi_push_adj_base + multi_push_additional; 7628 insn = emit_insn (riscv_gen_multi_push_pop_insn ( 7629 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask))); 7630 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj); 7631 RTX_FRAME_RELATED_P (insn) = 1; 7632 REG_NOTES (insn) = dwarf; 7633 7634 /* Temporarily fib that we need not save GPRs. */ 7635 frame->mask = 0; 7636 7637 /* push FPRs into the addtional reserved space by cm.push. */ 7638 if (fmask) 7639 { 7640 unsigned mask_fprs_push 7641 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD); 7642 frame->fmask &= mask_fprs_push; 7643 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, 7644 false); 7645 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */ 7646 } 7647 } 7648 /* When optimizing for size, call a subroutine to save the registers. */ 7649 else if (riscv_use_save_libcall (frame)) 7650 { 7651 rtx dwarf = NULL_RTX; 7652 dwarf = riscv_adjust_libcall_cfi_prologue (); 7653 7654 remaining_size -= frame->save_libcall_adjustment; 7655 insn = emit_insn (riscv_gen_gpr_save_insn (frame)); 7656 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ 7657 7658 RTX_FRAME_RELATED_P (insn) = 1; 7659 REG_NOTES (insn) = dwarf; 7660 } 7661 7662 th_int_mask = th_int_get_mask (frame->mask); 7663 if (th_int_mask && TH_INT_INTERRUPT (cfun)) 7664 { 7665 frame->mask &= ~th_int_mask; 7666 7667 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for 7668 interrupts, such as fcsr. */ 7669 if ((TARGET_HARD_FLOAT && frame->fmask) 7670 || (TARGET_ZFINX && frame->mask)) 7671 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM); 7672 7673 unsigned save_adjustment = th_int_get_save_adjustment (); 7674 frame->gp_sp_offset -= save_adjustment; 7675 remaining_size -= save_adjustment; 7676 7677 insn = emit_insn (gen_th_int_push ()); 7678 7679 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask); 7680 RTX_FRAME_RELATED_P (insn) = 1; 7681 REG_NOTES (insn) = dwarf; 7682 } 7683 7684 /* Save the GP, FP registers. */ 7685 if ((frame->mask | frame->fmask) != 0) 7686 { 7687 if (known_gt (remaining_size, frame->frame_pointer_offset)) 7688 { 7689 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size); 7690 remaining_size -= step1; 7691 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, 7692 GEN_INT (-step1)); 7693 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; 7694 } 7695 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false); 7696 } 7697 7698 /* Undo the above fib. */ 7699 frame->mask = mask; 7700 frame->fmask = fmask; 7701 7702 /* Set up the frame pointer, if we're using one. */ 7703 if (frame_pointer_needed) 7704 { 7705 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, 7706 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ())); 7707 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; 7708 7709 riscv_emit_stack_tie (); 7710 } 7711 7712 /* Save the V registers. */ 7713 if (frame->vmask != 0) 7714 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true); 7715 7716 /* Allocate the rest of the frame. */ 7717 if (known_gt (remaining_size, 0)) 7718 { 7719 /* Two step adjustment: 7720 1.scalable frame. 2.constant frame. */ 7721 poly_int64 scalable_frame (0, 0); 7722 if (!remaining_size.is_constant ()) 7723 { 7724 /* First for scalable frame. */ 7725 poly_int64 scalable_frame = remaining_size; 7726 scalable_frame.coeffs[0] = remaining_size.coeffs[1]; 7727 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false); 7728 remaining_size -= scalable_frame; 7729 } 7730 7731 /* Second step for constant frame. */ 7732 HOST_WIDE_INT constant_frame = remaining_size.to_constant (); 7733 if (constant_frame == 0) 7734 { 7735 /* We must have allocated stack space for the scalable frame. 7736 Emit a stack tie if we have a frame pointer so that the 7737 allocation is ordered WRT fp setup and subsequent writes 7738 into the frame. */ 7739 if (frame_pointer_needed) 7740 riscv_emit_stack_tie (); 7741 return; 7742 } 7743 7744 if (SMALL_OPERAND (-constant_frame)) 7745 { 7746 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, 7747 GEN_INT (-constant_frame)); 7748 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; 7749 } 7750 else 7751 { 7752 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame)); 7753 emit_insn (gen_add3_insn (stack_pointer_rtx, 7754 stack_pointer_rtx, 7755 RISCV_PROLOGUE_TEMP (Pmode))); 7756 7757 /* Describe the effect of the previous instructions. */ 7758 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame); 7759 insn = gen_rtx_SET (stack_pointer_rtx, insn); 7760 riscv_set_frame_expr (insn); 7761 } 7762 7763 /* We must have allocated the remainder of the stack frame. 7764 Emit a stack tie if we have a frame pointer so that the 7765 allocation is ordered WRT fp setup and subsequent writes 7766 into the frame. */ 7767 if (frame_pointer_needed) 7768 riscv_emit_stack_tie (); 7769 } 7770 } 7771 7772 static rtx 7773 riscv_adjust_multi_pop_cfi_epilogue (int saved_size) 7774 { 7775 rtx dwarf = NULL_RTX; 7776 rtx adjust_sp_rtx, reg; 7777 unsigned int mask = cfun->machine->frame.mask; 7778 7779 if (mask & S10_MASK) 7780 mask |= S11_MASK; 7781 7782 /* Debug info for adjust sp. */ 7783 adjust_sp_rtx 7784 = gen_rtx_SET (stack_pointer_rtx, 7785 plus_constant (Pmode, stack_pointer_rtx, saved_size)); 7786 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf); 7787 7788 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 7789 if (BITSET_P (mask, regno - GP_REG_FIRST)) 7790 { 7791 reg = gen_rtx_REG (Pmode, regno); 7792 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); 7793 } 7794 7795 return dwarf; 7796 } 7797 7798 static rtx 7799 riscv_adjust_libcall_cfi_epilogue () 7800 { 7801 rtx dwarf = NULL_RTX; 7802 rtx adjust_sp_rtx, reg; 7803 int saved_size = cfun->machine->frame.save_libcall_adjustment; 7804 7805 /* Debug info for adjust sp. */ 7806 adjust_sp_rtx = 7807 gen_rtx_SET (stack_pointer_rtx, 7808 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size))); 7809 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, 7810 dwarf); 7811 7812 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 7813 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) 7814 { 7815 reg = gen_rtx_REG (Pmode, regno); 7816 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); 7817 } 7818 7819 return dwarf; 7820 } 7821 7822 static void 7823 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask, 7824 unsigned multipop_size) 7825 { 7826 rtx insn; 7827 unsigned regs_count = riscv_multi_push_regs_count (mask); 7828 7829 if (!use_multi_pop_normal) 7830 insn = emit_insn ( 7831 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count)); 7832 else 7833 insn = emit_jump_insn ( 7834 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count)); 7835 7836 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size); 7837 RTX_FRAME_RELATED_P (insn) = 1; 7838 REG_NOTES (insn) = dwarf; 7839 } 7840 7841 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern; 7842 style says which. */ 7843 7844 void 7845 riscv_expand_epilogue (int style) 7846 { 7847 /* Split the frame into 3 steps. STEP1 is the amount of stack we should 7848 deallocate before restoring the registers. STEP2 is the amount we 7849 should deallocate afterwards including the callee saved regs. STEP3 7850 is the amount deallocated by save-restore libcall. 7851 7852 Start off by assuming that no registers need to be restored. */ 7853 struct riscv_frame_info *frame = &cfun->machine->frame; 7854 unsigned mask = frame->mask; 7855 unsigned fmask = frame->fmask; 7856 unsigned mask_fprs_push = 0; 7857 poly_int64 step2 = 0; 7858 bool use_multi_pop_normal 7859 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame)); 7860 bool use_multi_pop_sibcall 7861 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame)); 7862 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall; 7863 7864 bool use_restore_libcall 7865 = !use_multi_pop 7866 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); 7867 unsigned libcall_size = use_restore_libcall && !use_multi_pop 7868 ? frame->save_libcall_adjustment 7869 : 0; 7870 unsigned multipop_size 7871 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi 7872 : 0; 7873 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 7874 unsigned th_int_mask = 0; 7875 rtx insn; 7876 7877 /* We need to add memory barrier to prevent read from deallocated stack. */ 7878 bool need_barrier_p = known_ne (get_frame_size () 7879 + cfun->machine->frame.arg_pointer_offset, 0); 7880 7881 if (cfun->machine->naked_p) 7882 { 7883 gcc_assert (style == NORMAL_RETURN); 7884 7885 emit_jump_insn (gen_return ()); 7886 7887 return; 7888 } 7889 7890 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ()) 7891 { 7892 emit_jump_insn (gen_return ()); 7893 return; 7894 } 7895 7896 /* Reset the epilogue cfa info before starting to emit the epilogue. */ 7897 epilogue_cfa_sp_offset = 0; 7898 7899 /* Move past any dynamic stack allocations. */ 7900 if (cfun->calls_alloca) 7901 { 7902 /* Emit a barrier to prevent loads from a deallocated stack. */ 7903 riscv_emit_stack_tie (); 7904 need_barrier_p = false; 7905 7906 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset; 7907 rtx adjust = NULL_RTX; 7908 7909 if (!adjust_offset.is_constant ()) 7910 { 7911 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode); 7912 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode); 7913 riscv_legitimize_poly_move (Pmode, tmp1, tmp2, 7914 gen_int_mode (adjust_offset, Pmode)); 7915 adjust = tmp1; 7916 } 7917 else 7918 { 7919 if (!SMALL_OPERAND (adjust_offset.to_constant ())) 7920 { 7921 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), 7922 GEN_INT (adjust_offset.to_constant ())); 7923 adjust = RISCV_PROLOGUE_TEMP (Pmode); 7924 } 7925 else 7926 adjust = GEN_INT (adjust_offset.to_constant ()); 7927 } 7928 7929 insn = emit_insn ( 7930 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, 7931 adjust)); 7932 7933 rtx dwarf = NULL_RTX; 7934 rtx cfa_adjust_value = gen_rtx_PLUS ( 7935 Pmode, hard_frame_pointer_rtx, 7936 gen_int_mode (-frame->hard_frame_pointer_offset, Pmode)); 7937 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); 7938 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); 7939 RTX_FRAME_RELATED_P (insn) = 1; 7940 7941 REG_NOTES (insn) = dwarf; 7942 } 7943 7944 if (use_restore_libcall || use_multi_pop) 7945 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */ 7946 7947 /* If we need to restore registers, deallocate as much stack as 7948 possible in the second step without going out of range. */ 7949 if (use_multi_pop) 7950 { 7951 if (frame->fmask 7952 && known_gt (frame->total_size - multipop_size, 7953 frame->frame_pointer_offset)) 7954 step2 7955 = riscv_first_stack_step (frame, frame->total_size - multipop_size); 7956 } 7957 else if ((frame->mask | frame->fmask) != 0) 7958 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); 7959 7960 if (use_restore_libcall || use_multi_pop) 7961 frame->mask = mask; /* Undo the above fib. */ 7962 7963 poly_int64 step1; 7964 /* STEP1 must be set to the bottom of vector registers save area if any 7965 vector registers need be preversed. */ 7966 if (frame->vmask != 0) 7967 { 7968 step1 = frame->v_sp_offset_bottom; 7969 step2 = frame->total_size - step1 - libcall_size - multipop_size; 7970 } 7971 else 7972 step1 = frame->total_size - step2 - libcall_size - multipop_size; 7973 7974 /* Set TARGET to BASE + STEP1. */ 7975 if (known_gt (step1, 0)) 7976 { 7977 /* Emit a barrier to prevent loads from a deallocated stack. */ 7978 riscv_emit_stack_tie (); 7979 need_barrier_p = false; 7980 7981 /* Restore the scalable frame which is assigned in prologue. */ 7982 if (!step1.is_constant ()) 7983 { 7984 poly_int64 scalable_frame = step1; 7985 scalable_frame.coeffs[0] = step1.coeffs[1]; 7986 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, 7987 true); 7988 step1 -= scalable_frame; 7989 } 7990 7991 /* Get an rtx for STEP1 that we can add to BASE. 7992 Skip if adjust equal to zero. */ 7993 if (step1.to_constant () != 0) 7994 { 7995 rtx adjust = GEN_INT (step1.to_constant ()); 7996 if (!SMALL_OPERAND (step1.to_constant ())) 7997 { 7998 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust); 7999 adjust = RISCV_PROLOGUE_TEMP (Pmode); 8000 } 8001 8002 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, 8003 stack_pointer_rtx, 8004 adjust)); 8005 rtx dwarf = NULL_RTX; 8006 rtx cfa_adjust_rtx 8007 = gen_rtx_PLUS (Pmode, stack_pointer_rtx, 8008 gen_int_mode (step2 + libcall_size + multipop_size, 8009 Pmode)); 8010 8011 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); 8012 RTX_FRAME_RELATED_P (insn) = 1; 8013 8014 REG_NOTES (insn) = dwarf; 8015 } 8016 } 8017 else if (frame_pointer_needed) 8018 { 8019 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring 8020 old value of FP. */ 8021 epilogue_cfa_sp_offset = step2; 8022 } 8023 8024 if (use_multi_pop) 8025 { 8026 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */ 8027 if (fmask) 8028 { 8029 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi 8030 / UNITS_PER_WORD); 8031 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */ 8032 } 8033 } 8034 else if (use_restore_libcall) 8035 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */ 8036 8037 th_int_mask = th_int_get_mask (frame->mask); 8038 if (th_int_mask && TH_INT_INTERRUPT (cfun)) 8039 { 8040 frame->mask &= ~th_int_mask; 8041 8042 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for 8043 interrupts, such as fcsr. */ 8044 if ((TARGET_HARD_FLOAT && frame->fmask) 8045 || (TARGET_ZFINX && frame->mask)) 8046 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM); 8047 } 8048 8049 /* Restore the registers. */ 8050 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false); 8051 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size 8052 - multipop_size, 8053 riscv_restore_reg, true, style == EXCEPTION_RETURN); 8054 8055 if (th_int_mask && TH_INT_INTERRUPT (cfun)) 8056 { 8057 frame->mask = mask; /* Undo the above fib. */ 8058 unsigned save_adjustment = th_int_get_save_adjustment (); 8059 gcc_assert (step2.to_constant () >= save_adjustment); 8060 step2 -= save_adjustment; 8061 } 8062 8063 if (use_restore_libcall) 8064 frame->mask = mask; /* Undo the above fib. */ 8065 8066 if (need_barrier_p) 8067 riscv_emit_stack_tie (); 8068 8069 /* Deallocate the final bit of the frame. */ 8070 if (step2.to_constant () > 0) 8071 { 8072 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, 8073 GEN_INT (step2.to_constant ()))); 8074 8075 rtx dwarf = NULL_RTX; 8076 rtx cfa_adjust_rtx 8077 = gen_rtx_PLUS (Pmode, stack_pointer_rtx, 8078 GEN_INT (libcall_size + multipop_size)); 8079 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); 8080 RTX_FRAME_RELATED_P (insn) = 1; 8081 8082 REG_NOTES (insn) = dwarf; 8083 } 8084 8085 if (use_multi_pop) 8086 { 8087 /* restore FPRs pushed by cm.push. */ 8088 frame->fmask = fmask & mask_fprs_push; 8089 if (frame->fmask) 8090 riscv_for_each_saved_reg (frame->total_size - libcall_size 8091 - multipop_size, 8092 riscv_restore_reg, true, 8093 style == EXCEPTION_RETURN); 8094 /* Undo the above fib. */ 8095 frame->mask = mask; 8096 frame->fmask = fmask; 8097 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask, 8098 multipop_size); 8099 if (use_multi_pop_normal) 8100 return; 8101 } 8102 else if (use_restore_libcall) 8103 { 8104 rtx dwarf = riscv_adjust_libcall_cfi_epilogue (); 8105 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask)))); 8106 RTX_FRAME_RELATED_P (insn) = 1; 8107 REG_NOTES (insn) = dwarf; 8108 8109 emit_jump_insn (gen_gpr_restore_return (ra)); 8110 return; 8111 } 8112 8113 /* Add in the __builtin_eh_return stack adjustment. */ 8114 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return) 8115 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, 8116 EH_RETURN_STACKADJ_RTX)); 8117 8118 /* Return from interrupt. */ 8119 if (cfun->machine->interrupt_handler_p) 8120 { 8121 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode; 8122 8123 gcc_assert (mode != UNKNOWN_MODE); 8124 8125 if (th_int_mask && TH_INT_INTERRUPT (cfun)) 8126 emit_jump_insn (gen_th_int_pop ()); 8127 else if (mode == MACHINE_MODE) 8128 emit_jump_insn (gen_riscv_mret ()); 8129 else if (mode == SUPERVISOR_MODE) 8130 emit_jump_insn (gen_riscv_sret ()); 8131 else 8132 emit_jump_insn (gen_riscv_uret ()); 8133 } 8134 else if (style != SIBCALL_RETURN) 8135 emit_jump_insn (gen_simple_return_internal (ra)); 8136 } 8137 8138 /* Implement EPILOGUE_USES. */ 8139 8140 bool 8141 riscv_epilogue_uses (unsigned int regno) 8142 { 8143 if (regno == RETURN_ADDR_REGNUM) 8144 return true; 8145 8146 if (epilogue_completed && cfun->machine->interrupt_handler_p) 8147 { 8148 /* An interrupt function restores temp regs, so we must indicate that 8149 they are live at function end. */ 8150 if (df_regs_ever_live_p (regno) 8151 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno))) 8152 return true; 8153 } 8154 8155 return false; 8156 } 8157 8158 static bool 8159 riscv_avoid_shrink_wrapping_separate () 8160 { 8161 if (riscv_use_save_libcall (&cfun->machine->frame) 8162 || cfun->machine->interrupt_handler_p 8163 || !cfun->machine->frame.gp_sp_offset.is_constant ()) 8164 return true; 8165 8166 return false; 8167 } 8168 8169 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ 8170 8171 static sbitmap 8172 riscv_get_separate_components (void) 8173 { 8174 HOST_WIDE_INT offset; 8175 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); 8176 bitmap_clear (components); 8177 8178 if (riscv_avoid_shrink_wrapping_separate ()) 8179 return components; 8180 8181 offset = cfun->machine->frame.gp_sp_offset.to_constant (); 8182 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 8183 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) 8184 { 8185 /* We can only wrap registers that have small operand offsets. 8186 For large offsets a pseudo register might be needed which 8187 cannot be created during the shrink wrapping pass. */ 8188 if (SMALL_OPERAND (offset)) 8189 bitmap_set_bit (components, regno); 8190 8191 offset -= UNITS_PER_WORD; 8192 } 8193 8194 offset = cfun->machine->frame.fp_sp_offset.to_constant (); 8195 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 8196 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) 8197 { 8198 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; 8199 8200 /* We can only wrap registers that have small operand offsets. 8201 For large offsets a pseudo register might be needed which 8202 cannot be created during the shrink wrapping pass. */ 8203 if (SMALL_OPERAND (offset)) 8204 bitmap_set_bit (components, regno); 8205 8206 offset -= GET_MODE_SIZE (mode).to_constant (); 8207 } 8208 8209 /* Don't mess with the hard frame pointer. */ 8210 if (frame_pointer_needed) 8211 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); 8212 8213 bitmap_clear_bit (components, RETURN_ADDR_REGNUM); 8214 8215 return components; 8216 } 8217 8218 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ 8219 8220 static sbitmap 8221 riscv_components_for_bb (basic_block bb) 8222 { 8223 bitmap in = DF_LIVE_IN (bb); 8224 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; 8225 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; 8226 8227 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); 8228 bitmap_clear (components); 8229 8230 function_abi_aggregator callee_abis; 8231 rtx_insn *insn; 8232 FOR_BB_INSNS (bb, insn) 8233 if (CALL_P (insn)) 8234 callee_abis.note_callee_abi (insn_callee_abi (insn)); 8235 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); 8236 8237 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ 8238 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 8239 if (!fixed_regs[regno] 8240 && !crtl->abi->clobbers_full_reg_p (regno) 8241 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) 8242 || bitmap_bit_p (in, regno) 8243 || bitmap_bit_p (gen, regno) 8244 || bitmap_bit_p (kill, regno))) 8245 bitmap_set_bit (components, regno); 8246 8247 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 8248 if (!fixed_regs[regno] 8249 && !crtl->abi->clobbers_full_reg_p (regno) 8250 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) 8251 || bitmap_bit_p (in, regno) 8252 || bitmap_bit_p (gen, regno) 8253 || bitmap_bit_p (kill, regno))) 8254 bitmap_set_bit (components, regno); 8255 8256 return components; 8257 } 8258 8259 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ 8260 8261 static void 8262 riscv_disqualify_components (sbitmap, edge, sbitmap, bool) 8263 { 8264 /* Nothing to do for riscv. */ 8265 } 8266 8267 static void 8268 riscv_process_components (sbitmap components, bool prologue_p) 8269 { 8270 HOST_WIDE_INT offset; 8271 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg; 8272 8273 offset = cfun->machine->frame.gp_sp_offset.to_constant (); 8274 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 8275 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) 8276 { 8277 if (bitmap_bit_p (components, regno)) 8278 riscv_save_restore_reg (word_mode, regno, offset, fn); 8279 8280 offset -= UNITS_PER_WORD; 8281 } 8282 8283 offset = cfun->machine->frame.fp_sp_offset.to_constant (); 8284 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 8285 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) 8286 { 8287 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; 8288 8289 if (bitmap_bit_p (components, regno)) 8290 riscv_save_restore_reg (mode, regno, offset, fn); 8291 8292 offset -= GET_MODE_SIZE (mode).to_constant (); 8293 } 8294 } 8295 8296 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ 8297 8298 static void 8299 riscv_emit_prologue_components (sbitmap components) 8300 { 8301 riscv_process_components (components, true); 8302 } 8303 8304 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ 8305 8306 static void 8307 riscv_emit_epilogue_components (sbitmap components) 8308 { 8309 riscv_process_components (components, false); 8310 } 8311 8312 static void 8313 riscv_set_handled_components (sbitmap components) 8314 { 8315 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) 8316 if (bitmap_bit_p (components, regno)) 8317 cfun->machine->reg_is_wrapped_separately[regno] = true; 8318 8319 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 8320 if (bitmap_bit_p (components, regno)) 8321 cfun->machine->reg_is_wrapped_separately[regno] = true; 8322 } 8323 8324 /* Return nonzero if this function is known to have a null epilogue. 8325 This allows the optimizer to omit jumps to jumps if no stack 8326 was created. */ 8327 8328 bool 8329 riscv_can_use_return_insn (void) 8330 { 8331 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0) 8332 && ! cfun->machine->interrupt_handler_p); 8333 } 8334 8335 /* Given that there exists at least one variable that is set (produced) 8336 by OUT_INSN and read (consumed) by IN_INSN, return true iff 8337 IN_INSN represents one or more memory store operations and none of 8338 the variables set by OUT_INSN is used by IN_INSN as the address of a 8339 store operation. If either IN_INSN or OUT_INSN does not represent 8340 a "single" RTL SET expression (as loosely defined by the 8341 implementation of the single_set function) or a PARALLEL with only 8342 SETs, CLOBBERs, and USEs inside, this function returns false. 8343 8344 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain 8345 conditions that result in assertion failures in the generic 8346 store_data_bypass_p function and returns FALSE in such cases. 8347 8348 This is required to make -msave-restore work with the sifive-7 8349 pipeline description. */ 8350 8351 bool 8352 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 8353 { 8354 rtx out_set, in_set; 8355 rtx out_pat, in_pat; 8356 rtx out_exp, in_exp; 8357 int i, j; 8358 8359 in_set = single_set (in_insn); 8360 if (in_set) 8361 { 8362 if (MEM_P (SET_DEST (in_set))) 8363 { 8364 out_set = single_set (out_insn); 8365 if (!out_set) 8366 { 8367 out_pat = PATTERN (out_insn); 8368 if (GET_CODE (out_pat) == PARALLEL) 8369 { 8370 for (i = 0; i < XVECLEN (out_pat, 0); i++) 8371 { 8372 out_exp = XVECEXP (out_pat, 0, i); 8373 if ((GET_CODE (out_exp) == CLOBBER) 8374 || (GET_CODE (out_exp) == USE)) 8375 continue; 8376 else if (GET_CODE (out_exp) != SET) 8377 return false; 8378 } 8379 } 8380 } 8381 } 8382 } 8383 else 8384 { 8385 in_pat = PATTERN (in_insn); 8386 if (GET_CODE (in_pat) != PARALLEL) 8387 return false; 8388 8389 for (i = 0; i < XVECLEN (in_pat, 0); i++) 8390 { 8391 in_exp = XVECEXP (in_pat, 0, i); 8392 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) 8393 continue; 8394 else if (GET_CODE (in_exp) != SET) 8395 return false; 8396 8397 if (MEM_P (SET_DEST (in_exp))) 8398 { 8399 out_set = single_set (out_insn); 8400 if (!out_set) 8401 { 8402 out_pat = PATTERN (out_insn); 8403 if (GET_CODE (out_pat) != PARALLEL) 8404 return false; 8405 for (j = 0; j < XVECLEN (out_pat, 0); j++) 8406 { 8407 out_exp = XVECEXP (out_pat, 0, j); 8408 if ((GET_CODE (out_exp) == CLOBBER) 8409 || (GET_CODE (out_exp) == USE)) 8410 continue; 8411 else if (GET_CODE (out_exp) != SET) 8412 return false; 8413 } 8414 } 8415 } 8416 } 8417 } 8418 8419 return store_data_bypass_p (out_insn, in_insn); 8420 } 8421 8422 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. 8423 8424 When floating-point registers are wider than integer ones, moves between 8425 them must go through memory. */ 8426 8427 static bool 8428 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1, 8429 reg_class_t class2) 8430 { 8431 return (!riscv_v_ext_mode_p (mode) 8432 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD 8433 && (class1 == FP_REGS) != (class2 == FP_REGS) 8434 && !TARGET_XTHEADFMV 8435 && !TARGET_ZFA); 8436 } 8437 8438 /* Implement TARGET_REGISTER_MOVE_COST. */ 8439 8440 static int 8441 riscv_register_move_cost (machine_mode mode, 8442 reg_class_t from, reg_class_t to) 8443 { 8444 if ((from == FP_REGS && to == GR_REGS) || 8445 (from == GR_REGS && to == FP_REGS)) 8446 return tune_param->fmv_cost; 8447 8448 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2; 8449 } 8450 8451 /* Implement TARGET_HARD_REGNO_NREGS. */ 8452 8453 static unsigned int 8454 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode) 8455 { 8456 if (riscv_v_ext_vector_mode_p (mode)) 8457 { 8458 /* Handle fractional LMUL, it only occupy part of vector register but 8459 still need one vector register to hold. */ 8460 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG)) 8461 return 1; 8462 8463 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant (); 8464 } 8465 8466 /* For tuple modes, the number of register = NF * LMUL. */ 8467 if (riscv_v_ext_tuple_mode_p (mode)) 8468 { 8469 unsigned int nf = riscv_vector::get_nf (mode); 8470 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode); 8471 poly_int64 size = GET_MODE_SIZE (subpart_mode); 8472 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode))); 8473 if (maybe_lt (size, UNITS_PER_V_REG)) 8474 return nf; 8475 else 8476 { 8477 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant (); 8478 return nf * lmul; 8479 } 8480 } 8481 8482 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */ 8483 if (riscv_v_ext_vls_mode_p (mode)) 8484 { 8485 int size = GET_MODE_BITSIZE (mode).to_constant (); 8486 if (size < TARGET_MIN_VLEN) 8487 return 1; 8488 else 8489 return size / TARGET_MIN_VLEN; 8490 } 8491 8492 /* mode for VL or VTYPE are just a marker, not holding value, 8493 so it always consume one register. */ 8494 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno) 8495 || FRM_REG_P (regno)) 8496 return 1; 8497 8498 /* Assume every valid non-vector mode fits in one vector register. */ 8499 if (V_REG_P (regno)) 8500 return 1; 8501 8502 if (FP_REG_P (regno)) 8503 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG; 8504 8505 /* All other registers are word-sized. */ 8506 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 8507 } 8508 8509 /* Implement TARGET_HARD_REGNO_MODE_OK. */ 8510 8511 static bool 8512 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 8513 { 8514 unsigned int nregs = riscv_hard_regno_nregs (regno, mode); 8515 8516 if (GP_REG_P (regno)) 8517 { 8518 if (riscv_v_ext_mode_p (mode)) 8519 return false; 8520 8521 if (!GP_REG_P (regno + nregs - 1)) 8522 return false; 8523 } 8524 else if (FP_REG_P (regno)) 8525 { 8526 if (riscv_v_ext_mode_p (mode)) 8527 return false; 8528 8529 if (!FP_REG_P (regno + nregs - 1)) 8530 return false; 8531 8532 if (GET_MODE_CLASS (mode) != MODE_FLOAT 8533 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT) 8534 return false; 8535 8536 /* Only use callee-saved registers if a potential callee is guaranteed 8537 to spill the requisite width. */ 8538 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG 8539 || (!call_used_or_fixed_reg_p (regno) 8540 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG)) 8541 return false; 8542 } 8543 else if (V_REG_P (regno)) 8544 { 8545 if (!riscv_v_ext_mode_p (mode)) 8546 return false; 8547 8548 if (!V_REG_P (regno + nregs - 1)) 8549 return false; 8550 8551 int regno_alignment = riscv_get_v_regno_alignment (mode); 8552 if (regno_alignment != 1) 8553 return ((regno % regno_alignment) == 0); 8554 } 8555 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno) 8556 || FRM_REG_P (regno)) 8557 return true; 8558 else 8559 return false; 8560 8561 /* Require same callee-savedness for all registers. */ 8562 for (unsigned i = 1; i < nregs; i++) 8563 if (call_used_or_fixed_reg_p (regno) 8564 != call_used_or_fixed_reg_p (regno + i)) 8565 return false; 8566 8567 /* Only use even registers in RV32 ZDINX */ 8568 if (!TARGET_64BIT && TARGET_ZDINX){ 8569 if (GET_MODE_CLASS (mode) == MODE_FLOAT && 8570 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode)) 8571 return !(regno & 1); 8572 } 8573 8574 return true; 8575 } 8576 8577 /* Implement TARGET_MODES_TIEABLE_P. 8578 8579 Don't allow floating-point modes to be tied, since type punning of 8580 single-precision and double-precision is implementation defined. */ 8581 8582 static bool 8583 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) 8584 { 8585 /* We don't allow different REG_CLASS modes tieable since it 8586 will cause ICE in register allocation (RA). 8587 E.g. V2SI and DI are not tieable. */ 8588 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) 8589 return false; 8590 return (mode1 == mode2 8591 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT 8592 && GET_MODE_CLASS (mode2) == MODE_FLOAT)); 8593 } 8594 8595 /* Implement CLASS_MAX_NREGS. */ 8596 8597 static unsigned char 8598 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode) 8599 { 8600 if (reg_class_subset_p (rclass, FP_REGS)) 8601 return riscv_hard_regno_nregs (FP_REG_FIRST, mode); 8602 8603 if (reg_class_subset_p (rclass, GR_REGS)) 8604 return riscv_hard_regno_nregs (GP_REG_FIRST, mode); 8605 8606 if (reg_class_subset_p (rclass, V_REGS)) 8607 return riscv_hard_regno_nregs (V_REG_FIRST, mode); 8608 8609 return 0; 8610 } 8611 8612 /* Implement TARGET_MEMORY_MOVE_COST. */ 8613 8614 static int 8615 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in) 8616 { 8617 return (tune_param->memory_cost 8618 + memory_move_secondary_cost (mode, rclass, in)); 8619 } 8620 8621 /* Return the number of instructions that can be issued per cycle. */ 8622 8623 static int 8624 riscv_issue_rate (void) 8625 { 8626 return tune_param->issue_rate; 8627 } 8628 8629 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ 8630 static int 8631 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) 8632 { 8633 if (DEBUG_INSN_P (insn)) 8634 return more; 8635 8636 rtx_code code = GET_CODE (PATTERN (insn)); 8637 if (code == USE || code == CLOBBER) 8638 return more; 8639 8640 /* GHOST insns are used for blockage and similar cases which 8641 effectively end a cycle. */ 8642 if (get_attr_type (insn) == TYPE_GHOST) 8643 return 0; 8644 8645 /* If we ever encounter an insn with an unknown type, trip 8646 an assert so we can find and fix this problem. */ 8647 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN); 8648 8649 /* If we ever encounter an insn without an insn reservation, trip 8650 an assert so we can find and fix this problem. */ 8651 gcc_assert (insn_has_dfa_reservation_p (insn)); 8652 8653 return more - 1; 8654 } 8655 8656 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports 8657 instruction fusion of some sort. */ 8658 8659 static bool 8660 riscv_macro_fusion_p (void) 8661 { 8662 return tune_param->fusible_ops != RISCV_FUSE_NOTHING; 8663 } 8664 8665 /* Return true iff the instruction fusion described by OP is enabled. */ 8666 8667 static bool 8668 riscv_fusion_enabled_p(enum riscv_fusion_pairs op) 8669 { 8670 return tune_param->fusible_ops & op; 8671 } 8672 8673 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR 8674 should be kept together during scheduling. */ 8675 8676 static bool 8677 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) 8678 { 8679 rtx prev_set = single_set (prev); 8680 rtx curr_set = single_set (curr); 8681 /* prev and curr are simple SET insns i.e. no flag setting or branching. */ 8682 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); 8683 8684 if (!riscv_macro_fusion_p ()) 8685 return false; 8686 8687 if (simple_sets_p 8688 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) 8689 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))) 8690 { 8691 /* We are trying to match the following: 8692 prev (slli) == (set (reg:DI rD) 8693 (ashift:DI (reg:DI rS) (const_int 32))) 8694 curr (slri) == (set (reg:DI rD) 8695 (lshiftrt:DI (reg:DI rD) (const_int <shift>))) 8696 with <shift> being either 32 for FUSE_ZEXTW, or 8697 `less than 32 for FUSE_ZEXTWS. */ 8698 8699 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT 8700 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT 8701 && REG_P (SET_DEST (prev_set)) 8702 && REG_P (SET_DEST (curr_set)) 8703 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) 8704 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set)) 8705 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) 8706 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) 8707 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32 8708 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32 8709 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) ) 8710 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32 8711 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS)))) 8712 return true; 8713 } 8714 8715 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)) 8716 { 8717 /* We are trying to match the following: 8718 prev (slli) == (set (reg:DI rD) 8719 (ashift:DI (reg:DI rS) (const_int 48))) 8720 curr (slri) == (set (reg:DI rD) 8721 (lshiftrt:DI (reg:DI rD) (const_int 48))) */ 8722 8723 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT 8724 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT 8725 && REG_P (SET_DEST (prev_set)) 8726 && REG_P (SET_DEST (curr_set)) 8727 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) 8728 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set)) 8729 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) 8730 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) 8731 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48 8732 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48) 8733 return true; 8734 } 8735 8736 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED)) 8737 { 8738 /* We are trying to match the following: 8739 prev (add) == (set (reg:DI rD) 8740 (plus:DI (reg:DI rS1) (reg:DI rS2)) 8741 curr (ld) == (set (reg:DI rD) 8742 (mem:DI (reg:DI rD))) */ 8743 8744 if (MEM_P (SET_SRC (curr_set)) 8745 && REG_P (XEXP (SET_SRC (curr_set), 0)) 8746 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set)) 8747 && GET_CODE (SET_SRC (prev_set)) == PLUS 8748 && REG_P (XEXP (SET_SRC (prev_set), 0)) 8749 && REG_P (XEXP (SET_SRC (prev_set), 1))) 8750 return true; 8751 8752 /* We are trying to match the following: 8753 prev (add) == (set (reg:DI rD) 8754 (plus:DI (reg:DI rS1) (reg:DI rS2))) 8755 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */ 8756 8757 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND 8758 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)) 8759 && MEM_P (XEXP (SET_SRC (curr_set), 0)) 8760 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) 8761 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set)) 8762 && GET_CODE (SET_SRC (prev_set)) == PLUS 8763 && REG_P (XEXP (SET_SRC (prev_set), 0)) 8764 && REG_P (XEXP (SET_SRC (prev_set), 1))) 8765 return true; 8766 } 8767 8768 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT)) 8769 { 8770 /* We are trying to match the following: 8771 prev (add) == (set (reg:DI rS) 8772 (plus:DI (reg:DI rS) (const_int)) 8773 curr (ld) == (set (reg:DI rD) 8774 (mem:DI (reg:DI rS))) */ 8775 8776 if (MEM_P (SET_SRC (curr_set)) 8777 && REG_P (XEXP (SET_SRC (curr_set), 0)) 8778 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set)) 8779 && GET_CODE (SET_SRC (prev_set)) == PLUS 8780 && REG_P (XEXP (SET_SRC (prev_set), 0)) 8781 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))) 8782 return true; 8783 } 8784 8785 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI)) 8786 { 8787 /* We are trying to match the following: 8788 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) 8789 curr (addi) == (set (reg:DI rD) 8790 (plus:DI (reg:DI rD) (const_int IMM12))) */ 8791 8792 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM 8793 || (GET_CODE (SET_SRC (curr_set)) == PLUS 8794 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) 8795 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))) 8796 && (GET_CODE (SET_SRC (prev_set)) == HIGH 8797 || (CONST_INT_P (SET_SRC (prev_set)) 8798 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))))) 8799 return true; 8800 } 8801 8802 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI)) 8803 { 8804 /* We are trying to match the following: 8805 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC)) 8806 curr (addi) == (set (reg:DI rD) 8807 (plus:DI (reg:DI rD) (const_int IMM12))) 8808 and 8809 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC)) 8810 curr (addi) == (set (reg:DI rD) 8811 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */ 8812 8813 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC 8814 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC 8815 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM 8816 || (GET_CODE (SET_SRC (curr_set)) == PLUS 8817 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))) 8818 8819 return true; 8820 } 8821 8822 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD)) 8823 { 8824 /* We are trying to match the following: 8825 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) 8826 curr (ld) == (set (reg:DI rD) 8827 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */ 8828 8829 if (CONST_INT_P (SET_SRC (prev_set)) 8830 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))) 8831 && MEM_P (SET_SRC (curr_set)) 8832 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) 8833 return true; 8834 8835 if (GET_CODE (SET_SRC (prev_set)) == HIGH 8836 && MEM_P (SET_SRC (curr_set)) 8837 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM 8838 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0))) 8839 return true; 8840 8841 if (GET_CODE (SET_SRC (prev_set)) == HIGH 8842 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND 8843 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND) 8844 && MEM_P (XEXP (SET_SRC (curr_set), 0)) 8845 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM 8846 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0)))) 8847 return true; 8848 } 8849 8850 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD)) 8851 { 8852 /* We are trying to match the following: 8853 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC)) 8854 curr (ld) == (set (reg:DI rD) 8855 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */ 8856 8857 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC 8858 && XINT (prev_set, 1) == UNSPEC_AUIPC 8859 && MEM_P (SET_SRC (curr_set)) 8860 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) 8861 return true; 8862 } 8863 8864 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD)) 8865 { 8866 /* We are trying to match the following: 8867 prev (sd) == (set (mem (plus (reg sp|fp) (const_int))) 8868 (reg rS1)) 8869 curr (sd) == (set (mem (plus (reg sp|fp) (const_int))) 8870 (reg rS2)) */ 8871 8872 if (MEM_P (SET_DEST (prev_set)) 8873 && MEM_P (SET_DEST (curr_set)) 8874 /* We can probably relax this condition. The documentation is a bit 8875 unclear about sub-word cases. So we just model DImode for now. */ 8876 && GET_MODE (SET_DEST (curr_set)) == DImode 8877 && GET_MODE (SET_DEST (prev_set)) == DImode) 8878 { 8879 rtx base_prev, base_curr, offset_prev, offset_curr; 8880 8881 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev); 8882 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr); 8883 8884 /* The two stores must be contained within opposite halves of the same 8885 16 byte aligned block of memory. We know that the stack pointer and 8886 the frame pointer have suitable alignment. So we just need to check 8887 the offsets of the two stores for suitable alignment. 8888 8889 Originally the thought was to check MEM_ALIGN, but that was reporting 8890 incorrect alignments, even for SP/FP accesses, so we gave up on that 8891 approach. */ 8892 if (base_prev != NULL_RTX 8893 && base_curr != NULL_RTX 8894 && REG_P (base_prev) 8895 && REG_P (base_curr) 8896 && REGNO (base_prev) == REGNO (base_curr) 8897 && (REGNO (base_prev) == STACK_POINTER_REGNUM 8898 || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM) 8899 && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8 8900 && (INTVAL (offset_prev) % 16) == 0) 8901 || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8) 8902 && (INTVAL (offset_curr) % 16) == 0))) 8903 return true; 8904 } 8905 } 8906 8907 return false; 8908 } 8909 8910 /* Adjust the cost/latency of instructions for scheduling. 8911 For now this is just used to change the latency of vector instructions 8912 according to their LMUL. We assume that an insn with LMUL == 8 requires 8913 eight times more execution cycles than the same insn with LMUL == 1. 8914 As this may cause very high latencies which lead to scheduling artifacts 8915 we currently only perform the adjustment when -madjust-lmul-cost is given. 8916 */ 8917 static int 8918 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, 8919 unsigned int) 8920 { 8921 /* Only do adjustments for the generic out-of-order scheduling model. */ 8922 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo) 8923 return cost; 8924 8925 if (recog_memoized (insn) < 0) 8926 return cost; 8927 8928 enum attr_type type = get_attr_type (insn); 8929 8930 if (type == TYPE_VFREDO || type == TYPE_VFWREDO) 8931 { 8932 /* TODO: For ordered reductions scale the base cost relative to the 8933 number of units. */ 8934 ; 8935 } 8936 8937 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */ 8938 if (!TARGET_ADJUST_LMUL_COST) 8939 return cost; 8940 8941 /* vsetvl has a vlmul attribute but its latency does not depend on it. */ 8942 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE) 8943 return cost; 8944 8945 enum riscv_vector::vlmul_type lmul = 8946 (riscv_vector::vlmul_type)get_attr_vlmul (insn); 8947 8948 double factor = 1; 8949 switch (lmul) 8950 { 8951 case riscv_vector::LMUL_2: 8952 factor = 2; 8953 break; 8954 case riscv_vector::LMUL_4: 8955 factor = 4; 8956 break; 8957 case riscv_vector::LMUL_8: 8958 factor = 8; 8959 break; 8960 case riscv_vector::LMUL_F2: 8961 factor = 0.5; 8962 break; 8963 case riscv_vector::LMUL_F4: 8964 factor = 0.25; 8965 break; 8966 case riscv_vector::LMUL_F8: 8967 factor = 0.125; 8968 break; 8969 default: 8970 factor = 1; 8971 } 8972 8973 /* If the latency was nonzero, keep it that way. */ 8974 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor); 8975 8976 return new_cost; 8977 } 8978 8979 /* Auxiliary function to emit RISC-V ELF attribute. */ 8980 static void 8981 riscv_emit_attribute () 8982 { 8983 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n", 8984 riscv_arch_str ().c_str ()); 8985 8986 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n", 8987 TARGET_STRICT_ALIGN ? 0 : 1); 8988 8989 fprintf (asm_out_file, "\t.attribute stack_align, %d\n", 8990 riscv_stack_boundary / 8); 8991 } 8992 8993 /* Output .variant_cc for function symbol which follows vector calling 8994 convention. */ 8995 8996 static void 8997 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name) 8998 { 8999 if (TREE_CODE (decl) == FUNCTION_DECL) 9000 { 9001 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id (); 9002 if (cc == RISCV_CC_V) 9003 { 9004 fprintf (stream, "\t.variant_cc\t"); 9005 assemble_name (stream, name); 9006 fprintf (stream, "\n"); 9007 } 9008 } 9009 } 9010 9011 /* Implement ASM_DECLARE_FUNCTION_NAME. */ 9012 9013 void 9014 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl) 9015 { 9016 riscv_asm_output_variant_cc (stream, fndecl, name); 9017 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function"); 9018 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl); 9019 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl)) 9020 { 9021 fprintf (stream, "\t.option push\n"); 9022 9023 struct cl_target_option *local_cl_target = 9024 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl)); 9025 struct cl_target_option *global_cl_target = 9026 TREE_TARGET_OPTION (target_option_default_node); 9027 9028 const char *local_arch_str = get_arch_str (local_cl_target); 9029 const char *arch_str = local_arch_str != NULL 9030 ? local_arch_str 9031 : riscv_arch_str (true).c_str (); 9032 fprintf (stream, "\t.option arch, %s\n", arch_str); 9033 const char *local_tune_str = get_tune_str (local_cl_target); 9034 const char *global_tune_str = get_tune_str (global_cl_target); 9035 if (strcmp (local_tune_str, global_tune_str) != 0) 9036 fprintf (stream, "\t# tune = %s\n", local_tune_str); 9037 } 9038 } 9039 9040 void 9041 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl) 9042 { 9043 if (!flag_inhibit_size_directive) 9044 ASM_OUTPUT_MEASURED_SIZE (stream, name); 9045 9046 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl)) 9047 { 9048 fprintf (stream, "\t.option pop\n"); 9049 } 9050 } 9051 9052 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */ 9053 9054 void 9055 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target) 9056 { 9057 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 9058 const char *value = IDENTIFIER_POINTER (target); 9059 riscv_asm_output_variant_cc (stream, decl, name); 9060 ASM_OUTPUT_DEF (stream, name, value); 9061 } 9062 9063 /* Implement ASM_OUTPUT_EXTERNAL. */ 9064 9065 void 9066 riscv_asm_output_external (FILE *stream, tree decl, const char *name) 9067 { 9068 default_elf_asm_output_external (stream, decl, name); 9069 riscv_asm_output_variant_cc (stream, decl, name); 9070 } 9071 9072 /* Implement TARGET_ASM_FILE_START. */ 9073 9074 static void 9075 riscv_file_start (void) 9076 { 9077 default_file_start (); 9078 9079 /* Instruct GAS to generate position-[in]dependent code. */ 9080 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no")); 9081 9082 /* If the user specifies "-mno-relax" on the command line then disable linker 9083 relaxation in the assembler. */ 9084 if (! riscv_mrelax) 9085 fprintf (asm_out_file, "\t.option norelax\n"); 9086 9087 /* If the user specifies "-mcsr-check" on the command line then enable csr 9088 check in the assembler. */ 9089 if (riscv_mcsr_check) 9090 fprintf (asm_out_file, "\t.option csr-check\n"); 9091 9092 if (riscv_emit_attribute_p) 9093 riscv_emit_attribute (); 9094 } 9095 9096 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text 9097 in order to avoid duplicating too much logic from elsewhere. */ 9098 9099 static void 9100 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 9101 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 9102 tree function) 9103 { 9104 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); 9105 rtx this_rtx, temp1, temp2, fnaddr; 9106 rtx_insn *insn; 9107 9108 riscv_in_thunk_func = true; 9109 9110 /* Pretend to be a post-reload pass while generating rtl. */ 9111 reload_completed = 1; 9112 9113 /* Mark the end of the (empty) prologue. */ 9114 emit_note (NOTE_INSN_PROLOGUE_END); 9115 9116 /* Determine if we can use a sibcall to call FUNCTION directly. */ 9117 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0)); 9118 9119 /* We need two temporary registers in some cases. */ 9120 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM); 9121 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); 9122 9123 /* Find out which register contains the "this" pointer. */ 9124 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 9125 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); 9126 else 9127 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); 9128 9129 /* Add DELTA to THIS_RTX. */ 9130 if (delta != 0) 9131 { 9132 rtx offset = GEN_INT (delta); 9133 if (!SMALL_OPERAND (delta)) 9134 { 9135 riscv_emit_move (temp1, offset); 9136 offset = temp1; 9137 } 9138 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); 9139 } 9140 9141 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ 9142 if (vcall_offset != 0) 9143 { 9144 rtx addr; 9145 9146 /* Set TEMP1 to *THIS_RTX. */ 9147 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); 9148 9149 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ 9150 addr = riscv_add_offset (temp2, temp1, vcall_offset); 9151 9152 /* Load the offset and add it to THIS_RTX. */ 9153 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); 9154 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); 9155 } 9156 9157 /* Jump to the target function. */ 9158 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode); 9159 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc)); 9160 SIBLING_CALL_P (insn) = 1; 9161 9162 /* Run just enough of rest_of_compilation. This sequence was 9163 "borrowed" from alpha.cc. */ 9164 insn = get_insns (); 9165 split_all_insns_noflow (); 9166 shorten_branches (insn); 9167 assemble_start_function (thunk_fndecl, fnname); 9168 final_start_function (insn, file, 1); 9169 final (insn, file, 1); 9170 final_end_function (); 9171 assemble_end_function (thunk_fndecl, fnname); 9172 9173 /* Clean up the vars set above. Note that final_end_function resets 9174 the global pointer for us. */ 9175 reload_completed = 0; 9176 riscv_in_thunk_func = false; 9177 } 9178 9179 /* Allocate a chunk of memory for per-function machine-dependent data. */ 9180 9181 static struct machine_function * 9182 riscv_init_machine_status (void) 9183 { 9184 return ggc_cleared_alloc<machine_function> (); 9185 } 9186 9187 /* Return the VLEN value associated with -march and -mwrvv-vector-bits. 9188 TODO: So far we only support length-agnostic value. */ 9189 static poly_uint16 9190 riscv_convert_vector_chunks (struct gcc_options *opts) 9191 { 9192 int chunk_num; 9193 int min_vlen = TARGET_MIN_VLEN_OPTS (opts); 9194 if (min_vlen > 32) 9195 { 9196 /* When targetting minimum VLEN > 32, we should use 64-bit chunk size. 9197 Otherwise we can not include SEW = 64bits. 9198 Runtime invariant: The single indeterminate represent the 9199 number of 64-bit chunks in a vector beyond minimum length of 64 bits. 9200 Thus the number of bytes in a vector is 8 + 8 * x1 which is 9201 riscv_vector_chunks * 8 = poly_int (8, 8). */ 9202 riscv_bytes_per_vector_chunk = 8; 9203 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN: 9204 - TARGET_MIN_VLEN = 64bit: [8,8] 9205 - TARGET_MIN_VLEN = 128bit: [16,16] 9206 - TARGET_MIN_VLEN = 256bit: [32,32] 9207 - TARGET_MIN_VLEN = 512bit: [64,64] 9208 - TARGET_MIN_VLEN = 1024bit: [128,128] 9209 - TARGET_MIN_VLEN = 2048bit: [256,256] 9210 - TARGET_MIN_VLEN = 4096bit: [512,512] 9211 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */ 9212 chunk_num = min_vlen / 64; 9213 } 9214 else 9215 { 9216 /* When targetting minimum VLEN = 32, we should use 32-bit 9217 chunk size. Runtime invariant: The single indeterminate represent the 9218 number of 32-bit chunks in a vector beyond minimum length of 32 bits. 9219 Thus the number of bytes in a vector is 4 + 4 * x1 which is 9220 riscv_vector_chunks * 4 = poly_int (4, 4). */ 9221 riscv_bytes_per_vector_chunk = 4; 9222 chunk_num = 1; 9223 } 9224 9225 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR 9226 is enabled. Set riscv_vector_chunks as 1 compile-time constant if 9227 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def" 9228 to set RVV mode size. The RVV machine modes size are run-time constant if 9229 TARGET_VECTOR is enabled. The RVV machine modes size remains default 9230 compile-time constant if TARGET_VECTOR is disabled. */ 9231 if (TARGET_VECTOR_OPTS_P (opts)) 9232 { 9233 switch (opts->x_rvv_vector_bits) 9234 { 9235 case RVV_VECTOR_BITS_SCALABLE: 9236 return poly_uint16 (chunk_num, chunk_num); 9237 case RVV_VECTOR_BITS_ZVL: 9238 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8); 9239 default: 9240 gcc_unreachable (); 9241 } 9242 } 9243 else 9244 return 1; 9245 } 9246 9247 /* 'Unpack' up the internal tuning structs and update the options 9248 in OPTS. The caller must have set up selected_tune and selected_arch 9249 as all the other target-specific codegen decisions are 9250 derived from them. */ 9251 void 9252 riscv_override_options_internal (struct gcc_options *opts) 9253 { 9254 const struct riscv_tune_info *cpu; 9255 9256 /* The presence of the M extension implies that division instructions 9257 are present, so include them unless explicitly disabled. */ 9258 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0) 9259 opts->x_target_flags |= MASK_DIV; 9260 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts)) 9261 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension"); 9262 9263 /* We might use a multiplication to calculate the scalable vector length at 9264 runtime. Therefore, require the M extension. */ 9265 if (TARGET_VECTOR && !TARGET_MUL) 9266 sorry ("GCC's current %<V%> implementation requires the %<M%> extension"); 9267 9268 /* Likewise floating-point division and square root. */ 9269 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts)) 9270 && ((target_flags_explicit & MASK_FDIV) == 0)) 9271 opts->x_target_flags |= MASK_FDIV; 9272 9273 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune 9274 if both -mtune and -mcpu are not given. */ 9275 const char *tune_string = get_tune_str (opts); 9276 cpu = riscv_parse_tune (tune_string, false); 9277 riscv_microarchitecture = cpu->microarchitecture; 9278 tune_param = opts->x_optimize_size 9279 ? &optimize_size_tune_info 9280 : cpu->tune_param; 9281 9282 /* Use -mtune's setting for slow_unaligned_access, even when optimizing 9283 for size. For architectures that trap and emulate unaligned accesses, 9284 the performance cost is too great, even for -Os. Similarly, if 9285 -m[no-]strict-align is left unspecified, heed -mtune's advice. */ 9286 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access 9287 || TARGET_STRICT_ALIGN); 9288 9289 /* Make a note if user explicity passed -mstrict-align for later 9290 builtin macro generation. Can't use target_flags_explicitly since 9291 it is set even for -mno-strict-align. */ 9292 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts); 9293 9294 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0 9295 && cpu->tune_param->slow_unaligned_access) 9296 opts->x_target_flags |= MASK_STRICT_ALIGN; 9297 9298 /* If the user hasn't specified a branch cost, use the processor's 9299 default. */ 9300 if (opts->x_riscv_branch_cost == 0) 9301 opts->x_riscv_branch_cost = tune_param->branch_cost; 9302 9303 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of 9304 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16. 9305 9306 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */ 9307 if (TARGET_MIN_VLEN_OPTS (opts) > 4096) 9308 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for " 9309 "'V' Extension"); 9310 9311 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with 9312 big-endian after finishing full coverage testing. */ 9313 if (TARGET_VECTOR && TARGET_BIG_ENDIAN) 9314 sorry ("Current RISC-V GCC does not support RVV in big-endian mode"); 9315 9316 /* Convert -march and -mrvv-vector-bits to a chunks count. */ 9317 riscv_vector_chunks = riscv_convert_vector_chunks (opts); 9318 } 9319 9320 /* Implement TARGET_OPTION_OVERRIDE. */ 9321 9322 void 9323 riscv_option_override (void) 9324 { 9325 #ifdef SUBTARGET_OVERRIDE_OPTIONS 9326 SUBTARGET_OVERRIDE_OPTIONS; 9327 #endif 9328 9329 flag_pcc_struct_return = 0; 9330 9331 if (flag_pic) 9332 g_switch_value = 0; 9333 9334 /* Always prefer medlow than medany for RV32 since medlow can access 9335 full address space. */ 9336 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT) 9337 riscv_cmodel = CM_MEDLOW; 9338 9339 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS) 9340 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs"); 9341 9342 if (riscv_cmodel == CM_LARGE && flag_pic) 9343 sorry ("code model %qs with %qs", "large", 9344 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic"); 9345 9346 if (flag_pic) 9347 riscv_cmodel = CM_PIC; 9348 9349 /* We need to save the fp with ra for non-leaf functions with no fp and ra 9350 for leaf functions while no-omit-frame-pointer with 9351 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first 9352 priority to determine whether the frame pointer is needed. If we do not 9353 override it, the fp and ra will be stored for leaf functions, which is not 9354 our wanted. */ 9355 riscv_save_frame_pointer = false; 9356 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags)) 9357 { 9358 if (!global_options.x_flag_omit_frame_pointer) 9359 riscv_save_frame_pointer = true; 9360 9361 global_options.x_flag_omit_frame_pointer = 1; 9362 } 9363 9364 /* We get better code with explicit relocs for CM_MEDLOW, but 9365 worse code for the others (for now). Pick the best default. */ 9366 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0) 9367 if (riscv_cmodel == CM_MEDLOW) 9368 target_flags |= MASK_EXPLICIT_RELOCS; 9369 9370 /* Require that the ISA supports the requested floating-point ABI. */ 9371 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0)) 9372 error ("requested ABI requires %<-march%> to subsume the %qc extension", 9373 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F')); 9374 9375 /* RVE requires specific ABI. */ 9376 if (TARGET_RVE) 9377 { 9378 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E) 9379 error ("rv32e requires ilp32e ABI"); 9380 else if (TARGET_64BIT && riscv_abi != ABI_LP64E) 9381 error ("rv64e requires lp64e ABI"); 9382 } 9383 9384 /* ILP32E does not support the 'd' extension. */ 9385 if (riscv_abi == ABI_ILP32E && UNITS_PER_FP_REG > 4) 9386 error ("ILP32E ABI does not support the %qc extension", 9387 UNITS_PER_FP_REG > 8 ? 'Q' : 'D'); 9388 9389 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */ 9390 if (TARGET_ZFINX 9391 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64 9392 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E) 9393 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e"); 9394 9395 /* We do not yet support ILP32 on RV64. */ 9396 if (BITS_PER_WORD != POINTER_SIZE) 9397 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE); 9398 9399 /* Validate -mpreferred-stack-boundary= value. */ 9400 riscv_stack_boundary = ABI_STACK_BOUNDARY; 9401 if (riscv_preferred_stack_boundary_arg) 9402 { 9403 int min = ctz_hwi (STACK_BOUNDARY / 8); 9404 int max = 8; 9405 9406 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max)) 9407 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d", 9408 riscv_preferred_stack_boundary_arg, min, max); 9409 9410 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg; 9411 } 9412 9413 if (riscv_emit_attribute_p < 0) 9414 #ifdef HAVE_AS_RISCV_ATTRIBUTE 9415 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE; 9416 #else 9417 riscv_emit_attribute_p = 0; 9418 9419 if (riscv_emit_attribute_p) 9420 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32" 9421 " [%<-mriscv-attribute%>]"); 9422 #endif 9423 9424 if (riscv_stack_protector_guard == SSP_GLOBAL 9425 && OPTION_SET_P (riscv_stack_protector_guard_offset_str)) 9426 { 9427 error ("incompatible options %<-mstack-protector-guard=global%> and " 9428 "%<-mstack-protector-guard-offset=%s%>", 9429 riscv_stack_protector_guard_offset_str); 9430 } 9431 9432 if (riscv_stack_protector_guard == SSP_TLS 9433 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str) 9434 && OPTION_SET_P (riscv_stack_protector_guard_reg_str))) 9435 { 9436 error ("both %<-mstack-protector-guard-offset%> and " 9437 "%<-mstack-protector-guard-reg%> must be used " 9438 "with %<-mstack-protector-guard=sysreg%>"); 9439 } 9440 9441 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str)) 9442 { 9443 const char *str = riscv_stack_protector_guard_reg_str; 9444 int reg = decode_reg_name (str); 9445 9446 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST)) 9447 error ("%qs is not a valid base register in %qs", str, 9448 "-mstack-protector-guard-reg="); 9449 9450 riscv_stack_protector_guard_reg = reg; 9451 } 9452 9453 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str)) 9454 { 9455 char *end; 9456 const char *str = riscv_stack_protector_guard_offset_str; 9457 errno = 0; 9458 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0); 9459 9460 if (!*str || *end || errno) 9461 error ("%qs is not a valid number in %qs", str, 9462 "-mstack-protector-guard-offset="); 9463 9464 if (!SMALL_OPERAND (offs)) 9465 error ("%qs is not a valid offset in %qs", str, 9466 "-mstack-protector-guard-offset="); 9467 9468 riscv_stack_protector_guard_offset = offs; 9469 } 9470 9471 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 9472 param_sched_pressure_algorithm, 9473 SCHED_PRESSURE_MODEL); 9474 9475 /* Function to allocate machine-dependent function status. */ 9476 init_machine_status = &riscv_init_machine_status; 9477 9478 riscv_override_options_internal (&global_options); 9479 9480 /* Save these options as the default ones in case we push and pop them later 9481 while processing functions with potential target attributes. */ 9482 target_option_default_node = target_option_current_node 9483 = build_target_option_node (&global_options, &global_options_set); 9484 } 9485 9486 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. 9487 Used by riscv_set_current_function to 9488 make sure optab availability predicates are recomputed when necessary. */ 9489 9490 void 9491 riscv_save_restore_target_globals (tree new_tree) 9492 { 9493 if (TREE_TARGET_GLOBALS (new_tree)) 9494 restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); 9495 else if (new_tree == target_option_default_node) 9496 restore_target_globals (&default_target_globals); 9497 else 9498 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); 9499 } 9500 9501 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions 9502 using the information saved in PTR. */ 9503 9504 static void 9505 riscv_option_restore (struct gcc_options *opts, 9506 struct gcc_options * /* opts_set */, 9507 struct cl_target_option * /* ptr */) 9508 { 9509 riscv_override_options_internal (opts); 9510 } 9511 9512 static GTY (()) tree riscv_previous_fndecl; 9513 9514 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ 9515 9516 static void 9517 riscv_conditional_register_usage (void) 9518 { 9519 /* We have only x0~x15 on RV32E/RV64E. */ 9520 if (TARGET_RVE) 9521 { 9522 for (int r = 16; r <= 31; r++) 9523 fixed_regs[r] = 1; 9524 } 9525 9526 if (riscv_abi == ABI_ILP32E) 9527 { 9528 for (int r = 16; r <= 31; r++) 9529 call_used_regs[r] = 1; 9530 } 9531 9532 if (!TARGET_HARD_FLOAT) 9533 { 9534 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 9535 fixed_regs[regno] = call_used_regs[regno] = 1; 9536 } 9537 9538 /* In the soft-float ABI, there are no callee-saved FP registers. */ 9539 if (UNITS_PER_FP_ARG == 0) 9540 { 9541 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) 9542 call_used_regs[regno] = 1; 9543 } 9544 9545 if (!TARGET_VECTOR) 9546 { 9547 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++) 9548 fixed_regs[regno] = call_used_regs[regno] = 1; 9549 9550 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1; 9551 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1; 9552 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1; 9553 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1; 9554 } 9555 } 9556 9557 /* Return a register priority for hard reg REGNO. */ 9558 9559 static int 9560 riscv_register_priority (int regno) 9561 { 9562 /* Favor compressed registers to improve the odds of RVC instruction 9563 selection. */ 9564 if (riscv_compressed_reg_p (regno)) 9565 return 1; 9566 9567 return 0; 9568 } 9569 9570 /* Implement TARGET_TRAMPOLINE_INIT. */ 9571 9572 static void 9573 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 9574 { 9575 rtx addr, end_addr, mem; 9576 uint32_t trampoline[4]; 9577 unsigned int i; 9578 HOST_WIDE_INT static_chain_offset, target_function_offset; 9579 9580 /* Work out the offsets of the pointers from the start of the 9581 trampoline code. */ 9582 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE); 9583 9584 /* Get pointers to the beginning and end of the code block. */ 9585 addr = force_reg (Pmode, XEXP (m_tramp, 0)); 9586 end_addr = riscv_force_binary (Pmode, PLUS, addr, 9587 GEN_INT (TRAMPOLINE_CODE_SIZE)); 9588 9589 9590 if (Pmode == SImode) 9591 { 9592 chain_value = force_reg (Pmode, chain_value); 9593 9594 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 9595 /* lui t2, hi(chain) 9596 lui t0, hi(func) 9597 addi t2, t2, lo(chain) 9598 jr t0, lo(func) 9599 */ 9600 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code; 9601 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code; 9602 9603 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode)); 9604 9605 /* 0xfff. */ 9606 rtx imm12_mask = gen_reg_rtx (SImode); 9607 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask)); 9608 9609 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode)); 9610 9611 /* Gen lui t2, hi(chain). */ 9612 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value, 9613 fixup_value); 9614 hi_chain = riscv_force_binary (SImode, AND, hi_chain, 9615 uimm_mask); 9616 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD); 9617 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain, 9618 gen_int_mode (lui_hi_chain_code, SImode)); 9619 9620 mem = adjust_address (m_tramp, SImode, 0); 9621 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain)); 9622 9623 /* Gen lui t0, hi(func). */ 9624 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function, 9625 fixup_value); 9626 hi_func = riscv_force_binary (SImode, AND, hi_func, 9627 uimm_mask); 9628 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD); 9629 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func, 9630 gen_int_mode (lui_hi_func_code, SImode)); 9631 9632 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode)); 9633 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func)); 9634 9635 /* Gen addi t2, t2, lo(chain). */ 9636 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value, 9637 imm12_mask); 9638 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20)); 9639 9640 lo_chain_code = OPCODE_ADDI 9641 | (STATIC_CHAIN_REGNUM << SHIFT_RD) 9642 | (STATIC_CHAIN_REGNUM << SHIFT_RS1); 9643 9644 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain, 9645 force_reg (SImode, GEN_INT (lo_chain_code))); 9646 9647 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode)); 9648 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain)); 9649 9650 /* Gen jr t0, lo(func). */ 9651 rtx lo_func = riscv_force_binary (SImode, AND, target_function, 9652 imm12_mask); 9653 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20)); 9654 9655 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1); 9656 9657 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func, 9658 force_reg (SImode, GEN_INT (lo_func_code))); 9659 9660 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode)); 9661 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func)); 9662 } 9663 else 9664 { 9665 static_chain_offset = TRAMPOLINE_CODE_SIZE; 9666 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); 9667 9668 /* auipc t2, 0 9669 l[wd] t0, target_function_offset(t2) 9670 l[wd] t2, static_chain_offset(t2) 9671 jr t0 9672 */ 9673 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD); 9674 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW) 9675 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD) 9676 | (STATIC_CHAIN_REGNUM << SHIFT_RS1) 9677 | (target_function_offset << SHIFT_IMM); 9678 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW) 9679 | (STATIC_CHAIN_REGNUM << SHIFT_RD) 9680 | (STATIC_CHAIN_REGNUM << SHIFT_RS1) 9681 | (static_chain_offset << SHIFT_IMM); 9682 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1); 9683 9684 /* Copy the trampoline code. */ 9685 for (i = 0; i < ARRAY_SIZE (trampoline); i++) 9686 { 9687 if (BYTES_BIG_ENDIAN) 9688 trampoline[i] = __builtin_bswap32(trampoline[i]); 9689 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode)); 9690 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode)); 9691 } 9692 9693 /* Set up the static chain pointer field. */ 9694 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); 9695 riscv_emit_move (mem, chain_value); 9696 9697 /* Set up the target function field. */ 9698 mem = adjust_address (m_tramp, ptr_mode, target_function_offset); 9699 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); 9700 } 9701 9702 /* Flush the code part of the trampoline. */ 9703 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); 9704 emit_insn (gen_clear_cache (addr, end_addr)); 9705 } 9706 9707 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ 9708 9709 static bool 9710 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, 9711 tree exp ATTRIBUTE_UNUSED) 9712 { 9713 /* Don't use sibcalls when use save-restore routine. */ 9714 if (TARGET_SAVE_RESTORE) 9715 return false; 9716 9717 /* Don't use sibcall for naked functions. */ 9718 if (cfun->machine->naked_p) 9719 return false; 9720 9721 /* Don't use sibcall for interrupt functions. */ 9722 if (cfun->machine->interrupt_handler_p) 9723 return false; 9724 9725 /* Don't use sibcalls in the large model, because a sibcall instruction 9726 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP 9727 register. */ 9728 if (riscv_cmodel == CM_LARGE) 9729 return false; 9730 9731 return true; 9732 } 9733 9734 /* Get the interrupt type, return UNKNOWN_MODE if it's not 9735 interrupt function. */ 9736 static enum riscv_privilege_levels 9737 riscv_get_interrupt_type (tree decl) 9738 { 9739 gcc_assert (decl != NULL_TREE); 9740 9741 if ((TREE_CODE(decl) != FUNCTION_DECL) 9742 || (!riscv_interrupt_type_p (TREE_TYPE (decl)))) 9743 return UNKNOWN_MODE; 9744 9745 tree attr_args 9746 = TREE_VALUE (lookup_attribute ("interrupt", 9747 TYPE_ATTRIBUTES (TREE_TYPE (decl)))); 9748 9749 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE) 9750 { 9751 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args)); 9752 9753 if (!strcmp (string, "user")) 9754 return USER_MODE; 9755 else if (!strcmp (string, "supervisor")) 9756 return SUPERVISOR_MODE; 9757 else /* Must be "machine". */ 9758 return MACHINE_MODE; 9759 } 9760 else 9761 /* Interrupt attributes are machine mode by default. */ 9762 return MACHINE_MODE; 9763 } 9764 9765 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions 9766 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET 9767 of the function, if such exists. This function may be called multiple 9768 times on a single function so use aarch64_previous_fndecl to avoid 9769 setting up identical state. */ 9770 9771 /* Sanity cheching for above function attributes. */ 9772 static void 9773 riscv_set_current_function (tree decl) 9774 { 9775 if (decl == NULL_TREE 9776 || current_function_decl == NULL_TREE 9777 || current_function_decl == error_mark_node 9778 || ! cfun->machine) 9779 return; 9780 9781 if (!cfun->machine->attributes_checked_p) 9782 { 9783 cfun->machine->naked_p = riscv_naked_function_p (decl); 9784 cfun->machine->interrupt_handler_p 9785 = riscv_interrupt_type_p (TREE_TYPE (decl)); 9786 9787 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p) 9788 error ("function attributes %qs and %qs are mutually exclusive", 9789 "interrupt", "naked"); 9790 9791 if (cfun->machine->interrupt_handler_p) 9792 { 9793 tree ret = TREE_TYPE (TREE_TYPE (decl)); 9794 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl)); 9795 9796 if (TREE_CODE (ret) != VOID_TYPE) 9797 error ("%qs function cannot return a value", "interrupt"); 9798 9799 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE) 9800 error ("%qs function cannot have arguments", "interrupt"); 9801 9802 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl); 9803 9804 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE); 9805 } 9806 9807 /* Don't print the above diagnostics more than once. */ 9808 cfun->machine->attributes_checked_p = 1; 9809 } 9810 9811 if (!decl || decl == riscv_previous_fndecl) 9812 return; 9813 9814 tree old_tree = (riscv_previous_fndecl 9815 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl) 9816 : NULL_TREE); 9817 9818 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl); 9819 9820 /* If current function has no attributes but the previous one did, 9821 use the default node. */ 9822 if (!new_tree && old_tree) 9823 new_tree = target_option_default_node; 9824 9825 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to 9826 the default have been handled by aarch64_save_restore_target_globals from 9827 aarch64_pragma_target_parse. */ 9828 if (old_tree == new_tree) 9829 return; 9830 9831 riscv_previous_fndecl = decl; 9832 9833 /* First set the target options. */ 9834 cl_target_option_restore (&global_options, &global_options_set, 9835 TREE_TARGET_OPTION (new_tree)); 9836 9837 /* The ISA extension can vary based on the function extension like target. 9838 Thus, make sure that the machine modes are reflected correctly here. */ 9839 init_adjust_machine_modes (); 9840 9841 riscv_save_restore_target_globals (new_tree); 9842 } 9843 9844 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ 9845 static tree 9846 riscv_merge_decl_attributes (tree olddecl, tree newdecl) 9847 { 9848 tree combined_attrs; 9849 9850 enum riscv_privilege_levels old_interrupt_type 9851 = riscv_get_interrupt_type (olddecl); 9852 enum riscv_privilege_levels new_interrupt_type 9853 = riscv_get_interrupt_type (newdecl); 9854 9855 /* Check old and new has same interrupt type. */ 9856 if ((old_interrupt_type != UNKNOWN_MODE) 9857 && (new_interrupt_type != UNKNOWN_MODE) 9858 && (old_interrupt_type != new_interrupt_type)) 9859 error ("%qs function cannot have different interrupt type", "interrupt"); 9860 9861 /* Create combined attributes. */ 9862 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl), 9863 DECL_ATTRIBUTES (newdecl)); 9864 9865 return combined_attrs; 9866 } 9867 9868 /* Implement TARGET_CANNOT_COPY_INSN_P. */ 9869 9870 static bool 9871 riscv_cannot_copy_insn_p (rtx_insn *insn) 9872 { 9873 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn); 9874 } 9875 9876 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */ 9877 9878 static bool 9879 riscv_slow_unaligned_access (machine_mode, unsigned int) 9880 { 9881 return riscv_slow_unaligned_access_p; 9882 } 9883 9884 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 9885 9886 static bool 9887 riscv_can_change_mode_class (machine_mode from, machine_mode to, 9888 reg_class_t rclass) 9889 { 9890 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS. 9891 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation 9892 between wider mode (FROM) and narrow mode (TO). 9893 9894 E.g. We should not allow copy propagation 9895 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0]) 9896 since we can't order their size which will cause ICE in regcprop. 9897 9898 TODO: Even though they are have different size, they always change 9899 the whole register. We may enhance such case in regcprop to optimize 9900 it in the future. */ 9901 if (reg_classes_intersect_p (V_REGS, rclass) 9902 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to))) 9903 return false; 9904 9905 /* Subregs of modes larger than one vector are ambiguous. 9906 A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one 9907 register group of two at VLEN = 128 or one register at VLEN >= 256 and 9908 we cannot, statically, determine which part of it to extract. 9909 Therefore prevent that. */ 9910 if (reg_classes_intersect_p (V_REGS, rclass) 9911 && riscv_v_ext_vls_mode_p (from) 9912 && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from))) 9913 return false; 9914 9915 return !reg_classes_intersect_p (FP_REGS, rclass); 9916 } 9917 9918 /* Implement TARGET_CONSTANT_ALIGNMENT. */ 9919 9920 static HOST_WIDE_INT 9921 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align) 9922 { 9923 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) 9924 && (riscv_align_data_type == riscv_align_data_type_xlen)) 9925 return MAX (align, BITS_PER_WORD); 9926 return align; 9927 } 9928 9929 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ 9930 9931 /* This function is equivalent to default_promote_function_mode_always_promote 9932 except that it returns a promoted mode even if type is NULL_TREE. This is 9933 needed by libcalls which have no type (only a mode) such as fixed conversion 9934 routines that take a signed or unsigned char/short/int argument and convert 9935 it to a fixed type. */ 9936 9937 static machine_mode 9938 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9939 machine_mode mode, 9940 int *punsignedp ATTRIBUTE_UNUSED, 9941 const_tree fntype ATTRIBUTE_UNUSED, 9942 int for_return ATTRIBUTE_UNUSED) 9943 { 9944 int unsignedp; 9945 9946 if (type != NULL_TREE) 9947 return promote_mode (type, mode, punsignedp); 9948 9949 unsignedp = *punsignedp; 9950 scalar_mode smode = as_a <scalar_mode> (mode); 9951 PROMOTE_MODE (smode, unsignedp, type); 9952 *punsignedp = unsignedp; 9953 return smode; 9954 } 9955 9956 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */ 9957 9958 static void 9959 riscv_reorg (void) 9960 { 9961 /* Do nothing unless we have -msave-restore */ 9962 if (TARGET_SAVE_RESTORE) 9963 riscv_remove_unneeded_save_restore_calls (); 9964 } 9965 9966 /* Return nonzero if register FROM_REGNO can be renamed to register 9967 TO_REGNO. */ 9968 9969 bool 9970 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED, 9971 unsigned to_regno) 9972 { 9973 /* Interrupt functions can only use registers that have already been 9974 saved by the prologue, even if they would normally be 9975 call-clobbered. */ 9976 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno); 9977 } 9978 9979 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */ 9980 9981 bool 9982 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr) 9983 { 9984 /* Prefer old address if it is less expensive. */ 9985 addr_space_t as = MEM_ADDR_SPACE (memref); 9986 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 9987 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed); 9988 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed); 9989 return new_cost <= old_cost; 9990 } 9991 9992 /* Helper function for generating gpr_save pattern. */ 9993 9994 rtx 9995 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame) 9996 { 9997 unsigned count = riscv_save_libcall_count (frame->mask); 9998 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */ 9999 unsigned veclen = 1 + 2 + 1 + count; 10000 rtvec vec = rtvec_alloc (veclen); 10001 10002 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order)); 10003 10004 RTVEC_ELT (vec, 0) = 10005 gen_rtx_UNSPEC_VOLATILE (VOIDmode, 10006 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE); 10007 10008 for (unsigned i = 1; i < veclen; ++i) 10009 { 10010 unsigned regno = gpr_save_reg_order[i]; 10011 rtx reg = gen_rtx_REG (Pmode, regno); 10012 rtx elt; 10013 10014 /* t0 and t1 are CLOBBERs, others are USEs. */ 10015 if (i < 3) 10016 elt = gen_rtx_CLOBBER (Pmode, reg); 10017 else 10018 elt = gen_rtx_USE (Pmode, reg); 10019 10020 RTVEC_ELT (vec, i) = elt; 10021 } 10022 10023 /* Largest number of caller-save register must set in mask if we are 10024 not using __riscv_save_0. */ 10025 gcc_assert ((count == 0) || 10026 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1])); 10027 10028 return gen_rtx_PARALLEL (VOIDmode, vec); 10029 } 10030 10031 static HOST_WIDE_INT 10032 zcmp_base_adj (int regs_num) 10033 { 10034 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode)); 10035 } 10036 10037 static HOST_WIDE_INT 10038 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num) 10039 { 10040 return total - zcmp_base_adj (regs_num); 10041 } 10042 10043 bool 10044 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num) 10045 { 10046 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num); 10047 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP 10048 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP 10049 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP; 10050 } 10051 10052 /* Return true if it's valid gpr_save pattern. */ 10053 10054 bool 10055 riscv_gpr_save_operation_p (rtx op) 10056 { 10057 unsigned len = XVECLEN (op, 0); 10058 10059 if (len > ARRAY_SIZE (gpr_save_reg_order)) 10060 return false; 10061 10062 for (unsigned i = 0; i < len; i++) 10063 { 10064 rtx elt = XVECEXP (op, 0, i); 10065 if (i == 0) 10066 { 10067 /* First element in parallel is unspec. */ 10068 if (GET_CODE (elt) != UNSPEC_VOLATILE 10069 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT 10070 || XINT (elt, 1) != UNSPECV_GPR_SAVE) 10071 return false; 10072 } 10073 else 10074 { 10075 /* Two CLOBBER and USEs, must check the order. */ 10076 unsigned expect_code = i < 3 ? CLOBBER : USE; 10077 if (GET_CODE (elt) != expect_code 10078 || !REG_P (XEXP (elt, 1)) 10079 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i])) 10080 return false; 10081 } 10082 break; 10083 } 10084 return true; 10085 } 10086 10087 /* Implement TARGET_ASAN_SHADOW_OFFSET. */ 10088 10089 static unsigned HOST_WIDE_INT 10090 riscv_asan_shadow_offset (void) 10091 { 10092 /* We only have libsanitizer support for RV64 at present. 10093 10094 This number must match ASAN_SHADOW_OFFSET_CONST in the file 10095 libsanitizer/asan/asan_mapping.h. */ 10096 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0; 10097 } 10098 10099 /* Implement TARGET_MANGLE_TYPE. */ 10100 10101 static const char * 10102 riscv_mangle_type (const_tree type) 10103 { 10104 /* Half-precision float, _Float16 is "DF16_". */ 10105 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16) 10106 return "DF16_"; 10107 10108 /* Mangle all vector type for vector extension. */ 10109 /* The mangle name follows the rule of RVV LLVM 10110 that is "u" + length of (abi_name) + abi_name. */ 10111 if (TYPE_NAME (type) != NULL) 10112 { 10113 const char *res = riscv_vector::mangle_builtin_type (type); 10114 if (res) 10115 return res; 10116 } 10117 10118 /* Use the default mangling. */ 10119 return NULL; 10120 } 10121 10122 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */ 10123 10124 static bool 10125 riscv_scalar_mode_supported_p (scalar_mode mode) 10126 { 10127 if (mode == HFmode) 10128 return true; 10129 else 10130 return default_scalar_mode_supported_p (mode); 10131 } 10132 10133 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE 10134 if MODE is HFmode, and punt to the generic implementation otherwise. */ 10135 10136 static bool 10137 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode) 10138 { 10139 if (mode == HFmode) 10140 return true; 10141 else 10142 return default_libgcc_floating_mode_supported_p (mode); 10143 } 10144 10145 /* Set the value of FLT_EVAL_METHOD. 10146 ISO/IEC TS 18661-3 defines two values that we'd like to make use of: 10147 10148 0: evaluate all operations and constants, whose semantic type has at 10149 most the range and precision of type float, to the range and 10150 precision of float; evaluate all other operations and constants to 10151 the range and precision of the semantic type; 10152 10153 N, where _FloatN is a supported interchange floating type 10154 evaluate all operations and constants, whose semantic type has at 10155 most the range and precision of _FloatN type, to the range and 10156 precision of the _FloatN type; evaluate all other operations and 10157 constants to the range and precision of the semantic type; 10158 10159 If we have the zfh/zhinx/zvfh extensions then we support _Float16 10160 in native precision, so we should set this to 16. */ 10161 static enum flt_eval_method 10162 riscv_excess_precision (enum excess_precision_type type) 10163 { 10164 switch (type) 10165 { 10166 case EXCESS_PRECISION_TYPE_FAST: 10167 case EXCESS_PRECISION_TYPE_STANDARD: 10168 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH) 10169 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 10170 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT); 10171 case EXCESS_PRECISION_TYPE_IMPLICIT: 10172 case EXCESS_PRECISION_TYPE_FLOAT16: 10173 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; 10174 default: 10175 gcc_unreachable (); 10176 } 10177 return FLT_EVAL_METHOD_UNPREDICTABLE; 10178 } 10179 10180 /* Implement TARGET_FLOATN_MODE. */ 10181 static opt_scalar_float_mode 10182 riscv_floatn_mode (int n, bool extended) 10183 { 10184 if (!extended && n == 16) 10185 return HFmode; 10186 10187 return default_floatn_mode (n, extended); 10188 } 10189 10190 static void 10191 riscv_init_libfuncs (void) 10192 { 10193 /* Half-precision float operations. The compiler handles all operations 10194 with NULL libfuncs by converting to SFmode. */ 10195 10196 /* Arithmetic. */ 10197 set_optab_libfunc (add_optab, HFmode, NULL); 10198 set_optab_libfunc (sdiv_optab, HFmode, NULL); 10199 set_optab_libfunc (smul_optab, HFmode, NULL); 10200 set_optab_libfunc (neg_optab, HFmode, NULL); 10201 set_optab_libfunc (sub_optab, HFmode, NULL); 10202 10203 /* Comparisons. */ 10204 set_optab_libfunc (eq_optab, HFmode, NULL); 10205 set_optab_libfunc (ne_optab, HFmode, NULL); 10206 set_optab_libfunc (lt_optab, HFmode, NULL); 10207 set_optab_libfunc (le_optab, HFmode, NULL); 10208 set_optab_libfunc (ge_optab, HFmode, NULL); 10209 set_optab_libfunc (gt_optab, HFmode, NULL); 10210 set_optab_libfunc (unord_optab, HFmode, NULL); 10211 } 10212 10213 #if CHECKING_P 10214 void 10215 riscv_reinit (void) 10216 { 10217 riscv_option_override (); 10218 init_adjust_machine_modes (); 10219 init_derived_machine_modes (); 10220 reinit_regs (); 10221 init_optabs (); 10222 } 10223 #endif 10224 10225 #if CHECKING_P 10226 #undef TARGET_RUN_TARGET_SELFTESTS 10227 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests 10228 #endif /* #if CHECKING_P */ 10229 10230 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */ 10231 10232 static bool 10233 riscv_vector_mode_supported_p (machine_mode mode) 10234 { 10235 if (TARGET_VECTOR) 10236 return riscv_v_ext_mode_p (mode); 10237 10238 return false; 10239 } 10240 10241 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */ 10242 10243 static bool 10244 riscv_verify_type_context (location_t loc, type_context_kind context, 10245 const_tree type, bool silent_p) 10246 { 10247 return riscv_vector::verify_type_context (loc, context, type, silent_p); 10248 } 10249 10250 /* Implement TARGET_VECTOR_ALIGNMENT. */ 10251 10252 static HOST_WIDE_INT 10253 riscv_vector_alignment (const_tree type) 10254 { 10255 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can 10256 be set for non-predicate vectors of booleans. Modes are the most 10257 direct way we have of identifying real RVV predicate types. */ 10258 /* FIXME: RVV didn't mention the alignment of bool, we uses 10259 one byte align. */ 10260 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL) 10261 return 8; 10262 10263 widest_int min_size 10264 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type))); 10265 return wi::umin (min_size, 128).to_uhwi (); 10266 } 10267 10268 /* Implement REGMODE_NATURAL_SIZE. */ 10269 10270 poly_uint64 10271 riscv_regmode_natural_size (machine_mode mode) 10272 { 10273 /* The natural size for RVV data modes is one RVV data vector, 10274 and similarly for predicates. We can't independently modify 10275 anything smaller than that. */ 10276 /* ??? For now, only do this for variable-width RVV registers. 10277 Doing it for constant-sized registers breaks lower-subreg.c. */ 10278 10279 if (riscv_v_ext_mode_p (mode)) 10280 { 10281 poly_uint64 size = GET_MODE_SIZE (mode); 10282 if (riscv_v_ext_tuple_mode_p (mode)) 10283 { 10284 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode)); 10285 if (known_lt (size, BYTES_PER_RISCV_VECTOR)) 10286 return size; 10287 } 10288 else if (riscv_v_ext_vector_mode_p (mode)) 10289 { 10290 /* RVV mask modes always consume a single register. */ 10291 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) 10292 return BYTES_PER_RISCV_VECTOR; 10293 } 10294 if (!size.is_constant ()) 10295 return BYTES_PER_RISCV_VECTOR; 10296 else if (!riscv_v_ext_vls_mode_p (mode)) 10297 /* For -march=rv64gc_zve32f, the natural vector register size 10298 is 32bits which is smaller than scalar register size, so we 10299 return minimum size between vector register size and scalar 10300 register size. */ 10301 return MIN (size.to_constant (), UNITS_PER_WORD); 10302 } 10303 return UNITS_PER_WORD; 10304 } 10305 10306 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */ 10307 10308 static unsigned int 10309 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, 10310 int *offset) 10311 { 10312 /* Polynomial invariant 1 == (VLENB / BYTES_PER_RISCV_VECTOR) - 1. 10313 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1. 10314 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1. 10315 */ 10316 gcc_assert (i == 1); 10317 *factor = BYTES_PER_RISCV_VECTOR.coeffs[1]; 10318 *offset = 1; 10319 return RISCV_DWARF_VLENB; 10320 } 10321 10322 /* Implement TARGET_ESTIMATED_POLY_VALUE. */ 10323 10324 static HOST_WIDE_INT 10325 riscv_estimated_poly_value (poly_int64 val, 10326 poly_value_estimate_kind kind = POLY_VALUE_LIKELY) 10327 { 10328 if (TARGET_VECTOR) 10329 return riscv_vector::estimated_poly_value (val, kind); 10330 return default_estimated_poly_value (val, kind); 10331 } 10332 10333 /* Return true if the vector misalignment factor is supported by the 10334 target. */ 10335 bool 10336 riscv_support_vector_misalignment (machine_mode mode, 10337 const_tree type ATTRIBUTE_UNUSED, 10338 int misalignment, 10339 bool is_packed ATTRIBUTE_UNUSED) 10340 { 10341 /* Depend on movmisalign pattern. */ 10342 return default_builtin_support_vector_misalignment (mode, type, misalignment, 10343 is_packed); 10344 } 10345 10346 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ 10347 10348 static opt_machine_mode 10349 riscv_get_mask_mode (machine_mode mode) 10350 { 10351 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode)) 10352 return riscv_vector::get_mask_mode (mode); 10353 10354 return default_get_mask_mode (mode); 10355 } 10356 10357 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that 10358 it isn't worth branching around empty masked ops (including masked 10359 stores). */ 10360 10361 static bool 10362 riscv_empty_mask_is_expensive (unsigned) 10363 { 10364 return false; 10365 } 10366 10367 /* Return true if a shift-amount matches the trailing cleared bits on 10368 a bitmask. */ 10369 10370 bool 10371 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask) 10372 { 10373 return shamt == ctz_hwi (mask); 10374 } 10375 10376 static HARD_REG_SET 10377 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) 10378 { 10379 HARD_REG_SET zeroed_hardregs; 10380 CLEAR_HARD_REG_SET (zeroed_hardregs); 10381 10382 /* Find a register to hold vl. */ 10383 unsigned vl_regno = INVALID_REGNUM; 10384 /* Skip the first GPR, otherwise the existing vl is kept due to the same 10385 between vl and avl. */ 10386 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++) 10387 { 10388 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno)) 10389 { 10390 vl_regno = regno; 10391 break; 10392 } 10393 } 10394 10395 if (vl_regno > GP_REG_LAST) 10396 sorry ("cannot allocate vl register for %qs on this target", 10397 "-fzero-call-used-regs"); 10398 10399 /* Vector configurations need not be saved and restored here. The 10400 -fzero-call-used-regs=* option will zero all vector registers and 10401 return. So there's no vector operations between them. */ 10402 10403 bool emitted_vlmax_vsetvl = false; 10404 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */ 10405 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno) 10406 { 10407 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno)) 10408 { 10409 rtx target = regno_reg_rtx[regno]; 10410 machine_mode mode = GET_MODE (target); 10411 10412 if (!emitted_vlmax_vsetvl) 10413 { 10414 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl); 10415 emitted_vlmax_vsetvl = true; 10416 } 10417 10418 rtx ops[] = {target, CONST0_RTX (mode)}; 10419 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode), 10420 riscv_vector::UNARY_OP, ops, vl); 10421 10422 SET_HARD_REG_BIT (zeroed_hardregs, regno); 10423 } 10424 } 10425 10426 return zeroed_hardregs; 10427 } 10428 10429 /* Generate a sequence of instructions that zero registers specified by 10430 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually 10431 zeroed. */ 10432 HARD_REG_SET 10433 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) 10434 { 10435 HARD_REG_SET zeroed_hardregs; 10436 CLEAR_HARD_REG_SET (zeroed_hardregs); 10437 10438 if (TARGET_VECTOR) 10439 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs); 10440 10441 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs 10442 & ~zeroed_hardregs); 10443 } 10444 10445 /* Implement target hook TARGET_ARRAY_MODE. */ 10446 10447 static opt_machine_mode 10448 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) 10449 { 10450 machine_mode vmode; 10451 if (TARGET_VECTOR 10452 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode)) 10453 return vmode; 10454 10455 return opt_machine_mode (); 10456 } 10457 10458 /* Given memory reference MEM, expand code to compute the aligned 10459 memory address, shift and mask values and store them into 10460 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */ 10461 10462 void 10463 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, 10464 rtx *not_mask) 10465 { 10466 /* Align the memory address to a word. */ 10467 rtx addr = force_reg (Pmode, XEXP (mem, 0)); 10468 10469 rtx addr_mask = gen_int_mode (-4, Pmode); 10470 10471 rtx aligned_addr = gen_reg_rtx (Pmode); 10472 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask)); 10473 10474 *aligned_mem = change_address (mem, SImode, aligned_addr); 10475 10476 /* Calculate the shift amount. */ 10477 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), 10478 gen_int_mode (3, SImode))); 10479 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift, 10480 gen_int_mode (3, SImode))); 10481 10482 /* Calculate the mask. */ 10483 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem)); 10484 10485 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode)); 10486 10487 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask, 10488 gen_lowpart (QImode, *shift))); 10489 10490 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask)); 10491 } 10492 10493 /* Leftshift a subword within an SImode register. */ 10494 10495 void 10496 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift, 10497 rtx *shifted_value) 10498 { 10499 rtx value_reg = gen_reg_rtx (SImode); 10500 emit_move_insn (value_reg, gen_lowpart (SImode, value)); 10501 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg, 10502 gen_lowpart (QImode, shift))); 10503 } 10504 10505 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This 10506 allows the behavior to be tuned for specific implementations as well as 10507 when optimizing for size. */ 10508 10509 bool 10510 riscv_use_divmod_expander (void) 10511 { 10512 return tune_param->use_divmod_expansion; 10513 } 10514 10515 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ 10516 10517 static machine_mode 10518 riscv_preferred_simd_mode (scalar_mode mode) 10519 { 10520 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR) 10521 return riscv_vector::preferred_simd_mode (mode); 10522 10523 return word_mode; 10524 } 10525 10526 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */ 10527 10528 static poly_uint64 10529 riscv_vectorize_preferred_vector_alignment (const_tree type) 10530 { 10531 if (riscv_v_ext_mode_p (TYPE_MODE (type))) 10532 return TYPE_ALIGN (TREE_TYPE (type)); 10533 return TYPE_ALIGN (type); 10534 } 10535 10536 /* Return true if it is static FRM rounding mode. */ 10537 10538 static bool 10539 riscv_static_frm_mode_p (int mode) 10540 { 10541 switch (mode) 10542 { 10543 case riscv_vector::FRM_RDN: 10544 case riscv_vector::FRM_RUP: 10545 case riscv_vector::FRM_RTZ: 10546 case riscv_vector::FRM_RMM: 10547 case riscv_vector::FRM_RNE: 10548 return true; 10549 default: 10550 return false; 10551 } 10552 10553 gcc_unreachable (); 10554 } 10555 10556 /* Implement the floating-point Mode Switching. */ 10557 10558 static void 10559 riscv_emit_frm_mode_set (int mode, int prev_mode) 10560 { 10561 rtx backup_reg = DYNAMIC_FRM_RTL (cfun); 10562 10563 if (prev_mode == riscv_vector::FRM_DYN_CALL) 10564 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */ 10565 10566 if (mode != prev_mode) 10567 { 10568 rtx frm = gen_int_mode (mode, SImode); 10569 10570 if (mode == riscv_vector::FRM_DYN_CALL 10571 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun)) 10572 /* No need to emit when prev mode is DYN already. */ 10573 emit_insn (gen_fsrmsi_restore_volatile (backup_reg)); 10574 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun) 10575 && prev_mode != riscv_vector::FRM_DYN 10576 && prev_mode != riscv_vector::FRM_DYN_CALL) 10577 /* No need to emit when prev mode is DYN or DYN_CALL already. */ 10578 emit_insn (gen_fsrmsi_restore_volatile (backup_reg)); 10579 else if (mode == riscv_vector::FRM_DYN 10580 && prev_mode != riscv_vector::FRM_DYN_CALL) 10581 /* Restore frm value from backup when switch to DYN mode. */ 10582 emit_insn (gen_fsrmsi_restore (backup_reg)); 10583 else if (riscv_static_frm_mode_p (mode)) 10584 /* Set frm value when switch to static mode. */ 10585 emit_insn (gen_fsrmsi_restore (frm)); 10586 } 10587 } 10588 10589 /* Implement Mode switching. */ 10590 10591 static void 10592 riscv_emit_mode_set (int entity, int mode, int prev_mode, 10593 HARD_REG_SET regs_live ATTRIBUTE_UNUSED) 10594 { 10595 switch (entity) 10596 { 10597 case RISCV_VXRM: 10598 if (mode != VXRM_MODE_NONE && mode != prev_mode) 10599 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode))); 10600 break; 10601 case RISCV_FRM: 10602 riscv_emit_frm_mode_set (mode, prev_mode); 10603 break; 10604 default: 10605 gcc_unreachable (); 10606 } 10607 } 10608 10609 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the 10610 underlying emit. */ 10611 10612 static int 10613 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode) 10614 { 10615 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn); 10616 10617 if (insn && CALL_P (insn)) 10618 return riscv_vector::FRM_DYN; 10619 10620 return mode; 10621 } 10622 10623 /* Insert the backup frm insn to the end of the bb if and only if the call 10624 is the last insn of this bb. */ 10625 10626 static void 10627 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn) 10628 { 10629 edge eg; 10630 bool abnormal_edge_p = false; 10631 edge_iterator eg_iterator; 10632 basic_block bb = BLOCK_FOR_INSN (cur_insn); 10633 10634 FOR_EACH_EDGE (eg, eg_iterator, bb->succs) 10635 { 10636 if (eg->flags & EDGE_ABNORMAL) 10637 abnormal_edge_p = true; 10638 else 10639 { 10640 start_sequence (); 10641 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); 10642 rtx_insn *backup_insn = get_insns (); 10643 end_sequence (); 10644 10645 insert_insn_on_edge (backup_insn, eg); 10646 } 10647 } 10648 10649 if (abnormal_edge_p) 10650 { 10651 start_sequence (); 10652 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); 10653 rtx_insn *backup_insn = get_insns (); 10654 end_sequence (); 10655 10656 insert_insn_end_basic_block (backup_insn, bb); 10657 } 10658 10659 commit_edge_insertions (); 10660 } 10661 10662 /* Return mode that frm must be switched into 10663 prior to the execution of insn. */ 10664 10665 static int 10666 riscv_frm_mode_needed (rtx_insn *cur_insn, int code) 10667 { 10668 if (!DYNAMIC_FRM_RTL(cfun)) 10669 { 10670 /* The dynamic frm will be initialized only onece during cfun. */ 10671 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode); 10672 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); 10673 } 10674 10675 if (CALL_P (cur_insn)) 10676 { 10677 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn); 10678 10679 if (!insn) 10680 riscv_frm_emit_after_bb_end (cur_insn); 10681 10682 return riscv_vector::FRM_DYN_CALL; 10683 } 10684 10685 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE; 10686 10687 if (mode == riscv_vector::FRM_NONE) 10688 /* After meet a call, we need to backup the frm because it may be 10689 updated during the call. Here, for each insn, we will check if 10690 the previous insn is a call or not. When previous insn is call, 10691 there will be 2 cases for the emit mode set. 10692 10693 1. Current insn is not MODE_NONE, then the mode switch framework 10694 will do the mode switch from MODE_CALL to MODE_NONE natively. 10695 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to 10696 the MODE_DYN, and leave the mode switch itself to perform 10697 the emit mode set. 10698 */ 10699 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode); 10700 10701 return mode; 10702 } 10703 10704 /* Return mode that entity must be switched into 10705 prior to the execution of insn. */ 10706 10707 static int 10708 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) 10709 { 10710 int code = recog_memoized (insn); 10711 10712 switch (entity) 10713 { 10714 case RISCV_VXRM: 10715 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE; 10716 case RISCV_FRM: 10717 return riscv_frm_mode_needed (insn, code); 10718 default: 10719 gcc_unreachable (); 10720 } 10721 } 10722 10723 /* Return TRUE that an insn is asm. */ 10724 10725 static bool 10726 asm_insn_p (rtx_insn *insn) 10727 { 10728 extract_insn (insn); 10729 10730 return recog_data.is_asm; 10731 } 10732 10733 /* Return TRUE that an insn is unknown for VXRM. */ 10734 10735 static bool 10736 vxrm_unknown_p (rtx_insn *insn) 10737 { 10738 /* Return true if there is a definition of VXRM. */ 10739 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn)) 10740 return true; 10741 10742 /* A CALL function may contain an instruction that modifies the VXRM, 10743 return true in this situation. */ 10744 if (CALL_P (insn)) 10745 return true; 10746 10747 /* Return true for all assembly since users may hardcode a assembly 10748 like this: asm volatile ("csrwi vxrm, 0"). */ 10749 if (asm_insn_p (insn)) 10750 return true; 10751 10752 return false; 10753 } 10754 10755 /* Return TRUE that an insn is unknown dynamic for FRM. */ 10756 10757 static bool 10758 frm_unknown_dynamic_p (rtx_insn *insn) 10759 { 10760 /* Return true if there is a definition of FRM. */ 10761 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn)) 10762 return true; 10763 10764 return false; 10765 } 10766 10767 /* Return the mode that an insn results in for VXRM. */ 10768 10769 static int 10770 riscv_vxrm_mode_after (rtx_insn *insn, int mode) 10771 { 10772 if (vxrm_unknown_p (insn)) 10773 return VXRM_MODE_NONE; 10774 10775 if (recog_memoized (insn) < 0) 10776 return mode; 10777 10778 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn))) 10779 return get_attr_vxrm_mode (insn); 10780 else 10781 return mode; 10782 } 10783 10784 /* Return the mode that an insn results in for FRM. */ 10785 10786 static int 10787 riscv_frm_mode_after (rtx_insn *insn, int mode) 10788 { 10789 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode); 10790 10791 if (CALL_P (insn)) 10792 return mode; 10793 10794 if (frm_unknown_dynamic_p (insn)) 10795 return riscv_vector::FRM_DYN; 10796 10797 if (recog_memoized (insn) < 0) 10798 return mode; 10799 10800 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn))) 10801 return get_attr_frm_mode (insn); 10802 else 10803 return mode; 10804 } 10805 10806 /* Return the mode that an insn results in. */ 10807 10808 static int 10809 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) 10810 { 10811 switch (entity) 10812 { 10813 case RISCV_VXRM: 10814 return riscv_vxrm_mode_after (insn, mode); 10815 case RISCV_FRM: 10816 return riscv_frm_mode_after (insn, mode); 10817 default: 10818 gcc_unreachable (); 10819 } 10820 } 10821 10822 /* Return a mode that ENTITY is assumed to be 10823 switched to at function entry. */ 10824 10825 static int 10826 riscv_mode_entry (int entity) 10827 { 10828 switch (entity) 10829 { 10830 case RISCV_VXRM: 10831 return VXRM_MODE_NONE; 10832 case RISCV_FRM: 10833 { 10834 /* According to RVV 1.0 spec, all vector floating-point operations use 10835 the dynamic rounding mode in the frm register. Likewise in other 10836 similar places. */ 10837 return riscv_vector::FRM_DYN; 10838 } 10839 default: 10840 gcc_unreachable (); 10841 } 10842 } 10843 10844 /* Return a mode that ENTITY is assumed to be 10845 switched to at function exit. */ 10846 10847 static int 10848 riscv_mode_exit (int entity) 10849 { 10850 switch (entity) 10851 { 10852 case RISCV_VXRM: 10853 return VXRM_MODE_NONE; 10854 case RISCV_FRM: 10855 return riscv_vector::FRM_DYN_EXIT; 10856 default: 10857 gcc_unreachable (); 10858 } 10859 } 10860 10861 static int 10862 riscv_mode_priority (int, int n) 10863 { 10864 return n; 10865 } 10866 10867 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */ 10868 unsigned int 10869 riscv_autovectorize_vector_modes (vector_modes *modes, bool all) 10870 { 10871 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR) 10872 return riscv_vector::autovectorize_vector_modes (modes, all); 10873 10874 return default_autovectorize_vector_modes (modes, all); 10875 } 10876 10877 /* Implement TARGET_VECTORIZE_RELATED_MODE. */ 10878 opt_machine_mode 10879 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode, 10880 poly_uint64 nunits) 10881 { 10882 if (TARGET_VECTOR) 10883 return riscv_vector::vectorize_related_mode (vector_mode, element_mode, 10884 nunits); 10885 return default_vectorize_related_mode (vector_mode, element_mode, nunits); 10886 } 10887 10888 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ 10889 10890 static bool 10891 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, 10892 rtx target, rtx op0, rtx op1, 10893 const vec_perm_indices &sel) 10894 { 10895 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode)) 10896 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0, 10897 op1, sel); 10898 10899 return false; 10900 } 10901 10902 static bool 10903 riscv_frame_pointer_required (void) 10904 { 10905 return riscv_save_frame_pointer && !crtl->is_leaf; 10906 } 10907 10908 /* Return the appropriate common costs according to VECTYPE from COSTS. */ 10909 static const common_vector_cost * 10910 get_common_costs (const cpu_vector_cost *costs, tree vectype) 10911 { 10912 gcc_assert (costs); 10913 10914 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype))) 10915 return costs->vls; 10916 return costs->vla; 10917 } 10918 10919 /* Return the CPU vector costs according to -mtune if tune info has non-NULL 10920 vector cost. Otherwide, return the default generic vector costs. */ 10921 const cpu_vector_cost * 10922 get_vector_costs () 10923 { 10924 const cpu_vector_cost *costs = tune_param->vec_costs; 10925 if (!costs) 10926 return &generic_vector_cost; 10927 return costs; 10928 } 10929 10930 /* Implement targetm.vectorize.builtin_vectorization_cost. */ 10931 10932 static int 10933 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 10934 tree vectype, int misalign ATTRIBUTE_UNUSED) 10935 { 10936 const cpu_vector_cost *costs = get_vector_costs (); 10937 bool fp = false; 10938 10939 if (vectype != NULL) 10940 fp = FLOAT_TYPE_P (vectype); 10941 10942 const common_vector_cost *common_costs = get_common_costs (costs, vectype); 10943 gcc_assert (common_costs != NULL); 10944 switch (type_of_cost) 10945 { 10946 case scalar_stmt: 10947 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost; 10948 10949 case scalar_load: 10950 return costs->scalar_load_cost; 10951 10952 case scalar_store: 10953 return costs->scalar_store_cost; 10954 10955 case vector_stmt: 10956 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost; 10957 10958 case vector_load: 10959 return common_costs->align_load_cost; 10960 10961 case vector_store: 10962 return common_costs->align_store_cost; 10963 10964 case vec_to_scalar: 10965 return common_costs->vec_to_scalar_cost; 10966 10967 case scalar_to_vec: 10968 return common_costs->scalar_to_vec_cost; 10969 10970 case unaligned_load: 10971 return common_costs->unalign_load_cost; 10972 case vector_gather_load: 10973 return common_costs->gather_load_cost; 10974 10975 case unaligned_store: 10976 return common_costs->unalign_store_cost; 10977 case vector_scatter_store: 10978 return common_costs->scatter_store_cost; 10979 10980 case cond_branch_taken: 10981 return costs->cond_taken_branch_cost; 10982 10983 case cond_branch_not_taken: 10984 return costs->cond_not_taken_branch_cost; 10985 10986 case vec_perm: 10987 return common_costs->permute_cost; 10988 10989 case vec_promote_demote: 10990 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost; 10991 10992 case vec_construct: 10993 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); 10994 10995 default: 10996 gcc_unreachable (); 10997 } 10998 10999 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign); 11000 } 11001 11002 /* Implement targetm.vectorize.create_costs. */ 11003 11004 static vector_costs * 11005 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) 11006 { 11007 if (TARGET_VECTOR) 11008 return new riscv_vector::costs (vinfo, costing_for_scalar); 11009 /* Default vector costs. */ 11010 return new vector_costs (vinfo, costing_for_scalar); 11011 } 11012 11013 /* Implement TARGET_PREFERRED_ELSE_VALUE. */ 11014 11015 static tree 11016 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops, 11017 tree *ops) 11018 { 11019 if (riscv_v_ext_mode_p (TYPE_MODE (vectype))) 11020 { 11021 tree tmp_var = create_tmp_var (vectype); 11022 TREE_NO_WARNING (tmp_var) = 1; 11023 return get_or_create_ssa_default_def (cfun, tmp_var); 11024 } 11025 11026 return default_preferred_else_value (ifn, vectype, nops, ops); 11027 } 11028 11029 /* If MEM is in the form of "base+offset", extract the two parts 11030 of address and set to BASE and OFFSET, otherwise return false 11031 after clearing BASE and OFFSET. */ 11032 11033 bool 11034 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) 11035 { 11036 rtx addr; 11037 11038 gcc_assert (MEM_P (mem)); 11039 11040 addr = XEXP (mem, 0); 11041 11042 if (REG_P (addr)) 11043 { 11044 *base = addr; 11045 *offset = const0_rtx; 11046 return true; 11047 } 11048 11049 if (GET_CODE (addr) == PLUS 11050 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1))) 11051 { 11052 *base = XEXP (addr, 0); 11053 *offset = XEXP (addr, 1); 11054 return true; 11055 } 11056 11057 *base = NULL_RTX; 11058 *offset = NULL_RTX; 11059 11060 return false; 11061 } 11062 11063 /* Implements target hook vector_mode_supported_any_target_p. */ 11064 11065 static bool 11066 riscv_vector_mode_supported_any_target_p (machine_mode) 11067 { 11068 if (TARGET_XTHEADVECTOR) 11069 return false; 11070 return true; 11071 } 11072 11073 /* Implements hook TARGET_FUNCTION_VALUE_REGNO_P. */ 11074 11075 static bool 11076 riscv_function_value_regno_p (const unsigned regno) 11077 { 11078 if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST) 11079 return true; 11080 11081 if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST) 11082 return true; 11083 11084 if (TARGET_VECTOR && regno == V_RETURN) 11085 return true; 11086 11087 return false; 11088 } 11089 11090 /* Implements hook TARGET_GET_RAW_RESULT_MODE. */ 11091 11092 static fixed_size_mode 11093 riscv_get_raw_result_mode (int regno) 11094 { 11095 if (!is_a <fixed_size_mode> (reg_raw_mode[regno])) 11096 return as_a <fixed_size_mode> (VOIDmode); 11097 11098 return default_get_reg_raw_mode (regno); 11099 } 11100 11101 /* Initialize the GCC target structure. */ 11102 #undef TARGET_ASM_ALIGNED_HI_OP 11103 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 11104 #undef TARGET_ASM_ALIGNED_SI_OP 11105 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 11106 #undef TARGET_ASM_ALIGNED_DI_OP 11107 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 11108 11109 #undef TARGET_OPTION_OVERRIDE 11110 #define TARGET_OPTION_OVERRIDE riscv_option_override 11111 11112 #undef TARGET_OPTION_RESTORE 11113 #define TARGET_OPTION_RESTORE riscv_option_restore 11114 11115 #undef TARGET_OPTION_VALID_ATTRIBUTE_P 11116 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p 11117 11118 #undef TARGET_LEGITIMIZE_ADDRESS 11119 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address 11120 11121 #undef TARGET_SCHED_ISSUE_RATE 11122 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate 11123 #undef TARGET_SCHED_MACRO_FUSION_P 11124 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p 11125 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P 11126 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p 11127 11128 #undef TARGET_SCHED_VARIABLE_ISSUE 11129 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue 11130 11131 #undef TARGET_SCHED_ADJUST_COST 11132 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost 11133 11134 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 11135 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall 11136 11137 #undef TARGET_SET_CURRENT_FUNCTION 11138 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function 11139 11140 #undef TARGET_REGISTER_MOVE_COST 11141 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost 11142 #undef TARGET_MEMORY_MOVE_COST 11143 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost 11144 #undef TARGET_RTX_COSTS 11145 #define TARGET_RTX_COSTS riscv_rtx_costs 11146 #undef TARGET_ADDRESS_COST 11147 #define TARGET_ADDRESS_COST riscv_address_cost 11148 #undef TARGET_INSN_COST 11149 #define TARGET_INSN_COST riscv_insn_cost 11150 11151 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST 11152 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost 11153 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P 11154 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p 11155 11156 #undef TARGET_ASM_FILE_START 11157 #define TARGET_ASM_FILE_START riscv_file_start 11158 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE 11159 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true 11160 #undef TARGET_ASM_FILE_END 11161 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack 11162 11163 #undef TARGET_EXPAND_BUILTIN_VA_START 11164 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start 11165 11166 #undef TARGET_PROMOTE_FUNCTION_MODE 11167 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode 11168 11169 #undef TARGET_RETURN_IN_MEMORY 11170 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory 11171 11172 #undef TARGET_ASM_OUTPUT_MI_THUNK 11173 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk 11174 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 11175 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 11176 11177 #undef TARGET_PRINT_OPERAND 11178 #define TARGET_PRINT_OPERAND riscv_print_operand 11179 #undef TARGET_PRINT_OPERAND_ADDRESS 11180 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address 11181 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 11182 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p 11183 11184 #undef TARGET_SETUP_INCOMING_VARARGS 11185 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs 11186 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS 11187 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args 11188 #undef TARGET_STRICT_ARGUMENT_NAMING 11189 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 11190 #undef TARGET_MUST_PASS_IN_STACK 11191 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 11192 #undef TARGET_PASS_BY_REFERENCE 11193 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference 11194 #undef TARGET_ARG_PARTIAL_BYTES 11195 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes 11196 #undef TARGET_FUNCTION_ARG 11197 #define TARGET_FUNCTION_ARG riscv_function_arg 11198 #undef TARGET_FUNCTION_ARG_ADVANCE 11199 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance 11200 #undef TARGET_FUNCTION_ARG_BOUNDARY 11201 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary 11202 #undef TARGET_FNTYPE_ABI 11203 #define TARGET_FNTYPE_ABI riscv_fntype_abi 11204 #undef TARGET_INSN_CALLEE_ABI 11205 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi 11206 11207 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS 11208 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \ 11209 riscv_get_separate_components 11210 11211 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB 11212 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \ 11213 riscv_components_for_bb 11214 11215 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS 11216 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \ 11217 riscv_disqualify_components 11218 11219 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS 11220 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \ 11221 riscv_emit_prologue_components 11222 11223 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS 11224 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \ 11225 riscv_emit_epilogue_components 11226 11227 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS 11228 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \ 11229 riscv_set_handled_components 11230 11231 /* The generic ELF target does not always have TLS support. */ 11232 #ifdef HAVE_AS_TLS 11233 #undef TARGET_HAVE_TLS 11234 #define TARGET_HAVE_TLS true 11235 #endif 11236 11237 #undef TARGET_CANNOT_FORCE_CONST_MEM 11238 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem 11239 11240 #undef TARGET_LEGITIMATE_CONSTANT_P 11241 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p 11242 11243 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 11244 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p 11245 11246 #undef TARGET_LEGITIMATE_ADDRESS_P 11247 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p 11248 11249 #undef TARGET_CAN_ELIMINATE 11250 #define TARGET_CAN_ELIMINATE riscv_can_eliminate 11251 11252 #undef TARGET_CONDITIONAL_REGISTER_USAGE 11253 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage 11254 11255 #undef TARGET_CLASS_MAX_NREGS 11256 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs 11257 11258 #undef TARGET_TRAMPOLINE_INIT 11259 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init 11260 11261 #undef TARGET_IN_SMALL_DATA_P 11262 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p 11263 11264 #undef TARGET_HAVE_SRODATA_SECTION 11265 #define TARGET_HAVE_SRODATA_SECTION true 11266 11267 #undef TARGET_ASM_SELECT_SECTION 11268 #define TARGET_ASM_SELECT_SECTION riscv_select_section 11269 11270 #undef TARGET_ASM_UNIQUE_SECTION 11271 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section 11272 11273 #undef TARGET_ASM_SELECT_RTX_SECTION 11274 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section 11275 11276 #undef TARGET_MIN_ANCHOR_OFFSET 11277 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2) 11278 11279 #undef TARGET_MAX_ANCHOR_OFFSET 11280 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1) 11281 11282 #undef TARGET_REGISTER_PRIORITY 11283 #define TARGET_REGISTER_PRIORITY riscv_register_priority 11284 11285 #undef TARGET_CANNOT_COPY_INSN_P 11286 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p 11287 11288 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 11289 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv 11290 11291 #undef TARGET_INIT_BUILTINS 11292 #define TARGET_INIT_BUILTINS riscv_init_builtins 11293 11294 #undef TARGET_BUILTIN_DECL 11295 #define TARGET_BUILTIN_DECL riscv_builtin_decl 11296 11297 #undef TARGET_GIMPLE_FOLD_BUILTIN 11298 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin 11299 11300 #undef TARGET_EXPAND_BUILTIN 11301 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin 11302 11303 #undef TARGET_HARD_REGNO_NREGS 11304 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs 11305 #undef TARGET_HARD_REGNO_MODE_OK 11306 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok 11307 11308 #undef TARGET_MODES_TIEABLE_P 11309 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p 11310 11311 #undef TARGET_SLOW_UNALIGNED_ACCESS 11312 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access 11313 11314 #undef TARGET_SECONDARY_MEMORY_NEEDED 11315 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed 11316 11317 #undef TARGET_CAN_CHANGE_MODE_CLASS 11318 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class 11319 11320 #undef TARGET_CONSTANT_ALIGNMENT 11321 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment 11322 11323 #undef TARGET_MERGE_DECL_ATTRIBUTES 11324 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes 11325 11326 #undef TARGET_ATTRIBUTE_TABLE 11327 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table 11328 11329 #undef TARGET_WARN_FUNC_RETURN 11330 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return 11331 11332 /* The low bit is ignored by jump instructions so is safe to use. */ 11333 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 11334 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 11335 11336 #undef TARGET_MACHINE_DEPENDENT_REORG 11337 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg 11338 11339 #undef TARGET_NEW_ADDRESS_PROFITABLE_P 11340 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p 11341 11342 #undef TARGET_MANGLE_TYPE 11343 #define TARGET_MANGLE_TYPE riscv_mangle_type 11344 11345 #undef TARGET_SCALAR_MODE_SUPPORTED_P 11346 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p 11347 11348 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P 11349 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ 11350 riscv_libgcc_floating_mode_supported_p 11351 11352 #undef TARGET_INIT_LIBFUNCS 11353 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs 11354 11355 #undef TARGET_C_EXCESS_PRECISION 11356 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision 11357 11358 #undef TARGET_FLOATN_MODE 11359 #define TARGET_FLOATN_MODE riscv_floatn_mode 11360 11361 #undef TARGET_ASAN_SHADOW_OFFSET 11362 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset 11363 11364 #ifdef TARGET_BIG_ENDIAN_DEFAULT 11365 #undef TARGET_DEFAULT_TARGET_FLAGS 11366 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN) 11367 #endif 11368 11369 #undef TARGET_VECTOR_MODE_SUPPORTED_P 11370 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p 11371 11372 #undef TARGET_VERIFY_TYPE_CONTEXT 11373 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context 11374 11375 #undef TARGET_ESTIMATED_POLY_VALUE 11376 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value 11377 11378 #undef TARGET_VECTORIZE_GET_MASK_MODE 11379 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode 11380 11381 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE 11382 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive 11383 11384 #undef TARGET_VECTOR_ALIGNMENT 11385 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment 11386 11387 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 11388 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment 11389 11390 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE 11391 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value 11392 11393 #undef TARGET_ZERO_CALL_USED_REGS 11394 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs 11395 11396 #undef TARGET_ARRAY_MODE 11397 #define TARGET_ARRAY_MODE riscv_array_mode 11398 11399 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 11400 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode 11401 11402 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT 11403 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \ 11404 riscv_vectorize_preferred_vector_alignment 11405 11406 /* Mode switching hooks. */ 11407 11408 #undef TARGET_MODE_EMIT 11409 #define TARGET_MODE_EMIT riscv_emit_mode_set 11410 #undef TARGET_MODE_NEEDED 11411 #define TARGET_MODE_NEEDED riscv_mode_needed 11412 #undef TARGET_MODE_AFTER 11413 #define TARGET_MODE_AFTER riscv_mode_after 11414 #undef TARGET_MODE_ENTRY 11415 #define TARGET_MODE_ENTRY riscv_mode_entry 11416 #undef TARGET_MODE_EXIT 11417 #define TARGET_MODE_EXIT riscv_mode_exit 11418 #undef TARGET_MODE_PRIORITY 11419 #define TARGET_MODE_PRIORITY riscv_mode_priority 11420 11421 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES 11422 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ 11423 riscv_autovectorize_vector_modes 11424 11425 #undef TARGET_VECTORIZE_RELATED_MODE 11426 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode 11427 11428 #undef TARGET_VECTORIZE_VEC_PERM_CONST 11429 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const 11430 11431 #undef TARGET_FRAME_POINTER_REQUIRED 11432 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required 11433 11434 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 11435 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 11436 riscv_builtin_vectorization_cost 11437 11438 #undef TARGET_VECTORIZE_CREATE_COSTS 11439 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs 11440 11441 #undef TARGET_PREFERRED_ELSE_VALUE 11442 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value 11443 11444 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P 11445 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p 11446 11447 #undef TARGET_FUNCTION_VALUE_REGNO_P 11448 #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p 11449 11450 #undef TARGET_GET_RAW_RESULT_MODE 11451 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode 11452 11453 struct gcc_target targetm = TARGET_INITIALIZER; 11454 11455 #include "gt-riscv.h" 11456