1 /* Dependency checks for instruction scheduling, shared between ARM and 2 AARCH64. 3 4 Copyright (C) 1991-2022 Free Software Foundation, Inc. 5 Contributed by ARM Ltd. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published 11 by the Free Software Foundation; either version 3, or (at your 12 option) any later version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 17 License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 24 #define IN_TARGET_CODE 1 25 26 #include "config.h" 27 #include "system.h" 28 #include "coretypes.h" 29 #include "insn-modes.h" 30 #include "tm.h" 31 #include "rtl.h" 32 #include "rtl-iter.h" 33 #include "memmodel.h" 34 #include "diagnostic.h" 35 #include "tree.h" 36 #include "expr.h" 37 #include "function.h" 38 #include "emit-rtl.h" 39 40 /* Return TRUE if X is either an arithmetic shift left, or 41 is a multiplication by a power of two. */ 42 bool 43 arm_rtx_shift_left_p (rtx x) 44 { 45 enum rtx_code code = GET_CODE (x); 46 47 if (code == MULT && CONST_INT_P (XEXP (x, 1)) 48 && exact_log2 (INTVAL (XEXP (x, 1))) > 0) 49 return true; 50 51 if (code == ASHIFT) 52 return true; 53 54 return false; 55 } 56 57 static rtx_code shift_rtx_codes[] = 58 { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, 59 ROTATERT, ZERO_EXTEND, SIGN_EXTEND }; 60 61 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE. 62 If FIND_ANY_SHIFT then we are interested in anything which can 63 reasonably be described as a SHIFT RTX. */ 64 static rtx 65 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift) 66 { 67 subrtx_var_iterator::array_type array; 68 FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST) 69 { 70 rtx x = *iter; 71 if (find_any_shift) 72 { 73 /* Left shifts might have been canonicalized to a MULT of some 74 power of two. Make sure we catch them. */ 75 if (arm_rtx_shift_left_p (x)) 76 return x; 77 else 78 for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++) 79 if (GET_CODE (x) == shift_rtx_codes[i]) 80 return x; 81 } 82 83 if (GET_CODE (x) == code) 84 return x; 85 } 86 return NULL_RTX; 87 } 88 89 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */ 90 static rtx 91 arm_find_shift_sub_rtx (rtx pattern) 92 { 93 return arm_find_sub_rtx_with_code (pattern, ASHIFT, true); 94 } 95 96 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER 97 (possibly) contains a SET which will provide a result we can access 98 using the SET_DEST macro. We will place the RTX which would be 99 written by PRODUCER in SET_SOURCE. 100 Similarly, CONSUMER (possibly) contains a SET which has an operand 101 we can access using SET_SRC. We place this operand in 102 SET_DESTINATION. 103 104 Return nonzero if we found the SET RTX we expected. */ 105 static int 106 arm_get_set_operands (rtx producer, rtx consumer, 107 rtx *set_source, rtx *set_destination) 108 { 109 rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer), 110 SET, false); 111 rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer), 112 SET, false); 113 114 if (set_producer && set_consumer) 115 { 116 *set_source = SET_DEST (set_producer); 117 *set_destination = SET_SRC (set_consumer); 118 return 1; 119 } 120 return 0; 121 } 122 123 bool 124 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode) 125 { 126 return CONST_INT_P (val) 127 && INTVAL (val) 128 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), 129 mode); 130 } 131 132 bool 133 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode) 134 { 135 return CONST_INT_P (val) 136 && INTVAL (val) 137 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), 138 mode); 139 } 140 141 142 static bool 143 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode) 144 { 145 if (GET_CODE (lhs) == AND 146 && GET_CODE (XEXP (lhs, 0)) == ASHIFT 147 && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) 148 && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 149 && REG_P (XEXP (XEXP (lhs, 0), 0)) 150 && CONST_INT_P (XEXP (lhs, 1)) 151 && GET_CODE (rhs) == AND 152 && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT 153 && REG_P (XEXP (XEXP (rhs, 0), 0)) 154 && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) 155 && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 156 && CONST_INT_P (XEXP (rhs, 1)) 157 && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) 158 159 { 160 rtx lhs_mask = XEXP (lhs, 1); 161 rtx rhs_mask = XEXP (rhs, 1); 162 163 return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) 164 && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); 165 } 166 167 return false; 168 } 169 170 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation. 171 These will be of the form: 172 ((x >> 8) & 0x00ff00ff) 173 | ((x << 8) & 0xff00ff00) 174 for SImode and with similar but wider bitmasks for DImode. 175 The two sub-expressions of the IOR can appear on either side so check both 176 permutations with the help of aarch_rev16_p_1 above. */ 177 178 bool 179 aarch_rev16_p (rtx x) 180 { 181 rtx left_sub_rtx, right_sub_rtx; 182 bool is_rev = false; 183 184 if (GET_CODE (x) != IOR) 185 return false; 186 187 left_sub_rtx = XEXP (x, 0); 188 right_sub_rtx = XEXP (x, 1); 189 190 /* There are no canonicalisation rules for the position of the two shifts 191 involved in a rev, so try both permutations. */ 192 is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); 193 194 if (!is_rev) 195 is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); 196 197 return is_rev; 198 } 199 200 /* Return non-zero if the RTX representing a memory model is a memory model 201 that needs acquire semantics. */ 202 bool 203 aarch_mm_needs_acquire (rtx const_int) 204 { 205 enum memmodel model = memmodel_from_int (INTVAL (const_int)); 206 return !(is_mm_relaxed (model) 207 || is_mm_consume (model) 208 || is_mm_release (model)); 209 } 210 211 /* Return non-zero if the RTX representing a memory model is a memory model 212 that needs release semantics. */ 213 bool 214 aarch_mm_needs_release (rtx const_int) 215 { 216 enum memmodel model = memmodel_from_int (INTVAL (const_int)); 217 return !(is_mm_relaxed (model) 218 || is_mm_consume (model) 219 || is_mm_acquire (model)); 220 } 221 222 /* Return nonzero if the CONSUMER instruction (a load) does need 223 PRODUCER's value to calculate the address. */ 224 int 225 arm_early_load_addr_dep (rtx producer, rtx consumer) 226 { 227 rtx value, addr; 228 229 if (!arm_get_set_operands (producer, consumer, &value, &addr)) 230 return 0; 231 232 return reg_overlap_mentioned_p (value, addr); 233 } 234 235 /* Return nonzero if the CONSUMER instruction (a load) does need 236 a Pmode PRODUCER's value to calculate the address. */ 237 238 int 239 arm_early_load_addr_dep_ptr (rtx producer, rtx consumer) 240 { 241 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false); 242 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false); 243 244 if (!value || !addr || !MEM_P (SET_SRC (value))) 245 return 0; 246 247 value = SET_DEST (value); 248 addr = SET_SRC (addr); 249 250 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr); 251 } 252 253 /* Return nonzero if the CONSUMER instruction (an ALU op) does not 254 have an early register shift value or amount dependency on the 255 result of PRODUCER. */ 256 int 257 arm_no_early_alu_shift_dep (rtx producer, rtx consumer) 258 { 259 rtx value, op; 260 rtx early_op; 261 262 if (!arm_get_set_operands (producer, consumer, &value, &op)) 263 return 0; 264 265 if ((early_op = arm_find_shift_sub_rtx (op))) 266 return !reg_overlap_mentioned_p (value, early_op); 267 268 return 0; 269 } 270 271 /* Return nonzero if the CONSUMER instruction (an ALU op) does not 272 have an early register shift value dependency on the result of 273 PRODUCER. */ 274 int 275 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) 276 { 277 rtx value, op; 278 rtx early_op; 279 280 if (!arm_get_set_operands (producer, consumer, &value, &op)) 281 return 0; 282 283 if ((early_op = arm_find_shift_sub_rtx (op))) 284 /* We want to check the value being shifted. */ 285 if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0))) 286 return 1; 287 288 return 0; 289 } 290 291 /* Return nonzero if the CONSUMER (a mul or mac op) does not 292 have an early register mult dependency on the result of 293 PRODUCER. */ 294 int 295 arm_no_early_mul_dep (rtx producer, rtx consumer) 296 { 297 rtx value, op; 298 299 if (!arm_get_set_operands (producer, consumer, &value, &op)) 300 return 0; 301 302 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) 303 { 304 if (GET_CODE (XEXP (op, 0)) == MULT) 305 return !reg_overlap_mentioned_p (value, XEXP (op, 0)); 306 else 307 return !reg_overlap_mentioned_p (value, XEXP (op, 1)); 308 } 309 310 return 0; 311 } 312 313 /* Return nonzero if the CONSUMER instruction (a store) does not need 314 PRODUCER's value to calculate the address. */ 315 316 int 317 arm_no_early_store_addr_dep (rtx producer, rtx consumer) 318 { 319 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false); 320 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false); 321 322 if (value) 323 value = SET_DEST (value); 324 325 if (addr) 326 addr = SET_DEST (addr); 327 328 if (!value || !addr) 329 return 0; 330 331 return !reg_overlap_mentioned_p (value, addr); 332 } 333 334 /* Return nonzero if the CONSUMER instruction (a store) does need 335 PRODUCER's value to calculate the address. */ 336 337 int 338 arm_early_store_addr_dep (rtx producer, rtx consumer) 339 { 340 return !arm_no_early_store_addr_dep (producer, consumer); 341 } 342 343 /* Return nonzero if the CONSUMER instruction (a store) does need 344 a Pmode PRODUCER's value to calculate the address. */ 345 346 int 347 arm_early_store_addr_dep_ptr (rtx producer, rtx consumer) 348 { 349 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false); 350 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false); 351 352 if (!value || !addr || !MEM_P (SET_SRC (value))) 353 return 0; 354 355 value = SET_DEST (value); 356 addr = SET_DEST (addr); 357 358 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr); 359 } 360 361 /* Return non-zero iff the consumer (a multiply-accumulate or a 362 multiple-subtract instruction) has an accumulator dependency on the 363 result of the producer and no other dependency on that result. It 364 does not check if the producer is multiply-accumulate instruction. */ 365 int 366 arm_mac_accumulator_is_result (rtx producer, rtx consumer) 367 { 368 rtx result; 369 rtx op0, op1, acc; 370 371 producer = PATTERN (producer); 372 consumer = PATTERN (consumer); 373 374 if (GET_CODE (producer) == COND_EXEC) 375 producer = COND_EXEC_CODE (producer); 376 if (GET_CODE (consumer) == COND_EXEC) 377 consumer = COND_EXEC_CODE (consumer); 378 379 if (GET_CODE (producer) != SET) 380 return 0; 381 382 result = XEXP (producer, 0); 383 384 if (GET_CODE (consumer) != SET) 385 return 0; 386 387 /* Check that the consumer is of the form 388 (set (...) (plus (mult ...) (...))) 389 or 390 (set (...) (minus (...) (mult ...))). */ 391 if (GET_CODE (XEXP (consumer, 1)) == PLUS) 392 { 393 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) 394 return 0; 395 396 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); 397 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); 398 acc = XEXP (XEXP (consumer, 1), 1); 399 } 400 else if (GET_CODE (XEXP (consumer, 1)) == MINUS) 401 { 402 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) 403 return 0; 404 405 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); 406 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); 407 acc = XEXP (XEXP (consumer, 1), 0); 408 } 409 else 410 return 0; 411 412 return (reg_overlap_mentioned_p (result, acc) 413 && !reg_overlap_mentioned_p (result, op0) 414 && !reg_overlap_mentioned_p (result, op1)); 415 } 416 417 /* Return non-zero if the destination of PRODUCER feeds the accumulator 418 operand of an MLA-like operation. */ 419 420 int 421 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer) 422 { 423 rtx producer_set = single_set (producer); 424 rtx consumer_set = single_set (consumer); 425 426 /* We are looking for a SET feeding a SET. */ 427 if (!producer_set || !consumer_set) 428 return 0; 429 430 rtx dest = SET_DEST (producer_set); 431 rtx mla = SET_SRC (consumer_set); 432 433 /* We're looking for a register SET. */ 434 if (!REG_P (dest)) 435 return 0; 436 437 rtx accumulator; 438 439 /* Strip a zero_extend. */ 440 if (GET_CODE (mla) == ZERO_EXTEND) 441 mla = XEXP (mla, 0); 442 443 switch (GET_CODE (mla)) 444 { 445 case PLUS: 446 /* Possibly an MADD. */ 447 if (GET_CODE (XEXP (mla, 0)) == MULT) 448 accumulator = XEXP (mla, 1); 449 else 450 return 0; 451 break; 452 case MINUS: 453 /* Possibly an MSUB. */ 454 if (GET_CODE (XEXP (mla, 1)) == MULT) 455 accumulator = XEXP (mla, 0); 456 else 457 return 0; 458 break; 459 case FMA: 460 { 461 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */ 462 if (REG_P (XEXP (mla, 1)) 463 && REG_P (XEXP (mla, 2)) 464 && (REG_P (XEXP (mla, 0)) 465 || GET_CODE (XEXP (mla, 0)) == NEG)) 466 467 { 468 /* FMADD/FMSUB. */ 469 accumulator = XEXP (mla, 2); 470 } 471 else if (REG_P (XEXP (mla, 1)) 472 && GET_CODE (XEXP (mla, 2)) == NEG 473 && (REG_P (XEXP (mla, 0)) 474 || GET_CODE (XEXP (mla, 0)) == NEG)) 475 { 476 /* FNMADD/FNMSUB. */ 477 accumulator = XEXP (XEXP (mla, 2), 0); 478 } 479 else 480 return 0; 481 break; 482 } 483 default: 484 /* Not an MLA-like operation. */ 485 return 0; 486 } 487 488 if (SUBREG_P (accumulator)) 489 accumulator = SUBREG_REG (accumulator); 490 491 if (!REG_P (accumulator)) 492 return 0; 493 494 return (REGNO (dest) == REGNO (accumulator)); 495 } 496 497 /* Return non-zero if the consumer (a multiply-accumulate instruction) 498 has an accumulator dependency on the result of the producer (a 499 multiplication instruction) and no other dependency on that result. */ 500 int 501 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) 502 { 503 rtx mul = PATTERN (producer); 504 rtx mac = PATTERN (consumer); 505 rtx mul_result; 506 rtx mac_op0, mac_op1, mac_acc; 507 508 if (GET_CODE (mul) == COND_EXEC) 509 mul = COND_EXEC_CODE (mul); 510 if (GET_CODE (mac) == COND_EXEC) 511 mac = COND_EXEC_CODE (mac); 512 513 /* Check that mul is of the form (set (...) (mult ...)) 514 and mla is of the form (set (...) (plus (mult ...) (...))). */ 515 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) 516 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS 517 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) 518 return 0; 519 520 mul_result = XEXP (mul, 0); 521 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); 522 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); 523 mac_acc = XEXP (XEXP (mac, 1), 1); 524 525 return (reg_overlap_mentioned_p (mul_result, mac_acc) 526 && !reg_overlap_mentioned_p (mul_result, mac_op0) 527 && !reg_overlap_mentioned_p (mul_result, mac_op1)); 528 } 529 530 /* Worker function for TARGET_MD_ASM_ADJUST. 531 We implement asm flag outputs. */ 532 533 rtx_insn * 534 arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, 535 vec<machine_mode> & /*input_modes*/, 536 vec<const char *> &constraints, vec<rtx> & /*clobbers*/, 537 HARD_REG_SET & /*clobbered_regs*/, location_t loc) 538 { 539 bool saw_asm_flag = false; 540 541 start_sequence (); 542 for (unsigned i = 0, n = outputs.length (); i < n; ++i) 543 { 544 const char *con = constraints[i]; 545 if (!startswith (con, "=@cc")) 546 continue; 547 con += 4; 548 if (strchr (con, ',') != NULL) 549 { 550 error_at (loc, "alternatives not allowed in %<asm%> flag output"); 551 continue; 552 } 553 554 machine_mode mode; 555 rtx_code code; 556 int con01 = 0; 557 558 #define C(X, Y) (unsigned char)(X) * 256 + (unsigned char)(Y) 559 560 /* All of the condition codes are two characters. */ 561 if (con[0] != 0 && con[1] != 0 && con[2] == 0) 562 con01 = C(con[0], con[1]); 563 564 switch (con01) 565 { 566 case C('c', 'c'): 567 case C('l', 'o'): 568 mode = CC_Cmode, code = GEU; 569 break; 570 case C('c', 's'): 571 case C('h', 's'): 572 mode = CC_Cmode, code = LTU; 573 break; 574 case C('e', 'q'): 575 mode = CC_NZmode, code = EQ; 576 break; 577 case C('g', 'e'): 578 mode = CCmode, code = GE; 579 break; 580 case C('g', 't'): 581 mode = CCmode, code = GT; 582 break; 583 case C('h', 'i'): 584 mode = CCmode, code = GTU; 585 break; 586 case C('l', 'e'): 587 mode = CCmode, code = LE; 588 break; 589 case C('l', 's'): 590 mode = CCmode, code = LEU; 591 break; 592 case C('l', 't'): 593 mode = CCmode, code = LT; 594 break; 595 case C('m', 'i'): 596 mode = CC_NZmode, code = LT; 597 break; 598 case C('n', 'e'): 599 mode = CC_NZmode, code = NE; 600 break; 601 case C('p', 'l'): 602 mode = CC_NZmode, code = GE; 603 break; 604 case C('v', 'c'): 605 mode = CC_Vmode, code = EQ; 606 break; 607 case C('v', 's'): 608 mode = CC_Vmode, code = NE; 609 break; 610 default: 611 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]); 612 continue; 613 } 614 615 #undef C 616 617 rtx dest = outputs[i]; 618 machine_mode dest_mode = GET_MODE (dest); 619 if (!SCALAR_INT_MODE_P (dest_mode)) 620 { 621 error_at (loc, "invalid type for %<asm%> flag output"); 622 continue; 623 } 624 625 if (!saw_asm_flag) 626 { 627 /* This is the first asm flag output. Here we put the flags 628 register in as the real output and adjust the condition to 629 allow it. */ 630 constraints[i] = "=c"; 631 outputs[i] = gen_rtx_REG (CCmode, CC_REGNUM); 632 saw_asm_flag = true; 633 } 634 else 635 { 636 /* We don't need the flags register as output twice. */ 637 constraints[i] = "=X"; 638 outputs[i] = gen_rtx_SCRATCH (word_mode); 639 } 640 641 rtx x = gen_rtx_REG (mode, CC_REGNUM); 642 x = gen_rtx_fmt_ee (code, word_mode, x, const0_rtx); 643 644 if (dest_mode == word_mode && REG_P (dest)) 645 emit_insn (gen_rtx_SET (dest, x)); 646 else 647 { 648 rtx tmp = gen_reg_rtx (word_mode); 649 emit_insn (gen_rtx_SET (tmp, x)); 650 651 tmp = convert_modes (dest_mode, word_mode, tmp, true); 652 emit_move_insn (dest, tmp); 653 } 654 } 655 rtx_insn *seq = get_insns (); 656 end_sequence (); 657 658 return saw_asm_flag ? seq : NULL; 659 } 660