1 ;; Scheduling description for the IBM POWER10 processor. 2 ;; Copyright (C) 2020-2022 Free Software Foundation, Inc. 3 ;; 4 ;; Contributed by Pat Haugen (pthaugen (a] us.ibm.com). 5 6 ;; This file is part of GCC. 7 ;; 8 ;; GCC is free software; you can redistribute it and/or modify it 9 ;; under the terms of the GNU General Public License as published 10 ;; by the Free Software Foundation; either version 3, or (at your 11 ;; option) any later version. 12 ;; 13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT 14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 ;; License for more details. 17 ;; 18 ;; You should have received a copy of the GNU General Public License 19 ;; along with GCC; see the file COPYING3. If not see 20 ;; <http://www.gnu.org/licenses/>. 21 22 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups 23 ; which consist of 8 instructions max. We do not try to model the details of 24 ; the out-of-order issue queues and how insns flow to the various execution 25 ; units except for the simple representation of the issue limitation of at 26 ; most 4 insns to the execution units/2 insns to the load units/2 insns to 27 ; the store units. 28 (define_automaton "power10dispatch,power10issue") 29 30 ; Decode/dispatch slots 31 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10, 32 du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch") 33 34 ; Four execution units 35 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10" 36 "power10issue") 37 ; Two load units and two store units 38 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue") 39 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue") 40 41 42 ; Dispatch slots are allocated in order conforming to program order. 43 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\ 44 du5_power10,du6_power10,du7_power10") 45 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\ 46 du6_power10,du7_power10") 47 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\ 48 du7_power10") 49 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10") 50 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10") 51 (absence_set "du5_power10" "du6_power10,du7_power10") 52 (absence_set "du6_power10" "du7_power10") 53 54 55 ; Dispatch port reservations 56 ; 57 ; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of 58 ; 4 VSU/2 Load/2 Store per cycle. 59 60 ; Any dispatch slot 61 (define_reservation "DU_any_power10" 62 "du0_power10|du1_power10|du2_power10|du3_power10| 63 du4_power10|du5_power10|du6_power10|du7_power10") 64 65 ; Even slot, actually takes even/odd slots 66 (define_reservation "DU_even_power10" 67 "du0_power10+du1_power10|du2_power10+du3_power10| 68 du4_power10+du5_power10|du6_power10+du7_power10") 69 70 ; 4-way cracked (consumes whole decode/dispatch cycle) 71 (define_reservation "DU_all_power10" 72 "du0_power10+du1_power10+du2_power10+du3_power10+ 73 du4_power10+du5_power10+du6_power10+du7_power10") 74 75 76 ; Execution unit reservations 77 (define_reservation "LU_power10" 78 "lu0_power10|lu1_power10") 79 80 (define_reservation "STU_power10" 81 "stu0_power10|stu1_power10") 82 83 ; Certain simple fixed-point insns can execute in the Store-agen pipe 84 (define_reservation "SXU_power10" 85 "stu0_power10|stu1_power10") 86 87 (define_reservation "EXU_power10" 88 "exu0_power10|exu1_power10|exu2_power10|exu3_power10") 89 90 (define_reservation "EXU_super_power10" 91 "exu0_power10+exu1_power10|exu2_power10+exu3_power10") 92 93 94 ; Load Unit 95 (define_insn_reservation "power10-load" 4 96 (and (eq_attr "type" "load") 97 (eq_attr "update" "no") 98 (eq_attr "size" "!128") 99 (eq_attr "prefixed" "no") 100 (eq_attr "cpu" "power10")) 101 "DU_any_power10,LU_power10") 102 103 (define_insn_reservation "power10-fused-load" 4 104 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load") 105 (eq_attr "cpu" "power10")) 106 "DU_even_power10,LU_power10") 107 108 (define_insn_reservation "power10-prefixed-load" 4 109 (and (eq_attr "type" "load") 110 (eq_attr "update" "no") 111 (eq_attr "size" "!128") 112 (eq_attr "prefixed" "yes") 113 (eq_attr "cpu" "power10")) 114 "DU_even_power10,LU_power10") 115 116 (define_insn_reservation "power10-load-update" 4 117 (and (eq_attr "type" "load") 118 (eq_attr "update" "yes") 119 (eq_attr "cpu" "power10")) 120 "DU_even_power10,LU_power10+SXU_power10") 121 122 (define_insn_reservation "power10-fpload-double" 4 123 (and (eq_attr "type" "fpload") 124 (eq_attr "update" "no") 125 (eq_attr "size" "64") 126 (eq_attr "prefixed" "no") 127 (eq_attr "cpu" "power10")) 128 "DU_any_power10,LU_power10") 129 130 (define_insn_reservation "power10-prefixed-fpload-double" 4 131 (and (eq_attr "type" "fpload") 132 (eq_attr "update" "no") 133 (eq_attr "size" "64") 134 (eq_attr "prefixed" "yes") 135 (eq_attr "cpu" "power10")) 136 "DU_even_power10,LU_power10") 137 138 (define_insn_reservation "power10-fpload-update-double" 4 139 (and (eq_attr "type" "fpload") 140 (eq_attr "update" "yes") 141 (eq_attr "size" "64") 142 (eq_attr "cpu" "power10")) 143 "DU_even_power10,LU_power10+SXU_power10") 144 145 ; SFmode loads are cracked and have additional 3 cycles over DFmode 146 ; Prefixed forms behave the same 147 (define_insn_reservation "power10-fpload-single" 7 148 (and (eq_attr "type" "fpload") 149 (eq_attr "update" "no") 150 (eq_attr "size" "32") 151 (eq_attr "cpu" "power10")) 152 "DU_even_power10,LU_power10") 153 154 (define_insn_reservation "power10-fpload-update-single" 7 155 (and (eq_attr "type" "fpload") 156 (eq_attr "update" "yes") 157 (eq_attr "size" "32") 158 (eq_attr "cpu" "power10")) 159 "DU_even_power10,LU_power10+SXU_power10") 160 161 (define_insn_reservation "power10-vecload" 4 162 (and (eq_attr "type" "vecload") 163 (eq_attr "size" "!256") 164 (eq_attr "cpu" "power10")) 165 "DU_any_power10,LU_power10") 166 167 ; lxvp 168 (define_insn_reservation "power10-vecload-pair" 4 169 (and (eq_attr "type" "vecload") 170 (eq_attr "size" "256") 171 (eq_attr "cpu" "power10")) 172 "DU_even_power10,LU_power10+SXU_power10") 173 174 ; Store Unit 175 (define_insn_reservation "power10-store" 0 176 (and (eq_attr "type" "store,fpstore,vecstore") 177 (eq_attr "update" "no") 178 (eq_attr "prefixed" "no") 179 (eq_attr "size" "!128") 180 (eq_attr "size" "!256") 181 (eq_attr "cpu" "power10")) 182 "DU_any_power10,STU_power10") 183 184 (define_insn_reservation "power10-fused-store" 0 185 (and (eq_attr "type" "fused_store_store") 186 (eq_attr "cpu" "power10")) 187 "DU_even_power10,STU_power10") 188 189 (define_insn_reservation "power10-prefixed-store" 0 190 (and (eq_attr "type" "store,fpstore,vecstore") 191 (eq_attr "prefixed" "yes") 192 (eq_attr "size" "!128") 193 (eq_attr "size" "!256") 194 (eq_attr "cpu" "power10")) 195 "DU_even_power10,STU_power10") 196 197 ; Update forms have 2 cycle latency for updated addr reg 198 (define_insn_reservation "power10-store-update" 2 199 (and (eq_attr "type" "store,fpstore") 200 (eq_attr "update" "yes") 201 (eq_attr "cpu" "power10")) 202 "DU_any_power10,STU_power10") 203 204 ; stxvp 205 (define_insn_reservation "power10-vecstore-pair" 0 206 (and (eq_attr "type" "vecstore") 207 (eq_attr "size" "256") 208 (eq_attr "cpu" "power10")) 209 "DU_even_power10,stu0_power10+stu1_power10") 210 211 (define_insn_reservation "power10-larx" 4 212 (and (eq_attr "type" "load_l") 213 (eq_attr "size" "!128") 214 (eq_attr "cpu" "power10")) 215 "DU_any_power10,LU_power10") 216 217 ; All load quad forms 218 (define_insn_reservation "power10-lq" 4 219 (and (eq_attr "type" "load,load_l") 220 (eq_attr "size" "128") 221 (eq_attr "cpu" "power10")) 222 "DU_even_power10,LU_power10+SXU_power10") 223 224 (define_insn_reservation "power10-stcx" 0 225 (and (eq_attr "type" "store_c") 226 (eq_attr "size" "!128") 227 (eq_attr "cpu" "power10")) 228 "DU_any_power10,STU_power10") 229 230 ; All store quad forms 231 (define_insn_reservation "power10-stq" 0 232 (and (eq_attr "type" "store,store_c") 233 (eq_attr "size" "128") 234 (eq_attr "cpu" "power10")) 235 "DU_even_power10,stu0_power10+stu1_power10") 236 237 (define_insn_reservation "power10-sync" 1 238 (and (eq_attr "type" "sync,isync") 239 (eq_attr "cpu" "power10")) 240 "DU_even_power10,STU_power10") 241 242 243 ; VSU Execution Unit 244 245 ; Fixed point ops 246 247 ; Most ALU insns are simple 2 cycle, including record form 248 (define_insn_reservation "power10-alu" 2 249 (and (eq_attr "type" "add,exts,integer,logical,isel") 250 (eq_attr "prefixed" "no") 251 (eq_attr "cpu" "power10")) 252 "DU_any_power10,EXU_power10") 253 ; 4 cycle CR latency 254 (define_bypass 4 "power10-alu" 255 "power10-crlogical,power10-mfcr,power10-mfcrf") 256 257 (define_insn_reservation "power10-fused_alu" 2 258 (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry") 259 (eq_attr "cpu" "power10")) 260 "DU_even_power10,EXU_power10") 261 262 ; paddi 263 (define_insn_reservation "power10-paddi" 2 264 (and (eq_attr "type" "add") 265 (eq_attr "prefixed" "yes") 266 (eq_attr "cpu" "power10")) 267 "DU_even_power10,EXU_power10") 268 269 ; Rotate/shift (non-record form) 270 (define_insn_reservation "power10-rot" 2 271 (and (eq_attr "type" "insert,shift") 272 (eq_attr "dot" "no") 273 (eq_attr "cpu" "power10")) 274 "DU_any_power10,EXU_power10") 275 276 ; Record form rotate/shift 277 (define_insn_reservation "power10-rot-compare" 3 278 (and (eq_attr "type" "insert,shift") 279 (eq_attr "dot" "yes") 280 (eq_attr "cpu" "power10")) 281 "DU_any_power10,EXU_power10") 282 ; 5 cycle CR latency 283 (define_bypass 5 "power10-rot-compare" 284 "power10-crlogical,power10-mfcr,power10-mfcrf") 285 286 (define_insn_reservation "power10-alu2" 3 287 (and (eq_attr "type" "cntlz,popcnt,trap") 288 (eq_attr "cpu" "power10")) 289 "DU_any_power10,EXU_power10") 290 ; 5 cycle CR latency 291 (define_bypass 5 "power10-alu2" 292 "power10-crlogical,power10-mfcr,power10-mfcrf") 293 294 (define_insn_reservation "power10-cmp" 2 295 (and (eq_attr "type" "cmp") 296 (eq_attr "cpu" "power10")) 297 "DU_any_power10,EXU_power10") 298 299 ; Treat 'two' and 'three' types as 2 or 3 way cracked 300 (define_insn_reservation "power10-two" 4 301 (and (eq_attr "type" "two") 302 (eq_attr "cpu" "power10")) 303 "DU_even_power10,EXU_power10") 304 305 (define_insn_reservation "power10-three" 6 306 (and (eq_attr "type" "three") 307 (eq_attr "cpu" "power10")) 308 "DU_all_power10,EXU_power10") 309 310 (define_insn_reservation "power10-mul" 5 311 (and (eq_attr "type" "mul") 312 (eq_attr "dot" "no") 313 (eq_attr "cpu" "power10")) 314 "DU_any_power10,EXU_power10") 315 ; 4 cycle MUL->MUL latency 316 (define_bypass 4 "power10-mul" 317 "power10-mul,power10-mul-compare") 318 319 (define_insn_reservation "power10-mul-compare" 5 320 (and (eq_attr "type" "mul") 321 (eq_attr "dot" "yes") 322 (eq_attr "cpu" "power10")) 323 "DU_even_power10,EXU_power10") 324 ; 4 cycle MUL->MUL latency 325 (define_bypass 4 "power10-mul-compare" 326 "power10-mul,power10-mul-compare") 327 ; 7 cycle CR latency 328 (define_bypass 7 "power10-mul-compare" 329 "power10-crlogical,power10-mfcr,power10-mfcrf") 330 331 (define_insn_reservation "power10-div" 12 332 (and (eq_attr "type" "div") 333 (eq_attr "dot" "no") 334 (eq_attr "cpu" "power10")) 335 "DU_any_power10,EXU_power10") 336 337 (define_insn_reservation "power10-div-compare" 12 338 (and (eq_attr "type" "div") 339 (eq_attr "dot" "yes") 340 (eq_attr "cpu" "power10")) 341 "DU_even_power10,EXU_power10") 342 ; 14 cycle CR latency 343 (define_bypass 14 "power10-div-compare" 344 "power10-crlogical,power10-mfcr,power10-mfcrf") 345 346 (define_insn_reservation "power10-crlogical" 2 347 (and (eq_attr "type" "cr_logical") 348 (eq_attr "cpu" "power10")) 349 "DU_any_power10,EXU_power10") 350 351 (define_insn_reservation "power10-mfcrf" 2 352 (and (eq_attr "type" "mfcrf") 353 (eq_attr "cpu" "power10")) 354 "DU_any_power10,EXU_power10") 355 356 (define_insn_reservation "power10-mfcr" 3 357 (and (eq_attr "type" "mfcr") 358 (eq_attr "cpu" "power10")) 359 "DU_even_power10,EXU_power10") 360 361 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr 362 ; is cracked 363 (define_insn_reservation "power10-mtcr" 3 364 (and (eq_attr "type" "mtcr") 365 (eq_attr "cpu" "power10")) 366 "DU_any_power10,EXU_power10") 367 368 (define_insn_reservation "power10-mtjmpr" 3 369 (and (eq_attr "type" "mtjmpr") 370 (eq_attr "cpu" "power10")) 371 "DU_any_power10,EXU_power10") 372 373 (define_insn_reservation "power10-mfjmpr" 2 374 (and (eq_attr "type" "mfjmpr") 375 (eq_attr "cpu" "power10")) 376 "DU_any_power10,EXU_power10") 377 378 379 ; Floating point/Vector ops 380 381 (define_insn_reservation "power10-fpsimple" 3 382 (and (eq_attr "type" "fpsimple") 383 (eq_attr "cpu" "power10")) 384 "DU_any_power10,EXU_power10") 385 386 (define_insn_reservation "power10-fp" 5 387 (and (eq_attr "type" "fp,dmul") 388 (eq_attr "cpu" "power10")) 389 "DU_any_power10,EXU_power10") 390 391 (define_insn_reservation "power10-fpcompare" 3 392 (and (eq_attr "type" "fpcompare") 393 (eq_attr "cpu" "power10")) 394 "DU_any_power10,EXU_power10") 395 396 (define_insn_reservation "power10-sdiv" 22 397 (and (eq_attr "type" "sdiv") 398 (eq_attr "cpu" "power10")) 399 "DU_any_power10,EXU_power10") 400 401 (define_insn_reservation "power10-ddiv" 27 402 (and (eq_attr "type" "ddiv") 403 (eq_attr "cpu" "power10")) 404 "DU_any_power10,EXU_power10") 405 406 (define_insn_reservation "power10-sqrt" 26 407 (and (eq_attr "type" "ssqrt") 408 (eq_attr "cpu" "power10")) 409 "DU_any_power10,EXU_power10") 410 411 (define_insn_reservation "power10-dsqrt" 36 412 (and (eq_attr "type" "dsqrt") 413 (eq_attr "cpu" "power10")) 414 "DU_any_power10,EXU_power10") 415 416 (define_insn_reservation "power10-vec-2cyc" 2 417 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") 418 (eq_attr "cpu" "power10")) 419 "DU_any_power10,EXU_power10") 420 421 (define_insn_reservation "power10-fused-vec" 2 422 (and (eq_attr "type" "fused_vector") 423 (eq_attr "cpu" "power10")) 424 "DU_even_power10,EXU_power10") 425 426 (define_insn_reservation "power10-veccmp" 3 427 (and (eq_attr "type" "veccmp") 428 (eq_attr "cpu" "power10")) 429 "DU_any_power10,EXU_power10") 430 431 (define_insn_reservation "power10-vecsimple" 2 432 (and (eq_attr "type" "vecsimple") 433 (eq_attr "cpu" "power10")) 434 "DU_any_power10,EXU_power10") 435 436 (define_insn_reservation "power10-vecnormal" 5 437 (and (eq_attr "type" "vecfloat,vecdouble") 438 (eq_attr "size" "!128") 439 (eq_attr "cpu" "power10")) 440 "DU_any_power10,EXU_power10") 441 442 (define_insn_reservation "power10-qp" 12 443 (and (eq_attr "type" "vecfloat,vecdouble") 444 (eq_attr "size" "128") 445 (eq_attr "cpu" "power10")) 446 "DU_any_power10,EXU_power10") 447 448 (define_insn_reservation "power10-vecperm" 3 449 (and (eq_attr "type" "vecperm") 450 (eq_attr "prefixed" "no") 451 (eq_attr "dot" "no") 452 (eq_attr "cpu" "power10")) 453 "DU_any_power10,EXU_power10") 454 455 (define_insn_reservation "power10-vecperm-compare" 3 456 (and (eq_attr "type" "vecperm") 457 (eq_attr "dot" "yes") 458 (eq_attr "cpu" "power10")) 459 "DU_even_power10,EXU_power10") 460 461 (define_insn_reservation "power10-prefixed-vecperm" 3 462 (and (eq_attr "type" "vecperm") 463 (eq_attr "prefixed" "yes") 464 (eq_attr "cpu" "power10")) 465 "DU_even_power10,EXU_power10") 466 467 (define_insn_reservation "power10-veccomplex" 6 468 (and (eq_attr "type" "veccomplex") 469 (eq_attr "cpu" "power10")) 470 "DU_any_power10,EXU_power10") 471 472 (define_insn_reservation "power10-vecfdiv" 24 473 (and (eq_attr "type" "vecfdiv") 474 (eq_attr "cpu" "power10")) 475 "DU_any_power10,EXU_power10") 476 477 (define_insn_reservation "power10-vecdiv" 27 478 (and (eq_attr "type" "vecdiv") 479 (eq_attr "size" "!128") 480 (eq_attr "cpu" "power10")) 481 "DU_any_power10,EXU_power10") 482 483 (define_insn_reservation "power10-qpdiv" 56 484 (and (eq_attr "type" "vecdiv") 485 (eq_attr "size" "128") 486 (eq_attr "cpu" "power10")) 487 "DU_any_power10,EXU_power10") 488 489 (define_insn_reservation "power10-qpmul" 24 490 (and (eq_attr "type" "qmul") 491 (eq_attr "size" "128") 492 (eq_attr "cpu" "power10")) 493 "DU_any_power10,EXU_power10") 494 495 (define_insn_reservation "power10-mtvsr" 2 496 (and (eq_attr "type" "mtvsr") 497 (eq_attr "cpu" "power10")) 498 "DU_any_power10,EXU_power10") 499 500 (define_insn_reservation "power10-mfvsr" 2 501 (and (eq_attr "type" "mfvsr") 502 (eq_attr "cpu" "power10")) 503 "DU_any_power10,EXU_power10") 504 505 506 ; Branch 507 ; Branch is 2 cycles, grouped with STU for issue 508 (define_insn_reservation "power10-branch" 2 509 (and (eq_attr "type" "jmpreg,branch") 510 (eq_attr "cpu" "power10")) 511 "DU_any_power10,STU_power10") 512 513 (define_insn_reservation "power10-fused-branch" 3 514 (and (eq_attr "type" "fused_mtbc") 515 (eq_attr "cpu" "power10")) 516 "DU_even_power10,STU_power10") 517 518 519 ; Crypto 520 (define_insn_reservation "power10-crypto" 4 521 (and (eq_attr "type" "crypto") 522 (eq_attr "cpu" "power10")) 523 "DU_any_power10,EXU_power10") 524 525 526 ; HTM 527 (define_insn_reservation "power10-htm" 2 528 (and (eq_attr "type" "htmsimple,htm") 529 (eq_attr "cpu" "power10")) 530 "DU_any_power10,EXU_power10") 531 532 533 ; DFP 534 ; Use the minimum 12 cycle latency for all DFP insns 535 (define_insn_reservation "power10-dfp" 12 536 (and (eq_attr "type" "dfp") 537 (eq_attr "size" "!128") 538 (eq_attr "cpu" "power10")) 539 "DU_any_power10,EXU_power10") 540 541 (define_insn_reservation "power10-dfpq" 12 542 (and (eq_attr "type" "dfp") 543 (eq_attr "size" "128") 544 (eq_attr "cpu" "power10")) 545 "DU_even_power10,EXU_power10") 546 547 ; MMA 548 (define_insn_reservation "power10-mma" 9 549 (and (eq_attr "type" "mma") 550 (eq_attr "prefixed" "no") 551 (eq_attr "cpu" "power10")) 552 "DU_any_power10,EXU_super_power10") 553 554 (define_insn_reservation "power10-prefixed-mma" 9 555 (and (eq_attr "type" "mma") 556 (eq_attr "prefixed" "yes") 557 (eq_attr "cpu" "power10")) 558 "DU_even_power10,EXU_super_power10") 559 ; 4 cycle MMA->MMA latency 560 (define_bypass 4 "power10-mma,power10-prefixed-mma" 561 "power10-mma,power10-prefixed-mma") 562 563 564