1 ;; Scheduling description for IBM POWER9 processor. 2 ;; Copyright (C) 2016-2022 Free Software Foundation, Inc. 3 ;; 4 ;; Contributed by Pat Haugen (pthaugen (a] us.ibm.com). 5 6 ;; This file is part of GCC. 7 ;; 8 ;; GCC is free software; you can redistribute it and/or modify it 9 ;; under the terms of the GNU General Public License as published 10 ;; by the Free Software Foundation; either version 3, or (at your 11 ;; option) any later version. 12 ;; 13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT 14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 ;; License for more details. 17 ;; 18 ;; You should have received a copy of the GNU General Public License 19 ;; along with GCC; see the file COPYING3. If not see 20 ;; <http://www.gnu.org/licenses/>. 21 22 (define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc") 23 24 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") 25 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") 26 ; Two vector permute units, part of vsu 27 (define_cpu_unit "prm0_power9,prm1_power9" "power9vsu") 28 ; Two fixed point divide units, not pipelined 29 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") 30 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") 31 ; Create a false unit for use by non-pipelined FP div/sqrt 32 (define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9" 33 "power9fpdiv") 34 35 36 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, 37 x2_power9,x3_power9,xb0_power9,xb1_power9, 38 br0_power9,br1_power9" "power9dsp") 39 40 41 ; Dispatch port reservations 42 ; 43 ; Power9 can dispatch a maximum of 6 iops per cycle with the following 44 ; general restrictions (other restrictions also apply): 45 ; 1) At most 2 iops per execution slice 46 ; 2) At most 2 iops to the branch unit 47 ; Note that insn position in a dispatch group of 6 insns does not infer which 48 ; execution slice the insn is routed to. The units are used to infer the 49 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch 50 ; with 2 insns with 'superslice' requirement). 51 52 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but 53 ; are listed as separate units to allow those insns that preclude its use to 54 ; still be scheduled two to a superslice while reserving the 3rd slot. The 55 ; same applies for xb0/xb1. 56 (define_reservation "DU_xa_power9" "xa0_power9+xa1_power9") 57 (define_reservation "DU_xb_power9" "xb0_power9+xb1_power9") 58 59 ; Any execution slice dispatch 60 (define_reservation "DU_any_power9" 61 "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9| 62 DU_xb_power9") 63 64 ; Even slice, actually takes even/odd slots 65 (define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9") 66 67 ; Slice plus 3rd slot 68 (define_reservation "DU_slice_3_power9" 69 "x0_power9+xa0_power9|x1_power9+xa1_power9| 70 x2_power9+xb0_power9|x3_power9+xb1_power9") 71 72 ; Superslice 73 (define_reservation "DU_super_power9" 74 "x0_power9+x1_power9|x2_power9+x3_power9") 75 76 ; 2-way cracked 77 (define_reservation "DU_C2_power9" "x0_power9+x1_power9| 78 x1_power9+DU_xa_power9| 79 x1_power9+x2_power9| 80 DU_xa_power9+x2_power9| 81 x2_power9+x3_power9| 82 x3_power9+DU_xb_power9") 83 84 ; 2-way cracked plus 3rd slot 85 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| 86 x1_power9+x2_power9+xa1_power9| 87 x2_power9+x3_power9+xb0_power9") 88 89 ; 3-way cracked (consumes whole decode/dispatch cycle) 90 (define_reservation "DU_C3_power9" 91 "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+ 92 x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9") 93 94 ; Branch ports 95 (define_reservation "DU_branch_power9" "br0_power9|br1_power9") 96 97 98 ; Execution unit reservations 99 (define_reservation "LSU_power9" 100 "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9") 101 102 (define_reservation "LSU_pair_power9" 103 "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9| 104 lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9") 105 106 (define_reservation "VSU_power9" 107 "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9") 108 109 (define_reservation "VSU_super_power9" 110 "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9") 111 112 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") 113 114 ; Define the reservation to be used by FP div/sqrt which allows other insns 115 ; to be issued to the VSU, but blocks other div/sqrt for a number of cycles. 116 ; Note that the number of cycles blocked varies depending on insn, but we 117 ; just use the same number for all in order to keep the number of DFA states 118 ; reasonable. 119 (define_reservation "FP_DIV_power9" 120 "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8| 121 fp_div3_power9*8") 122 (define_reservation "VEC_DIV_power9" 123 "fp_div0_power9*8+fp_div1_power9*8| 124 fp_div2_power9*8+fp_div3_power9*8") 125 126 127 ; LS Unit 128 (define_insn_reservation "power9-load" 4 129 (and (eq_attr "type" "load") 130 (eq_attr "sign_extend" "no") 131 (eq_attr "update" "no") 132 (eq_attr "cpu" "power9")) 133 "DU_any_power9,LSU_power9") 134 135 (define_insn_reservation "power9-load-update" 4 136 (and (eq_attr "type" "load") 137 (eq_attr "sign_extend" "no") 138 (eq_attr "update" "yes") 139 (eq_attr "cpu" "power9")) 140 "DU_C2_power9,LSU_power9+VSU_power9") 141 142 (define_insn_reservation "power9-load-ext" 6 143 (and (eq_attr "type" "load") 144 (eq_attr "sign_extend" "yes") 145 (eq_attr "update" "no") 146 (eq_attr "cpu" "power9")) 147 "DU_C2_power9,LSU_power9") 148 149 (define_insn_reservation "power9-load-ext-update" 6 150 (and (eq_attr "type" "load") 151 (eq_attr "sign_extend" "yes") 152 (eq_attr "update" "yes") 153 (eq_attr "cpu" "power9")) 154 "DU_C3_power9,LSU_power9+VSU_power9") 155 156 (define_insn_reservation "power9-fpload-double" 4 157 (and (eq_attr "type" "fpload") 158 (eq_attr "update" "no") 159 (eq_attr "size" "64") 160 (eq_attr "cpu" "power9")) 161 "DU_slice_3_power9,LSU_power9") 162 163 (define_insn_reservation "power9-fpload-update-double" 4 164 (and (eq_attr "type" "fpload") 165 (eq_attr "update" "yes") 166 (eq_attr "size" "64") 167 (eq_attr "cpu" "power9")) 168 "DU_C2_3_power9,LSU_power9+VSU_power9") 169 170 ; SFmode loads are cracked and have additional 2 cycles over DFmode 171 (define_insn_reservation "power9-fpload-single" 6 172 (and (eq_attr "type" "fpload") 173 (eq_attr "update" "no") 174 (eq_attr "size" "32") 175 (eq_attr "cpu" "power9")) 176 "DU_C2_3_power9,LSU_power9") 177 178 (define_insn_reservation "power9-fpload-update-single" 6 179 (and (eq_attr "type" "fpload") 180 (eq_attr "update" "yes") 181 (eq_attr "size" "32") 182 (eq_attr "cpu" "power9")) 183 "DU_C3_power9,LSU_power9+VSU_power9") 184 185 (define_insn_reservation "power9-vecload" 5 186 (and (eq_attr "type" "vecload") 187 (eq_attr "cpu" "power9")) 188 "DU_any_power9,LSU_pair_power9") 189 190 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store 191 (define_insn_reservation "power9-store" 0 192 (and (eq_attr "type" "store") 193 (eq_attr "update" "no") 194 (eq_attr "indexed" "no") 195 (eq_attr "cpu" "power9")) 196 "DU_slice_3_power9,LSU_power9") 197 198 (define_insn_reservation "power9-store-indexed" 0 199 (and (eq_attr "type" "store") 200 (eq_attr "update" "no") 201 (eq_attr "indexed" "yes") 202 (eq_attr "cpu" "power9")) 203 "DU_slice_3_power9,LSU_power9") 204 205 ; Update forms have 2 cycle latency for updated addr reg 206 (define_insn_reservation "power9-store-update" 2 207 (and (eq_attr "type" "store") 208 (eq_attr "update" "yes") 209 (eq_attr "indexed" "no") 210 (eq_attr "cpu" "power9")) 211 "DU_C2_3_power9,LSU_power9+VSU_power9") 212 213 ; Update forms have 2 cycle latency for updated addr reg 214 (define_insn_reservation "power9-store-update-indexed" 2 215 (and (eq_attr "type" "store") 216 (eq_attr "update" "yes") 217 (eq_attr "indexed" "yes") 218 (eq_attr "cpu" "power9")) 219 "DU_C2_3_power9,LSU_power9+VSU_power9") 220 221 (define_insn_reservation "power9-fpstore" 0 222 (and (eq_attr "type" "fpstore") 223 (eq_attr "update" "no") 224 (eq_attr "cpu" "power9")) 225 "DU_slice_3_power9,LSU_power9") 226 227 ; Update forms have 2 cycle latency for updated addr reg 228 (define_insn_reservation "power9-fpstore-update" 2 229 (and (eq_attr "type" "fpstore") 230 (eq_attr "update" "yes") 231 (eq_attr "cpu" "power9")) 232 "DU_C2_3_power9,LSU_power9+VSU_power9") 233 234 (define_insn_reservation "power9-vecstore" 0 235 (and (eq_attr "type" "vecstore") 236 (eq_attr "cpu" "power9")) 237 "DU_super_power9,LSU_pair_power9") 238 239 ; Store forwarding latency is 6 240 (define_bypass 6 "power9-*store*" "power9-*load*") 241 242 (define_insn_reservation "power9-larx" 4 243 (and (eq_attr "type" "load_l") 244 (eq_attr "cpu" "power9")) 245 "DU_any_power9,LSU_power9") 246 247 (define_insn_reservation "power9-stcx" 2 248 (and (eq_attr "type" "store_c") 249 (eq_attr "cpu" "power9")) 250 "DU_C2_3_power9,LSU_power9+VSU_power9") 251 252 (define_insn_reservation "power9-sync" 4 253 (and (eq_attr "type" "sync,isync") 254 (eq_attr "cpu" "power9")) 255 "DU_any_power9,LSU_power9") 256 257 258 ; VSU Execution Unit 259 260 ; Fixed point ops 261 262 ; Most ALU insns are simple 2 cycle, including record form 263 (define_insn_reservation "power9-alu" 2 264 (and (eq_attr "type" "add,exts,integer,logical,isel") 265 (eq_attr "cpu" "power9")) 266 "DU_any_power9,VSU_power9") 267 ; 5 cycle CR latency 268 (define_bypass 5 "power9-alu" 269 "power9-crlogical,power9-mfcr,power9-mfcrf") 270 271 ; Rotate/shift prevent use of third slot 272 (define_insn_reservation "power9-rot" 2 273 (and (eq_attr "type" "insert,shift") 274 (eq_attr "dot" "no") 275 (eq_attr "cpu" "power9")) 276 "DU_slice_3_power9,VSU_power9") 277 278 ; Record form rotate/shift are cracked 279 (define_insn_reservation "power9-cracked-alu" 2 280 (and (eq_attr "type" "insert,shift") 281 (eq_attr "dot" "yes") 282 (eq_attr "cpu" "power9")) 283 "DU_C2_3_power9,VSU_power9") 284 ; 7 cycle CR latency 285 (define_bypass 7 "power9-cracked-alu" 286 "power9-crlogical,power9-mfcr,power9-mfcrf") 287 288 (define_insn_reservation "power9-alu2" 3 289 (and (eq_attr "type" "cntlz,popcnt,trap") 290 (eq_attr "cpu" "power9")) 291 "DU_any_power9,VSU_power9") 292 ; 6 cycle CR latency 293 (define_bypass 6 "power9-alu2" 294 "power9-crlogical,power9-mfcr,power9-mfcrf") 295 296 (define_insn_reservation "power9-cmp" 2 297 (and (eq_attr "type" "cmp") 298 (eq_attr "cpu" "power9")) 299 "DU_any_power9,VSU_power9") 300 301 302 ; Treat 'two' and 'three' types as 2 or 3 way cracked 303 (define_insn_reservation "power9-two" 4 304 (and (eq_attr "type" "two") 305 (eq_attr "cpu" "power9")) 306 "DU_C2_power9,VSU_power9") 307 308 (define_insn_reservation "power9-three" 6 309 (and (eq_attr "type" "three") 310 (eq_attr "cpu" "power9")) 311 "DU_C3_power9,VSU_power9") 312 313 (define_insn_reservation "power9-mul" 5 314 (and (eq_attr "type" "mul") 315 (eq_attr "dot" "no") 316 (eq_attr "cpu" "power9")) 317 "DU_slice_3_power9,VSU_power9") 318 319 (define_insn_reservation "power9-mul-compare" 5 320 (and (eq_attr "type" "mul") 321 (eq_attr "dot" "yes") 322 (eq_attr "cpu" "power9")) 323 "DU_C2_3_power9,VSU_power9") 324 ; 10 cycle CR latency 325 (define_bypass 10 "power9-mul-compare" 326 "power9-crlogical,power9-mfcr,power9-mfcrf") 327 328 ; Fixed point divides reserve the divide units for a minimum of 8 cycles 329 (define_insn_reservation "power9-idiv" 16 330 (and (eq_attr "type" "div") 331 (eq_attr "size" "32") 332 (eq_attr "cpu" "power9")) 333 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") 334 335 (define_insn_reservation "power9-ldiv" 24 336 (and (eq_attr "type" "div") 337 (eq_attr "size" "64") 338 (eq_attr "cpu" "power9")) 339 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") 340 341 (define_insn_reservation "power9-crlogical" 2 342 (and (eq_attr "type" "cr_logical") 343 (eq_attr "cpu" "power9")) 344 "DU_any_power9,VSU_power9") 345 346 (define_insn_reservation "power9-mfcrf" 2 347 (and (eq_attr "type" "mfcrf") 348 (eq_attr "cpu" "power9")) 349 "DU_any_power9,VSU_power9") 350 351 (define_insn_reservation "power9-mfcr" 6 352 (and (eq_attr "type" "mfcr") 353 (eq_attr "cpu" "power9")) 354 "DU_C3_power9,VSU_power9") 355 356 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr 357 ; is cracked 358 (define_insn_reservation "power9-mtcr" 2 359 (and (eq_attr "type" "mtcr") 360 (eq_attr "cpu" "power9")) 361 "DU_any_power9,VSU_power9") 362 363 ; Move to LR/CTR are executed in VSU 364 (define_insn_reservation "power9-mtjmpr" 5 365 (and (eq_attr "type" "mtjmpr") 366 (eq_attr "cpu" "power9")) 367 "DU_any_power9,VSU_power9") 368 369 ; Floating point/Vector ops 370 (define_insn_reservation "power9-fpsimple" 2 371 (and (eq_attr "type" "fpsimple") 372 (eq_attr "cpu" "power9")) 373 "DU_slice_3_power9,VSU_power9") 374 375 (define_insn_reservation "power9-fp" 5 376 (and (eq_attr "type" "fp,dmul") 377 (eq_attr "cpu" "power9")) 378 "DU_slice_3_power9,VSU_power9") 379 380 (define_insn_reservation "power9-fpcompare" 3 381 (and (eq_attr "type" "fpcompare") 382 (eq_attr "cpu" "power9")) 383 "DU_slice_3_power9,VSU_power9") 384 385 ; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other 386 ; div/sqrt insns, but for the most part do not block pipelined ops. 387 (define_insn_reservation "power9-sdiv" 22 388 (and (eq_attr "type" "sdiv") 389 (eq_attr "cpu" "power9")) 390 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 391 392 (define_insn_reservation "power9-ddiv" 27 393 (and (eq_attr "type" "ddiv") 394 (eq_attr "cpu" "power9")) 395 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 396 397 (define_insn_reservation "power9-sqrt" 26 398 (and (eq_attr "type" "ssqrt") 399 (eq_attr "cpu" "power9")) 400 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 401 402 (define_insn_reservation "power9-dsqrt" 36 403 (and (eq_attr "type" "dsqrt") 404 (eq_attr "cpu" "power9")) 405 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 406 407 (define_insn_reservation "power9-vec-2cyc" 2 408 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") 409 (eq_attr "cpu" "power9")) 410 "DU_super_power9,VSU_super_power9") 411 412 (define_insn_reservation "power9-veccmp" 3 413 (and (eq_attr "type" "veccmp") 414 (eq_attr "cpu" "power9")) 415 "DU_super_power9,VSU_super_power9") 416 417 (define_insn_reservation "power9-vecsimple" 3 418 (and (eq_attr "type" "vecsimple") 419 (eq_attr "cpu" "power9")) 420 "DU_super_power9,VSU_super_power9") 421 422 (define_insn_reservation "power9-vecnormal" 7 423 (and (eq_attr "type" "vecfloat,vecdouble") 424 (eq_attr "size" "!128") 425 (eq_attr "cpu" "power9")) 426 "DU_super_power9,VSU_super_power9") 427 428 ; Quad-precision FP ops, execute in DFU 429 (define_insn_reservation "power9-qp" 12 430 (and (eq_attr "type" "vecfloat,vecdouble") 431 (eq_attr "size" "128") 432 (eq_attr "cpu" "power9")) 433 "DU_super_power9,dfu_power9") 434 435 (define_insn_reservation "power9-vecperm" 3 436 (and (eq_attr "type" "vecperm") 437 (eq_attr "cpu" "power9")) 438 "DU_super_power9,VSU_PRM_power9") 439 440 (define_insn_reservation "power9-veccomplex" 7 441 (and (eq_attr "type" "veccomplex") 442 (eq_attr "cpu" "power9")) 443 "DU_super_power9,VSU_super_power9") 444 445 (define_insn_reservation "power9-vecfdiv" 24 446 (and (eq_attr "type" "vecfdiv") 447 (eq_attr "cpu" "power9")) 448 "DU_super_power9,VSU_super_power9,VEC_DIV_power9") 449 450 (define_insn_reservation "power9-vecdiv" 27 451 (and (eq_attr "type" "vecdiv") 452 (eq_attr "size" "!128") 453 (eq_attr "cpu" "power9")) 454 "DU_super_power9,VSU_super_power9,VEC_DIV_power9") 455 456 ; Use 8 for DFU reservation on QP div/mul to limit DFA state size 457 (define_insn_reservation "power9-qpdiv" 56 458 (and (eq_attr "type" "vecdiv") 459 (eq_attr "size" "128") 460 (eq_attr "cpu" "power9")) 461 "DU_super_power9,dfu_power9*8") 462 463 (define_insn_reservation "power9-qpmul" 24 464 (and (eq_attr "type" "qmul") 465 (eq_attr "size" "128") 466 (eq_attr "cpu" "power9")) 467 "DU_super_power9,dfu_power9*8") 468 469 (define_insn_reservation "power9-mtvsr" 2 470 (and (eq_attr "type" "mtvsr") 471 (eq_attr "cpu" "power9")) 472 "DU_slice_3_power9,VSU_power9") 473 474 (define_insn_reservation "power9-mfvsr" 2 475 (and (eq_attr "type" "mfvsr") 476 (eq_attr "cpu" "power9")) 477 "DU_slice_3_power9,VSU_power9") 478 479 480 ; Branch Unit 481 ; Move from LR/CTR are executed in BRU but consume a writeback port from an 482 ; execution slice. 483 (define_insn_reservation "power9-mfjmpr" 6 484 (and (eq_attr "type" "mfjmpr") 485 (eq_attr "cpu" "power9")) 486 "DU_branch_power9,bru_power9+VSU_power9") 487 488 ; Branch is 2 cycles 489 (define_insn_reservation "power9-branch" 2 490 (and (eq_attr "type" "jmpreg,branch") 491 (eq_attr "cpu" "power9")) 492 "DU_branch_power9,bru_power9") 493 494 495 ; Crypto Unit 496 (define_insn_reservation "power9-crypto" 6 497 (and (eq_attr "type" "crypto") 498 (eq_attr "cpu" "power9")) 499 "DU_super_power9,cryptu_power9") 500 501 502 ; HTM Unit 503 (define_insn_reservation "power9-htm" 4 504 (and (eq_attr "type" "htm") 505 (eq_attr "cpu" "power9")) 506 "DU_C2_power9,LSU_power9") 507 508 (define_insn_reservation "power9-htm-simple" 2 509 (and (eq_attr "type" "htmsimple") 510 (eq_attr "cpu" "power9")) 511 "DU_any_power9,VSU_power9") 512 513 514 ; DFP Unit 515 (define_insn_reservation "power9-dfp" 12 516 (and (eq_attr "type" "dfp") 517 (eq_attr "cpu" "power9")) 518 "DU_even_power9,dfu_power9") 519 520