Home | History | Annotate | Line # | Download | only in rs6000
      1 ;; Scheduling description for IBM POWER9 processor.
      2 ;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
      3 ;;
      4 ;; Contributed by Pat Haugen (pthaugen (a] us.ibm.com).
      5 
      6 ;; This file is part of GCC.
      7 ;;
      8 ;; GCC is free software; you can redistribute it and/or modify it
      9 ;; under the terms of the GNU General Public License as published
     10 ;; by the Free Software Foundation; either version 3, or (at your
     11 ;; option) any later version.
     12 ;;
     13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
     14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     16 ;; License for more details.
     17 ;;
     18 ;; You should have received a copy of the GNU General Public License
     19 ;; along with GCC; see the file COPYING3.  If not see
     20 ;; <http://www.gnu.org/licenses/>.
     21 
     22 (define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc")
     23 
     24 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
     25 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
     26 ; Two vector permute units, part of vsu
     27 (define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
     28 ; Two fixed point divide units, not pipelined
     29 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
     30 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
     31 ; Create a false unit for use by non-pipelined FP div/sqrt
     32 (define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9"
     33 		 "power9fpdiv")
     34 
     35 
     36 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
     37 		  x2_power9,x3_power9,xb0_power9,xb1_power9,
     38 		  br0_power9,br1_power9" "power9dsp")
     39 
     40 
     41 ; Dispatch port reservations
     42 ;
     43 ; Power9 can dispatch a maximum of 6 iops per cycle with the following
     44 ; general restrictions (other restrictions also apply):
     45 ;   1) At most 2 iops per execution slice
     46 ;   2) At most 2 iops to the branch unit
     47 ; Note that insn position in a dispatch group of 6 insns does not infer which
     48 ; execution slice the insn is routed to.  The units are used to infer the
     49 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
     50 ; with 2 insns with 'superslice' requirement).
     51 
     52 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
     53 ; are listed as separate units to allow those insns that preclude its use to
     54 ; still be scheduled two to a superslice while reserving the 3rd slot.  The
     55 ; same applies for xb0/xb1.
     56 (define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
     57 (define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
     58 
     59 ; Any execution slice dispatch
     60 (define_reservation "DU_any_power9"
     61 		    "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|
     62 		     DU_xb_power9")
     63 
     64 ; Even slice, actually takes even/odd slots
     65 (define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
     66 
     67 ; Slice plus 3rd slot
     68 (define_reservation "DU_slice_3_power9"
     69 		    "x0_power9+xa0_power9|x1_power9+xa1_power9|
     70 		     x2_power9+xb0_power9|x3_power9+xb1_power9")
     71 
     72 ; Superslice
     73 (define_reservation "DU_super_power9"
     74 		    "x0_power9+x1_power9|x2_power9+x3_power9")
     75 
     76 ; 2-way cracked
     77 (define_reservation "DU_C2_power9" "x0_power9+x1_power9|
     78 				    x1_power9+DU_xa_power9|
     79 				    x1_power9+x2_power9|
     80 				    DU_xa_power9+x2_power9|
     81 				    x2_power9+x3_power9|
     82 				    x3_power9+DU_xb_power9")
     83 
     84 ; 2-way cracked plus 3rd slot
     85 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
     86 				      x1_power9+x2_power9+xa1_power9|
     87 				      x2_power9+x3_power9+xb0_power9")
     88 
     89 ; 3-way cracked (consumes whole decode/dispatch cycle)
     90 (define_reservation "DU_C3_power9"
     91 		    "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+
     92 		     x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
     93 
     94 ; Branch ports
     95 (define_reservation "DU_branch_power9" "br0_power9|br1_power9")
     96 
     97 
     98 ; Execution unit reservations
     99 (define_reservation "LSU_power9"
    100 		    "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
    101 
    102 (define_reservation "LSU_pair_power9"
    103 		    "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|
    104 		     lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9")
    105 
    106 (define_reservation "VSU_power9"
    107 		    "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
    108 
    109 (define_reservation "VSU_super_power9"
    110 		    "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
    111 
    112 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
    113 
    114 ; Define the reservation to be used by FP div/sqrt which allows other insns
    115 ; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
    116 ; Note that the number of cycles blocked varies depending on insn, but we
    117 ; just use the same number for all in order to keep the number of DFA states
    118 ; reasonable.
    119 (define_reservation "FP_DIV_power9"
    120 		    "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8|
    121 		     fp_div3_power9*8")
    122 (define_reservation "VEC_DIV_power9"
    123 		    "fp_div0_power9*8+fp_div1_power9*8|
    124 		     fp_div2_power9*8+fp_div3_power9*8")
    125 
    126 
    127 ; LS Unit
    128 (define_insn_reservation "power9-load" 4
    129   (and (eq_attr "type" "load")
    130        (eq_attr "sign_extend" "no")
    131        (eq_attr "update" "no")
    132        (eq_attr "cpu" "power9"))
    133   "DU_any_power9,LSU_power9")
    134 
    135 (define_insn_reservation "power9-load-update" 4
    136   (and (eq_attr "type" "load")
    137        (eq_attr "sign_extend" "no")
    138        (eq_attr "update" "yes")
    139        (eq_attr "cpu" "power9"))
    140   "DU_C2_power9,LSU_power9+VSU_power9")
    141 
    142 (define_insn_reservation "power9-load-ext" 6
    143   (and (eq_attr "type" "load")
    144        (eq_attr "sign_extend" "yes")
    145        (eq_attr "update" "no")
    146        (eq_attr "cpu" "power9"))
    147   "DU_C2_power9,LSU_power9")
    148 
    149 (define_insn_reservation "power9-load-ext-update" 6
    150   (and (eq_attr "type" "load")
    151        (eq_attr "sign_extend" "yes")
    152        (eq_attr "update" "yes")
    153        (eq_attr "cpu" "power9"))
    154   "DU_C3_power9,LSU_power9+VSU_power9")
    155 
    156 (define_insn_reservation "power9-fpload-double" 4
    157   (and (eq_attr "type" "fpload")
    158        (eq_attr "update" "no")
    159        (eq_attr "size" "64")
    160        (eq_attr "cpu" "power9"))
    161   "DU_slice_3_power9,LSU_power9")
    162 
    163 (define_insn_reservation "power9-fpload-update-double" 4
    164   (and (eq_attr "type" "fpload")
    165        (eq_attr "update" "yes")
    166        (eq_attr "size" "64")
    167        (eq_attr "cpu" "power9"))
    168   "DU_C2_3_power9,LSU_power9+VSU_power9")
    169 
    170 ; SFmode loads are cracked and have additional 2 cycles over DFmode
    171 (define_insn_reservation "power9-fpload-single" 6
    172   (and (eq_attr "type" "fpload")
    173        (eq_attr "update" "no")
    174        (eq_attr "size" "32")
    175        (eq_attr "cpu" "power9"))
    176   "DU_C2_3_power9,LSU_power9")
    177 
    178 (define_insn_reservation "power9-fpload-update-single" 6
    179   (and (eq_attr "type" "fpload")
    180        (eq_attr "update" "yes")
    181        (eq_attr "size" "32")
    182        (eq_attr "cpu" "power9"))
    183   "DU_C3_power9,LSU_power9+VSU_power9")
    184 
    185 (define_insn_reservation "power9-vecload" 5
    186   (and (eq_attr "type" "vecload")
    187        (eq_attr "cpu" "power9"))
    188   "DU_any_power9,LSU_pair_power9")
    189 
    190 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
    191 (define_insn_reservation "power9-store" 0
    192   (and (eq_attr "type" "store")
    193        (eq_attr "update" "no")
    194        (eq_attr "indexed" "no")
    195        (eq_attr "cpu" "power9"))
    196   "DU_slice_3_power9,LSU_power9")
    197 
    198 (define_insn_reservation "power9-store-indexed" 0
    199   (and (eq_attr "type" "store")
    200        (eq_attr "update" "no")
    201        (eq_attr "indexed" "yes")
    202        (eq_attr "cpu" "power9"))
    203   "DU_slice_3_power9,LSU_power9")
    204 
    205 ; Update forms have 2 cycle latency for updated addr reg
    206 (define_insn_reservation "power9-store-update" 2
    207   (and (eq_attr "type" "store")
    208        (eq_attr "update" "yes")
    209        (eq_attr "indexed" "no")
    210        (eq_attr "cpu" "power9"))
    211   "DU_C2_3_power9,LSU_power9+VSU_power9")
    212 
    213 ; Update forms have 2 cycle latency for updated addr reg
    214 (define_insn_reservation "power9-store-update-indexed" 2
    215   (and (eq_attr "type" "store")
    216        (eq_attr "update" "yes")
    217        (eq_attr "indexed" "yes")
    218        (eq_attr "cpu" "power9"))
    219   "DU_C2_3_power9,LSU_power9+VSU_power9")
    220 
    221 (define_insn_reservation "power9-fpstore" 0
    222   (and (eq_attr "type" "fpstore")
    223        (eq_attr "update" "no")
    224        (eq_attr "cpu" "power9"))
    225   "DU_slice_3_power9,LSU_power9")
    226 
    227 ; Update forms have 2 cycle latency for updated addr reg
    228 (define_insn_reservation "power9-fpstore-update" 2
    229   (and (eq_attr "type" "fpstore")
    230        (eq_attr "update" "yes")
    231        (eq_attr "cpu" "power9"))
    232   "DU_C2_3_power9,LSU_power9+VSU_power9")
    233 
    234 (define_insn_reservation "power9-vecstore" 0
    235   (and (eq_attr "type" "vecstore")
    236        (eq_attr "cpu" "power9"))
    237   "DU_super_power9,LSU_pair_power9")
    238 
    239 ; Store forwarding latency is 6
    240 (define_bypass 6 "power9-*store*" "power9-*load*")
    241 
    242 (define_insn_reservation "power9-larx" 4
    243   (and (eq_attr "type" "load_l")
    244        (eq_attr "cpu" "power9"))
    245   "DU_any_power9,LSU_power9")
    246 
    247 (define_insn_reservation "power9-stcx" 2
    248   (and (eq_attr "type" "store_c")
    249        (eq_attr "cpu" "power9"))
    250   "DU_C2_3_power9,LSU_power9+VSU_power9")
    251 
    252 (define_insn_reservation "power9-sync" 4
    253   (and (eq_attr "type" "sync,isync")
    254        (eq_attr "cpu" "power9"))
    255   "DU_any_power9,LSU_power9")
    256 
    257 
    258 ; VSU Execution Unit
    259 
    260 ; Fixed point ops
    261 
    262 ; Most ALU insns are simple 2 cycle, including record form
    263 (define_insn_reservation "power9-alu" 2
    264   (and (eq_attr "type" "add,exts,integer,logical,isel")
    265        (eq_attr "cpu" "power9"))
    266   "DU_any_power9,VSU_power9")
    267 ; 5 cycle CR latency
    268 (define_bypass 5 "power9-alu"
    269 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    270 
    271 ; Rotate/shift prevent use of third slot
    272 (define_insn_reservation "power9-rot" 2
    273   (and (eq_attr "type" "insert,shift")
    274        (eq_attr "dot" "no")
    275        (eq_attr "cpu" "power9"))
    276   "DU_slice_3_power9,VSU_power9")
    277 
    278 ; Record form rotate/shift are cracked
    279 (define_insn_reservation "power9-cracked-alu" 2
    280   (and (eq_attr "type" "insert,shift")
    281        (eq_attr "dot" "yes")
    282        (eq_attr "cpu" "power9"))
    283   "DU_C2_3_power9,VSU_power9")
    284 ; 7 cycle CR latency
    285 (define_bypass 7 "power9-cracked-alu"
    286 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    287 
    288 (define_insn_reservation "power9-alu2" 3
    289   (and (eq_attr "type" "cntlz,popcnt,trap")
    290        (eq_attr "cpu" "power9"))
    291   "DU_any_power9,VSU_power9")
    292 ; 6 cycle CR latency
    293 (define_bypass 6 "power9-alu2"
    294 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    295 
    296 (define_insn_reservation "power9-cmp" 2
    297   (and (eq_attr "type" "cmp")
    298        (eq_attr "cpu" "power9"))
    299   "DU_any_power9,VSU_power9")
    300 
    301 
    302 ; Treat 'two' and 'three' types as 2 or 3 way cracked
    303 (define_insn_reservation "power9-two" 4
    304   (and (eq_attr "type" "two")
    305        (eq_attr "cpu" "power9"))
    306   "DU_C2_power9,VSU_power9")
    307 
    308 (define_insn_reservation "power9-three" 6
    309   (and (eq_attr "type" "three")
    310        (eq_attr "cpu" "power9"))
    311   "DU_C3_power9,VSU_power9")
    312 
    313 (define_insn_reservation "power9-mul" 5
    314   (and (eq_attr "type" "mul")
    315        (eq_attr "dot" "no")
    316        (eq_attr "cpu" "power9"))
    317   "DU_slice_3_power9,VSU_power9")
    318 
    319 (define_insn_reservation "power9-mul-compare" 5
    320   (and (eq_attr "type" "mul")
    321        (eq_attr "dot" "yes")
    322        (eq_attr "cpu" "power9"))
    323   "DU_C2_3_power9,VSU_power9")
    324 ; 10 cycle CR latency
    325 (define_bypass 10 "power9-mul-compare"
    326 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    327 
    328 ; Fixed point divides reserve the divide units for a minimum of 8 cycles
    329 (define_insn_reservation "power9-idiv" 16
    330   (and (eq_attr "type" "div")
    331        (eq_attr "size" "32")
    332        (eq_attr "cpu" "power9"))
    333   "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
    334 
    335 (define_insn_reservation "power9-ldiv" 24
    336   (and (eq_attr "type" "div")
    337        (eq_attr "size" "64")
    338        (eq_attr "cpu" "power9"))
    339   "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
    340 
    341 (define_insn_reservation "power9-crlogical" 2
    342   (and (eq_attr "type" "cr_logical")
    343        (eq_attr "cpu" "power9"))
    344   "DU_any_power9,VSU_power9")
    345 
    346 (define_insn_reservation "power9-mfcrf" 2
    347   (and (eq_attr "type" "mfcrf")
    348        (eq_attr "cpu" "power9"))
    349   "DU_any_power9,VSU_power9")
    350 
    351 (define_insn_reservation "power9-mfcr" 6
    352   (and (eq_attr "type" "mfcr")
    353        (eq_attr "cpu" "power9"))
    354   "DU_C3_power9,VSU_power9")
    355 
    356 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
    357 ; is cracked
    358 (define_insn_reservation "power9-mtcr" 2
    359   (and (eq_attr "type" "mtcr")
    360        (eq_attr "cpu" "power9"))
    361   "DU_any_power9,VSU_power9")
    362 
    363 ; Move to LR/CTR are executed in VSU
    364 (define_insn_reservation "power9-mtjmpr" 5
    365   (and (eq_attr "type" "mtjmpr")
    366        (eq_attr "cpu" "power9"))
    367   "DU_any_power9,VSU_power9")
    368 
    369 ; Floating point/Vector ops
    370 (define_insn_reservation "power9-fpsimple" 2
    371   (and (eq_attr "type" "fpsimple")
    372        (eq_attr "cpu" "power9"))
    373   "DU_slice_3_power9,VSU_power9")
    374 
    375 (define_insn_reservation "power9-fp" 5
    376   (and (eq_attr "type" "fp,dmul")
    377        (eq_attr "cpu" "power9"))
    378   "DU_slice_3_power9,VSU_power9")
    379 
    380 (define_insn_reservation "power9-fpcompare" 3
    381   (and (eq_attr "type" "fpcompare")
    382        (eq_attr "cpu" "power9"))
    383   "DU_slice_3_power9,VSU_power9")
    384 
    385 ; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
    386 ; div/sqrt insns, but for the most part do not block pipelined ops.
    387 (define_insn_reservation "power9-sdiv" 22
    388   (and (eq_attr "type" "sdiv")
    389        (eq_attr "cpu" "power9"))
    390   "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
    391 
    392 (define_insn_reservation "power9-ddiv" 27
    393   (and (eq_attr "type" "ddiv")
    394        (eq_attr "cpu" "power9"))
    395   "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
    396 
    397 (define_insn_reservation "power9-sqrt" 26
    398   (and (eq_attr "type" "ssqrt")
    399        (eq_attr "cpu" "power9"))
    400   "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
    401 
    402 (define_insn_reservation "power9-dsqrt" 36
    403   (and (eq_attr "type" "dsqrt")
    404        (eq_attr "cpu" "power9"))
    405   "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
    406 
    407 (define_insn_reservation "power9-vec-2cyc" 2
    408   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
    409        (eq_attr "cpu" "power9"))
    410   "DU_super_power9,VSU_super_power9")
    411 
    412 (define_insn_reservation "power9-veccmp" 3
    413   (and (eq_attr "type" "veccmp")
    414        (eq_attr "cpu" "power9"))
    415   "DU_super_power9,VSU_super_power9")
    416 
    417 (define_insn_reservation "power9-vecsimple" 3
    418   (and (eq_attr "type" "vecsimple")
    419        (eq_attr "cpu" "power9"))
    420   "DU_super_power9,VSU_super_power9")
    421 
    422 (define_insn_reservation "power9-vecnormal" 7
    423   (and (eq_attr "type" "vecfloat,vecdouble")
    424        (eq_attr "size" "!128")
    425        (eq_attr "cpu" "power9"))
    426   "DU_super_power9,VSU_super_power9")
    427 
    428 ; Quad-precision FP ops, execute in DFU
    429 (define_insn_reservation "power9-qp" 12
    430   (and (eq_attr "type" "vecfloat,vecdouble")
    431        (eq_attr "size" "128")
    432        (eq_attr "cpu" "power9"))
    433   "DU_super_power9,dfu_power9")
    434 
    435 (define_insn_reservation "power9-vecperm" 3
    436   (and (eq_attr "type" "vecperm")
    437        (eq_attr "cpu" "power9"))
    438   "DU_super_power9,VSU_PRM_power9")
    439 
    440 (define_insn_reservation "power9-veccomplex" 7
    441   (and (eq_attr "type" "veccomplex")
    442        (eq_attr "cpu" "power9"))
    443   "DU_super_power9,VSU_super_power9")
    444 
    445 (define_insn_reservation "power9-vecfdiv" 24
    446   (and (eq_attr "type" "vecfdiv")
    447        (eq_attr "cpu" "power9"))
    448   "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
    449 
    450 (define_insn_reservation "power9-vecdiv" 27
    451   (and (eq_attr "type" "vecdiv")
    452        (eq_attr "size" "!128")
    453        (eq_attr "cpu" "power9"))
    454   "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
    455 
    456 ; Use 8 for DFU reservation on QP div/mul to limit DFA state size
    457 (define_insn_reservation "power9-qpdiv" 56
    458   (and (eq_attr "type" "vecdiv")
    459        (eq_attr "size" "128")
    460        (eq_attr "cpu" "power9"))
    461   "DU_super_power9,dfu_power9*8")
    462 
    463 (define_insn_reservation "power9-qpmul" 24
    464   (and (eq_attr "type" "qmul")
    465        (eq_attr "size" "128")
    466        (eq_attr "cpu" "power9"))
    467   "DU_super_power9,dfu_power9*8")
    468 
    469 (define_insn_reservation "power9-mtvsr" 2
    470   (and (eq_attr "type" "mtvsr")
    471        (eq_attr "cpu" "power9"))
    472   "DU_slice_3_power9,VSU_power9")
    473 
    474 (define_insn_reservation "power9-mfvsr" 2
    475   (and (eq_attr "type" "mfvsr")
    476        (eq_attr "cpu" "power9"))
    477   "DU_slice_3_power9,VSU_power9")
    478 
    479 
    480 ; Branch Unit
    481 ; Move from LR/CTR are executed in BRU but consume a writeback port from an
    482 ; execution slice.
    483 (define_insn_reservation "power9-mfjmpr" 6
    484   (and (eq_attr "type" "mfjmpr")
    485        (eq_attr "cpu" "power9"))
    486   "DU_branch_power9,bru_power9+VSU_power9")
    487 
    488 ; Branch is 2 cycles
    489 (define_insn_reservation "power9-branch" 2
    490   (and (eq_attr "type" "jmpreg,branch")
    491        (eq_attr "cpu" "power9"))
    492   "DU_branch_power9,bru_power9")
    493 
    494 
    495 ; Crypto Unit
    496 (define_insn_reservation "power9-crypto" 6
    497   (and (eq_attr "type" "crypto")
    498        (eq_attr "cpu" "power9"))
    499   "DU_super_power9,cryptu_power9")
    500 
    501 
    502 ; HTM Unit
    503 (define_insn_reservation "power9-htm" 4
    504   (and (eq_attr "type" "htm")
    505        (eq_attr "cpu" "power9"))
    506   "DU_C2_power9,LSU_power9")
    507 
    508 (define_insn_reservation "power9-htm-simple" 2
    509   (and (eq_attr "type" "htmsimple")
    510        (eq_attr "cpu" "power9"))
    511   "DU_any_power9,VSU_power9")
    512 
    513 
    514 ; DFP Unit
    515 (define_insn_reservation "power9-dfp" 12
    516   (and (eq_attr "type" "dfp")
    517        (eq_attr "cpu" "power9"))
    518   "DU_even_power9,dfu_power9")
    519 
    520