Home | History | Annotate | Line # | Download | only in rs6000
power9.md revision 1.3
      1 ;; Scheduling description for IBM POWER9 processor.
      2 ;; Copyright (C) 2016-2017 Free Software Foundation, Inc.
      3 ;;
      4 ;; Contributed by Pat Haugen (pthaugen (a] us.ibm.com).
      5 
      6 ;; This file is part of GCC.
      7 ;;
      8 ;; GCC is free software; you can redistribute it and/or modify it
      9 ;; under the terms of the GNU General Public License as published
     10 ;; by the Free Software Foundation; either version 3, or (at your
     11 ;; option) any later version.
     12 ;;
     13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
     14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     16 ;; License for more details.
     17 ;;
     18 ;; You should have received a copy of the GNU General Public License
     19 ;; along with GCC; see the file COPYING3.  If not see
     20 ;; <http://www.gnu.org/licenses/>.
     21 
     22 (define_automaton "power9dsp,power9lsu,power9vsu,power9misc")
     23 
     24 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
     25 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
     26 ; Two vector permute units, part of vsu
     27 (define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
     28 ; Two fixed point divide units, not pipelined
     29 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
     30 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
     31 
     32 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
     33 		  x2_power9,x3_power9,xb0_power9,xb1_power9,
     34 		  br0_power9,br1_power9" "power9dsp")
     35 
     36 
     37 ; Dispatch port reservations
     38 ;
     39 ; Power9 can dispatch a maximum of 6 iops per cycle with the following
     40 ; general restrictions (other restrictions also apply):
     41 ;   1) At most 2 iops per execution slice
     42 ;   2) At most 2 iops to the branch unit
     43 ; Note that insn position in a dispatch group of 6 insns does not infer which
     44 ; execution slice the insn is routed to.  The units are used to infer the
     45 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
     46 ; with 2 insns with 'superslice' requirement).
     47 
     48 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
     49 ; are listed as separate units to allow those insns that preclude its use to
     50 ; still be scheduled two to a superslice while reserving the 3rd slot.  The
     51 ; same applies for xb0/xb1.
     52 (define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
     53 (define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
     54 
     55 ; Any execution slice dispatch
     56 (define_reservation "DU_any_power9"
     57 		    "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|
     58 		     DU_xb_power9")
     59 
     60 ; Even slice, actually takes even/odd slots
     61 (define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
     62 
     63 ; Slice plus 3rd slot
     64 (define_reservation "DU_slice_3_power9"
     65 		    "x0_power9+xa0_power9|x1_power9+xa1_power9|
     66 		     x2_power9+xb0_power9|x3_power9+xb1_power9")
     67 
     68 ; Superslice
     69 (define_reservation "DU_super_power9"
     70 		    "x0_power9+x1_power9|x2_power9+x3_power9")
     71 
     72 ; 2-way cracked
     73 (define_reservation "DU_C2_power9" "x0_power9+x1_power9|
     74 				    x1_power9+DU_xa_power9|
     75 				    x1_power9+x2_power9|
     76 				    DU_xa_power9+x2_power9|
     77 				    x2_power9+x3_power9|
     78 				    x3_power9+DU_xb_power9")
     79 
     80 ; 2-way cracked plus 3rd slot
     81 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
     82 				      x1_power9+x2_power9+xa0_power9|
     83 				      x1_power9+x2_power9+xb0_power9|
     84 				      x2_power9+x3_power9+xb0_power9")
     85 
     86 ; 3-way cracked (consumes whole decode/dispatch cycle)
     87 (define_reservation "DU_C3_power9"
     88 		    "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+
     89 		     x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
     90 
     91 ; Branch ports
     92 (define_reservation "DU_branch_power9" "br0_power9|br1_power9")
     93 
     94 
     95 ; Execution unit reservations
     96 (define_reservation "LSU_power9"
     97 		    "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
     98 
     99 (define_reservation "LSU_pair_power9"
    100 		    "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|
    101 		     lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9")
    102 
    103 (define_reservation "VSU_power9"
    104 		    "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
    105 
    106 (define_reservation "VSU_super_power9"
    107 		    "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
    108 
    109 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
    110 
    111 
    112 ; LS Unit
    113 (define_insn_reservation "power9-load" 4
    114   (and (eq_attr "type" "load")
    115        (eq_attr "sign_extend" "no")
    116        (eq_attr "update" "no")
    117        (eq_attr "cpu" "power9"))
    118   "DU_any_power9,LSU_power9")
    119 
    120 (define_insn_reservation "power9-load-update" 4
    121   (and (eq_attr "type" "load")
    122        (eq_attr "sign_extend" "no")
    123        (eq_attr "update" "yes")
    124        (eq_attr "cpu" "power9"))
    125   "DU_C2_power9,LSU_power9+VSU_power9")
    126 
    127 (define_insn_reservation "power9-load-ext" 6
    128   (and (eq_attr "type" "load")
    129        (eq_attr "sign_extend" "yes")
    130        (eq_attr "update" "no")
    131        (eq_attr "cpu" "power9"))
    132   "DU_C2_power9,LSU_power9")
    133 
    134 (define_insn_reservation "power9-load-ext-update" 6
    135   (and (eq_attr "type" "load")
    136        (eq_attr "sign_extend" "yes")
    137        (eq_attr "update" "yes")
    138        (eq_attr "cpu" "power9"))
    139   "DU_C3_power9,LSU_power9+VSU_power9")
    140 
    141 (define_insn_reservation "power9-fpload-double" 4
    142   (and (eq_attr "type" "fpload")
    143        (eq_attr "update" "no")
    144        (eq_attr "size" "64")
    145        (eq_attr "cpu" "power9"))
    146   "DU_slice_3_power9,LSU_power9")
    147 
    148 (define_insn_reservation "power9-fpload-update-double" 4
    149   (and (eq_attr "type" "fpload")
    150        (eq_attr "update" "yes")
    151        (eq_attr "size" "64")
    152        (eq_attr "cpu" "power9"))
    153   "DU_C2_3_power9,LSU_power9+VSU_power9")
    154 
    155 ; SFmode loads are cracked and have additional 2 cycles over DFmode
    156 (define_insn_reservation "power9-fpload-single" 6
    157   (and (eq_attr "type" "fpload")
    158        (eq_attr "update" "no")
    159        (eq_attr "size" "32")
    160        (eq_attr "cpu" "power9"))
    161   "DU_C2_3_power9,LSU_power9")
    162 
    163 (define_insn_reservation "power9-fpload-update-single" 6
    164   (and (eq_attr "type" "fpload")
    165        (eq_attr "update" "yes")
    166        (eq_attr "size" "32")
    167        (eq_attr "cpu" "power9"))
    168   "DU_C3_power9,LSU_power9+VSU_power9")
    169 
    170 (define_insn_reservation "power9-vecload" 5
    171   (and (eq_attr "type" "vecload")
    172        (eq_attr "cpu" "power9"))
    173   "DU_any_power9,LSU_pair_power9")
    174 
    175 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
    176 (define_insn_reservation "power9-store" 0
    177   (and (eq_attr "type" "store")
    178        (eq_attr "update" "no")
    179        (eq_attr "indexed" "no")
    180        (eq_attr "cpu" "power9"))
    181   "DU_slice_3_power9,LSU_power9")
    182 
    183 (define_insn_reservation "power9-store-indexed" 0
    184   (and (eq_attr "type" "store")
    185        (eq_attr "update" "no")
    186        (eq_attr "indexed" "yes")
    187        (eq_attr "cpu" "power9"))
    188   "DU_slice_3_power9,LSU_power9")
    189 
    190 ; Update forms have 2 cycle latency for updated addr reg
    191 (define_insn_reservation "power9-store-update" 2
    192   (and (eq_attr "type" "store")
    193        (eq_attr "update" "yes")
    194        (eq_attr "indexed" "no")
    195        (eq_attr "cpu" "power9"))
    196   "DU_C2_3_power9,LSU_power9+VSU_power9")
    197 
    198 ; Update forms have 2 cycle latency for updated addr reg
    199 (define_insn_reservation "power9-store-update-indexed" 2
    200   (and (eq_attr "type" "store")
    201        (eq_attr "update" "yes")
    202        (eq_attr "indexed" "yes")
    203        (eq_attr "cpu" "power9"))
    204   "DU_C2_3_power9,LSU_power9+VSU_power9")
    205 
    206 (define_insn_reservation "power9-fpstore" 0
    207   (and (eq_attr "type" "fpstore")
    208        (eq_attr "update" "no")
    209        (eq_attr "cpu" "power9"))
    210   "DU_slice_3_power9,LSU_power9")
    211 
    212 ; Update forms have 2 cycle latency for updated addr reg
    213 (define_insn_reservation "power9-fpstore-update" 2
    214   (and (eq_attr "type" "fpstore")
    215        (eq_attr "update" "yes")
    216        (eq_attr "cpu" "power9"))
    217   "DU_C2_3_power9,LSU_power9+VSU_power9")
    218 
    219 (define_insn_reservation "power9-vecstore" 0
    220   (and (eq_attr "type" "vecstore")
    221        (eq_attr "cpu" "power9"))
    222   "DU_super_power9,LSU_pair_power9")
    223 
    224 (define_insn_reservation "power9-larx" 4
    225   (and (eq_attr "type" "load_l")
    226        (eq_attr "cpu" "power9"))
    227   "DU_any_power9,LSU_power9")
    228 
    229 (define_insn_reservation "power9-stcx" 2
    230   (and (eq_attr "type" "store_c")
    231        (eq_attr "cpu" "power9"))
    232   "DU_C2_3_power9,LSU_power9+VSU_power9")
    233 
    234 (define_insn_reservation "power9-sync" 4
    235   (and (eq_attr "type" "sync,isync")
    236        (eq_attr "cpu" "power9"))
    237   "DU_any_power9,LSU_power9")
    238 
    239 
    240 ; VSU Execution Unit
    241 
    242 ; Fixed point ops
    243 
    244 ; Most ALU insns are simple 2 cycle, including record form
    245 (define_insn_reservation "power9-alu" 2
    246   (and (ior (eq_attr "type" "add,exts,integer,logical,isel")
    247 	    (and (eq_attr "type" "insert,shift")
    248 		 (eq_attr "dot" "no")))
    249        (eq_attr "cpu" "power9"))
    250   "DU_any_power9,VSU_power9")
    251 ; 5 cycle CR latency
    252 (define_bypass 5 "power9-alu"
    253 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    254 
    255 ; Record form rotate/shift are cracked
    256 (define_insn_reservation "power9-cracked-alu" 2
    257   (and (eq_attr "type" "insert,shift")
    258        (eq_attr "dot" "yes")
    259        (eq_attr "cpu" "power9"))
    260   "DU_C2_power9,VSU_power9")
    261 ; 7 cycle CR latency
    262 (define_bypass 7 "power9-cracked-alu"
    263 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    264 
    265 (define_insn_reservation "power9-alu2" 3
    266   (and (eq_attr "type" "cntlz,popcnt,trap")
    267        (eq_attr "cpu" "power9"))
    268   "DU_any_power9,VSU_power9")
    269 ; 6 cycle CR latency
    270 (define_bypass 6 "power9-alu2"
    271 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    272 
    273 (define_insn_reservation "power9-cmp" 2
    274   (and (eq_attr "type" "cmp")
    275        (eq_attr "cpu" "power9"))
    276   "DU_any_power9,VSU_power9")
    277 
    278 
    279 ; Treat 'two' and 'three' types as 2 or 3 way cracked
    280 (define_insn_reservation "power9-two" 4
    281   (and (eq_attr "type" "two")
    282        (eq_attr "cpu" "power9"))
    283   "DU_C2_power9,VSU_power9")
    284 
    285 (define_insn_reservation "power9-three" 6
    286   (and (eq_attr "type" "three")
    287        (eq_attr "cpu" "power9"))
    288   "DU_C3_power9,VSU_power9")
    289 
    290 (define_insn_reservation "power9-mul" 5
    291   (and (eq_attr "type" "mul")
    292        (eq_attr "dot" "no")
    293        (eq_attr "cpu" "power9"))
    294   "DU_any_power9,VSU_power9")
    295 
    296 (define_insn_reservation "power9-mul-compare" 5
    297   (and (eq_attr "type" "mul")
    298        (eq_attr "dot" "yes")
    299        (eq_attr "cpu" "power9"))
    300   "DU_C2_power9,VSU_power9")
    301 ; 10 cycle CR latency
    302 (define_bypass 10 "power9-mul-compare"
    303 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
    304 
    305 ; Fixed point divides reserve the divide units for a minimum of 8 cycles
    306 (define_insn_reservation "power9-idiv" 16
    307   (and (eq_attr "type" "div")
    308        (eq_attr "size" "32")
    309        (eq_attr "cpu" "power9"))
    310   "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
    311 
    312 (define_insn_reservation "power9-ldiv" 24
    313   (and (eq_attr "type" "div")
    314        (eq_attr "size" "64")
    315        (eq_attr "cpu" "power9"))
    316   "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
    317 
    318 (define_insn_reservation "power9-crlogical" 2
    319   (and (eq_attr "type" "cr_logical,delayed_cr")
    320        (eq_attr "cpu" "power9"))
    321   "DU_any_power9,VSU_power9")
    322 
    323 (define_insn_reservation "power9-mfcrf" 2
    324   (and (eq_attr "type" "mfcrf")
    325        (eq_attr "cpu" "power9"))
    326   "DU_any_power9,VSU_power9")
    327 
    328 (define_insn_reservation "power9-mfcr" 6
    329   (and (eq_attr "type" "mfcr")
    330        (eq_attr "cpu" "power9"))
    331   "DU_C3_power9,VSU_power9")
    332 
    333 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
    334 ; is cracked
    335 (define_insn_reservation "power9-mtcr" 2
    336   (and (eq_attr "type" "mtcr")
    337        (eq_attr "cpu" "power9"))
    338   "DU_any_power9,VSU_power9")
    339 
    340 ; Move to LR/CTR are executed in VSU
    341 (define_insn_reservation "power9-mtjmpr" 5
    342   (and (eq_attr "type" "mtjmpr")
    343        (eq_attr "cpu" "power9"))
    344   "DU_any_power9,VSU_power9")
    345 
    346 ; Floating point/Vector ops
    347 (define_insn_reservation "power9-fpsimple" 2
    348   (and (eq_attr "type" "fpsimple")
    349        (eq_attr "cpu" "power9"))
    350   "DU_slice_3_power9,VSU_power9")
    351 
    352 (define_insn_reservation "power9-fp" 7
    353   (and (eq_attr "type" "fp,dmul")
    354        (eq_attr "cpu" "power9"))
    355   "DU_slice_3_power9,VSU_power9")
    356 
    357 (define_insn_reservation "power9-fpcompare" 3
    358   (and (eq_attr "type" "fpcompare")
    359        (eq_attr "cpu" "power9"))
    360   "DU_slice_3_power9,VSU_power9")
    361 
    362 ; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
    363 ; divide insns, but for the most part do not block pipelined ops.
    364 (define_insn_reservation "power9-sdiv" 22
    365   (and (eq_attr "type" "sdiv")
    366        (eq_attr "cpu" "power9"))
    367   "DU_slice_3_power9,VSU_power9")
    368 
    369 (define_insn_reservation "power9-ddiv" 33
    370   (and (eq_attr "type" "ddiv")
    371        (eq_attr "cpu" "power9"))
    372   "DU_slice_3_power9,VSU_power9")
    373 
    374 (define_insn_reservation "power9-sqrt" 26
    375   (and (eq_attr "type" "ssqrt")
    376        (eq_attr "cpu" "power9"))
    377   "DU_slice_3_power9,VSU_power9")
    378 
    379 (define_insn_reservation "power9-dsqrt" 36
    380   (and (eq_attr "type" "dsqrt")
    381        (eq_attr "cpu" "power9"))
    382   "DU_slice_3_power9,VSU_power9")
    383 
    384 (define_insn_reservation "power9-vec-2cyc" 2
    385   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
    386        (eq_attr "cpu" "power9"))
    387   "DU_super_power9,VSU_super_power9")
    388 
    389 (define_insn_reservation "power9-veccmp" 3
    390   (and (eq_attr "type" "veccmp")
    391        (eq_attr "cpu" "power9"))
    392   "DU_super_power9,VSU_super_power9")
    393 
    394 (define_insn_reservation "power9-vecsimple" 3
    395   (and (eq_attr "type" "vecsimple")
    396        (eq_attr "cpu" "power9"))
    397   "DU_super_power9,VSU_super_power9")
    398 
    399 (define_insn_reservation "power9-vecnormal" 7
    400   (and (eq_attr "type" "vecfloat,vecdouble")
    401        (eq_attr "size" "!128")
    402        (eq_attr "cpu" "power9"))
    403   "DU_super_power9,VSU_super_power9")
    404 
    405 ; Quad-precision FP ops, execute in DFU
    406 (define_insn_reservation "power9-qp" 12
    407   (and (eq_attr "type" "vecfloat,vecdouble")
    408        (eq_attr "size" "128")
    409        (eq_attr "cpu" "power9"))
    410   "DU_super_power9,dfu_power9")
    411 
    412 (define_insn_reservation "power9-vecperm" 3
    413   (and (eq_attr "type" "vecperm")
    414        (eq_attr "cpu" "power9"))
    415   "DU_super_power9,VSU_PRM_power9")
    416 
    417 (define_insn_reservation "power9-veccomplex" 7
    418   (and (eq_attr "type" "veccomplex")
    419        (eq_attr "cpu" "power9"))
    420   "DU_super_power9,VSU_super_power9")
    421 
    422 (define_insn_reservation "power9-vecfdiv" 28
    423   (and (eq_attr "type" "vecfdiv")
    424        (eq_attr "cpu" "power9"))
    425   "DU_super_power9,VSU_super_power9")
    426 
    427 (define_insn_reservation "power9-vecdiv" 32
    428   (and (eq_attr "type" "vecdiv")
    429        (eq_attr "size" "!128")
    430        (eq_attr "cpu" "power9"))
    431   "DU_super_power9,VSU_super_power9")
    432 
    433 (define_insn_reservation "power9-qpdiv" 56
    434   (and (eq_attr "type" "vecdiv")
    435        (eq_attr "size" "128")
    436        (eq_attr "cpu" "power9"))
    437   "DU_super_power9,dfu_power9")
    438 
    439 (define_insn_reservation "power9-mffgpr" 2
    440   (and (eq_attr "type" "mffgpr")
    441        (eq_attr "cpu" "power9"))
    442   "DU_slice_3_power9,VSU_power9")
    443 
    444 (define_insn_reservation "power9-mftgpr" 2
    445   (and (eq_attr "type" "mftgpr")
    446        (eq_attr "cpu" "power9"))
    447   "DU_slice_3_power9,VSU_power9")
    448 
    449 
    450 ; Branch Unit
    451 ; Move from LR/CTR are executed in BRU but consume a writeback port from an
    452 ; execution slice.
    453 (define_insn_reservation "power9-mfjmpr" 6
    454   (and (eq_attr "type" "mfjmpr")
    455        (eq_attr "cpu" "power9"))
    456   "DU_branch_power9,bru_power9+VSU_power9")
    457 
    458 ; Branch is 2 cycles
    459 (define_insn_reservation "power9-branch" 2
    460   (and (eq_attr "type" "jmpreg,branch")
    461        (eq_attr "cpu" "power9"))
    462   "DU_branch_power9,bru_power9")
    463 
    464 
    465 ; Crypto Unit
    466 (define_insn_reservation "power9-crypto" 6
    467   (and (eq_attr "type" "crypto")
    468        (eq_attr "cpu" "power9"))
    469   "DU_super_power9,cryptu_power9")
    470 
    471 
    472 ; HTM Unit
    473 (define_insn_reservation "power9-htm" 4
    474   (and (eq_attr "type" "htm")
    475        (eq_attr "cpu" "power9"))
    476   "DU_C2_power9,LSU_power9")
    477 
    478 (define_insn_reservation "power9-htm-simple" 2
    479   (and (eq_attr "type" "htmsimple")
    480        (eq_attr "cpu" "power9"))
    481   "DU_any_power9,VSU_power9")
    482 
    483 
    484 ; DFP Unit
    485 (define_insn_reservation "power9-dfp" 12
    486   (and (eq_attr "type" "dfp")
    487        (eq_attr "cpu" "power9"))
    488   "DU_even_power9,dfu_power9")
    489 
    490