Home | History | Annotate | Line # | Download | only in rs6000
power10.md revision 1.1.1.1
      1 ;; Scheduling description for IBM POWER10 processor.
      2 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
      3 ;;
      4 ;; This is a clone of power9.md.  It is intended to be a placeholder until a
      5 ;; real scheduler model can be contributed.
      6 ;; The original power9.md was contributed by Pat Haugen (pthaugen (a] us.ibm.com).
      7 
      8 ;; This file is part of GCC.
      9 ;;
     10 ;; GCC is free software; you can redistribute it and/or modify it
     11 ;; under the terms of the GNU General Public License as published
     12 ;; by the Free Software Foundation; either version 3, or (at your
     13 ;; option) any later version.
     14 ;;
     15 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
     16 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     17 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     18 ;; License for more details.
     19 ;;
     20 ;; You should have received a copy of the GNU General Public License
     21 ;; along with GCC; see the file COPYING3.  If not see
     22 ;; <http://www.gnu.org/licenses/>.
     23 
     24 ;; This file was cloned from power9.md, it does not (yet) describe the actual
     25 ;; POWER10 processor.
     26 
     27 (define_automaton "power10dsp,power10lsu,power10vsu,power10fpdiv,power10misc")
     28 
     29 (define_cpu_unit "lsu0_power10,lsu1_power10,lsu2_power10,lsu3_power10" "power10lsu")
     30 (define_cpu_unit "vsu0_power10,vsu1_power10,vsu2_power10,vsu3_power10" "power10vsu")
     31 ; Two vector permute units, part of vsu
     32 (define_cpu_unit "prm0_power10,prm1_power10" "power10vsu")
     33 ; Two fixed point divide units, not pipelined
     34 (define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10misc")
     35 (define_cpu_unit "bru_power10,cryptu_power10,dfu_power10" "power10misc")
     36 ; Create a false unit for use by non-pipelined FP div/sqrt
     37 (define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,fp_div3_power10"
     38 		 "power10fpdiv")
     39 
     40 
     41 (define_cpu_unit "x0_power10,x1_power10,xa0_power10,xa1_power10,
     42 		  x2_power10,x3_power10,xb0_power10,xb1_power10,
     43 		  br0_power10,br1_power10" "power10dsp")
     44 
     45 
     46 ; Dispatch port reservations
     47 ;
     48 ; The processor can dispatch a maximum of 6 iops per cycle with the following
     49 ; general restrictions (other restrictions also apply):
     50 ;   1) At most 2 iops per execution slice
     51 ;   2) At most 2 iops to the branch unit
     52 ; Note that insn position in a dispatch group of 6 insns does not infer which
     53 ; execution slice the insn is routed to.  The units are used to infer the
     54 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
     55 ; with 2 insns with 'superslice' requirement).
     56 
     57 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
     58 ; are listed as separate units to allow those insns that preclude its use to
     59 ; still be scheduled two to a superslice while reserving the 3rd slot.  The
     60 ; same applies for xb0/xb1.
     61 (define_reservation "DU_xa_power10" "xa0_power10+xa1_power10")
     62 (define_reservation "DU_xb_power10" "xb0_power10+xb1_power10")
     63 
     64 ; Any execution slice dispatch
     65 (define_reservation "DU_any_power10"
     66 		    "x0_power10|x1_power10|DU_xa_power10|x2_power10|x3_power10|
     67 		     DU_xb_power10")
     68 
     69 ; Even slice, actually takes even/odd slots
     70 (define_reservation "DU_even_power10" "x0_power10+x1_power10|x2_power10+x3_power10")
     71 
     72 ; Slice plus 3rd slot
     73 (define_reservation "DU_slice_3_power10"
     74 		    "x0_power10+xa0_power10|x1_power10+xa1_power10|
     75 		     x2_power10+xb0_power10|x3_power10+xb1_power10")
     76 
     77 ; Superslice
     78 (define_reservation "DU_super_power10"
     79 		    "x0_power10+x1_power10|x2_power10+x3_power10")
     80 
     81 ; 2-way cracked
     82 (define_reservation "DU_C2_power10" "x0_power10+x1_power10|
     83 				    x1_power10+DU_xa_power10|
     84 				    x1_power10+x2_power10|
     85 				    DU_xa_power10+x2_power10|
     86 				    x2_power10+x3_power10|
     87 				    x3_power10+DU_xb_power10")
     88 
     89 ; 2-way cracked plus 3rd slot
     90 (define_reservation "DU_C2_3_power10" "x0_power10+x1_power10+xa0_power10|
     91 				      x1_power10+x2_power10+xa1_power10|
     92 				      x2_power10+x3_power10+xb0_power10")
     93 
     94 ; 3-way cracked (consumes whole decode/dispatch cycle)
     95 (define_reservation "DU_C3_power10"
     96 		    "x0_power10+x1_power10+xa0_power10+xa1_power10+x2_power10+
     97 		     x3_power10+xb0_power10+xb1_power10+br0_power10+br1_power10")
     98 
     99 ; Branch ports
    100 (define_reservation "DU_branch_power10" "br0_power10|br1_power10")
    101 
    102 
    103 ; Execution unit reservations
    104 (define_reservation "LSU_power10"
    105 		    "lsu0_power10|lsu1_power10|lsu2_power10|lsu3_power10")
    106 
    107 (define_reservation "LSU_pair_power10"
    108 		    "lsu0_power10+lsu1_power10|lsu1_power10+lsu2_power10|
    109 		     lsu2_power10+lsu3_power10|lsu3_power10+lsu0_power10")
    110 
    111 (define_reservation "VSU_power10"
    112 		    "vsu0_power10|vsu1_power10|vsu2_power10|vsu3_power10")
    113 
    114 (define_reservation "VSU_super_power10"
    115 		    "vsu0_power10+vsu1_power10|vsu2_power10+vsu3_power10")
    116 
    117 (define_reservation "VSU_PRM_power10" "prm0_power10|prm1_power10")
    118 
    119 ; Define the reservation to be used by FP div/sqrt which allows other insns
    120 ; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
    121 ; Note that the number of cycles blocked varies depending on insn, but we
    122 ; just use the same number for all in order to keep the number of DFA states
    123 ; reasonable.
    124 (define_reservation "FP_DIV_power10"
    125 		    "fp_div0_power10*8|fp_div1_power10*8|fp_div2_power10*8|
    126 		     fp_div3_power10*8")
    127 (define_reservation "VEC_DIV_power10"
    128 		    "fp_div0_power10*8+fp_div1_power10*8|
    129 		     fp_div2_power10*8+fp_div3_power10*8")
    130 
    131 
    132 ; LS Unit
    133 (define_insn_reservation "power10-load" 4
    134   (and (eq_attr "type" "load")
    135        (eq_attr "sign_extend" "no")
    136        (eq_attr "update" "no")
    137        (eq_attr "cpu" "power10"))
    138   "DU_any_power10,LSU_power10")
    139 
    140 (define_insn_reservation "power10-load-update" 4
    141   (and (eq_attr "type" "load")
    142        (eq_attr "sign_extend" "no")
    143        (eq_attr "update" "yes")
    144        (eq_attr "cpu" "power10"))
    145   "DU_C2_power10,LSU_power10+VSU_power10")
    146 
    147 (define_insn_reservation "power10-load-ext" 6
    148   (and (eq_attr "type" "load")
    149        (eq_attr "sign_extend" "yes")
    150        (eq_attr "update" "no")
    151        (eq_attr "cpu" "power10"))
    152   "DU_C2_power10,LSU_power10")
    153 
    154 (define_insn_reservation "power10-load-ext-update" 6
    155   (and (eq_attr "type" "load")
    156        (eq_attr "sign_extend" "yes")
    157        (eq_attr "update" "yes")
    158        (eq_attr "cpu" "power10"))
    159   "DU_C3_power10,LSU_power10+VSU_power10")
    160 
    161 (define_insn_reservation "power10-fpload-double" 4
    162   (and (eq_attr "type" "fpload")
    163        (eq_attr "update" "no")
    164        (eq_attr "size" "64")
    165        (eq_attr "cpu" "power10"))
    166   "DU_slice_3_power10,LSU_power10")
    167 
    168 (define_insn_reservation "power10-fpload-update-double" 4
    169   (and (eq_attr "type" "fpload")
    170        (eq_attr "update" "yes")
    171        (eq_attr "size" "64")
    172        (eq_attr "cpu" "power10"))
    173   "DU_C2_3_power10,LSU_power10+VSU_power10")
    174 
    175 ; SFmode loads are cracked and have additional 2 cycles over DFmode
    176 (define_insn_reservation "power10-fpload-single" 6
    177   (and (eq_attr "type" "fpload")
    178        (eq_attr "update" "no")
    179        (eq_attr "size" "32")
    180        (eq_attr "cpu" "power10"))
    181   "DU_C2_3_power10,LSU_power10")
    182 
    183 (define_insn_reservation "power10-fpload-update-single" 6
    184   (and (eq_attr "type" "fpload")
    185        (eq_attr "update" "yes")
    186        (eq_attr "size" "32")
    187        (eq_attr "cpu" "power10"))
    188   "DU_C3_power10,LSU_power10+VSU_power10")
    189 
    190 (define_insn_reservation "power10-vecload" 5
    191   (and (eq_attr "type" "vecload")
    192        (eq_attr "cpu" "power10"))
    193   "DU_any_power10,LSU_pair_power10")
    194 
    195 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
    196 (define_insn_reservation "power10-store" 0
    197   (and (eq_attr "type" "store")
    198        (eq_attr "update" "no")
    199        (eq_attr "indexed" "no")
    200        (eq_attr "cpu" "power10"))
    201   "DU_slice_3_power10,LSU_power10")
    202 
    203 (define_insn_reservation "power10-store-indexed" 0
    204   (and (eq_attr "type" "store")
    205        (eq_attr "update" "no")
    206        (eq_attr "indexed" "yes")
    207        (eq_attr "cpu" "power10"))
    208   "DU_slice_3_power10,LSU_power10")
    209 
    210 ; Update forms have 2 cycle latency for updated addr reg
    211 (define_insn_reservation "power10-store-update" 2
    212   (and (eq_attr "type" "store")
    213        (eq_attr "update" "yes")
    214        (eq_attr "indexed" "no")
    215        (eq_attr "cpu" "power10"))
    216   "DU_C2_3_power10,LSU_power10+VSU_power10")
    217 
    218 ; Update forms have 2 cycle latency for updated addr reg
    219 (define_insn_reservation "power10-store-update-indexed" 2
    220   (and (eq_attr "type" "store")
    221        (eq_attr "update" "yes")
    222        (eq_attr "indexed" "yes")
    223        (eq_attr "cpu" "power10"))
    224   "DU_C2_3_power10,LSU_power10+VSU_power10")
    225 
    226 (define_insn_reservation "power10-fpstore" 0
    227   (and (eq_attr "type" "fpstore")
    228        (eq_attr "update" "no")
    229        (eq_attr "cpu" "power10"))
    230   "DU_slice_3_power10,LSU_power10")
    231 
    232 ; Update forms have 2 cycle latency for updated addr reg
    233 (define_insn_reservation "power10-fpstore-update" 2
    234   (and (eq_attr "type" "fpstore")
    235        (eq_attr "update" "yes")
    236        (eq_attr "cpu" "power10"))
    237   "DU_C2_3_power10,LSU_power10+VSU_power10")
    238 
    239 (define_insn_reservation "power10-vecstore" 0
    240   (and (eq_attr "type" "vecstore")
    241        (eq_attr "cpu" "power10"))
    242   "DU_super_power10,LSU_pair_power10")
    243 
    244 (define_insn_reservation "power10-larx" 4
    245   (and (eq_attr "type" "load_l")
    246        (eq_attr "cpu" "power10"))
    247   "DU_any_power10,LSU_power10")
    248 
    249 (define_insn_reservation "power10-stcx" 2
    250   (and (eq_attr "type" "store_c")
    251        (eq_attr "cpu" "power10"))
    252   "DU_C2_3_power10,LSU_power10+VSU_power10")
    253 
    254 (define_insn_reservation "power10-sync" 4
    255   (and (eq_attr "type" "sync,isync")
    256        (eq_attr "cpu" "power10"))
    257   "DU_any_power10,LSU_power10")
    258 
    259 
    260 ; VSU Execution Unit
    261 
    262 ; Fixed point ops
    263 
    264 ; Most ALU insns are simple 2 cycle, including record form
    265 (define_insn_reservation "power10-alu" 2
    266   (and (eq_attr "type" "add,exts,integer,logical,isel")
    267        (eq_attr "cpu" "power10"))
    268   "DU_any_power10,VSU_power10")
    269 ; 5 cycle CR latency
    270 (define_bypass 5 "power10-alu"
    271 		 "power10-crlogical,power10-mfcr,power10-mfcrf")
    272 
    273 ; Rotate/shift prevent use of third slot
    274 (define_insn_reservation "power10-rot" 2
    275   (and (eq_attr "type" "insert,shift")
    276        (eq_attr "dot" "no")
    277        (eq_attr "cpu" "power10"))
    278   "DU_slice_3_power10,VSU_power10")
    279 
    280 ; Record form rotate/shift are cracked
    281 (define_insn_reservation "power10-cracked-alu" 2
    282   (and (eq_attr "type" "insert,shift")
    283        (eq_attr "dot" "yes")
    284        (eq_attr "cpu" "power10"))
    285   "DU_C2_3_power10,VSU_power10")
    286 ; 7 cycle CR latency
    287 (define_bypass 7 "power10-cracked-alu"
    288 		 "power10-crlogical,power10-mfcr,power10-mfcrf")
    289 
    290 (define_insn_reservation "power10-alu2" 3
    291   (and (eq_attr "type" "cntlz,popcnt,trap")
    292        (eq_attr "cpu" "power10"))
    293   "DU_any_power10,VSU_power10")
    294 ; 6 cycle CR latency
    295 (define_bypass 6 "power10-alu2"
    296 		 "power10-crlogical,power10-mfcr,power10-mfcrf")
    297 
    298 (define_insn_reservation "power10-cmp" 2
    299   (and (eq_attr "type" "cmp")
    300        (eq_attr "cpu" "power10"))
    301   "DU_any_power10,VSU_power10")
    302 
    303 
    304 ; Treat 'two' and 'three' types as 2 or 3 way cracked
    305 (define_insn_reservation "power10-two" 4
    306   (and (eq_attr "type" "two")
    307        (eq_attr "cpu" "power10"))
    308   "DU_C2_power10,VSU_power10")
    309 
    310 (define_insn_reservation "power10-three" 6
    311   (and (eq_attr "type" "three")
    312        (eq_attr "cpu" "power10"))
    313   "DU_C3_power10,VSU_power10")
    314 
    315 (define_insn_reservation "power10-mul" 5
    316   (and (eq_attr "type" "mul")
    317        (eq_attr "dot" "no")
    318        (eq_attr "cpu" "power10"))
    319   "DU_slice_3_power10,VSU_power10")
    320 
    321 (define_insn_reservation "power10-mul-compare" 5
    322   (and (eq_attr "type" "mul")
    323        (eq_attr "dot" "yes")
    324        (eq_attr "cpu" "power10"))
    325   "DU_C2_3_power10,VSU_power10")
    326 ; 10 cycle CR latency
    327 (define_bypass 10 "power10-mul-compare"
    328 		 "power10-crlogical,power10-mfcr,power10-mfcrf")
    329 
    330 ; Fixed point divides reserve the divide units for a minimum of 8 cycles
    331 (define_insn_reservation "power10-idiv" 16
    332   (and (eq_attr "type" "div")
    333        (eq_attr "size" "32")
    334        (eq_attr "cpu" "power10"))
    335   "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
    336 
    337 (define_insn_reservation "power10-ldiv" 24
    338   (and (eq_attr "type" "div")
    339        (eq_attr "size" "64")
    340        (eq_attr "cpu" "power10"))
    341   "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
    342 
    343 (define_insn_reservation "power10-crlogical" 2
    344   (and (eq_attr "type" "cr_logical")
    345        (eq_attr "cpu" "power10"))
    346   "DU_any_power10,VSU_power10")
    347 
    348 (define_insn_reservation "power10-mfcrf" 2
    349   (and (eq_attr "type" "mfcrf")
    350        (eq_attr "cpu" "power10"))
    351   "DU_any_power10,VSU_power10")
    352 
    353 (define_insn_reservation "power10-mfcr" 6
    354   (and (eq_attr "type" "mfcr")
    355        (eq_attr "cpu" "power10"))
    356   "DU_C3_power10,VSU_power10")
    357 
    358 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
    359 ; is cracked
    360 (define_insn_reservation "power10-mtcr" 2
    361   (and (eq_attr "type" "mtcr")
    362        (eq_attr "cpu" "power10"))
    363   "DU_any_power10,VSU_power10")
    364 
    365 ; Move to LR/CTR are executed in VSU
    366 (define_insn_reservation "power10-mtjmpr" 5
    367   (and (eq_attr "type" "mtjmpr")
    368        (eq_attr "cpu" "power10"))
    369   "DU_any_power10,VSU_power10")
    370 
    371 ; Floating point/Vector ops
    372 (define_insn_reservation "power10-fpsimple" 2
    373   (and (eq_attr "type" "fpsimple")
    374        (eq_attr "cpu" "power10"))
    375   "DU_slice_3_power10,VSU_power10")
    376 
    377 (define_insn_reservation "power10-fp" 5
    378   (and (eq_attr "type" "fp,dmul")
    379        (eq_attr "cpu" "power10"))
    380   "DU_slice_3_power10,VSU_power10")
    381 
    382 (define_insn_reservation "power10-fpcompare" 3
    383   (and (eq_attr "type" "fpcompare")
    384        (eq_attr "cpu" "power10"))
    385   "DU_slice_3_power10,VSU_power10")
    386 
    387 ; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
    388 ; div/sqrt insns, but for the most part do not block pipelined ops.
    389 (define_insn_reservation "power10-sdiv" 22
    390   (and (eq_attr "type" "sdiv")
    391        (eq_attr "cpu" "power10"))
    392   "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
    393 
    394 (define_insn_reservation "power10-ddiv" 27
    395   (and (eq_attr "type" "ddiv")
    396        (eq_attr "cpu" "power10"))
    397   "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
    398 
    399 (define_insn_reservation "power10-sqrt" 26
    400   (and (eq_attr "type" "ssqrt")
    401        (eq_attr "cpu" "power10"))
    402   "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
    403 
    404 (define_insn_reservation "power10-dsqrt" 36
    405   (and (eq_attr "type" "dsqrt")
    406        (eq_attr "cpu" "power10"))
    407   "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
    408 
    409 (define_insn_reservation "power10-vec-2cyc" 2
    410   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
    411        (eq_attr "cpu" "power10"))
    412   "DU_super_power10,VSU_super_power10")
    413 
    414 (define_insn_reservation "power10-veccmp" 3
    415   (and (eq_attr "type" "veccmp")
    416        (eq_attr "cpu" "power10"))
    417   "DU_super_power10,VSU_super_power10")
    418 
    419 (define_insn_reservation "power10-vecsimple" 3
    420   (and (eq_attr "type" "vecsimple")
    421        (eq_attr "cpu" "power10"))
    422   "DU_super_power10,VSU_super_power10")
    423 
    424 (define_insn_reservation "power10-vecnormal" 7
    425   (and (eq_attr "type" "vecfloat,vecdouble")
    426        (eq_attr "size" "!128")
    427        (eq_attr "cpu" "power10"))
    428   "DU_super_power10,VSU_super_power10")
    429 
    430 ; Quad-precision FP ops, execute in DFU
    431 (define_insn_reservation "power10-qp" 12
    432   (and (eq_attr "type" "vecfloat,vecdouble")
    433        (eq_attr "size" "128")
    434        (eq_attr "cpu" "power10"))
    435   "DU_super_power10,dfu_power10")
    436 
    437 (define_insn_reservation "power10-vecperm" 3
    438   (and (eq_attr "type" "vecperm")
    439        (eq_attr "cpu" "power10"))
    440   "DU_super_power10,VSU_PRM_power10")
    441 
    442 (define_insn_reservation "power10-veccomplex" 7
    443   (and (eq_attr "type" "veccomplex")
    444        (eq_attr "cpu" "power10"))
    445   "DU_super_power10,VSU_super_power10")
    446 
    447 (define_insn_reservation "power10-vecfdiv" 24
    448   (and (eq_attr "type" "vecfdiv")
    449        (eq_attr "cpu" "power10"))
    450   "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
    451 
    452 (define_insn_reservation "power10-vecdiv" 27
    453   (and (eq_attr "type" "vecdiv")
    454        (eq_attr "size" "!128")
    455        (eq_attr "cpu" "power10"))
    456   "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
    457 
    458 ; Use 8 for DFU reservation on QP div/mul to limit DFA state size
    459 (define_insn_reservation "power10-qpdiv" 56
    460   (and (eq_attr "type" "vecdiv")
    461        (eq_attr "size" "128")
    462        (eq_attr "cpu" "power10"))
    463   "DU_super_power10,dfu_power10*8")
    464 
    465 (define_insn_reservation "power10-qpmul" 24
    466   (and (eq_attr "type" "qmul")
    467        (eq_attr "size" "128")
    468        (eq_attr "cpu" "power10"))
    469   "DU_super_power10,dfu_power10*8")
    470 
    471 (define_insn_reservation "power10-mffgpr" 2
    472   (and (eq_attr "type" "mffgpr")
    473        (eq_attr "cpu" "power10"))
    474   "DU_slice_3_power10,VSU_power10")
    475 
    476 (define_insn_reservation "power10-mftgpr" 2
    477   (and (eq_attr "type" "mftgpr")
    478        (eq_attr "cpu" "power10"))
    479   "DU_slice_3_power10,VSU_power10")
    480 
    481 
    482 ; Branch Unit
    483 ; Move from LR/CTR are executed in BRU but consume a writeback port from an
    484 ; execution slice.
    485 (define_insn_reservation "power10-mfjmpr" 6
    486   (and (eq_attr "type" "mfjmpr")
    487        (eq_attr "cpu" "power10"))
    488   "DU_branch_power10,bru_power10+VSU_power10")
    489 
    490 ; Branch is 2 cycles
    491 (define_insn_reservation "power10-branch" 2
    492   (and (eq_attr "type" "jmpreg,branch")
    493        (eq_attr "cpu" "power10"))
    494   "DU_branch_power10,bru_power10")
    495 
    496 
    497 ; Crypto Unit
    498 (define_insn_reservation "power10-crypto" 6
    499   (and (eq_attr "type" "crypto")
    500        (eq_attr "cpu" "power10"))
    501   "DU_super_power10,cryptu_power10")
    502 
    503 
    504 ; HTM Unit
    505 (define_insn_reservation "power10-htm" 4
    506   (and (eq_attr "type" "htm")
    507        (eq_attr "cpu" "power10"))
    508   "DU_C2_power10,LSU_power10")
    509 
    510 (define_insn_reservation "power10-htm-simple" 2
    511   (and (eq_attr "type" "htmsimple")
    512        (eq_attr "cpu" "power10"))
    513   "DU_any_power10,VSU_power10")
    514 
    515 
    516 ; DFP Unit
    517 (define_insn_reservation "power10-dfp" 12
    518   (and (eq_attr "type" "dfp")
    519        (eq_attr "cpu" "power10"))
    520   "DU_even_power10,dfu_power10")
    521 
    522