power10.md revision 1.1.1.1 1 ;; Scheduling description for IBM POWER10 processor.
2 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
3 ;;
4 ;; This is a clone of power9.md. It is intended to be a placeholder until a
5 ;; real scheduler model can be contributed.
6 ;; The original power9.md was contributed by Pat Haugen (pthaugen (a] us.ibm.com).
7
8 ;; This file is part of GCC.
9 ;;
10 ;; GCC is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published
12 ;; by the Free Software Foundation; either version 3, or (at your
13 ;; option) any later version.
14 ;;
15 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
16 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 ;; License for more details.
19 ;;
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GCC; see the file COPYING3. If not see
22 ;; <http://www.gnu.org/licenses/>.
23
24 ;; This file was cloned from power9.md, it does not (yet) describe the actual
25 ;; POWER10 processor.
26
27 (define_automaton "power10dsp,power10lsu,power10vsu,power10fpdiv,power10misc")
28
29 (define_cpu_unit "lsu0_power10,lsu1_power10,lsu2_power10,lsu3_power10" "power10lsu")
30 (define_cpu_unit "vsu0_power10,vsu1_power10,vsu2_power10,vsu3_power10" "power10vsu")
31 ; Two vector permute units, part of vsu
32 (define_cpu_unit "prm0_power10,prm1_power10" "power10vsu")
33 ; Two fixed point divide units, not pipelined
34 (define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10misc")
35 (define_cpu_unit "bru_power10,cryptu_power10,dfu_power10" "power10misc")
36 ; Create a false unit for use by non-pipelined FP div/sqrt
37 (define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,fp_div3_power10"
38 "power10fpdiv")
39
40
41 (define_cpu_unit "x0_power10,x1_power10,xa0_power10,xa1_power10,
42 x2_power10,x3_power10,xb0_power10,xb1_power10,
43 br0_power10,br1_power10" "power10dsp")
44
45
46 ; Dispatch port reservations
47 ;
48 ; The processor can dispatch a maximum of 6 iops per cycle with the following
49 ; general restrictions (other restrictions also apply):
50 ; 1) At most 2 iops per execution slice
51 ; 2) At most 2 iops to the branch unit
52 ; Note that insn position in a dispatch group of 6 insns does not infer which
53 ; execution slice the insn is routed to. The units are used to infer the
54 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
55 ; with 2 insns with 'superslice' requirement).
56
57 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
58 ; are listed as separate units to allow those insns that preclude its use to
59 ; still be scheduled two to a superslice while reserving the 3rd slot. The
60 ; same applies for xb0/xb1.
61 (define_reservation "DU_xa_power10" "xa0_power10+xa1_power10")
62 (define_reservation "DU_xb_power10" "xb0_power10+xb1_power10")
63
64 ; Any execution slice dispatch
65 (define_reservation "DU_any_power10"
66 "x0_power10|x1_power10|DU_xa_power10|x2_power10|x3_power10|
67 DU_xb_power10")
68
69 ; Even slice, actually takes even/odd slots
70 (define_reservation "DU_even_power10" "x0_power10+x1_power10|x2_power10+x3_power10")
71
72 ; Slice plus 3rd slot
73 (define_reservation "DU_slice_3_power10"
74 "x0_power10+xa0_power10|x1_power10+xa1_power10|
75 x2_power10+xb0_power10|x3_power10+xb1_power10")
76
77 ; Superslice
78 (define_reservation "DU_super_power10"
79 "x0_power10+x1_power10|x2_power10+x3_power10")
80
81 ; 2-way cracked
82 (define_reservation "DU_C2_power10" "x0_power10+x1_power10|
83 x1_power10+DU_xa_power10|
84 x1_power10+x2_power10|
85 DU_xa_power10+x2_power10|
86 x2_power10+x3_power10|
87 x3_power10+DU_xb_power10")
88
89 ; 2-way cracked plus 3rd slot
90 (define_reservation "DU_C2_3_power10" "x0_power10+x1_power10+xa0_power10|
91 x1_power10+x2_power10+xa1_power10|
92 x2_power10+x3_power10+xb0_power10")
93
94 ; 3-way cracked (consumes whole decode/dispatch cycle)
95 (define_reservation "DU_C3_power10"
96 "x0_power10+x1_power10+xa0_power10+xa1_power10+x2_power10+
97 x3_power10+xb0_power10+xb1_power10+br0_power10+br1_power10")
98
99 ; Branch ports
100 (define_reservation "DU_branch_power10" "br0_power10|br1_power10")
101
102
103 ; Execution unit reservations
104 (define_reservation "LSU_power10"
105 "lsu0_power10|lsu1_power10|lsu2_power10|lsu3_power10")
106
107 (define_reservation "LSU_pair_power10"
108 "lsu0_power10+lsu1_power10|lsu1_power10+lsu2_power10|
109 lsu2_power10+lsu3_power10|lsu3_power10+lsu0_power10")
110
111 (define_reservation "VSU_power10"
112 "vsu0_power10|vsu1_power10|vsu2_power10|vsu3_power10")
113
114 (define_reservation "VSU_super_power10"
115 "vsu0_power10+vsu1_power10|vsu2_power10+vsu3_power10")
116
117 (define_reservation "VSU_PRM_power10" "prm0_power10|prm1_power10")
118
119 ; Define the reservation to be used by FP div/sqrt which allows other insns
120 ; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
121 ; Note that the number of cycles blocked varies depending on insn, but we
122 ; just use the same number for all in order to keep the number of DFA states
123 ; reasonable.
124 (define_reservation "FP_DIV_power10"
125 "fp_div0_power10*8|fp_div1_power10*8|fp_div2_power10*8|
126 fp_div3_power10*8")
127 (define_reservation "VEC_DIV_power10"
128 "fp_div0_power10*8+fp_div1_power10*8|
129 fp_div2_power10*8+fp_div3_power10*8")
130
131
132 ; LS Unit
133 (define_insn_reservation "power10-load" 4
134 (and (eq_attr "type" "load")
135 (eq_attr "sign_extend" "no")
136 (eq_attr "update" "no")
137 (eq_attr "cpu" "power10"))
138 "DU_any_power10,LSU_power10")
139
140 (define_insn_reservation "power10-load-update" 4
141 (and (eq_attr "type" "load")
142 (eq_attr "sign_extend" "no")
143 (eq_attr "update" "yes")
144 (eq_attr "cpu" "power10"))
145 "DU_C2_power10,LSU_power10+VSU_power10")
146
147 (define_insn_reservation "power10-load-ext" 6
148 (and (eq_attr "type" "load")
149 (eq_attr "sign_extend" "yes")
150 (eq_attr "update" "no")
151 (eq_attr "cpu" "power10"))
152 "DU_C2_power10,LSU_power10")
153
154 (define_insn_reservation "power10-load-ext-update" 6
155 (and (eq_attr "type" "load")
156 (eq_attr "sign_extend" "yes")
157 (eq_attr "update" "yes")
158 (eq_attr "cpu" "power10"))
159 "DU_C3_power10,LSU_power10+VSU_power10")
160
161 (define_insn_reservation "power10-fpload-double" 4
162 (and (eq_attr "type" "fpload")
163 (eq_attr "update" "no")
164 (eq_attr "size" "64")
165 (eq_attr "cpu" "power10"))
166 "DU_slice_3_power10,LSU_power10")
167
168 (define_insn_reservation "power10-fpload-update-double" 4
169 (and (eq_attr "type" "fpload")
170 (eq_attr "update" "yes")
171 (eq_attr "size" "64")
172 (eq_attr "cpu" "power10"))
173 "DU_C2_3_power10,LSU_power10+VSU_power10")
174
175 ; SFmode loads are cracked and have additional 2 cycles over DFmode
176 (define_insn_reservation "power10-fpload-single" 6
177 (and (eq_attr "type" "fpload")
178 (eq_attr "update" "no")
179 (eq_attr "size" "32")
180 (eq_attr "cpu" "power10"))
181 "DU_C2_3_power10,LSU_power10")
182
183 (define_insn_reservation "power10-fpload-update-single" 6
184 (and (eq_attr "type" "fpload")
185 (eq_attr "update" "yes")
186 (eq_attr "size" "32")
187 (eq_attr "cpu" "power10"))
188 "DU_C3_power10,LSU_power10+VSU_power10")
189
190 (define_insn_reservation "power10-vecload" 5
191 (and (eq_attr "type" "vecload")
192 (eq_attr "cpu" "power10"))
193 "DU_any_power10,LSU_pair_power10")
194
195 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
196 (define_insn_reservation "power10-store" 0
197 (and (eq_attr "type" "store")
198 (eq_attr "update" "no")
199 (eq_attr "indexed" "no")
200 (eq_attr "cpu" "power10"))
201 "DU_slice_3_power10,LSU_power10")
202
203 (define_insn_reservation "power10-store-indexed" 0
204 (and (eq_attr "type" "store")
205 (eq_attr "update" "no")
206 (eq_attr "indexed" "yes")
207 (eq_attr "cpu" "power10"))
208 "DU_slice_3_power10,LSU_power10")
209
210 ; Update forms have 2 cycle latency for updated addr reg
211 (define_insn_reservation "power10-store-update" 2
212 (and (eq_attr "type" "store")
213 (eq_attr "update" "yes")
214 (eq_attr "indexed" "no")
215 (eq_attr "cpu" "power10"))
216 "DU_C2_3_power10,LSU_power10+VSU_power10")
217
218 ; Update forms have 2 cycle latency for updated addr reg
219 (define_insn_reservation "power10-store-update-indexed" 2
220 (and (eq_attr "type" "store")
221 (eq_attr "update" "yes")
222 (eq_attr "indexed" "yes")
223 (eq_attr "cpu" "power10"))
224 "DU_C2_3_power10,LSU_power10+VSU_power10")
225
226 (define_insn_reservation "power10-fpstore" 0
227 (and (eq_attr "type" "fpstore")
228 (eq_attr "update" "no")
229 (eq_attr "cpu" "power10"))
230 "DU_slice_3_power10,LSU_power10")
231
232 ; Update forms have 2 cycle latency for updated addr reg
233 (define_insn_reservation "power10-fpstore-update" 2
234 (and (eq_attr "type" "fpstore")
235 (eq_attr "update" "yes")
236 (eq_attr "cpu" "power10"))
237 "DU_C2_3_power10,LSU_power10+VSU_power10")
238
239 (define_insn_reservation "power10-vecstore" 0
240 (and (eq_attr "type" "vecstore")
241 (eq_attr "cpu" "power10"))
242 "DU_super_power10,LSU_pair_power10")
243
244 (define_insn_reservation "power10-larx" 4
245 (and (eq_attr "type" "load_l")
246 (eq_attr "cpu" "power10"))
247 "DU_any_power10,LSU_power10")
248
249 (define_insn_reservation "power10-stcx" 2
250 (and (eq_attr "type" "store_c")
251 (eq_attr "cpu" "power10"))
252 "DU_C2_3_power10,LSU_power10+VSU_power10")
253
254 (define_insn_reservation "power10-sync" 4
255 (and (eq_attr "type" "sync,isync")
256 (eq_attr "cpu" "power10"))
257 "DU_any_power10,LSU_power10")
258
259
260 ; VSU Execution Unit
261
262 ; Fixed point ops
263
264 ; Most ALU insns are simple 2 cycle, including record form
265 (define_insn_reservation "power10-alu" 2
266 (and (eq_attr "type" "add,exts,integer,logical,isel")
267 (eq_attr "cpu" "power10"))
268 "DU_any_power10,VSU_power10")
269 ; 5 cycle CR latency
270 (define_bypass 5 "power10-alu"
271 "power10-crlogical,power10-mfcr,power10-mfcrf")
272
273 ; Rotate/shift prevent use of third slot
274 (define_insn_reservation "power10-rot" 2
275 (and (eq_attr "type" "insert,shift")
276 (eq_attr "dot" "no")
277 (eq_attr "cpu" "power10"))
278 "DU_slice_3_power10,VSU_power10")
279
280 ; Record form rotate/shift are cracked
281 (define_insn_reservation "power10-cracked-alu" 2
282 (and (eq_attr "type" "insert,shift")
283 (eq_attr "dot" "yes")
284 (eq_attr "cpu" "power10"))
285 "DU_C2_3_power10,VSU_power10")
286 ; 7 cycle CR latency
287 (define_bypass 7 "power10-cracked-alu"
288 "power10-crlogical,power10-mfcr,power10-mfcrf")
289
290 (define_insn_reservation "power10-alu2" 3
291 (and (eq_attr "type" "cntlz,popcnt,trap")
292 (eq_attr "cpu" "power10"))
293 "DU_any_power10,VSU_power10")
294 ; 6 cycle CR latency
295 (define_bypass 6 "power10-alu2"
296 "power10-crlogical,power10-mfcr,power10-mfcrf")
297
298 (define_insn_reservation "power10-cmp" 2
299 (and (eq_attr "type" "cmp")
300 (eq_attr "cpu" "power10"))
301 "DU_any_power10,VSU_power10")
302
303
304 ; Treat 'two' and 'three' types as 2 or 3 way cracked
305 (define_insn_reservation "power10-two" 4
306 (and (eq_attr "type" "two")
307 (eq_attr "cpu" "power10"))
308 "DU_C2_power10,VSU_power10")
309
310 (define_insn_reservation "power10-three" 6
311 (and (eq_attr "type" "three")
312 (eq_attr "cpu" "power10"))
313 "DU_C3_power10,VSU_power10")
314
315 (define_insn_reservation "power10-mul" 5
316 (and (eq_attr "type" "mul")
317 (eq_attr "dot" "no")
318 (eq_attr "cpu" "power10"))
319 "DU_slice_3_power10,VSU_power10")
320
321 (define_insn_reservation "power10-mul-compare" 5
322 (and (eq_attr "type" "mul")
323 (eq_attr "dot" "yes")
324 (eq_attr "cpu" "power10"))
325 "DU_C2_3_power10,VSU_power10")
326 ; 10 cycle CR latency
327 (define_bypass 10 "power10-mul-compare"
328 "power10-crlogical,power10-mfcr,power10-mfcrf")
329
330 ; Fixed point divides reserve the divide units for a minimum of 8 cycles
331 (define_insn_reservation "power10-idiv" 16
332 (and (eq_attr "type" "div")
333 (eq_attr "size" "32")
334 (eq_attr "cpu" "power10"))
335 "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
336
337 (define_insn_reservation "power10-ldiv" 24
338 (and (eq_attr "type" "div")
339 (eq_attr "size" "64")
340 (eq_attr "cpu" "power10"))
341 "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
342
343 (define_insn_reservation "power10-crlogical" 2
344 (and (eq_attr "type" "cr_logical")
345 (eq_attr "cpu" "power10"))
346 "DU_any_power10,VSU_power10")
347
348 (define_insn_reservation "power10-mfcrf" 2
349 (and (eq_attr "type" "mfcrf")
350 (eq_attr "cpu" "power10"))
351 "DU_any_power10,VSU_power10")
352
353 (define_insn_reservation "power10-mfcr" 6
354 (and (eq_attr "type" "mfcr")
355 (eq_attr "cpu" "power10"))
356 "DU_C3_power10,VSU_power10")
357
358 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
359 ; is cracked
360 (define_insn_reservation "power10-mtcr" 2
361 (and (eq_attr "type" "mtcr")
362 (eq_attr "cpu" "power10"))
363 "DU_any_power10,VSU_power10")
364
365 ; Move to LR/CTR are executed in VSU
366 (define_insn_reservation "power10-mtjmpr" 5
367 (and (eq_attr "type" "mtjmpr")
368 (eq_attr "cpu" "power10"))
369 "DU_any_power10,VSU_power10")
370
371 ; Floating point/Vector ops
372 (define_insn_reservation "power10-fpsimple" 2
373 (and (eq_attr "type" "fpsimple")
374 (eq_attr "cpu" "power10"))
375 "DU_slice_3_power10,VSU_power10")
376
377 (define_insn_reservation "power10-fp" 5
378 (and (eq_attr "type" "fp,dmul")
379 (eq_attr "cpu" "power10"))
380 "DU_slice_3_power10,VSU_power10")
381
382 (define_insn_reservation "power10-fpcompare" 3
383 (and (eq_attr "type" "fpcompare")
384 (eq_attr "cpu" "power10"))
385 "DU_slice_3_power10,VSU_power10")
386
387 ; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other
388 ; div/sqrt insns, but for the most part do not block pipelined ops.
389 (define_insn_reservation "power10-sdiv" 22
390 (and (eq_attr "type" "sdiv")
391 (eq_attr "cpu" "power10"))
392 "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
393
394 (define_insn_reservation "power10-ddiv" 27
395 (and (eq_attr "type" "ddiv")
396 (eq_attr "cpu" "power10"))
397 "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
398
399 (define_insn_reservation "power10-sqrt" 26
400 (and (eq_attr "type" "ssqrt")
401 (eq_attr "cpu" "power10"))
402 "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
403
404 (define_insn_reservation "power10-dsqrt" 36
405 (and (eq_attr "type" "dsqrt")
406 (eq_attr "cpu" "power10"))
407 "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
408
409 (define_insn_reservation "power10-vec-2cyc" 2
410 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
411 (eq_attr "cpu" "power10"))
412 "DU_super_power10,VSU_super_power10")
413
414 (define_insn_reservation "power10-veccmp" 3
415 (and (eq_attr "type" "veccmp")
416 (eq_attr "cpu" "power10"))
417 "DU_super_power10,VSU_super_power10")
418
419 (define_insn_reservation "power10-vecsimple" 3
420 (and (eq_attr "type" "vecsimple")
421 (eq_attr "cpu" "power10"))
422 "DU_super_power10,VSU_super_power10")
423
424 (define_insn_reservation "power10-vecnormal" 7
425 (and (eq_attr "type" "vecfloat,vecdouble")
426 (eq_attr "size" "!128")
427 (eq_attr "cpu" "power10"))
428 "DU_super_power10,VSU_super_power10")
429
430 ; Quad-precision FP ops, execute in DFU
431 (define_insn_reservation "power10-qp" 12
432 (and (eq_attr "type" "vecfloat,vecdouble")
433 (eq_attr "size" "128")
434 (eq_attr "cpu" "power10"))
435 "DU_super_power10,dfu_power10")
436
437 (define_insn_reservation "power10-vecperm" 3
438 (and (eq_attr "type" "vecperm")
439 (eq_attr "cpu" "power10"))
440 "DU_super_power10,VSU_PRM_power10")
441
442 (define_insn_reservation "power10-veccomplex" 7
443 (and (eq_attr "type" "veccomplex")
444 (eq_attr "cpu" "power10"))
445 "DU_super_power10,VSU_super_power10")
446
447 (define_insn_reservation "power10-vecfdiv" 24
448 (and (eq_attr "type" "vecfdiv")
449 (eq_attr "cpu" "power10"))
450 "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
451
452 (define_insn_reservation "power10-vecdiv" 27
453 (and (eq_attr "type" "vecdiv")
454 (eq_attr "size" "!128")
455 (eq_attr "cpu" "power10"))
456 "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
457
458 ; Use 8 for DFU reservation on QP div/mul to limit DFA state size
459 (define_insn_reservation "power10-qpdiv" 56
460 (and (eq_attr "type" "vecdiv")
461 (eq_attr "size" "128")
462 (eq_attr "cpu" "power10"))
463 "DU_super_power10,dfu_power10*8")
464
465 (define_insn_reservation "power10-qpmul" 24
466 (and (eq_attr "type" "qmul")
467 (eq_attr "size" "128")
468 (eq_attr "cpu" "power10"))
469 "DU_super_power10,dfu_power10*8")
470
471 (define_insn_reservation "power10-mffgpr" 2
472 (and (eq_attr "type" "mffgpr")
473 (eq_attr "cpu" "power10"))
474 "DU_slice_3_power10,VSU_power10")
475
476 (define_insn_reservation "power10-mftgpr" 2
477 (and (eq_attr "type" "mftgpr")
478 (eq_attr "cpu" "power10"))
479 "DU_slice_3_power10,VSU_power10")
480
481
482 ; Branch Unit
483 ; Move from LR/CTR are executed in BRU but consume a writeback port from an
484 ; execution slice.
485 (define_insn_reservation "power10-mfjmpr" 6
486 (and (eq_attr "type" "mfjmpr")
487 (eq_attr "cpu" "power10"))
488 "DU_branch_power10,bru_power10+VSU_power10")
489
490 ; Branch is 2 cycles
491 (define_insn_reservation "power10-branch" 2
492 (and (eq_attr "type" "jmpreg,branch")
493 (eq_attr "cpu" "power10"))
494 "DU_branch_power10,bru_power10")
495
496
497 ; Crypto Unit
498 (define_insn_reservation "power10-crypto" 6
499 (and (eq_attr "type" "crypto")
500 (eq_attr "cpu" "power10"))
501 "DU_super_power10,cryptu_power10")
502
503
504 ; HTM Unit
505 (define_insn_reservation "power10-htm" 4
506 (and (eq_attr "type" "htm")
507 (eq_attr "cpu" "power10"))
508 "DU_C2_power10,LSU_power10")
509
510 (define_insn_reservation "power10-htm-simple" 2
511 (and (eq_attr "type" "htmsimple")
512 (eq_attr "cpu" "power10"))
513 "DU_any_power10,VSU_power10")
514
515
516 ; DFP Unit
517 (define_insn_reservation "power10-dfp" 12
518 (and (eq_attr "type" "dfp")
519 (eq_attr "cpu" "power10"))
520 "DU_even_power10,dfu_power10")
521
522