1 1.1 mrg ;; DFA scheduling description for SH4. 2 1.12 mrg ;; Copyright (C) 2004-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg ;; This file is part of GCC. 5 1.1 mrg 6 1.1 mrg ;; GCC is free software; you can redistribute it and/or modify 7 1.1 mrg ;; it under the terms of the GNU General Public License as published by 8 1.1 mrg ;; the Free Software Foundation; either version 3, or (at your option) 9 1.1 mrg ;; any later version. 10 1.1 mrg 11 1.1 mrg ;; GCC is distributed in the hope that it will be useful, 12 1.1 mrg ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 1.1 mrg ;; GNU General Public License for more details. 15 1.1 mrg 16 1.1 mrg ;; You should have received a copy of the GNU General Public License 17 1.1 mrg ;; along with GCC; see the file COPYING3. If not see 18 1.1 mrg ;; <http://www.gnu.org/licenses/>. 19 1.1 mrg 20 1.1 mrg ;; Load and store instructions save a cycle if they are aligned on a 21 1.1 mrg ;; four byte boundary. Using a function unit for stores encourages 22 1.1 mrg ;; gcc to separate load and store instructions by one instruction, 23 1.1 mrg ;; which makes it more likely that the linker will be able to word 24 1.1 mrg ;; align them when relaxing. 25 1.1 mrg 26 1.1 mrg ;; The following description models the SH4 pipeline using the DFA based 27 1.1 mrg ;; scheduler. The DFA based description is better way to model a 28 1.1 mrg ;; superscalar pipeline as compared to function unit reservation model. 29 1.1 mrg ;; 1. The function unit based model is oriented to describe at most one 30 1.1 mrg ;; unit reservation by each insn. It is difficult to model unit reservations 31 1.1 mrg ;; in multiple pipeline units by same insn. This can be done using DFA 32 1.1 mrg ;; based description. 33 1.1 mrg ;; 2. The execution performance of DFA based scheduler does not depend on 34 1.1 mrg ;; processor complexity. 35 1.1 mrg ;; 3. Writing all unit reservations for an instruction class is a more natural 36 1.1 mrg ;; description of the pipeline and makes the interface to the hazard 37 1.1 mrg ;; recognizer simpler than the old function unit based model. 38 1.1 mrg ;; 4. The DFA model is richer and is a part of greater overall framework 39 1.1 mrg ;; of RCSP. 40 1.1 mrg 41 1.1 mrg 42 1.1 mrg ;; Two automata are defined to reduce number of states 43 1.1 mrg ;; which a single large automaton will have. (Factoring) 44 1.1 mrg (define_automaton "inst_pipeline,fpu_pipe") 45 1.1 mrg 46 1.1 mrg ;; This unit is basically the decode unit of the processor. 47 1.1 mrg ;; Since SH4 is a dual issue machine,it is as if there are two 48 1.1 mrg ;; units so that any insn can be processed by either one 49 1.1 mrg ;; of the decoding unit. 50 1.1 mrg (define_cpu_unit "pipe_01,pipe_02" "inst_pipeline") 51 1.1 mrg 52 1.1 mrg 53 1.1 mrg ;; The fixed point arithmetic calculator(?? EX Unit). 54 1.1 mrg (define_cpu_unit "int" "inst_pipeline") 55 1.1 mrg 56 1.1 mrg ;; f1_1 and f1_2 are floating point units.Actually there is 57 1.1 mrg ;; a f1 unit which can overlap with other f1 unit but 58 1.1 mrg ;; not another F1 unit.It is as though there were two 59 1.1 mrg ;; f1 units. 60 1.1 mrg (define_cpu_unit "f1_1,f1_2" "fpu_pipe") 61 1.1 mrg 62 1.1 mrg ;; The floating point units (except FS - F2 always precedes it.) 63 1.1 mrg (define_cpu_unit "F0,F1,F2,F3" "fpu_pipe") 64 1.1 mrg 65 1.1 mrg ;; This is basically the MA unit of SH4 66 1.1 mrg ;; used in LOAD/STORE pipeline. 67 1.1 mrg (define_cpu_unit "memory" "inst_pipeline") 68 1.1 mrg 69 1.1 mrg ;; However, there are LS group insns that don't use it, even ones that 70 1.1 mrg ;; complete in 0 cycles. So we use an extra unit for the issue of LS insns. 71 1.1 mrg (define_cpu_unit "load_store" "inst_pipeline") 72 1.1 mrg 73 1.1 mrg ;; The address calculator used for branch instructions. 74 1.1 mrg ;; This will be reserved after "issue" of branch instructions 75 1.1 mrg ;; and this is to make sure that no two branch instructions 76 1.1 mrg ;; can be issued in parallel. 77 1.1 mrg 78 1.1 mrg (define_cpu_unit "pcr_addrcalc" "inst_pipeline") 79 1.1 mrg 80 1.1 mrg ;; ---------------------------------------------------- 81 1.1 mrg ;; This reservation is to simplify the dual issue description. 82 1.1 mrg (define_reservation "issue" "pipe_01|pipe_02") 83 1.1 mrg 84 1.1 mrg ;; This is to express the locking of D stage. 85 1.1 mrg ;; Note that the issue of a CO group insn also effectively locks the D stage. 86 1.1 mrg (define_reservation "d_lock" "pipe_01+pipe_02") 87 1.1 mrg 88 1.1 mrg ;; Every FE instruction but fipr / ftrv starts with issue and this. 89 1.1 mrg (define_reservation "F01" "F0+F1") 90 1.1 mrg 91 1.1 mrg ;; This is to simplify description where F1,F2,FS 92 1.1 mrg ;; are used simultaneously. 93 1.1 mrg (define_reservation "fpu" "F1+F2") 94 1.1 mrg 95 1.1 mrg ;; This is to highlight the fact that f1 96 1.1 mrg ;; cannot overlap with F1. 97 1.1 mrg (exclusion_set "f1_1,f1_2" "F1") 98 1.1 mrg 99 1.1 mrg (define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing") 100 1.1 mrg 101 1.1 mrg ;; Although reg moves have a latency of zero 102 1.1 mrg ;; we need to highlight that they use D stage 103 1.1 mrg ;; for one cycle. 104 1.1 mrg 105 1.1 mrg ;; Group: MT 106 1.1 mrg (define_insn_reservation "reg_mov" 0 107 1.1 mrg (and (eq_attr "pipe_model" "sh4") 108 1.1 mrg (eq_attr "type" "move")) 109 1.1 mrg "issue") 110 1.1 mrg 111 1.1 mrg ;; Group: LS 112 1.1 mrg (define_insn_reservation "freg_mov" 0 113 1.1 mrg (and (eq_attr "pipe_model" "sh4") 114 1.1 mrg (eq_attr "type" "fmove")) 115 1.1 mrg "issue+load_store") 116 1.1 mrg 117 1.1 mrg ;; We don't model all pipeline stages; we model the issue ('D') stage 118 1.1 mrg ;; inasmuch as we allow only two instructions to issue simultaneously, 119 1.1 mrg ;; and CO instructions prevent any simultaneous issue of another instruction. 120 1.1 mrg ;; (This uses pipe_01 and pipe_02). 121 1.1 mrg ;; Double issue of EX insns is prevented by using the int unit in the EX stage. 122 1.1 mrg ;; Double issue of EX / BR insns is prevented by using the int unit / 123 1.1 mrg ;; pcr_addrcalc unit in the EX stage. 124 1.1 mrg ;; Double issue of BR / LS instructions is prevented by using the 125 1.1 mrg ;; pcr_addrcalc / load_store unit in the issue cycle. 126 1.1 mrg ;; Double issue of FE instructions is prevented by using F0 in the first 127 1.1 mrg ;; pipeline stage after the first D stage. 128 1.1 mrg ;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage 129 1.1 mrg ;; (except in the cases outlined above), nor to describe the FS stage after 130 1.1 mrg ;; the F2 stage. 131 1.1 mrg 132 1.1 mrg ;; Other MT group instructions(1 step operations) 133 1.1 mrg ;; Group: MT 134 1.1 mrg ;; Latency: 1 135 1.1 mrg ;; Issue Rate: 1 136 1.1 mrg (define_insn_reservation "mt" 1 137 1.1 mrg (and (eq_attr "pipe_model" "sh4") 138 1.1 mrg (eq_attr "type" "mt_group")) 139 1.1 mrg "issue") 140 1.1 mrg 141 1.1 mrg ;; Fixed Point Arithmetic Instructions(1 step operations) 142 1.1 mrg ;; Group: EX 143 1.1 mrg ;; Latency: 1 144 1.1 mrg ;; Issue Rate: 1 145 1.1 mrg (define_insn_reservation "sh4_simple_arith" 1 146 1.1 mrg (and (eq_attr "pipe_model" "sh4") 147 1.1 mrg (eq_attr "insn_class" "ex_group")) 148 1.1 mrg "issue,int") 149 1.1 mrg 150 1.1 mrg ;; Load and store instructions have no alignment peculiarities for the SH4, 151 1.1 mrg ;; but they use the load-store unit, which they share with the fmove type 152 1.1 mrg ;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . 153 1.1 mrg ;; Loads have a latency of two. 154 1.1 mrg ;; However, call insns can only paired with a preceding insn, and have 155 1.1 mrg ;; a delay slot, so that we want two more insns to be scheduled between the 156 1.1 mrg ;; load of the function address and the call. This is equivalent to a 157 1.1 mrg ;; latency of three. 158 1.1 mrg ;; ADJUST_COST can only properly handle reductions of the cost, so we 159 1.1 mrg ;; use a latency of three here, which gets multiplied by 10 to yield 30. 160 1.1 mrg ;; We only do this for SImode loads of general registers, to make the work 161 1.1 mrg ;; for ADJUST_COST easier. 162 1.1 mrg 163 1.1 mrg ;; Load Store instructions. (MOV.[BWL]@(d,GBR) 164 1.1 mrg ;; Group: LS 165 1.1 mrg ;; Latency: 2 166 1.1 mrg ;; Issue Rate: 1 167 1.1 mrg (define_insn_reservation "sh4_load" 2 168 1.1 mrg (and (eq_attr "pipe_model" "sh4") 169 1.1 mrg (eq_attr "type" "load,pcload")) 170 1.1 mrg "issue+load_store,nothing,memory") 171 1.1 mrg 172 1.1 mrg ;; calls / sfuncs need an extra instruction for their delay slot. 173 1.1 mrg ;; Moreover, estimating the latency for SImode loads as 3 will also allow 174 1.1 mrg ;; adjust_cost to meaningfully bump it back up to 3 if they load the shift 175 1.1 mrg ;; count of a dynamic shift. 176 1.1 mrg (define_insn_reservation "sh4_load_si" 3 177 1.1 mrg (and (eq_attr "pipe_model" "sh4") 178 1.1 mrg (eq_attr "type" "load_si,pcload_si")) 179 1.1 mrg "issue+load_store,nothing,memory") 180 1.1 mrg 181 1.1 mrg ;; (define_bypass 2 "sh4_load_si" "!sh4_call") 182 1.1 mrg 183 1.1 mrg ;; The load latency is upped to three higher if the dependent insn does 184 1.1 mrg ;; double precision computation. We want the 'default' latency to reflect 185 1.1 mrg ;; that increased latency because otherwise the insn priorities won't 186 1.1 mrg ;; allow proper scheduling. 187 1.1 mrg (define_insn_reservation "sh4_fload" 3 188 1.1 mrg (and (eq_attr "pipe_model" "sh4") 189 1.1 mrg (eq_attr "type" "fload,pcfload")) 190 1.1 mrg "issue+load_store,nothing,memory") 191 1.1 mrg 192 1.1 mrg ;; (define_bypass 2 "sh4_fload" "!") 193 1.1 mrg 194 1.1 mrg (define_insn_reservation "sh4_store" 1 195 1.1 mrg (and (eq_attr "pipe_model" "sh4") 196 1.1 mrg (eq_attr "type" "store,fstore")) 197 1.1 mrg "issue+load_store,nothing,memory") 198 1.1 mrg 199 1.1 mrg (define_insn_reservation "mac_mem" 1 200 1.1 mrg (and (eq_attr "pipe_model" "sh4") 201 1.1 mrg (eq_attr "type" "mac_mem")) 202 1.1 mrg "d_lock,nothing,memory") 203 1.1 mrg 204 1.1 mrg ;; Load Store instructions. 205 1.1 mrg ;; Group: LS 206 1.1 mrg ;; Latency: 1 207 1.1 mrg ;; Issue Rate: 1 208 1.1 mrg (define_insn_reservation "sh4_gp_fpul" 1 209 1.1 mrg (and (eq_attr "pipe_model" "sh4") 210 1.1 mrg (eq_attr "type" "gp_fpul")) 211 1.1 mrg "issue+load_store") 212 1.1 mrg 213 1.1 mrg ;; Load Store instructions. 214 1.1 mrg ;; Group: LS 215 1.1 mrg ;; Latency: 3 216 1.1 mrg ;; Issue Rate: 1 217 1.1 mrg (define_insn_reservation "sh4_fpul_gp" 3 218 1.1 mrg (and (eq_attr "pipe_model" "sh4") 219 1.1 mrg (eq_attr "type" "fpul_gp")) 220 1.1 mrg "issue+load_store") 221 1.1 mrg 222 1.1 mrg ;; Branch (BF,BF/S,BT,BT/S,BRA) 223 1.1 mrg ;; Group: BR 224 1.1 mrg ;; Latency when taken: 2 (or 1) 225 1.1 mrg ;; Issue Rate: 1 226 1.1 mrg ;; The latency is 1 when displacement is 0. 227 1.1 mrg ;; We can't really do much with the latency, even if we could express it, 228 1.1 mrg ;; but the pairing restrictions are useful to take into account. 229 1.1 mrg ;; ??? If the branch is likely, we might want to fill the delay slot; 230 1.1 mrg ;; if the branch is likely, but not very likely, should we pretend to use 231 1.1 mrg ;; a resource that CO instructions use, to get a pairable delay slot insn? 232 1.1 mrg (define_insn_reservation "sh4_branch" 1 233 1.1 mrg (and (eq_attr "pipe_model" "sh4") 234 1.1 mrg (eq_attr "type" "cbranch,jump")) 235 1.1 mrg "issue+pcr_addrcalc") 236 1.1 mrg 237 1.1 mrg ;; Branch Far (JMP,RTS,BRAF) 238 1.1 mrg ;; Group: CO 239 1.1 mrg ;; Latency: 3 240 1.1 mrg ;; Issue Rate: 2 241 1.1 mrg ;; ??? Scheduling happens before branch shortening, and hence jmp and braf 242 1.1 mrg ;; can't be distinguished from bra for the "jump" pattern. 243 1.1 mrg (define_insn_reservation "sh4_return" 3 244 1.1 mrg (and (eq_attr "pipe_model" "sh4") 245 1.1 mrg (eq_attr "type" "return,jump_ind")) 246 1.1 mrg "d_lock*2") 247 1.1 mrg 248 1.1 mrg ;; RTE 249 1.1 mrg ;; Group: CO 250 1.1 mrg ;; Latency: 5 251 1.1 mrg ;; Issue Rate: 5 252 1.1 mrg ;; this instruction can be executed in any of the pipelines 253 1.1 mrg ;; and blocks the pipeline for next 4 stages. 254 1.1 mrg (define_insn_reservation "sh4_return_from_exp" 5 255 1.1 mrg (and (eq_attr "pipe_model" "sh4") 256 1.1 mrg (eq_attr "type" "rte")) 257 1.1 mrg "d_lock*5") 258 1.1 mrg 259 1.1 mrg ;; OCBP, OCBWB 260 1.1 mrg ;; Group: CO 261 1.1 mrg ;; Latency: 1-5 262 1.1 mrg ;; Issue Rate: 1 263 1.3 mrg ;; cwb is used for the sequence 264 1.3 mrg ;; ocbwb @%0 265 1.3 mrg ;; extu.w %0,%2 266 1.3 mrg ;; or %1,%2 267 1.3 mrg ;; mov.l %0,@%2 268 1.1 mrg ;; ocbwb on its own would be "d_lock,nothing,memory*5" 269 1.1 mrg (define_insn_reservation "ocbwb" 6 270 1.1 mrg (and (eq_attr "pipe_model" "sh4") 271 1.1 mrg (eq_attr "type" "cwb")) 272 1.1 mrg "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2") 273 1.1 mrg 274 1.1 mrg ;; LDS to PR,JSR 275 1.1 mrg ;; Group: CO 276 1.1 mrg ;; Latency: 3 277 1.1 mrg ;; Issue Rate: 2 278 1.1 mrg ;; The SX stage is blocked for last 2 cycles. 279 1.1 mrg ;; OTOH, the only time that has an effect for insns generated by the compiler 280 1.1 mrg ;; is when lds to PR is followed by sts from PR - and that is highly unlikely - 281 1.1 mrg ;; or when we are doing a function call - and we don't do inter-function 282 1.1 mrg ;; scheduling. For the function call case, it's really best that we end with 283 1.1 mrg ;; something that models an rts. 284 1.1 mrg (define_insn_reservation "sh4_lds_to_pr" 3 285 1.1 mrg (and (eq_attr "pipe_model" "sh4") 286 1.1 mrg (eq_attr "type" "prset") ) 287 1.1 mrg "d_lock*2") 288 1.1 mrg 289 1.1 mrg ;; calls introduce a longisch delay that is likely to flush the pipelines 290 1.1 mrg ;; of the caller's instructions. Ordinary functions tend to end with a 291 1.1 mrg ;; load to restore a register (in the delay slot of rts), while sfuncs 292 1.1 mrg ;; tend to end with an EX or MT insn. But that is not actually relevant, 293 1.1 mrg ;; since there are no instructions that contend for memory access early. 294 1.1 mrg ;; We could, of course, provide exact scheduling information for specific 295 1.1 mrg ;; sfuncs, if that should prove useful. 296 1.1 mrg (define_insn_reservation "sh4_call" 16 297 1.1 mrg (and (eq_attr "pipe_model" "sh4") 298 1.1 mrg (eq_attr "type" "call,sfunc")) 299 1.1 mrg "d_lock*16") 300 1.1 mrg 301 1.1 mrg ;; LDS.L to PR 302 1.1 mrg ;; Group: CO 303 1.1 mrg ;; Latency: 3 304 1.1 mrg ;; Issue Rate: 2 305 1.1 mrg ;; The SX unit is blocked for last 2 cycles. 306 1.1 mrg (define_insn_reservation "ldsmem_to_pr" 3 307 1.1 mrg (and (eq_attr "pipe_model" "sh4") 308 1.1 mrg (eq_attr "type" "pload")) 309 1.1 mrg "d_lock*2") 310 1.1 mrg 311 1.1 mrg ;; STS from PR 312 1.1 mrg ;; Group: CO 313 1.1 mrg ;; Latency: 2 314 1.1 mrg ;; Issue Rate: 2 315 1.1 mrg ;; The SX unit in second and third cycles. 316 1.1 mrg (define_insn_reservation "sts_from_pr" 2 317 1.1 mrg (and (eq_attr "pipe_model" "sh4") 318 1.1 mrg (eq_attr "type" "prget")) 319 1.1 mrg "d_lock*2") 320 1.1 mrg 321 1.1 mrg ;; STS.L from PR 322 1.1 mrg ;; Group: CO 323 1.1 mrg ;; Latency: 2 324 1.1 mrg ;; Issue Rate: 2 325 1.1 mrg (define_insn_reservation "sh4_prstore_mem" 2 326 1.1 mrg (and (eq_attr "pipe_model" "sh4") 327 1.1 mrg (eq_attr "type" "pstore")) 328 1.1 mrg "d_lock*2,nothing,memory") 329 1.1 mrg 330 1.1 mrg ;; LDS to FPSCR 331 1.1 mrg ;; Group: CO 332 1.1 mrg ;; Latency: 4 333 1.1 mrg ;; Issue Rate: 1 334 1.1 mrg ;; F1 is blocked for last three cycles. 335 1.1 mrg (define_insn_reservation "fpscr_load" 4 336 1.1 mrg (and (eq_attr "pipe_model" "sh4") 337 1.1 mrg (eq_attr "type" "gp_fpscr")) 338 1.1 mrg "d_lock,nothing,F1*3") 339 1.1 mrg 340 1.1 mrg ;; LDS.L to FPSCR 341 1.1 mrg ;; Group: CO 342 1.1 mrg ;; Latency: 1 / 4 343 1.1 mrg ;; Latency to update Rn is 1 and latency to update FPSCR is 4 344 1.1 mrg ;; Issue Rate: 1 345 1.1 mrg ;; F1 is blocked for last three cycles. 346 1.1 mrg (define_insn_reservation "fpscr_load_mem" 4 347 1.1 mrg (and (eq_attr "pipe_model" "sh4") 348 1.1 mrg (eq_attr "type" "mem_fpscr")) 349 1.1 mrg "d_lock,nothing,(F1+memory),F1*2") 350 1.1 mrg 351 1.1 mrg 353 1.1 mrg ;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) 354 1.1 mrg ;; Group: CO 355 1.1 mrg ;; Latency: 4 / 4 356 1.1 mrg ;; Issue Rate: 2 357 1.1 mrg (define_insn_reservation "multi" 4 358 1.1 mrg (and (eq_attr "pipe_model" "sh4") 359 1.1 mrg (eq_attr "type" "smpy,dmpy")) 360 1.1 mrg "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2") 361 1.1 mrg 362 1.1 mrg ;; Fixed STS from, and LDS to MACL / MACH 363 1.1 mrg ;; Group: CO 364 1.1 mrg ;; Latency: 3 365 1.1 mrg ;; Issue Rate: 1 366 1.1 mrg (define_insn_reservation "sh4_mac_gp" 3 367 1.1 mrg (and (eq_attr "pipe_model" "sh4") 368 1.1 mrg (eq_attr "type" "mac_gp,gp_mac,mem_mac")) 369 1.1 mrg "d_lock") 370 1.1 mrg 371 1.1 mrg 372 1.1 mrg ;; Single precision floating point computation FCMP/EQ, 373 1.1 mrg ;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG 374 1.1 mrg ;; Group: FE 375 1.1 mrg ;; Latency: 3/4 376 1.1 mrg ;; Issue Rate: 1 377 1.1 mrg (define_insn_reservation "fp_arith" 3 378 1.1 mrg (and (eq_attr "pipe_model" "sh4") 379 1.1 mrg (eq_attr "type" "fp,fp_cmp")) 380 1.1 mrg "issue,F01,F2") 381 1.1 mrg 382 1.1 mrg ;; We don't model the resource usage of this exactly because that would 383 1.1 mrg ;; introduce a bogus latency. 384 1.1 mrg (define_insn_reservation "sh4_fpscr_toggle" 1 385 1.1 mrg (and (eq_attr "pipe_model" "sh4") 386 1.1 mrg (eq_attr "type" "fpscr_toggle")) 387 1.1 mrg "issue") 388 1.1 mrg 389 1.1 mrg (define_insn_reservation "fp_arith_ftrc" 3 390 1.1 mrg (and (eq_attr "pipe_model" "sh4") 391 1.1 mrg (eq_attr "type" "ftrc_s")) 392 1.1 mrg "issue,F01,F2") 393 1.1 mrg 394 1.1 mrg (define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp") 395 1.1 mrg 396 1.1 mrg ;; Single Precision FDIV/SQRT 397 1.1 mrg ;; Group: FE 398 1.1 mrg ;; Latency: 12/13 (FDIV); 11/12 (FSQRT) 399 1.1 mrg ;; Issue Rate: 1 400 1.1 mrg ;; We describe fdiv here; fsqrt is actually one cycle faster. 401 1.1 mrg (define_insn_reservation "fp_div" 12 402 1.1 mrg (and (eq_attr "pipe_model" "sh4") 403 1.1 mrg (eq_attr "type" "fdiv")) 404 1.1 mrg "issue,F01+F3,F2+F3,F3*7,F1+F3,F2") 405 1.1 mrg 406 1.1 mrg ;; Double Precision floating point computation 407 1.1 mrg ;; (FCNVDS, FCNVSD, FLOAT, FTRC) 408 1.1 mrg ;; Group: FE 409 1.1 mrg ;; Latency: (3,4)/5 410 1.1 mrg ;; Issue Rate: 1 411 1.1 mrg (define_insn_reservation "dp_float" 4 412 1.1 mrg (and (eq_attr "pipe_model" "sh4") 413 1.1 mrg (eq_attr "type" "dfp_conv")) 414 1.1 mrg "issue,F01,F1+F2,F2") 415 1.1 mrg 416 1.1 mrg ;; Double-precision floating-point (FADD,FMUL,FSUB) 417 1.1 mrg ;; Group: FE 418 1.1 mrg ;; Latency: (7,8)/9 419 1.1 mrg ;; Issue Rate: 1 420 1.1 mrg (define_insn_reservation "fp_double_arith" 8 421 1.1 mrg (and (eq_attr "pipe_model" "sh4") 422 1.1 mrg (eq_attr "type" "dfp_arith,dfp_mul")) 423 1.1 mrg "issue,F01,F1+F2,fpu*4,F2") 424 1.1 mrg 425 1.1 mrg ;; Double-precision FCMP (FCMP/EQ,FCMP/GT) 426 1.1 mrg ;; Group: CO 427 1.1 mrg ;; Latency: 3/5 428 1.1 mrg ;; Issue Rate: 2 429 1.1 mrg (define_insn_reservation "fp_double_cmp" 3 430 1.1 mrg (and (eq_attr "pipe_model" "sh4") 431 1.1 mrg (eq_attr "type" "dfp_cmp")) 432 1.1 mrg "d_lock,(d_lock+F01),F1+F2,F2") 433 1.1 mrg 434 1.1 mrg ;; Double precision FDIV/SQRT 435 1.1 mrg ;; Group: FE 436 1.1 mrg ;; Latency: (24,25)/26 437 1.1 mrg ;; Issue Rate: 1 438 1.1 mrg (define_insn_reservation "dp_div" 25 439 1.1 mrg (and (eq_attr "pipe_model" "sh4") 440 1.1 mrg (eq_attr "type" "dfdiv")) 441 1.1 mrg "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2") 442 1.1 mrg 443 1.1 mrg 444 1.1 mrg ;; Use the branch-not-taken case to model arith3 insns. For the branch taken 445 1.1 mrg ;; case, we'd get a d_lock instead of issue at the end. 446 1.1 mrg (define_insn_reservation "arith3" 3 447 1.1 mrg (and (eq_attr "pipe_model" "sh4") 448 1.1 mrg (eq_attr "type" "arith3")) 449 1.1 mrg "issue,d_lock+pcr_addrcalc,issue") 450 1.1 mrg 451 1.1 mrg ;; arith3b insns schedule the same no matter if the branch is taken or not. 452 1.1 mrg (define_insn_reservation "arith3b" 2 453 1.1 mrg (and (eq_attr "pipe_model" "sh4") 454 1.1 mrg (eq_attr "type" "arith3")) 455 "issue,d_lock+pcr_addrcalc") 456