1 ;; Scheduling for Core 2 and derived processors. 2 ;; Copyright (C) 2004-2022 Free Software Foundation, Inc. 3 ;; 4 ;; This file is part of GCC. 5 ;; 6 ;; GCC is free software; you can redistribute it and/or modify 7 ;; it under the terms of the GNU General Public License as published by 8 ;; the Free Software Foundation; either version 3, or (at your option) 9 ;; any later version. 10 ;; 11 ;; GCC is distributed in the hope that it will be useful, 12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 ;; GNU General Public License for more details. 15 ;; 16 ;; You should have received a copy of the GNU General Public License 17 ;; along with GCC; see the file COPYING3. If not see 18 ;; <http://www.gnu.org/licenses/>. */ 19 20 ;; The scheduling description in this file is based on the one in ppro.md, 21 ;; with additional information obtained from 22 ;; 23 ;; "How to optimize for the Pentium family of microprocessors", 24 ;; by Agner Fog, PhD. 25 ;; 26 ;; The major difference from the P6 pipeline is one extra decoder, and 27 ;; one extra execute unit. Due to micro-op fusion, many insns no longer 28 ;; need to be decoded in decoder 0, but can be handled by all of them. 29 30 ;; The core2_idiv, core2_fdiv and core2_ssediv automata are used to 31 ;; model issue latencies of idiv, fdiv and ssediv type insns. 32 (define_automaton "core2_decoder,core2_core,core2_idiv,core2_fdiv,core2_ssediv,core2_load,core2_store") 33 34 ;; The CPU domain, used for Core i7 bypass latencies 35 (define_attr "i7_domain" "int,float,simd" 36 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") 37 (const_string "float") 38 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, 39 sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt, 40 ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") 41 (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF") 42 (const_string "float") 43 (eq_attr "mode" "SI") 44 (const_string "int")] 45 (const_string "simd")) 46 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") 47 (const_string "simd")] 48 (const_string "int"))) 49 50 ;; As for the Pentium Pro, 51 ;; - an instruction with 1 uop can be decoded by any of the three 52 ;; decoders in one cycle. 53 ;; - an instruction with 1 to 4 uops can be decoded only by decoder 0 54 ;; but still in only one cycle. 55 ;; - a complex (microcode) instruction can also only be decoded by 56 ;; decoder 0, and this takes an unspecified number of cycles. 57 ;; 58 ;; The goal is to schedule such that we have a few-one-one uops sequence 59 ;; in each cycle, to decode as many instructions per cycle as possible. 60 (define_cpu_unit "c2_decoder0" "core2_decoder") 61 (define_cpu_unit "c2_decoder1" "core2_decoder") 62 (define_cpu_unit "c2_decoder2" "core2_decoder") 63 (define_cpu_unit "c2_decoder3" "core2_decoder") 64 65 ;; We first wish to find an instruction for c2_decoder0, so exclude 66 ;; c2_decoder1 and c2_decoder2 from being reserved until c2_decoder 0 is 67 ;; reserved. 68 (presence_set "c2_decoder1" "c2_decoder0") 69 (presence_set "c2_decoder2" "c2_decoder0") 70 (presence_set "c2_decoder3" "c2_decoder0") 71 72 ;; Most instructions can be decoded on any of the three decoders. 73 (define_reservation "c2_decodern" "(c2_decoder0|c2_decoder1|c2_decoder2|c2_decoder3)") 74 75 ;; The out-of-order core has six pipelines. These are similar to the 76 ;; Pentium Pro's five pipelines. Port 2 is responsible for memory loads, 77 ;; port 3 for store address calculations, port 4 for memory stores, and 78 ;; ports 0, 1 and 5 for everything else. 79 80 (define_cpu_unit "c2_p0,c2_p1,c2_p5" "core2_core") 81 (define_cpu_unit "c2_p2" "core2_load") 82 (define_cpu_unit "c2_p3,c2_p4" "core2_store") 83 (define_cpu_unit "c2_idiv" "core2_idiv") 84 (define_cpu_unit "c2_fdiv" "core2_fdiv") 85 (define_cpu_unit "c2_ssediv" "core2_ssediv") 86 87 ;; Only the irregular instructions have to be modeled here. A load 88 ;; increases the latency by 2 or 3, or by nothing if the manual gives 89 ;; a latency already. Store latencies are not accounted for. 90 ;; 91 ;; The simple instructions follow a very regular pattern of 1 uop per 92 ;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store 93 ;; on port 4 and port 3. These instructions are modelled at the bottom 94 ;; of this file. 95 ;; 96 ;; For microcoded instructions we don't know how many uops are produced. 97 ;; These instructions are the "complex" ones in the Intel manuals. All 98 ;; we _do_ know is that they typically produce four or more uops, so 99 ;; they can only be decoded on c2_decoder0. Modelling their latencies 100 ;; doesn't make sense because we don't know how these instructions are 101 ;; executed in the core. So we just model that they can only be decoded 102 ;; on decoder 0, and say that it takes a little while before the result 103 ;; is available. 104 (define_insn_reservation "c2_complex_insn" 6 105 (and (eq_attr "cpu" "core2,nehalem") 106 (eq_attr "type" "other,multi,str")) 107 "c2_decoder0") 108 109 (define_insn_reservation "c2_call" 1 110 (and (eq_attr "cpu" "core2,nehalem") 111 (eq_attr "type" "call,callv")) 112 "c2_decoder0") 113 114 ;; imov with memory operands does not use the integer units. 115 ;; imovx always decodes to one uop, and also doesn't use the integer 116 ;; units if it has memory operands. 117 (define_insn_reservation "c2_imov" 1 118 (and (eq_attr "cpu" "core2,nehalem") 119 (and (eq_attr "memory" "none") 120 (eq_attr "type" "imov,imovx"))) 121 "c2_decodern,(c2_p0|c2_p1|c2_p5)") 122 123 (define_insn_reservation "c2_imov_load" 4 124 (and (eq_attr "cpu" "core2,nehalem") 125 (and (eq_attr "memory" "load") 126 (eq_attr "type" "imov,imovx"))) 127 "c2_decodern,c2_p2") 128 129 (define_insn_reservation "c2_imov_store" 1 130 (and (eq_attr "cpu" "core2,nehalem") 131 (and (eq_attr "memory" "store") 132 (eq_attr "type" "imov"))) 133 "c2_decodern,c2_p4+c2_p3") 134 135 (define_insn_reservation "c2_icmov" 2 136 (and (eq_attr "cpu" "core2,nehalem") 137 (and (eq_attr "memory" "none") 138 (eq_attr "type" "icmov"))) 139 "c2_decoder0,(c2_p0|c2_p1|c2_p5)*2") 140 141 (define_insn_reservation "c2_icmov_load" 2 142 (and (eq_attr "cpu" "core2,nehalem") 143 (and (eq_attr "memory" "load") 144 (eq_attr "type" "icmov"))) 145 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5)*2") 146 147 (define_insn_reservation "c2_push_reg" 1 148 (and (eq_attr "cpu" "core2,nehalem") 149 (and (eq_attr "memory" "store") 150 (eq_attr "type" "push"))) 151 "c2_decodern,c2_p4+c2_p3") 152 153 (define_insn_reservation "c2_push_mem" 1 154 (and (eq_attr "cpu" "core2,nehalem") 155 (and (eq_attr "memory" "both") 156 (eq_attr "type" "push"))) 157 "c2_decoder0,c2_p2,c2_p4+c2_p3") 158 159 ;; lea executes on port 0 with latency one and throughput 1. 160 (define_insn_reservation "c2_lea" 1 161 (and (eq_attr "cpu" "core2,nehalem") 162 (and (eq_attr "memory" "none") 163 (eq_attr "type" "lea"))) 164 "c2_decodern,c2_p0") 165 166 ;; Shift and rotate decode as two uops which can go to port 0 or 5. 167 ;; The load and store units need to be reserved when memory operands 168 ;; are involved. 169 (define_insn_reservation "c2_shift_rotate" 1 170 (and (eq_attr "cpu" "core2,nehalem") 171 (and (eq_attr "memory" "none") 172 (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) 173 "c2_decodern,(c2_p0|c2_p5)") 174 175 (define_insn_reservation "c2_shift_rotate_mem" 4 176 (and (eq_attr "cpu" "core2,nehalem") 177 (and (eq_attr "memory" "!none") 178 (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) 179 "c2_decoder0,c2_p2,(c2_p0|c2_p5),c2_p4+c2_p3") 180 181 ;; See comments in ppro.md for the corresponding reservation. 182 (define_insn_reservation "c2_branch" 1 183 (and (eq_attr "cpu" "core2,nehalem") 184 (and (eq_attr "memory" "none") 185 (eq_attr "type" "ibr"))) 186 "c2_decodern,c2_p5") 187 188 ;; ??? Indirect branches probably have worse latency than this. 189 (define_insn_reservation "c2_indirect_branch" 6 190 (and (eq_attr "cpu" "core2,nehalem") 191 (and (eq_attr "memory" "!none") 192 (eq_attr "type" "ibr"))) 193 "c2_decoder0,c2_p2+c2_p5") 194 195 (define_insn_reservation "c2_leave" 4 196 (and (eq_attr "cpu" "core2,nehalem") 197 (eq_attr "type" "leave")) 198 "c2_decoder0,c2_p2+(c2_p0|c2_p1),(c2_p0|c2_p1)") 199 200 ;; mul and imul with two/three operands only execute on port 1 for HImode 201 ;; and SImode, port 0 for DImode. 202 (define_insn_reservation "c2_imul_hisi" 3 203 (and (eq_attr "cpu" "core2,nehalem") 204 (and (eq_attr "memory" "none") 205 (and (eq_attr "mode" "HI,SI") 206 (eq_attr "type" "imul")))) 207 "c2_decodern,c2_p1") 208 209 (define_insn_reservation "c2_imul_hisi_mem" 3 210 (and (eq_attr "cpu" "core2,nehalem") 211 (and (eq_attr "memory" "!none") 212 (and (eq_attr "mode" "HI,SI") 213 (eq_attr "type" "imul")))) 214 "c2_decoder0,c2_p2+c2_p1") 215 216 (define_insn_reservation "c2_imul_di" 5 217 (and (eq_attr "cpu" "core2,nehalem") 218 (and (eq_attr "memory" "none") 219 (and (eq_attr "mode" "DI") 220 (eq_attr "type" "imul")))) 221 "c2_decodern,c2_p0") 222 223 (define_insn_reservation "c2_imul_di_mem" 5 224 (and (eq_attr "cpu" "core2,nehalem") 225 (and (eq_attr "memory" "!none") 226 (and (eq_attr "mode" "DI") 227 (eq_attr "type" "imul")))) 228 "c2_decoder0,c2_p2+c2_p0") 229 230 ;; div and idiv are very similar, so we model them the same. 231 ;; QI, HI, and SI have issue latency 12, 21, and 37, respectively. 232 ;; These issue latencies are modelled via the c2_div automaton. 233 (define_insn_reservation "c2_idiv_QI" 19 234 (and (eq_attr "cpu" "core2,nehalem") 235 (and (eq_attr "memory" "none") 236 (and (eq_attr "mode" "QI") 237 (eq_attr "type" "idiv")))) 238 "c2_decoder0,(c2_p0+c2_idiv)*2,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9") 239 240 (define_insn_reservation "c2_idiv_QI_load" 19 241 (and (eq_attr "cpu" "core2,nehalem") 242 (and (eq_attr "memory" "load") 243 (and (eq_attr "mode" "QI") 244 (eq_attr "type" "idiv")))) 245 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9") 246 247 (define_insn_reservation "c2_idiv_HI" 23 248 (and (eq_attr "cpu" "core2,nehalem") 249 (and (eq_attr "memory" "none") 250 (and (eq_attr "mode" "HI") 251 (eq_attr "type" "idiv")))) 252 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*17") 253 254 (define_insn_reservation "c2_idiv_HI_load" 23 255 (and (eq_attr "cpu" "core2,nehalem") 256 (and (eq_attr "memory" "load") 257 (and (eq_attr "mode" "HI") 258 (eq_attr "type" "idiv")))) 259 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*18") 260 261 (define_insn_reservation "c2_idiv_SI" 39 262 (and (eq_attr "cpu" "core2,nehalem") 263 (and (eq_attr "memory" "none") 264 (and (eq_attr "mode" "SI") 265 (eq_attr "type" "idiv")))) 266 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*33") 267 268 (define_insn_reservation "c2_idiv_SI_load" 39 269 (and (eq_attr "cpu" "core2,nehalem") 270 (and (eq_attr "memory" "load") 271 (and (eq_attr "mode" "SI") 272 (eq_attr "type" "idiv")))) 273 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*34") 274 275 ;; x87 floating point operations. 276 277 (define_insn_reservation "c2_fxch" 0 278 (and (eq_attr "cpu" "core2,nehalem") 279 (eq_attr "type" "fxch")) 280 "c2_decodern") 281 282 (define_insn_reservation "c2_fop" 3 283 (and (eq_attr "cpu" "core2,nehalem") 284 (and (eq_attr "memory" "none,unknown") 285 (eq_attr "type" "fop"))) 286 "c2_decodern,c2_p1") 287 288 (define_insn_reservation "c2_fop_load" 5 289 (and (eq_attr "cpu" "core2,nehalem") 290 (and (eq_attr "memory" "load") 291 (eq_attr "type" "fop"))) 292 "c2_decoder0,c2_p2+c2_p1,c2_p1") 293 294 (define_insn_reservation "c2_fop_store" 3 295 (and (eq_attr "cpu" "core2,nehalem") 296 (and (eq_attr "memory" "store") 297 (eq_attr "type" "fop"))) 298 "c2_decoder0,c2_p0,c2_p0,c2_p0+c2_p4+c2_p3") 299 300 (define_insn_reservation "c2_fop_both" 5 301 (and (eq_attr "cpu" "core2,nehalem") 302 (and (eq_attr "memory" "both") 303 (eq_attr "type" "fop"))) 304 "c2_decoder0,c2_p2+c2_p0,c2_p0+c2_p4+c2_p3") 305 306 (define_insn_reservation "c2_fsgn" 1 307 (and (eq_attr "cpu" "core2,nehalem") 308 (eq_attr "type" "fsgn")) 309 "c2_decodern,c2_p0") 310 311 (define_insn_reservation "c2_fistp" 5 312 (and (eq_attr "cpu" "core2,nehalem") 313 (eq_attr "type" "fistp")) 314 "c2_decoder0,c2_p0*2,c2_p4+c2_p3") 315 316 (define_insn_reservation "c2_fcmov" 2 317 (and (eq_attr "cpu" "core2,nehalem") 318 (eq_attr "type" "fcmov")) 319 "c2_decoder0,c2_p0*2") 320 321 (define_insn_reservation "c2_fcmp" 1 322 (and (eq_attr "cpu" "core2,nehalem") 323 (and (eq_attr "memory" "none") 324 (eq_attr "type" "fcmp"))) 325 "c2_decodern,c2_p1") 326 327 (define_insn_reservation "c2_fcmp_load" 4 328 (and (eq_attr "cpu" "core2,nehalem") 329 (and (eq_attr "memory" "load") 330 (eq_attr "type" "fcmp"))) 331 "c2_decoder0,c2_p2+c2_p1") 332 333 (define_insn_reservation "c2_fmov" 1 334 (and (eq_attr "cpu" "core2,nehalem") 335 (and (eq_attr "memory" "none") 336 (eq_attr "type" "fmov"))) 337 "c2_decodern,c2_p0") 338 339 (define_insn_reservation "c2_fmov_load" 1 340 (and (eq_attr "cpu" "core2,nehalem") 341 (and (eq_attr "memory" "load") 342 (and (eq_attr "mode" "!XF") 343 (eq_attr "type" "fmov")))) 344 "c2_decodern,c2_p2") 345 346 (define_insn_reservation "c2_fmov_XF_load" 3 347 (and (eq_attr "cpu" "core2,nehalem") 348 (and (eq_attr "memory" "load") 349 (and (eq_attr "mode" "XF") 350 (eq_attr "type" "fmov")))) 351 "c2_decoder0,(c2_p2+c2_p0)*2") 352 353 (define_insn_reservation "c2_fmov_store" 1 354 (and (eq_attr "cpu" "core2,nehalem") 355 (and (eq_attr "memory" "store") 356 (and (eq_attr "mode" "!XF") 357 (eq_attr "type" "fmov")))) 358 "c2_decodern,c2_p3+c2_p4") 359 360 (define_insn_reservation "c2_fmov_XF_store" 3 361 (and (eq_attr "cpu" "core2,nehalem") 362 (and (eq_attr "memory" "store") 363 (and (eq_attr "mode" "XF") 364 (eq_attr "type" "fmov")))) 365 "c2_decoder0,(c2_p3+c2_p4),(c2_p3+c2_p4)") 366 367 ;; fmul executes on port 0 with latency 5. It has issue latency 2, 368 ;; but we don't model this. 369 (define_insn_reservation "c2_fmul" 5 370 (and (eq_attr "cpu" "core2,nehalem") 371 (and (eq_attr "memory" "none") 372 (eq_attr "type" "fmul"))) 373 "c2_decoder0,c2_p0*2") 374 375 (define_insn_reservation "c2_fmul_load" 6 376 (and (eq_attr "cpu" "core2,nehalem") 377 (and (eq_attr "memory" "load") 378 (eq_attr "type" "fmul"))) 379 "c2_decoder0,c2_p2+c2_p0,c2_p0") 380 381 ;; fdiv latencies depend on the mode of the operands. XFmode gives 382 ;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18. 383 ;; Division by a power of 2 takes only 9 cycles, but we cannot model 384 ;; that. Throughput is equal to latency - 1, which we model using the 385 ;; c2_div automaton. 386 (define_insn_reservation "c2_fdiv_SF" 18 387 (and (eq_attr "cpu" "core2,nehalem") 388 (and (eq_attr "memory" "none") 389 (and (eq_attr "mode" "SF") 390 (eq_attr "type" "fdiv,fpspc")))) 391 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*16") 392 393 (define_insn_reservation "c2_fdiv_SF_load" 19 394 (and (eq_attr "cpu" "core2,nehalem") 395 (and (eq_attr "memory" "load") 396 (and (eq_attr "mode" "SF") 397 (eq_attr "type" "fdiv,fpspc")))) 398 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*16") 399 400 (define_insn_reservation "c2_fdiv_DF" 32 401 (and (eq_attr "cpu" "core2,nehalem") 402 (and (eq_attr "memory" "none") 403 (and (eq_attr "mode" "DF") 404 (eq_attr "type" "fdiv,fpspc")))) 405 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*30") 406 407 (define_insn_reservation "c2_fdiv_DF_load" 33 408 (and (eq_attr "cpu" "core2,nehalem") 409 (and (eq_attr "memory" "load") 410 (and (eq_attr "mode" "DF") 411 (eq_attr "type" "fdiv,fpspc")))) 412 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*30") 413 414 (define_insn_reservation "c2_fdiv_XF" 38 415 (and (eq_attr "cpu" "core2,nehalem") 416 (and (eq_attr "memory" "none") 417 (and (eq_attr "mode" "XF") 418 (eq_attr "type" "fdiv,fpspc")))) 419 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*36") 420 421 (define_insn_reservation "c2_fdiv_XF_load" 39 422 (and (eq_attr "cpu" "core2,nehalem") 423 (and (eq_attr "memory" "load") 424 (and (eq_attr "mode" "XF") 425 (eq_attr "type" "fdiv,fpspc")))) 426 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*36") 427 428 ;; MMX instructions. 429 430 (define_insn_reservation "c2_mmx_add" 1 431 (and (eq_attr "cpu" "core2,nehalem") 432 (and (eq_attr "memory" "none") 433 (eq_attr "type" "mmxadd,sseiadd"))) 434 "c2_decodern,c2_p0|c2_p5") 435 436 (define_insn_reservation "c2_mmx_add_load" 2 437 (and (eq_attr "cpu" "core2,nehalem") 438 (and (eq_attr "memory" "load") 439 (eq_attr "type" "mmxadd,sseiadd"))) 440 "c2_decodern,c2_p2+c2_p0|c2_p5") 441 442 (define_insn_reservation "c2_mmx_shft" 1 443 (and (eq_attr "cpu" "core2,nehalem") 444 (and (eq_attr "memory" "none") 445 (eq_attr "type" "mmxshft"))) 446 "c2_decodern,c2_p0|c2_p5") 447 448 (define_insn_reservation "c2_mmx_shft_load" 2 449 (and (eq_attr "cpu" "core2,nehalem") 450 (and (eq_attr "memory" "load") 451 (eq_attr "type" "mmxshft"))) 452 "c2_decoder0,c2_p2+c2_p1") 453 454 (define_insn_reservation "c2_mmx_sse_shft" 1 455 (and (eq_attr "cpu" "core2,nehalem") 456 (and (eq_attr "memory" "none") 457 (and (eq_attr "type" "sseishft") 458 (eq_attr "length_immediate" "!0")))) 459 "c2_decodern,c2_p1") 460 461 (define_insn_reservation "c2_mmx_sse_shft_load" 2 462 (and (eq_attr "cpu" "core2,nehalem") 463 (and (eq_attr "memory" "load") 464 (and (eq_attr "type" "sseishft") 465 (eq_attr "length_immediate" "!0")))) 466 "c2_decodern,c2_p1") 467 468 (define_insn_reservation "c2_mmx_sse_shft1" 2 469 (and (eq_attr "cpu" "core2,nehalem") 470 (and (eq_attr "memory" "none") 471 (and (eq_attr "type" "sseishft") 472 (eq_attr "length_immediate" "0")))) 473 "c2_decodern,c2_p1") 474 475 (define_insn_reservation "c2_mmx_sse_shft1_load" 3 476 (and (eq_attr "cpu" "core2,nehalem") 477 (and (eq_attr "memory" "load") 478 (and (eq_attr "type" "sseishft") 479 (eq_attr "length_immediate" "0")))) 480 "c2_decodern,c2_p1") 481 482 (define_insn_reservation "c2_mmx_mul" 3 483 (and (eq_attr "cpu" "core2,nehalem") 484 (and (eq_attr "memory" "none") 485 (eq_attr "type" "mmxmul,sseimul"))) 486 "c2_decodern,c2_p1") 487 488 (define_insn_reservation "c2_mmx_mul_load" 3 489 (and (eq_attr "cpu" "core2,nehalem") 490 (and (eq_attr "memory" "none") 491 (eq_attr "type" "mmxmul,sseimul"))) 492 "c2_decoder0,c2_p2+c2_p1") 493 494 (define_insn_reservation "c2_sse_mmxcvt" 4 495 (and (eq_attr "cpu" "core2,nehalem") 496 (and (eq_attr "mode" "DI") 497 (eq_attr "type" "mmxcvt"))) 498 "c2_decodern,c2_p1") 499 500 ;; FIXME: These are Pentium III only, but we cannot tell here if 501 ;; we're generating code for PentiumPro/Pentium II or Pentium III 502 ;; (define_insn_reservation "c2_sse_mmxshft" 2 503 ;; (and (eq_attr "cpu" "core2,nehalem") 504 ;; (and (eq_attr "mode" "TI") 505 ;; (eq_attr "type" "mmxshft"))) 506 ;; "c2_decodern,c2_p0") 507 508 ;; The sfence instruction. 509 (define_insn_reservation "c2_sse_sfence" 3 510 (and (eq_attr "cpu" "core2,nehalem") 511 (and (eq_attr "memory" "unknown") 512 (eq_attr "type" "sse"))) 513 "c2_decoder0,c2_p4+c2_p3") 514 515 ;; FIXME: This reservation is all wrong when we're scheduling sqrtss. 516 (define_insn_reservation "c2_sse_SFDF" 3 517 (and (eq_attr "cpu" "core2,nehalem") 518 (and (eq_attr "mode" "SF,DF") 519 (eq_attr "type" "sse"))) 520 "c2_decodern,c2_p0") 521 522 (define_insn_reservation "c2_sse_V4SF" 4 523 (and (eq_attr "cpu" "core2,nehalem") 524 (and (eq_attr "mode" "V4SF") 525 (eq_attr "type" "sse"))) 526 "c2_decoder0,c2_p1*2") 527 528 (define_insn_reservation "c2_sse_addcmp" 3 529 (and (eq_attr "cpu" "core2,nehalem") 530 (and (eq_attr "memory" "none") 531 (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi"))) 532 "c2_decodern,c2_p1") 533 534 (define_insn_reservation "c2_sse_addcmp_load" 3 535 (and (eq_attr "cpu" "core2,nehalem") 536 (and (eq_attr "memory" "load") 537 (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi"))) 538 "c2_decodern,c2_p2+c2_p1") 539 540 (define_insn_reservation "c2_sse_mul_SF" 4 541 (and (eq_attr "cpu" "core2,nehalem") 542 (and (eq_attr "memory" "none") 543 (and (eq_attr "mode" "SF,V4SF") 544 (eq_attr "type" "ssemul")))) 545 "c2_decodern,c2_p0") 546 547 (define_insn_reservation "c2_sse_mul_SF_load" 4 548 (and (eq_attr "cpu" "core2,nehalem") 549 (and (eq_attr "memory" "load") 550 (and (eq_attr "mode" "SF,V4SF") 551 (eq_attr "type" "ssemul")))) 552 "c2_decodern,c2_p2+c2_p0") 553 554 (define_insn_reservation "c2_sse_mul_DF" 5 555 (and (eq_attr "cpu" "core2,nehalem") 556 (and (eq_attr "memory" "none") 557 (and (eq_attr "mode" "DF,V2DF") 558 (eq_attr "type" "ssemul")))) 559 "c2_decodern,c2_p0") 560 561 (define_insn_reservation "c2_sse_mul_DF_load" 5 562 (and (eq_attr "cpu" "core2,nehalem") 563 (and (eq_attr "memory" "load") 564 (and (eq_attr "mode" "DF,V2DF") 565 (eq_attr "type" "ssemul")))) 566 "c2_decodern,c2_p2+c2_p0") 567 568 (define_insn_reservation "c2_sse_div_SF" 18 569 (and (eq_attr "cpu" "core2,nehalem") 570 (and (eq_attr "memory" "none") 571 (and (eq_attr "mode" "SF,V4SF") 572 (eq_attr "type" "ssediv")))) 573 "c2_decodern,c2_p0,c2_ssediv*17") 574 575 (define_insn_reservation "c2_sse_div_SF_load" 18 576 (and (eq_attr "cpu" "core2,nehalem") 577 (and (eq_attr "memory" "none") 578 (and (eq_attr "mode" "SF,V4SF") 579 (eq_attr "type" "ssediv")))) 580 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*17") 581 582 (define_insn_reservation "c2_sse_div_DF" 32 583 (and (eq_attr "cpu" "core2,nehalem") 584 (and (eq_attr "memory" "none") 585 (and (eq_attr "mode" "DF,V2DF") 586 (eq_attr "type" "ssediv")))) 587 "c2_decodern,c2_p0,c2_ssediv*31") 588 589 (define_insn_reservation "c2_sse_div_DF_load" 32 590 (and (eq_attr "cpu" "core2,nehalem") 591 (and (eq_attr "memory" "none") 592 (and (eq_attr "mode" "DF,V2DF") 593 (eq_attr "type" "ssediv")))) 594 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*31") 595 596 ;; FIXME: these have limited throughput 597 (define_insn_reservation "c2_sse_icvt_SF" 4 598 (and (eq_attr "cpu" "core2,nehalem") 599 (and (eq_attr "memory" "none") 600 (and (eq_attr "mode" "SF") 601 (eq_attr "type" "sseicvt")))) 602 "c2_decodern,c2_p1") 603 604 (define_insn_reservation "c2_sse_icvt_SF_load" 4 605 (and (eq_attr "cpu" "core2,nehalem") 606 (and (eq_attr "memory" "!none") 607 (and (eq_attr "mode" "SF") 608 (eq_attr "type" "sseicvt")))) 609 "c2_decodern,c2_p2+c2_p1") 610 611 (define_insn_reservation "c2_sse_icvt_DF" 4 612 (and (eq_attr "cpu" "core2,nehalem") 613 (and (eq_attr "memory" "none") 614 (and (eq_attr "mode" "DF") 615 (eq_attr "type" "sseicvt")))) 616 "c2_decoder0,c2_p0+c2_p1") 617 618 (define_insn_reservation "c2_sse_icvt_DF_load" 4 619 (and (eq_attr "cpu" "core2,nehalem") 620 (and (eq_attr "memory" "!none") 621 (and (eq_attr "mode" "DF") 622 (eq_attr "type" "sseicvt")))) 623 "c2_decoder0,(c2_p2+c2_p1)") 624 625 (define_insn_reservation "c2_sse_icvt_SI" 3 626 (and (eq_attr "cpu" "core2,nehalem") 627 (and (eq_attr "memory" "none") 628 (and (eq_attr "mode" "SI") 629 (eq_attr "type" "sseicvt")))) 630 "c2_decodern,c2_p1") 631 632 (define_insn_reservation "c2_sse_icvt_SI_load" 3 633 (and (eq_attr "cpu" "core2,nehalem") 634 (and (eq_attr "memory" "!none") 635 (and (eq_attr "mode" "SI") 636 (eq_attr "type" "sseicvt")))) 637 "c2_decodern,(c2_p2+c2_p1)") 638 639 (define_insn_reservation "c2_sse_mov" 1 640 (and (eq_attr "cpu" "core2,nehalem") 641 (and (eq_attr "memory" "none") 642 (eq_attr "type" "ssemov"))) 643 "c2_decodern,(c2_p0|c2_p1|c2_p5)") 644 645 (define_insn_reservation "c2_sse_mov_load" 2 646 (and (eq_attr "cpu" "core2,nehalem") 647 (and (eq_attr "memory" "load") 648 (eq_attr "type" "ssemov"))) 649 "c2_decodern,c2_p2") 650 651 (define_insn_reservation "c2_sse_mov_store" 1 652 (and (eq_attr "cpu" "core2,nehalem") 653 (and (eq_attr "memory" "store") 654 (eq_attr "type" "ssemov"))) 655 "c2_decodern,c2_p4+c2_p3") 656 657 ;; All other instructions are modelled as simple instructions. 658 ;; We have already modelled all i387 floating point instructions, so all 659 ;; other instructions execute on either port 0, 1 or 5. This includes 660 ;; the ALU units, and the MMX units. 661 ;; 662 ;; reg-reg instructions produce 1 uop so they can be decoded on any of 663 ;; the three decoders. Loads benefit from micro-op fusion and can be 664 ;; treated in the same way. 665 (define_insn_reservation "c2_insn" 1 666 (and (eq_attr "cpu" "core2,nehalem") 667 (and (eq_attr "memory" "none,unknown") 668 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) 669 "c2_decodern,(c2_p0|c2_p1|c2_p5)") 670 671 (define_insn_reservation "c2_insn_load" 4 672 (and (eq_attr "cpu" "core2,nehalem") 673 (and (eq_attr "memory" "load") 674 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) 675 "c2_decodern,c2_p2,(c2_p0|c2_p1|c2_p5)") 676 677 ;; register-memory instructions have three uops, so they have to be 678 ;; decoded on c2_decoder0. 679 (define_insn_reservation "c2_insn_store" 1 680 (and (eq_attr "cpu" "core2,nehalem") 681 (and (eq_attr "memory" "store") 682 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) 683 "c2_decoder0,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3") 684 685 ;; read-modify-store instructions produce 4 uops so they have to be 686 ;; decoded on c2_decoder0 as well. 687 (define_insn_reservation "c2_insn_both" 4 688 (and (eq_attr "cpu" "core2,nehalem") 689 (and (eq_attr "memory" "both") 690 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) 691 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3") 692