1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdio.h> 26#include <stdlib.h> 27#include <string.h> 28#include <unistd.h> 29#include "gen_device_info.h" 30#include "compiler/shader_enums.h" 31#include "util/bitscan.h" 32#include "util/macros.h" 33 34#include "drm-uapi/i915_drm.h" 35 36/** 37 * Get the PCI ID for the device name. 38 * 39 * Returns -1 if the device is not known. 40 */ 41int 42gen_device_name_to_pci_device_id(const char *name) 43{ 44 static const struct { 45 const char *name; 46 int pci_id; 47 } name_map[] = { 48 { "brw", 0x2a02 }, 49 { "g4x", 0x2a42 }, 50 { "ilk", 0x0042 }, 51 { "snb", 0x0126 }, 52 { "ivb", 0x016a }, 53 { "hsw", 0x0d2e }, 54 { "byt", 0x0f33 }, 55 { "bdw", 0x162e }, 56 { "chv", 0x22B3 }, 57 { "skl", 0x1912 }, 58 { "bxt", 0x5A85 }, 59 { "kbl", 0x5912 }, 60 { "aml", 0x591C }, 61 { "glk", 0x3185 }, 62 { "cfl", 0x3E9B }, 63 { "whl", 0x3EA1 }, 64 { "cml", 0x9b41 }, 65 { "cnl", 0x5a52 }, 66 { "icl", 0x8a52 }, 67 }; 68 69 for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { 70 if (!strcmp(name_map[i].name, name)) 71 return name_map[i].pci_id; 72 } 73 74 return -1; 75} 76 77/** 78 * Get the overridden PCI ID for the device. This is set with the 79 * INTEL_DEVID_OVERRIDE environment variable. 80 * 81 * Returns -1 if the override is not set. 82 */ 83int 84gen_get_pci_device_id_override(void) 85{ 86 if (!issetugid()) { 87 const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); 88 if (devid_override) { 89 const int id = gen_device_name_to_pci_device_id(devid_override); 90 return id >= 0 ? id : strtol(devid_override, NULL, 0); 91 } 92 } 93 94 return -1; 95} 96 97static const struct gen_device_info gen_device_info_i965 = { 98 .gen = 4, 99 .has_negative_rhw_bug = true, 100 .num_slices = 1, 101 .num_subslices = { 1, }, 102 .num_eu_per_subslice = 8, 103 .num_thread_per_eu = 4, 104 .max_vs_threads = 16, 105 .max_gs_threads = 2, 106 .max_wm_threads = 8 * 4, 107 .urb = { 108 .size = 256, 109 }, 110 .timestamp_frequency = 12500000, 111 .simulator_id = -1, 112}; 113 114static const struct gen_device_info gen_device_info_g4x = { 115 .gen = 4, 116 .has_pln = true, 117 .has_compr4 = true, 118 .has_surface_tile_offset = true, 119 .is_g4x = true, 120 .num_slices = 1, 121 .num_subslices = { 1, }, 122 .num_eu_per_subslice = 10, 123 .num_thread_per_eu = 5, 124 .max_vs_threads = 32, 125 .max_gs_threads = 2, 126 .max_wm_threads = 10 * 5, 127 .urb = { 128 .size = 384, 129 }, 130 .timestamp_frequency = 12500000, 131 .simulator_id = -1, 132}; 133 134static const struct gen_device_info gen_device_info_ilk = { 135 .gen = 5, 136 .has_pln = true, 137 .has_compr4 = true, 138 .has_surface_tile_offset = true, 139 .num_slices = 1, 140 .num_subslices = { 1, }, 141 .num_eu_per_subslice = 12, 142 .num_thread_per_eu = 6, 143 .max_vs_threads = 72, 144 .max_gs_threads = 32, 145 .max_wm_threads = 12 * 6, 146 .urb = { 147 .size = 1024, 148 }, 149 .timestamp_frequency = 12500000, 150 .simulator_id = -1, 151}; 152 153static const struct gen_device_info gen_device_info_snb_gt1 = { 154 .gen = 6, 155 .gt = 1, 156 .has_hiz_and_separate_stencil = true, 157 .has_llc = true, 158 .has_pln = true, 159 .has_surface_tile_offset = true, 160 .needs_unlit_centroid_workaround = true, 161 .num_slices = 1, 162 .num_subslices = { 1, }, 163 .num_eu_per_subslice = 6, 164 .num_thread_per_eu = 6, /* Not confirmed */ 165 .max_vs_threads = 24, 166 .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ 167 .max_wm_threads = 40, 168 .urb = { 169 .size = 32, 170 .min_entries = { 171 [MESA_SHADER_VERTEX] = 24, 172 }, 173 .max_entries = { 174 [MESA_SHADER_VERTEX] = 256, 175 [MESA_SHADER_GEOMETRY] = 256, 176 }, 177 }, 178 .timestamp_frequency = 12500000, 179 .simulator_id = -1, 180}; 181 182static const struct gen_device_info gen_device_info_snb_gt2 = { 183 .gen = 6, 184 .gt = 2, 185 .has_hiz_and_separate_stencil = true, 186 .has_llc = true, 187 .has_pln = true, 188 .has_surface_tile_offset = true, 189 .needs_unlit_centroid_workaround = true, 190 .num_slices = 1, 191 .num_subslices = { 1, }, 192 .num_eu_per_subslice = 12, 193 .num_thread_per_eu = 6, /* Not confirmed */ 194 .max_vs_threads = 60, 195 .max_gs_threads = 60, 196 .max_wm_threads = 80, 197 .urb = { 198 .size = 64, 199 .min_entries = { 200 [MESA_SHADER_VERTEX] = 24, 201 }, 202 .max_entries = { 203 [MESA_SHADER_VERTEX] = 256, 204 [MESA_SHADER_GEOMETRY] = 256, 205 }, 206 }, 207 .timestamp_frequency = 12500000, 208 .simulator_id = -1, 209}; 210 211#define GEN7_FEATURES \ 212 .gen = 7, \ 213 .has_hiz_and_separate_stencil = true, \ 214 .must_use_separate_stencil = true, \ 215 .has_llc = true, \ 216 .has_pln = true, \ 217 .has_64bit_types = true, \ 218 .has_surface_tile_offset = true, \ 219 .timestamp_frequency = 12500000 220 221static const struct gen_device_info gen_device_info_ivb_gt1 = { 222 GEN7_FEATURES, .is_ivybridge = true, .gt = 1, 223 .num_slices = 1, 224 .num_subslices = { 1, }, 225 .num_eu_per_subslice = 6, 226 .num_thread_per_eu = 6, 227 .l3_banks = 2, 228 .max_vs_threads = 36, 229 .max_tcs_threads = 36, 230 .max_tes_threads = 36, 231 .max_gs_threads = 36, 232 .max_wm_threads = 48, 233 .max_cs_threads = 36, 234 .urb = { 235 .size = 128, 236 .min_entries = { 237 [MESA_SHADER_VERTEX] = 32, 238 [MESA_SHADER_TESS_EVAL] = 10, 239 }, 240 .max_entries = { 241 [MESA_SHADER_VERTEX] = 512, 242 [MESA_SHADER_TESS_CTRL] = 32, 243 [MESA_SHADER_TESS_EVAL] = 288, 244 [MESA_SHADER_GEOMETRY] = 192, 245 }, 246 }, 247 .simulator_id = 7, 248}; 249 250static const struct gen_device_info gen_device_info_ivb_gt2 = { 251 GEN7_FEATURES, .is_ivybridge = true, .gt = 2, 252 .num_slices = 1, 253 .num_subslices = { 1, }, 254 .num_eu_per_subslice = 12, 255 .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of 256 * @max_wm_threads ... */ 257 .l3_banks = 4, 258 .max_vs_threads = 128, 259 .max_tcs_threads = 128, 260 .max_tes_threads = 128, 261 .max_gs_threads = 128, 262 .max_wm_threads = 172, 263 .max_cs_threads = 64, 264 .urb = { 265 .size = 256, 266 .min_entries = { 267 [MESA_SHADER_VERTEX] = 32, 268 [MESA_SHADER_TESS_EVAL] = 10, 269 }, 270 .max_entries = { 271 [MESA_SHADER_VERTEX] = 704, 272 [MESA_SHADER_TESS_CTRL] = 64, 273 [MESA_SHADER_TESS_EVAL] = 448, 274 [MESA_SHADER_GEOMETRY] = 320, 275 }, 276 }, 277 .simulator_id = 7, 278}; 279 280static const struct gen_device_info gen_device_info_byt = { 281 GEN7_FEATURES, .is_baytrail = true, .gt = 1, 282 .num_slices = 1, 283 .num_subslices = { 1, }, 284 .num_eu_per_subslice = 4, 285 .num_thread_per_eu = 8, 286 .l3_banks = 1, 287 .has_llc = false, 288 .max_vs_threads = 36, 289 .max_tcs_threads = 36, 290 .max_tes_threads = 36, 291 .max_gs_threads = 36, 292 .max_wm_threads = 48, 293 .max_cs_threads = 32, 294 .urb = { 295 .size = 128, 296 .min_entries = { 297 [MESA_SHADER_VERTEX] = 32, 298 [MESA_SHADER_TESS_EVAL] = 10, 299 }, 300 .max_entries = { 301 [MESA_SHADER_VERTEX] = 512, 302 [MESA_SHADER_TESS_CTRL] = 32, 303 [MESA_SHADER_TESS_EVAL] = 288, 304 [MESA_SHADER_GEOMETRY] = 192, 305 }, 306 }, 307 .simulator_id = 10, 308}; 309 310#define HSW_FEATURES \ 311 GEN7_FEATURES, \ 312 .is_haswell = true, \ 313 .supports_simd16_3src = true, \ 314 .has_resource_streamer = true 315 316static const struct gen_device_info gen_device_info_hsw_gt1 = { 317 HSW_FEATURES, .gt = 1, 318 .num_slices = 1, 319 .num_subslices = { 1, }, 320 .num_eu_per_subslice = 10, 321 .num_thread_per_eu = 7, 322 .l3_banks = 2, 323 .max_vs_threads = 70, 324 .max_tcs_threads = 70, 325 .max_tes_threads = 70, 326 .max_gs_threads = 70, 327 .max_wm_threads = 102, 328 .max_cs_threads = 70, 329 .urb = { 330 .size = 128, 331 .min_entries = { 332 [MESA_SHADER_VERTEX] = 32, 333 [MESA_SHADER_TESS_EVAL] = 10, 334 }, 335 .max_entries = { 336 [MESA_SHADER_VERTEX] = 640, 337 [MESA_SHADER_TESS_CTRL] = 64, 338 [MESA_SHADER_TESS_EVAL] = 384, 339 [MESA_SHADER_GEOMETRY] = 256, 340 }, 341 }, 342 .simulator_id = 9, 343}; 344 345static const struct gen_device_info gen_device_info_hsw_gt2 = { 346 HSW_FEATURES, .gt = 2, 347 .num_slices = 1, 348 .num_subslices = { 2, }, 349 .num_eu_per_subslice = 10, 350 .num_thread_per_eu = 7, 351 .l3_banks = 4, 352 .max_vs_threads = 280, 353 .max_tcs_threads = 256, 354 .max_tes_threads = 280, 355 .max_gs_threads = 256, 356 .max_wm_threads = 204, 357 .max_cs_threads = 70, 358 .urb = { 359 .size = 256, 360 .min_entries = { 361 [MESA_SHADER_VERTEX] = 64, 362 [MESA_SHADER_TESS_EVAL] = 10, 363 }, 364 .max_entries = { 365 [MESA_SHADER_VERTEX] = 1664, 366 [MESA_SHADER_TESS_CTRL] = 128, 367 [MESA_SHADER_TESS_EVAL] = 960, 368 [MESA_SHADER_GEOMETRY] = 640, 369 }, 370 }, 371 .simulator_id = 9, 372}; 373 374static const struct gen_device_info gen_device_info_hsw_gt3 = { 375 HSW_FEATURES, .gt = 3, 376 .num_slices = 2, 377 .num_subslices = { 2, }, 378 .num_eu_per_subslice = 10, 379 .num_thread_per_eu = 7, 380 .l3_banks = 8, 381 .max_vs_threads = 280, 382 .max_tcs_threads = 256, 383 .max_tes_threads = 280, 384 .max_gs_threads = 256, 385 .max_wm_threads = 408, 386 .max_cs_threads = 70, 387 .urb = { 388 .size = 512, 389 .min_entries = { 390 [MESA_SHADER_VERTEX] = 64, 391 [MESA_SHADER_TESS_EVAL] = 10, 392 }, 393 .max_entries = { 394 [MESA_SHADER_VERTEX] = 1664, 395 [MESA_SHADER_TESS_CTRL] = 128, 396 [MESA_SHADER_TESS_EVAL] = 960, 397 [MESA_SHADER_GEOMETRY] = 640, 398 }, 399 }, 400 .simulator_id = 9, 401}; 402 403/* It's unclear how well supported sampling from the hiz buffer is on GEN8, 404 * so keep things conservative for now and set has_sample_with_hiz = false. 405 */ 406#define GEN8_FEATURES \ 407 .gen = 8, \ 408 .has_hiz_and_separate_stencil = true, \ 409 .has_resource_streamer = true, \ 410 .must_use_separate_stencil = true, \ 411 .has_llc = true, \ 412 .has_sample_with_hiz = false, \ 413 .has_pln = true, \ 414 .has_integer_dword_mul = true, \ 415 .has_64bit_types = true, \ 416 .supports_simd16_3src = true, \ 417 .has_surface_tile_offset = true, \ 418 .num_thread_per_eu = 7, \ 419 .max_vs_threads = 504, \ 420 .max_tcs_threads = 504, \ 421 .max_tes_threads = 504, \ 422 .max_gs_threads = 504, \ 423 .max_wm_threads = 384, \ 424 .timestamp_frequency = 12500000 425 426static const struct gen_device_info gen_device_info_bdw_gt1 = { 427 GEN8_FEATURES, .gt = 1, 428 .is_broadwell = true, 429 .num_slices = 1, 430 .num_subslices = { 2, }, 431 .num_eu_per_subslice = 8, 432 .l3_banks = 2, 433 .max_cs_threads = 42, 434 .urb = { 435 .size = 192, 436 .min_entries = { 437 [MESA_SHADER_VERTEX] = 64, 438 [MESA_SHADER_TESS_EVAL] = 34, 439 }, 440 .max_entries = { 441 [MESA_SHADER_VERTEX] = 2560, 442 [MESA_SHADER_TESS_CTRL] = 504, 443 [MESA_SHADER_TESS_EVAL] = 1536, 444 [MESA_SHADER_GEOMETRY] = 960, 445 }, 446 }, 447 .simulator_id = 11, 448}; 449 450static const struct gen_device_info gen_device_info_bdw_gt2 = { 451 GEN8_FEATURES, .gt = 2, 452 .is_broadwell = true, 453 .num_slices = 1, 454 .num_subslices = { 3, }, 455 .num_eu_per_subslice = 8, 456 .l3_banks = 4, 457 .max_cs_threads = 56, 458 .urb = { 459 .size = 384, 460 .min_entries = { 461 [MESA_SHADER_VERTEX] = 64, 462 [MESA_SHADER_TESS_EVAL] = 34, 463 }, 464 .max_entries = { 465 [MESA_SHADER_VERTEX] = 2560, 466 [MESA_SHADER_TESS_CTRL] = 504, 467 [MESA_SHADER_TESS_EVAL] = 1536, 468 [MESA_SHADER_GEOMETRY] = 960, 469 }, 470 }, 471 .simulator_id = 11, 472}; 473 474static const struct gen_device_info gen_device_info_bdw_gt3 = { 475 GEN8_FEATURES, .gt = 3, 476 .is_broadwell = true, 477 .num_slices = 2, 478 .num_subslices = { 3, 3, }, 479 .num_eu_per_subslice = 8, 480 .l3_banks = 8, 481 .max_cs_threads = 56, 482 .urb = { 483 .size = 384, 484 .min_entries = { 485 [MESA_SHADER_VERTEX] = 64, 486 [MESA_SHADER_TESS_EVAL] = 34, 487 }, 488 .max_entries = { 489 [MESA_SHADER_VERTEX] = 2560, 490 [MESA_SHADER_TESS_CTRL] = 504, 491 [MESA_SHADER_TESS_EVAL] = 1536, 492 [MESA_SHADER_GEOMETRY] = 960, 493 }, 494 }, 495 .simulator_id = 11, 496}; 497 498static const struct gen_device_info gen_device_info_chv = { 499 GEN8_FEATURES, .is_cherryview = 1, .gt = 1, 500 .has_llc = false, 501 .has_integer_dword_mul = false, 502 .num_slices = 1, 503 .num_subslices = { 2, }, 504 .num_eu_per_subslice = 8, 505 .l3_banks = 2, 506 .max_vs_threads = 80, 507 .max_tcs_threads = 80, 508 .max_tes_threads = 80, 509 .max_gs_threads = 80, 510 .max_wm_threads = 128, 511 .max_cs_threads = 6 * 7, 512 .urb = { 513 .size = 192, 514 .min_entries = { 515 [MESA_SHADER_VERTEX] = 34, 516 [MESA_SHADER_TESS_EVAL] = 34, 517 }, 518 .max_entries = { 519 [MESA_SHADER_VERTEX] = 640, 520 [MESA_SHADER_TESS_CTRL] = 80, 521 [MESA_SHADER_TESS_EVAL] = 384, 522 [MESA_SHADER_GEOMETRY] = 256, 523 }, 524 }, 525 .simulator_id = 13, 526}; 527 528#define GEN9_HW_INFO \ 529 .gen = 9, \ 530 .max_vs_threads = 336, \ 531 .max_gs_threads = 336, \ 532 .max_tcs_threads = 336, \ 533 .max_tes_threads = 336, \ 534 .max_cs_threads = 56, \ 535 .timestamp_frequency = 12000000, \ 536 .urb = { \ 537 .size = 384, \ 538 .min_entries = { \ 539 [MESA_SHADER_VERTEX] = 64, \ 540 [MESA_SHADER_TESS_EVAL] = 34, \ 541 }, \ 542 .max_entries = { \ 543 [MESA_SHADER_VERTEX] = 1856, \ 544 [MESA_SHADER_TESS_CTRL] = 672, \ 545 [MESA_SHADER_TESS_EVAL] = 1120, \ 546 [MESA_SHADER_GEOMETRY] = 640, \ 547 }, \ 548 } 549 550#define GEN9_LP_FEATURES \ 551 GEN8_FEATURES, \ 552 GEN9_HW_INFO, \ 553 .has_integer_dword_mul = false, \ 554 .gt = 1, \ 555 .has_llc = false, \ 556 .has_sample_with_hiz = true, \ 557 .num_slices = 1, \ 558 .num_thread_per_eu = 6, \ 559 .max_vs_threads = 112, \ 560 .max_tcs_threads = 112, \ 561 .max_tes_threads = 112, \ 562 .max_gs_threads = 112, \ 563 .max_cs_threads = 6 * 6, \ 564 .timestamp_frequency = 19200000, \ 565 .urb = { \ 566 .size = 192, \ 567 .min_entries = { \ 568 [MESA_SHADER_VERTEX] = 34, \ 569 [MESA_SHADER_TESS_EVAL] = 34, \ 570 }, \ 571 .max_entries = { \ 572 [MESA_SHADER_VERTEX] = 704, \ 573 [MESA_SHADER_TESS_CTRL] = 256, \ 574 [MESA_SHADER_TESS_EVAL] = 416, \ 575 [MESA_SHADER_GEOMETRY] = 256, \ 576 }, \ 577 } 578 579#define GEN9_LP_FEATURES_3X6 \ 580 GEN9_LP_FEATURES, \ 581 .num_subslices = { 3, }, \ 582 .num_eu_per_subslice = 6 583 584#define GEN9_LP_FEATURES_2X6 \ 585 GEN9_LP_FEATURES, \ 586 .num_subslices = { 2, }, \ 587 .num_eu_per_subslice = 6, \ 588 .max_vs_threads = 56, \ 589 .max_tcs_threads = 56, \ 590 .max_tes_threads = 56, \ 591 .max_gs_threads = 56, \ 592 .max_cs_threads = 6 * 6, \ 593 .urb = { \ 594 .size = 128, \ 595 .min_entries = { \ 596 [MESA_SHADER_VERTEX] = 34, \ 597 [MESA_SHADER_TESS_EVAL] = 34, \ 598 }, \ 599 .max_entries = { \ 600 [MESA_SHADER_VERTEX] = 352, \ 601 [MESA_SHADER_TESS_CTRL] = 128, \ 602 [MESA_SHADER_TESS_EVAL] = 208, \ 603 [MESA_SHADER_GEOMETRY] = 128, \ 604 }, \ 605 } 606 607#define GEN9_FEATURES \ 608 GEN8_FEATURES, \ 609 GEN9_HW_INFO, \ 610 .has_sample_with_hiz = true 611 612static const struct gen_device_info gen_device_info_skl_gt1 = { 613 GEN9_FEATURES, .gt = 1, 614 .is_skylake = true, 615 .num_slices = 1, 616 .num_subslices = { 2, }, 617 .num_eu_per_subslice = 6, 618 .l3_banks = 2, 619 .urb.size = 192, 620 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 621 * leading to some vertices to go missing if we use too much URB. 622 */ 623 .urb.max_entries[MESA_SHADER_VERTEX] = 928, 624 .simulator_id = 12, 625}; 626 627static const struct gen_device_info gen_device_info_skl_gt2 = { 628 GEN9_FEATURES, .gt = 2, 629 .is_skylake = true, 630 .num_slices = 1, 631 .num_subslices = { 3, }, 632 .num_eu_per_subslice = 8, 633 .l3_banks = 4, 634 .simulator_id = 12, 635}; 636 637static const struct gen_device_info gen_device_info_skl_gt3 = { 638 GEN9_FEATURES, .gt = 3, 639 .is_skylake = true, 640 .num_slices = 2, 641 .num_subslices = { 3, 3, }, 642 .num_eu_per_subslice = 8, 643 .l3_banks = 8, 644 .simulator_id = 12, 645}; 646 647static const struct gen_device_info gen_device_info_skl_gt4 = { 648 GEN9_FEATURES, .gt = 4, 649 .is_skylake = true, 650 .num_slices = 3, 651 .num_subslices = { 3, 3, 3, }, 652 .num_eu_per_subslice = 8, 653 .l3_banks = 12, 654 /* From the "L3 Allocation and Programming" documentation: 655 * 656 * "URB is limited to 1008KB due to programming restrictions. This is not a 657 * restriction of the L3 implementation, but of the FF and other clients. 658 * Therefore, in a GT4 implementation it is possible for the programmed 659 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but 660 * only 1008KB of this will be used." 661 */ 662 .urb.size = 1008 / 3, 663 .simulator_id = 12, 664}; 665 666static const struct gen_device_info gen_device_info_bxt = { 667 GEN9_LP_FEATURES_3X6, 668 .is_broxton = true, 669 .l3_banks = 2, 670 .simulator_id = 14, 671}; 672 673static const struct gen_device_info gen_device_info_bxt_2x6 = { 674 GEN9_LP_FEATURES_2X6, 675 .is_broxton = true, 676 .l3_banks = 1, 677 .simulator_id = 14, 678}; 679/* 680 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. 681 * There's no KBL entry. Using the default SKL (GEN9) GS entries value. 682 */ 683 684static const struct gen_device_info gen_device_info_kbl_gt1 = { 685 GEN9_FEATURES, 686 .is_kabylake = true, 687 .gt = 1, 688 689 .max_cs_threads = 7 * 6, 690 .urb.size = 192, 691 .num_slices = 1, 692 .num_subslices = { 2, }, 693 .num_eu_per_subslice = 6, 694 .l3_banks = 2, 695 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 696 * leading to some vertices to go missing if we use too much URB. 697 */ 698 .urb.max_entries[MESA_SHADER_VERTEX] = 928, 699 .simulator_id = 16, 700}; 701 702static const struct gen_device_info gen_device_info_kbl_gt1_5 = { 703 GEN9_FEATURES, 704 .is_kabylake = true, 705 .gt = 1, 706 707 .max_cs_threads = 7 * 6, 708 .num_slices = 1, 709 .num_subslices = { 3, }, 710 .num_eu_per_subslice = 6, 711 .l3_banks = 4, 712 .simulator_id = 16, 713}; 714 715static const struct gen_device_info gen_device_info_kbl_gt2 = { 716 GEN9_FEATURES, 717 .is_kabylake = true, 718 .gt = 2, 719 720 .num_slices = 1, 721 .num_subslices = { 3, }, 722 .num_eu_per_subslice = 8, 723 .l3_banks = 4, 724 .simulator_id = 16, 725}; 726 727static const struct gen_device_info gen_device_info_kbl_gt3 = { 728 GEN9_FEATURES, 729 .is_kabylake = true, 730 .gt = 3, 731 732 .num_slices = 2, 733 .num_subslices = { 3, 3, }, 734 .num_eu_per_subslice = 8, 735 .l3_banks = 8, 736 .simulator_id = 16, 737}; 738 739static const struct gen_device_info gen_device_info_kbl_gt4 = { 740 GEN9_FEATURES, 741 .is_kabylake = true, 742 .gt = 4, 743 744 /* 745 * From the "L3 Allocation and Programming" documentation: 746 * 747 * "URB is limited to 1008KB due to programming restrictions. This 748 * is not a restriction of the L3 implementation, but of the FF and 749 * other clients. Therefore, in a GT4 implementation it is 750 * possible for the programmed allocation of the L3 data array to 751 * provide 3*384KB=1152KB for URB, but only 1008KB of this 752 * will be used." 753 */ 754 .urb.size = 1008 / 3, 755 .num_slices = 3, 756 .num_subslices = { 3, 3, 3, }, 757 .num_eu_per_subslice = 8, 758 .l3_banks = 12, 759 .simulator_id = 16, 760}; 761 762static const struct gen_device_info gen_device_info_glk = { 763 GEN9_LP_FEATURES_3X6, 764 .is_geminilake = true, 765 .l3_banks = 2, 766 .simulator_id = 17, 767}; 768 769static const struct gen_device_info gen_device_info_glk_2x6 = { 770 GEN9_LP_FEATURES_2X6, 771 .is_geminilake = true, 772 .l3_banks = 2, 773 .simulator_id = 17, 774}; 775 776static const struct gen_device_info gen_device_info_cfl_gt1 = { 777 GEN9_FEATURES, 778 .is_coffeelake = true, 779 .gt = 1, 780 781 .num_slices = 1, 782 .num_subslices = { 2, }, 783 .num_eu_per_subslice = 6, 784 .l3_banks = 2, 785 .urb.size = 192, 786 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 787 * leading to some vertices to go missing if we use too much URB. 788 */ 789 .urb.max_entries[MESA_SHADER_VERTEX] = 928, 790 .simulator_id = 24, 791}; 792static const struct gen_device_info gen_device_info_cfl_gt2 = { 793 GEN9_FEATURES, 794 .is_coffeelake = true, 795 .gt = 2, 796 797 .num_slices = 1, 798 .num_subslices = { 3, }, 799 .num_eu_per_subslice = 8, 800 .l3_banks = 4, 801 .simulator_id = 24, 802}; 803 804static const struct gen_device_info gen_device_info_cfl_gt3 = { 805 GEN9_FEATURES, 806 .is_coffeelake = true, 807 .gt = 3, 808 809 .num_slices = 2, 810 .num_subslices = { 3, 3, }, 811 .num_eu_per_subslice = 8, 812 .l3_banks = 8, 813 .simulator_id = 24, 814}; 815 816#define GEN10_HW_INFO \ 817 .gen = 10, \ 818 .num_thread_per_eu = 7, \ 819 .max_vs_threads = 728, \ 820 .max_gs_threads = 432, \ 821 .max_tcs_threads = 432, \ 822 .max_tes_threads = 624, \ 823 .max_cs_threads = 56, \ 824 .timestamp_frequency = 19200000, \ 825 .urb = { \ 826 .size = 256, \ 827 .min_entries = { \ 828 [MESA_SHADER_VERTEX] = 64, \ 829 [MESA_SHADER_TESS_EVAL] = 34, \ 830 }, \ 831 .max_entries = { \ 832 [MESA_SHADER_VERTEX] = 3936, \ 833 [MESA_SHADER_TESS_CTRL] = 896, \ 834 [MESA_SHADER_TESS_EVAL] = 2064, \ 835 [MESA_SHADER_GEOMETRY] = 832, \ 836 }, \ 837 } 838 839#define subslices(args...) { args, } 840 841#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \ 842 GEN8_FEATURES, \ 843 GEN10_HW_INFO, \ 844 .has_sample_with_hiz = true, \ 845 .gt = _gt, \ 846 .num_slices = _slices, \ 847 .num_subslices = _subslices, \ 848 .num_eu_per_subslice = 8, \ 849 .l3_banks = _l3 850 851static const struct gen_device_info gen_device_info_cnl_2x8 = { 852 /* GT0.5 */ 853 GEN10_FEATURES(1, 1, subslices(2), 2), 854 .is_cannonlake = true, 855 .simulator_id = 15, 856}; 857 858static const struct gen_device_info gen_device_info_cnl_3x8 = { 859 /* GT1 */ 860 GEN10_FEATURES(1, 1, subslices(3), 3), 861 .is_cannonlake = true, 862 .simulator_id = 15, 863}; 864 865static const struct gen_device_info gen_device_info_cnl_4x8 = { 866 /* GT 1.5 */ 867 GEN10_FEATURES(1, 2, subslices(2, 2), 6), 868 .is_cannonlake = true, 869 .simulator_id = 15, 870}; 871 872static const struct gen_device_info gen_device_info_cnl_5x8 = { 873 /* GT2 */ 874 GEN10_FEATURES(2, 2, subslices(3, 2), 6), 875 .is_cannonlake = true, 876 .simulator_id = 15, 877}; 878 879#define GEN11_HW_INFO \ 880 .gen = 11, \ 881 .has_pln = false, \ 882 .max_vs_threads = 364, \ 883 .max_gs_threads = 224, \ 884 .max_tcs_threads = 224, \ 885 .max_tes_threads = 364, \ 886 .max_cs_threads = 56 887 888#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ 889 GEN8_FEATURES, \ 890 GEN11_HW_INFO, \ 891 .has_64bit_types = false, \ 892 .has_integer_dword_mul = false, \ 893 .has_sample_with_hiz = false, \ 894 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 895 .num_subslices = _subslices, \ 896 .num_eu_per_subslice = 8 897 898#define GEN11_URB_MIN_MAX_ENTRIES \ 899 .min_entries = { \ 900 [MESA_SHADER_VERTEX] = 64, \ 901 [MESA_SHADER_TESS_EVAL] = 34, \ 902 }, \ 903 .max_entries = { \ 904 [MESA_SHADER_VERTEX] = 2384, \ 905 [MESA_SHADER_TESS_CTRL] = 1032, \ 906 [MESA_SHADER_TESS_EVAL] = 2384, \ 907 [MESA_SHADER_GEOMETRY] = 1032, \ 908 } 909 910static const struct gen_device_info gen_device_info_icl_8x8 = { 911 GEN11_FEATURES(2, 1, subslices(8), 8), 912 .urb = { 913 .size = 1024, 914 GEN11_URB_MIN_MAX_ENTRIES, 915 }, 916 .simulator_id = 19, 917}; 918 919static const struct gen_device_info gen_device_info_icl_6x8 = { 920 GEN11_FEATURES(1, 1, subslices(6), 6), 921 .urb = { 922 .size = 768, 923 GEN11_URB_MIN_MAX_ENTRIES, 924 }, 925 .simulator_id = 19, 926}; 927 928static const struct gen_device_info gen_device_info_icl_4x8 = { 929 GEN11_FEATURES(1, 1, subslices(4), 6), 930 .urb = { 931 .size = 768, 932 GEN11_URB_MIN_MAX_ENTRIES, 933 }, 934 .simulator_id = 19, 935}; 936 937static const struct gen_device_info gen_device_info_icl_1x8 = { 938 GEN11_FEATURES(1, 1, subslices(1), 6), 939 .urb = { 940 .size = 768, 941 GEN11_URB_MIN_MAX_ENTRIES, 942 }, 943 .simulator_id = 19, 944}; 945 946static const struct gen_device_info gen_device_info_ehl_4x8 = { 947 GEN11_FEATURES(1, 1, subslices(4), 4), 948 .urb = { 949 .size = 512, 950 .min_entries = { 951 [MESA_SHADER_VERTEX] = 64, 952 [MESA_SHADER_TESS_EVAL] = 34, 953 }, 954 .max_entries = { 955 [MESA_SHADER_VERTEX] = 2384, 956 [MESA_SHADER_TESS_CTRL] = 1032, 957 [MESA_SHADER_TESS_EVAL] = 2384, 958 [MESA_SHADER_GEOMETRY] = 1032, 959 }, 960 }, 961 .simulator_id = 28, 962}; 963 964/* FIXME: Verfiy below entries when more information is available for this SKU. 965 */ 966static const struct gen_device_info gen_device_info_ehl_4x4 = { 967 GEN11_FEATURES(1, 1, subslices(4), 4), 968 .urb = { 969 .size = 512, 970 .min_entries = { 971 [MESA_SHADER_VERTEX] = 64, 972 [MESA_SHADER_TESS_EVAL] = 34, 973 }, 974 .max_entries = { 975 [MESA_SHADER_VERTEX] = 2384, 976 [MESA_SHADER_TESS_CTRL] = 1032, 977 [MESA_SHADER_TESS_EVAL] = 2384, 978 [MESA_SHADER_GEOMETRY] = 1032, 979 }, 980 }, 981 .num_eu_per_subslice = 4, 982 .simulator_id = 28, 983}; 984 985/* FIXME: Verfiy below entries when more information is available for this SKU. 986 */ 987static const struct gen_device_info gen_device_info_ehl_2x4 = { 988 GEN11_FEATURES(1, 1, subslices(2), 4), 989 .urb = { 990 .size = 512, 991 .min_entries = { 992 [MESA_SHADER_VERTEX] = 64, 993 [MESA_SHADER_TESS_EVAL] = 34, 994 }, 995 .max_entries = { 996 [MESA_SHADER_VERTEX] = 2384, 997 [MESA_SHADER_TESS_CTRL] = 1032, 998 [MESA_SHADER_TESS_EVAL] = 2384, 999 [MESA_SHADER_GEOMETRY] = 1032, 1000 }, 1001 }, 1002 .num_eu_per_subslice =4, 1003 .simulator_id = 28, 1004}; 1005 1006static void 1007gen_device_info_set_eu_mask(struct gen_device_info *devinfo, 1008 unsigned slice, 1009 unsigned subslice, 1010 unsigned eu_mask) 1011{ 1012 unsigned subslice_offset = slice * devinfo->eu_slice_stride + 1013 subslice * devinfo->eu_subslice_stride; 1014 1015 for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) { 1016 devinfo->eu_masks[subslice_offset + b_eu] = 1017 (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff; 1018 } 1019} 1020 1021/* Generate slice/subslice/eu masks from number of 1022 * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info 1023 * structure. 1024 * 1025 * These can be overridden with values reported by the kernel either from 1026 * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+ 1027 * through the i915 query uapi. 1028 */ 1029static void 1030fill_masks(struct gen_device_info *devinfo) 1031{ 1032 devinfo->slice_masks = (1U << devinfo->num_slices) - 1; 1033 1034 /* Subslice masks */ 1035 unsigned max_subslices = 0; 1036 for (int s = 0; s < devinfo->num_slices; s++) 1037 max_subslices = MAX2(devinfo->num_subslices[s], max_subslices); 1038 devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); 1039 1040 for (int s = 0; s < devinfo->num_slices; s++) { 1041 devinfo->subslice_masks[s * devinfo->subslice_slice_stride] = 1042 (1U << devinfo->num_subslices[s]) - 1; 1043 } 1044 1045 /* EU masks */ 1046 devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8); 1047 devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride; 1048 1049 for (int s = 0; s < devinfo->num_slices; s++) { 1050 for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) { 1051 gen_device_info_set_eu_mask(devinfo, s, ss, 1052 (1U << devinfo->num_eu_per_subslice) - 1); 1053 } 1054 } 1055} 1056 1057void 1058gen_device_info_update_from_masks(struct gen_device_info *devinfo, 1059 uint32_t slice_mask, 1060 uint32_t subslice_mask, 1061 uint32_t n_eus) 1062{ 1063 struct { 1064 struct drm_i915_query_topology_info base; 1065 } topology; 1066 1067 assert((slice_mask & 0xff) == slice_mask); 1068 1069 memset(&topology, 0, sizeof(topology)); 1070 1071 topology.base.max_slices = util_last_bit(slice_mask); 1072 topology.base.max_subslices = util_last_bit(subslice_mask); 1073 1074 topology.base.subslice_offset = DIV_ROUND_UP(topology.base.max_slices, 8); 1075 topology.base.subslice_stride = DIV_ROUND_UP(topology.base.max_subslices, 8); 1076 1077 uint32_t n_subslices = __builtin_popcount(slice_mask) * 1078 __builtin_popcount(subslice_mask); 1079 uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 1080 uint32_t eu_mask = (1U << num_eu_per_subslice) - 1; 1081 1082 topology.base.eu_offset = topology.base.subslice_offset + 1083 DIV_ROUND_UP(topology.base.max_subslices, 8); 1084 topology.base.eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8); 1085 1086 /* Set slice mask in topology */ 1087 for (int b = 0; b < topology.base.subslice_offset; b++) 1088 topology.base.data[b] = (slice_mask >> (b * 8)) & 0xff; 1089 1090 for (int s = 0; s < topology.base.max_slices; s++) { 1091 1092 /* Set subslice mask in topology */ 1093 for (int b = 0; b < topology.base.subslice_stride; b++) { 1094 int subslice_offset = topology.base.subslice_offset + 1095 s * topology.base.subslice_stride + b; 1096 1097 topology.base.data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; 1098 } 1099 1100 /* Set eu mask in topology */ 1101 for (int ss = 0; ss < topology.base.max_subslices; ss++) { 1102 for (int b = 0; b < topology.base.eu_stride; b++) { 1103 int eu_offset = topology.base.eu_offset + 1104 (s * topology.base.max_subslices + ss) * topology.base.eu_stride + b; 1105 1106 topology.base.data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; 1107 } 1108 } 1109 } 1110 1111 gen_device_info_update_from_topology(devinfo, &topology.base); 1112} 1113 1114static void 1115reset_masks(struct gen_device_info *devinfo) 1116{ 1117 devinfo->subslice_slice_stride = 0; 1118 devinfo->eu_subslice_stride = 0; 1119 devinfo->eu_slice_stride = 0; 1120 1121 devinfo->num_slices = 0; 1122 devinfo->num_eu_per_subslice = 0; 1123 memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); 1124 1125 memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); 1126 memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); 1127 memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); 1128} 1129 1130void 1131gen_device_info_update_from_topology(struct gen_device_info *devinfo, 1132 const struct drm_i915_query_topology_info *topology) 1133{ 1134 reset_masks(devinfo); 1135 1136 devinfo->subslice_slice_stride = topology->subslice_stride; 1137 1138 devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8); 1139 devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride; 1140 1141 assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); 1142 memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); 1143 devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); 1144 1145 uint32_t subslice_mask_len = 1146 topology->max_slices * topology->subslice_stride; 1147 assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); 1148 memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset], 1149 subslice_mask_len); 1150 1151 uint32_t n_subslices = 0; 1152 for (int s = 0; s < topology->max_slices; s++) { 1153 if ((devinfo->slice_masks & (1UL << s)) == 0) 1154 continue; 1155 1156 for (int b = 0; b < devinfo->subslice_slice_stride; b++) { 1157 devinfo->num_subslices[s] += 1158 __builtin_popcount(devinfo->subslice_masks[b]); 1159 } 1160 n_subslices += devinfo->num_subslices[s]; 1161 } 1162 assert(n_subslices > 0); 1163 1164 uint32_t eu_mask_len = 1165 topology->eu_stride * topology->max_subslices * topology->max_slices; 1166 assert(sizeof(devinfo->eu_masks) >= eu_mask_len); 1167 memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len); 1168 1169 uint32_t n_eus = 0; 1170 for (int b = 0; b < eu_mask_len; b++) 1171 n_eus += __builtin_popcount(devinfo->eu_masks[b]); 1172 1173 devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 1174} 1175 1176bool 1177gen_get_device_info(int devid, struct gen_device_info *devinfo) 1178{ 1179 switch (devid) { 1180#undef CHIPSET 1181#define CHIPSET(id, family, name) \ 1182 case id: *devinfo = gen_device_info_##family; break; 1183#include "pci_ids/i965_pci_ids.h" 1184 default: 1185 fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); 1186 return false; 1187 } 1188 1189 fill_masks(devinfo); 1190 1191 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: 1192 * 1193 * "Scratch Space per slice is computed based on 4 sub-slices. SW must 1194 * allocate scratch space enough so that each slice has 4 slices allowed." 1195 * 1196 * The equivalent internal documentation says that this programming note 1197 * applies to all Gen9+ platforms. 1198 * 1199 * The hardware typically calculates the scratch space pointer by taking 1200 * the base address, and adding per-thread-scratch-space * thread ID. 1201 * Extra padding can be necessary depending how the thread IDs are 1202 * calculated for a particular shader stage. 1203 */ 1204 1205 switch(devinfo->gen) { 1206 case 9: 1207 case 10: 1208 devinfo->max_wm_threads = 64 /* threads-per-PSD */ 1209 * devinfo->num_slices 1210 * 4; /* effective subslices per slice */ 1211 break; 1212 case 11: 1213 devinfo->max_wm_threads = 128 /* threads-per-PSD */ 1214 * devinfo->num_slices 1215 * 8; /* subslices per slice */ 1216 break; 1217 default: 1218 break; 1219 } 1220 1221 assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); 1222 1223 return true; 1224} 1225 1226const char * 1227gen_get_device_name(int devid) 1228{ 1229 switch (devid) { 1230#undef CHIPSET 1231#define CHIPSET(id, family, name) case id: return name; 1232#include "pci_ids/i965_pci_ids.h" 1233 default: 1234 return NULL; 1235 } 1236} 1237