1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2013 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <assert.h> 25b8e80941Smrg#include <stdio.h> 26b8e80941Smrg#include <stdlib.h> 27b8e80941Smrg#include <string.h> 28b8e80941Smrg#include <unistd.h> 29b8e80941Smrg#include "gen_device_info.h" 30b8e80941Smrg#include "compiler/shader_enums.h" 31b8e80941Smrg#include "util/bitscan.h" 32b8e80941Smrg#include "util/macros.h" 33b8e80941Smrg 34b8e80941Smrg#include "drm-uapi/i915_drm.h" 35b8e80941Smrg 36b8e80941Smrg/** 37b8e80941Smrg * Get the PCI ID for the device name. 38b8e80941Smrg * 39b8e80941Smrg * Returns -1 if the device is not known. 40b8e80941Smrg */ 41b8e80941Smrgint 42b8e80941Smrggen_device_name_to_pci_device_id(const char *name) 43b8e80941Smrg{ 44b8e80941Smrg static const struct { 45b8e80941Smrg const char *name; 46b8e80941Smrg int pci_id; 47b8e80941Smrg } name_map[] = { 48b8e80941Smrg { "brw", 0x2a02 }, 49b8e80941Smrg { "g4x", 0x2a42 }, 50b8e80941Smrg { "ilk", 0x0042 }, 51b8e80941Smrg { "snb", 0x0126 }, 52b8e80941Smrg { "ivb", 0x016a }, 53b8e80941Smrg { "hsw", 0x0d2e }, 54b8e80941Smrg { "byt", 0x0f33 }, 55b8e80941Smrg { "bdw", 0x162e }, 56b8e80941Smrg { "chv", 0x22B3 }, 57b8e80941Smrg { "skl", 0x1912 }, 58b8e80941Smrg { "bxt", 0x5A85 }, 59b8e80941Smrg { "kbl", 0x5912 }, 60b8e80941Smrg { "aml", 0x591C }, 61b8e80941Smrg { "glk", 0x3185 }, 62b8e80941Smrg { "cfl", 0x3E9B }, 63b8e80941Smrg { "whl", 0x3EA1 }, 64b8e80941Smrg { "cml", 0x9b41 }, 65b8e80941Smrg { "cnl", 0x5a52 }, 66b8e80941Smrg { "icl", 0x8a52 }, 67b8e80941Smrg }; 68b8e80941Smrg 69b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { 70b8e80941Smrg if (!strcmp(name_map[i].name, name)) 71b8e80941Smrg return name_map[i].pci_id; 72b8e80941Smrg } 73b8e80941Smrg 74b8e80941Smrg return -1; 75b8e80941Smrg} 76b8e80941Smrg 77b8e80941Smrg/** 78b8e80941Smrg * Get the overridden PCI ID for the device. This is set with the 79b8e80941Smrg * INTEL_DEVID_OVERRIDE environment variable. 80b8e80941Smrg * 81b8e80941Smrg * Returns -1 if the override is not set. 82b8e80941Smrg */ 83b8e80941Smrgint 84b8e80941Smrggen_get_pci_device_id_override(void) 85b8e80941Smrg{ 86b8e80941Smrg if (!issetugid()) { 87b8e80941Smrg const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); 88b8e80941Smrg if (devid_override) { 89b8e80941Smrg const int id = gen_device_name_to_pci_device_id(devid_override); 90b8e80941Smrg return id >= 0 ? id : strtol(devid_override, NULL, 0); 91b8e80941Smrg } 92b8e80941Smrg } 93b8e80941Smrg 94b8e80941Smrg return -1; 95b8e80941Smrg} 96b8e80941Smrg 97b8e80941Smrgstatic const struct gen_device_info gen_device_info_i965 = { 98b8e80941Smrg .gen = 4, 99b8e80941Smrg .has_negative_rhw_bug = true, 100b8e80941Smrg .num_slices = 1, 101b8e80941Smrg .num_subslices = { 1, }, 102b8e80941Smrg .num_eu_per_subslice = 8, 103b8e80941Smrg .num_thread_per_eu = 4, 104b8e80941Smrg .max_vs_threads = 16, 105b8e80941Smrg .max_gs_threads = 2, 106b8e80941Smrg .max_wm_threads = 8 * 4, 107b8e80941Smrg .urb = { 108b8e80941Smrg .size = 256, 109b8e80941Smrg }, 110b8e80941Smrg .timestamp_frequency = 12500000, 111b8e80941Smrg .simulator_id = -1, 112b8e80941Smrg}; 113b8e80941Smrg 114b8e80941Smrgstatic const struct gen_device_info gen_device_info_g4x = { 115b8e80941Smrg .gen = 4, 116b8e80941Smrg .has_pln = true, 117b8e80941Smrg .has_compr4 = true, 118b8e80941Smrg .has_surface_tile_offset = true, 119b8e80941Smrg .is_g4x = true, 120b8e80941Smrg .num_slices = 1, 121b8e80941Smrg .num_subslices = { 1, }, 122b8e80941Smrg .num_eu_per_subslice = 10, 123b8e80941Smrg .num_thread_per_eu = 5, 124b8e80941Smrg .max_vs_threads = 32, 125b8e80941Smrg .max_gs_threads = 2, 126b8e80941Smrg .max_wm_threads = 10 * 5, 127b8e80941Smrg .urb = { 128b8e80941Smrg .size = 384, 129b8e80941Smrg }, 130b8e80941Smrg .timestamp_frequency = 12500000, 131b8e80941Smrg .simulator_id = -1, 132b8e80941Smrg}; 133b8e80941Smrg 134b8e80941Smrgstatic const struct gen_device_info gen_device_info_ilk = { 135b8e80941Smrg .gen = 5, 136b8e80941Smrg .has_pln = true, 137b8e80941Smrg .has_compr4 = true, 138b8e80941Smrg .has_surface_tile_offset = true, 139b8e80941Smrg .num_slices = 1, 140b8e80941Smrg .num_subslices = { 1, }, 141b8e80941Smrg .num_eu_per_subslice = 12, 142b8e80941Smrg .num_thread_per_eu = 6, 143b8e80941Smrg .max_vs_threads = 72, 144b8e80941Smrg .max_gs_threads = 32, 145b8e80941Smrg .max_wm_threads = 12 * 6, 146b8e80941Smrg .urb = { 147b8e80941Smrg .size = 1024, 148b8e80941Smrg }, 149b8e80941Smrg .timestamp_frequency = 12500000, 150b8e80941Smrg .simulator_id = -1, 151b8e80941Smrg}; 152b8e80941Smrg 153b8e80941Smrgstatic const struct gen_device_info gen_device_info_snb_gt1 = { 154b8e80941Smrg .gen = 6, 155b8e80941Smrg .gt = 1, 156b8e80941Smrg .has_hiz_and_separate_stencil = true, 157b8e80941Smrg .has_llc = true, 158b8e80941Smrg .has_pln = true, 159b8e80941Smrg .has_surface_tile_offset = true, 160b8e80941Smrg .needs_unlit_centroid_workaround = true, 161b8e80941Smrg .num_slices = 1, 162b8e80941Smrg .num_subslices = { 1, }, 163b8e80941Smrg .num_eu_per_subslice = 6, 164b8e80941Smrg .num_thread_per_eu = 6, /* Not confirmed */ 165b8e80941Smrg .max_vs_threads = 24, 166b8e80941Smrg .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ 167b8e80941Smrg .max_wm_threads = 40, 168b8e80941Smrg .urb = { 169b8e80941Smrg .size = 32, 170b8e80941Smrg .min_entries = { 171b8e80941Smrg [MESA_SHADER_VERTEX] = 24, 172b8e80941Smrg }, 173b8e80941Smrg .max_entries = { 174b8e80941Smrg [MESA_SHADER_VERTEX] = 256, 175b8e80941Smrg [MESA_SHADER_GEOMETRY] = 256, 176b8e80941Smrg }, 177b8e80941Smrg }, 178b8e80941Smrg .timestamp_frequency = 12500000, 179b8e80941Smrg .simulator_id = -1, 180b8e80941Smrg}; 181b8e80941Smrg 182b8e80941Smrgstatic const struct gen_device_info gen_device_info_snb_gt2 = { 183b8e80941Smrg .gen = 6, 184b8e80941Smrg .gt = 2, 185b8e80941Smrg .has_hiz_and_separate_stencil = true, 186b8e80941Smrg .has_llc = true, 187b8e80941Smrg .has_pln = true, 188b8e80941Smrg .has_surface_tile_offset = true, 189b8e80941Smrg .needs_unlit_centroid_workaround = true, 190b8e80941Smrg .num_slices = 1, 191b8e80941Smrg .num_subslices = { 1, }, 192b8e80941Smrg .num_eu_per_subslice = 12, 193b8e80941Smrg .num_thread_per_eu = 6, /* Not confirmed */ 194b8e80941Smrg .max_vs_threads = 60, 195b8e80941Smrg .max_gs_threads = 60, 196b8e80941Smrg .max_wm_threads = 80, 197b8e80941Smrg .urb = { 198b8e80941Smrg .size = 64, 199b8e80941Smrg .min_entries = { 200b8e80941Smrg [MESA_SHADER_VERTEX] = 24, 201b8e80941Smrg }, 202b8e80941Smrg .max_entries = { 203b8e80941Smrg [MESA_SHADER_VERTEX] = 256, 204b8e80941Smrg [MESA_SHADER_GEOMETRY] = 256, 205b8e80941Smrg }, 206b8e80941Smrg }, 207b8e80941Smrg .timestamp_frequency = 12500000, 208b8e80941Smrg .simulator_id = -1, 209b8e80941Smrg}; 210b8e80941Smrg 211b8e80941Smrg#define GEN7_FEATURES \ 212b8e80941Smrg .gen = 7, \ 213b8e80941Smrg .has_hiz_and_separate_stencil = true, \ 214b8e80941Smrg .must_use_separate_stencil = true, \ 215b8e80941Smrg .has_llc = true, \ 216b8e80941Smrg .has_pln = true, \ 217b8e80941Smrg .has_64bit_types = true, \ 218b8e80941Smrg .has_surface_tile_offset = true, \ 219b8e80941Smrg .timestamp_frequency = 12500000 220b8e80941Smrg 221b8e80941Smrgstatic const struct gen_device_info gen_device_info_ivb_gt1 = { 222b8e80941Smrg GEN7_FEATURES, .is_ivybridge = true, .gt = 1, 223b8e80941Smrg .num_slices = 1, 224b8e80941Smrg .num_subslices = { 1, }, 225b8e80941Smrg .num_eu_per_subslice = 6, 226b8e80941Smrg .num_thread_per_eu = 6, 227b8e80941Smrg .l3_banks = 2, 228b8e80941Smrg .max_vs_threads = 36, 229b8e80941Smrg .max_tcs_threads = 36, 230b8e80941Smrg .max_tes_threads = 36, 231b8e80941Smrg .max_gs_threads = 36, 232b8e80941Smrg .max_wm_threads = 48, 233b8e80941Smrg .max_cs_threads = 36, 234b8e80941Smrg .urb = { 235b8e80941Smrg .size = 128, 236b8e80941Smrg .min_entries = { 237b8e80941Smrg [MESA_SHADER_VERTEX] = 32, 238b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 239b8e80941Smrg }, 240b8e80941Smrg .max_entries = { 241b8e80941Smrg [MESA_SHADER_VERTEX] = 512, 242b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 32, 243b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 288, 244b8e80941Smrg [MESA_SHADER_GEOMETRY] = 192, 245b8e80941Smrg }, 246b8e80941Smrg }, 247b8e80941Smrg .simulator_id = 7, 248b8e80941Smrg}; 249b8e80941Smrg 250b8e80941Smrgstatic const struct gen_device_info gen_device_info_ivb_gt2 = { 251b8e80941Smrg GEN7_FEATURES, .is_ivybridge = true, .gt = 2, 252b8e80941Smrg .num_slices = 1, 253b8e80941Smrg .num_subslices = { 1, }, 254b8e80941Smrg .num_eu_per_subslice = 12, 255b8e80941Smrg .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of 256b8e80941Smrg * @max_wm_threads ... */ 257b8e80941Smrg .l3_banks = 4, 258b8e80941Smrg .max_vs_threads = 128, 259b8e80941Smrg .max_tcs_threads = 128, 260b8e80941Smrg .max_tes_threads = 128, 261b8e80941Smrg .max_gs_threads = 128, 262b8e80941Smrg .max_wm_threads = 172, 263b8e80941Smrg .max_cs_threads = 64, 264b8e80941Smrg .urb = { 265b8e80941Smrg .size = 256, 266b8e80941Smrg .min_entries = { 267b8e80941Smrg [MESA_SHADER_VERTEX] = 32, 268b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 269b8e80941Smrg }, 270b8e80941Smrg .max_entries = { 271b8e80941Smrg [MESA_SHADER_VERTEX] = 704, 272b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 64, 273b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 448, 274b8e80941Smrg [MESA_SHADER_GEOMETRY] = 320, 275b8e80941Smrg }, 276b8e80941Smrg }, 277b8e80941Smrg .simulator_id = 7, 278b8e80941Smrg}; 279b8e80941Smrg 280b8e80941Smrgstatic const struct gen_device_info gen_device_info_byt = { 281b8e80941Smrg GEN7_FEATURES, .is_baytrail = true, .gt = 1, 282b8e80941Smrg .num_slices = 1, 283b8e80941Smrg .num_subslices = { 1, }, 284b8e80941Smrg .num_eu_per_subslice = 4, 285b8e80941Smrg .num_thread_per_eu = 8, 286b8e80941Smrg .l3_banks = 1, 287b8e80941Smrg .has_llc = false, 288b8e80941Smrg .max_vs_threads = 36, 289b8e80941Smrg .max_tcs_threads = 36, 290b8e80941Smrg .max_tes_threads = 36, 291b8e80941Smrg .max_gs_threads = 36, 292b8e80941Smrg .max_wm_threads = 48, 293b8e80941Smrg .max_cs_threads = 32, 294b8e80941Smrg .urb = { 295b8e80941Smrg .size = 128, 296b8e80941Smrg .min_entries = { 297b8e80941Smrg [MESA_SHADER_VERTEX] = 32, 298b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 299b8e80941Smrg }, 300b8e80941Smrg .max_entries = { 301b8e80941Smrg [MESA_SHADER_VERTEX] = 512, 302b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 32, 303b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 288, 304b8e80941Smrg [MESA_SHADER_GEOMETRY] = 192, 305b8e80941Smrg }, 306b8e80941Smrg }, 307b8e80941Smrg .simulator_id = 10, 308b8e80941Smrg}; 309b8e80941Smrg 310b8e80941Smrg#define HSW_FEATURES \ 311b8e80941Smrg GEN7_FEATURES, \ 312b8e80941Smrg .is_haswell = true, \ 313b8e80941Smrg .supports_simd16_3src = true, \ 314b8e80941Smrg .has_resource_streamer = true 315b8e80941Smrg 316b8e80941Smrgstatic const struct gen_device_info gen_device_info_hsw_gt1 = { 317b8e80941Smrg HSW_FEATURES, .gt = 1, 318b8e80941Smrg .num_slices = 1, 319b8e80941Smrg .num_subslices = { 1, }, 320b8e80941Smrg .num_eu_per_subslice = 10, 321b8e80941Smrg .num_thread_per_eu = 7, 322b8e80941Smrg .l3_banks = 2, 323b8e80941Smrg .max_vs_threads = 70, 324b8e80941Smrg .max_tcs_threads = 70, 325b8e80941Smrg .max_tes_threads = 70, 326b8e80941Smrg .max_gs_threads = 70, 327b8e80941Smrg .max_wm_threads = 102, 328b8e80941Smrg .max_cs_threads = 70, 329b8e80941Smrg .urb = { 330b8e80941Smrg .size = 128, 331b8e80941Smrg .min_entries = { 332b8e80941Smrg [MESA_SHADER_VERTEX] = 32, 333b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 334b8e80941Smrg }, 335b8e80941Smrg .max_entries = { 336b8e80941Smrg [MESA_SHADER_VERTEX] = 640, 337b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 64, 338b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 384, 339b8e80941Smrg [MESA_SHADER_GEOMETRY] = 256, 340b8e80941Smrg }, 341b8e80941Smrg }, 342b8e80941Smrg .simulator_id = 9, 343b8e80941Smrg}; 344b8e80941Smrg 345b8e80941Smrgstatic const struct gen_device_info gen_device_info_hsw_gt2 = { 346b8e80941Smrg HSW_FEATURES, .gt = 2, 347b8e80941Smrg .num_slices = 1, 348b8e80941Smrg .num_subslices = { 2, }, 349b8e80941Smrg .num_eu_per_subslice = 10, 350b8e80941Smrg .num_thread_per_eu = 7, 351b8e80941Smrg .l3_banks = 4, 352b8e80941Smrg .max_vs_threads = 280, 353b8e80941Smrg .max_tcs_threads = 256, 354b8e80941Smrg .max_tes_threads = 280, 355b8e80941Smrg .max_gs_threads = 256, 356b8e80941Smrg .max_wm_threads = 204, 357b8e80941Smrg .max_cs_threads = 70, 358b8e80941Smrg .urb = { 359b8e80941Smrg .size = 256, 360b8e80941Smrg .min_entries = { 361b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 362b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 363b8e80941Smrg }, 364b8e80941Smrg .max_entries = { 365b8e80941Smrg [MESA_SHADER_VERTEX] = 1664, 366b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 128, 367b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 960, 368b8e80941Smrg [MESA_SHADER_GEOMETRY] = 640, 369b8e80941Smrg }, 370b8e80941Smrg }, 371b8e80941Smrg .simulator_id = 9, 372b8e80941Smrg}; 373b8e80941Smrg 374b8e80941Smrgstatic const struct gen_device_info gen_device_info_hsw_gt3 = { 375b8e80941Smrg HSW_FEATURES, .gt = 3, 376b8e80941Smrg .num_slices = 2, 377b8e80941Smrg .num_subslices = { 2, }, 378b8e80941Smrg .num_eu_per_subslice = 10, 379b8e80941Smrg .num_thread_per_eu = 7, 380b8e80941Smrg .l3_banks = 8, 381b8e80941Smrg .max_vs_threads = 280, 382b8e80941Smrg .max_tcs_threads = 256, 383b8e80941Smrg .max_tes_threads = 280, 384b8e80941Smrg .max_gs_threads = 256, 385b8e80941Smrg .max_wm_threads = 408, 386b8e80941Smrg .max_cs_threads = 70, 387b8e80941Smrg .urb = { 388b8e80941Smrg .size = 512, 389b8e80941Smrg .min_entries = { 390b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 391b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 10, 392b8e80941Smrg }, 393b8e80941Smrg .max_entries = { 394b8e80941Smrg [MESA_SHADER_VERTEX] = 1664, 395b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 128, 396b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 960, 397b8e80941Smrg [MESA_SHADER_GEOMETRY] = 640, 398b8e80941Smrg }, 399b8e80941Smrg }, 400b8e80941Smrg .simulator_id = 9, 401b8e80941Smrg}; 402b8e80941Smrg 403b8e80941Smrg/* It's unclear how well supported sampling from the hiz buffer is on GEN8, 404b8e80941Smrg * so keep things conservative for now and set has_sample_with_hiz = false. 405b8e80941Smrg */ 406b8e80941Smrg#define GEN8_FEATURES \ 407b8e80941Smrg .gen = 8, \ 408b8e80941Smrg .has_hiz_and_separate_stencil = true, \ 409b8e80941Smrg .has_resource_streamer = true, \ 410b8e80941Smrg .must_use_separate_stencil = true, \ 411b8e80941Smrg .has_llc = true, \ 412b8e80941Smrg .has_sample_with_hiz = false, \ 413b8e80941Smrg .has_pln = true, \ 414b8e80941Smrg .has_integer_dword_mul = true, \ 415b8e80941Smrg .has_64bit_types = true, \ 416b8e80941Smrg .supports_simd16_3src = true, \ 417b8e80941Smrg .has_surface_tile_offset = true, \ 418b8e80941Smrg .num_thread_per_eu = 7, \ 419b8e80941Smrg .max_vs_threads = 504, \ 420b8e80941Smrg .max_tcs_threads = 504, \ 421b8e80941Smrg .max_tes_threads = 504, \ 422b8e80941Smrg .max_gs_threads = 504, \ 423b8e80941Smrg .max_wm_threads = 384, \ 424b8e80941Smrg .timestamp_frequency = 12500000 425b8e80941Smrg 426b8e80941Smrgstatic const struct gen_device_info gen_device_info_bdw_gt1 = { 427b8e80941Smrg GEN8_FEATURES, .gt = 1, 428b8e80941Smrg .is_broadwell = true, 429b8e80941Smrg .num_slices = 1, 430b8e80941Smrg .num_subslices = { 2, }, 431b8e80941Smrg .num_eu_per_subslice = 8, 432b8e80941Smrg .l3_banks = 2, 433b8e80941Smrg .max_cs_threads = 42, 434b8e80941Smrg .urb = { 435b8e80941Smrg .size = 192, 436b8e80941Smrg .min_entries = { 437b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 438b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 439b8e80941Smrg }, 440b8e80941Smrg .max_entries = { 441b8e80941Smrg [MESA_SHADER_VERTEX] = 2560, 442b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 504, 443b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 1536, 444b8e80941Smrg [MESA_SHADER_GEOMETRY] = 960, 445b8e80941Smrg }, 446b8e80941Smrg }, 447b8e80941Smrg .simulator_id = 11, 448b8e80941Smrg}; 449b8e80941Smrg 450b8e80941Smrgstatic const struct gen_device_info gen_device_info_bdw_gt2 = { 451b8e80941Smrg GEN8_FEATURES, .gt = 2, 452b8e80941Smrg .is_broadwell = true, 453b8e80941Smrg .num_slices = 1, 454b8e80941Smrg .num_subslices = { 3, }, 455b8e80941Smrg .num_eu_per_subslice = 8, 456b8e80941Smrg .l3_banks = 4, 457b8e80941Smrg .max_cs_threads = 56, 458b8e80941Smrg .urb = { 459b8e80941Smrg .size = 384, 460b8e80941Smrg .min_entries = { 461b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 462b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 463b8e80941Smrg }, 464b8e80941Smrg .max_entries = { 465b8e80941Smrg [MESA_SHADER_VERTEX] = 2560, 466b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 504, 467b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 1536, 468b8e80941Smrg [MESA_SHADER_GEOMETRY] = 960, 469b8e80941Smrg }, 470b8e80941Smrg }, 471b8e80941Smrg .simulator_id = 11, 472b8e80941Smrg}; 473b8e80941Smrg 474b8e80941Smrgstatic const struct gen_device_info gen_device_info_bdw_gt3 = { 475b8e80941Smrg GEN8_FEATURES, .gt = 3, 476b8e80941Smrg .is_broadwell = true, 477b8e80941Smrg .num_slices = 2, 478b8e80941Smrg .num_subslices = { 3, 3, }, 479b8e80941Smrg .num_eu_per_subslice = 8, 480b8e80941Smrg .l3_banks = 8, 481b8e80941Smrg .max_cs_threads = 56, 482b8e80941Smrg .urb = { 483b8e80941Smrg .size = 384, 484b8e80941Smrg .min_entries = { 485b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 486b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 487b8e80941Smrg }, 488b8e80941Smrg .max_entries = { 489b8e80941Smrg [MESA_SHADER_VERTEX] = 2560, 490b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 504, 491b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 1536, 492b8e80941Smrg [MESA_SHADER_GEOMETRY] = 960, 493b8e80941Smrg }, 494b8e80941Smrg }, 495b8e80941Smrg .simulator_id = 11, 496b8e80941Smrg}; 497b8e80941Smrg 498b8e80941Smrgstatic const struct gen_device_info gen_device_info_chv = { 499b8e80941Smrg GEN8_FEATURES, .is_cherryview = 1, .gt = 1, 500b8e80941Smrg .has_llc = false, 501b8e80941Smrg .has_integer_dword_mul = false, 502b8e80941Smrg .num_slices = 1, 503b8e80941Smrg .num_subslices = { 2, }, 504b8e80941Smrg .num_eu_per_subslice = 8, 505b8e80941Smrg .l3_banks = 2, 506b8e80941Smrg .max_vs_threads = 80, 507b8e80941Smrg .max_tcs_threads = 80, 508b8e80941Smrg .max_tes_threads = 80, 509b8e80941Smrg .max_gs_threads = 80, 510b8e80941Smrg .max_wm_threads = 128, 511b8e80941Smrg .max_cs_threads = 6 * 7, 512b8e80941Smrg .urb = { 513b8e80941Smrg .size = 192, 514b8e80941Smrg .min_entries = { 515b8e80941Smrg [MESA_SHADER_VERTEX] = 34, 516b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 517b8e80941Smrg }, 518b8e80941Smrg .max_entries = { 519b8e80941Smrg [MESA_SHADER_VERTEX] = 640, 520b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 80, 521b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 384, 522b8e80941Smrg [MESA_SHADER_GEOMETRY] = 256, 523b8e80941Smrg }, 524b8e80941Smrg }, 525b8e80941Smrg .simulator_id = 13, 526b8e80941Smrg}; 527b8e80941Smrg 528b8e80941Smrg#define GEN9_HW_INFO \ 529b8e80941Smrg .gen = 9, \ 530b8e80941Smrg .max_vs_threads = 336, \ 531b8e80941Smrg .max_gs_threads = 336, \ 532b8e80941Smrg .max_tcs_threads = 336, \ 533b8e80941Smrg .max_tes_threads = 336, \ 534b8e80941Smrg .max_cs_threads = 56, \ 535b8e80941Smrg .timestamp_frequency = 12000000, \ 536b8e80941Smrg .urb = { \ 537b8e80941Smrg .size = 384, \ 538b8e80941Smrg .min_entries = { \ 539b8e80941Smrg [MESA_SHADER_VERTEX] = 64, \ 540b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 541b8e80941Smrg }, \ 542b8e80941Smrg .max_entries = { \ 543b8e80941Smrg [MESA_SHADER_VERTEX] = 1856, \ 544b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 672, \ 545b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 1120, \ 546b8e80941Smrg [MESA_SHADER_GEOMETRY] = 640, \ 547b8e80941Smrg }, \ 548b8e80941Smrg } 549b8e80941Smrg 550b8e80941Smrg#define GEN9_LP_FEATURES \ 551b8e80941Smrg GEN8_FEATURES, \ 552b8e80941Smrg GEN9_HW_INFO, \ 553b8e80941Smrg .has_integer_dword_mul = false, \ 554b8e80941Smrg .gt = 1, \ 555b8e80941Smrg .has_llc = false, \ 556b8e80941Smrg .has_sample_with_hiz = true, \ 557b8e80941Smrg .num_slices = 1, \ 558b8e80941Smrg .num_thread_per_eu = 6, \ 559b8e80941Smrg .max_vs_threads = 112, \ 560b8e80941Smrg .max_tcs_threads = 112, \ 561b8e80941Smrg .max_tes_threads = 112, \ 562b8e80941Smrg .max_gs_threads = 112, \ 563b8e80941Smrg .max_cs_threads = 6 * 6, \ 564b8e80941Smrg .timestamp_frequency = 19200000, \ 565b8e80941Smrg .urb = { \ 566b8e80941Smrg .size = 192, \ 567b8e80941Smrg .min_entries = { \ 568b8e80941Smrg [MESA_SHADER_VERTEX] = 34, \ 569b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 570b8e80941Smrg }, \ 571b8e80941Smrg .max_entries = { \ 572b8e80941Smrg [MESA_SHADER_VERTEX] = 704, \ 573b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 256, \ 574b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 416, \ 575b8e80941Smrg [MESA_SHADER_GEOMETRY] = 256, \ 576b8e80941Smrg }, \ 577b8e80941Smrg } 578b8e80941Smrg 579b8e80941Smrg#define GEN9_LP_FEATURES_3X6 \ 580b8e80941Smrg GEN9_LP_FEATURES, \ 581b8e80941Smrg .num_subslices = { 3, }, \ 582b8e80941Smrg .num_eu_per_subslice = 6 583b8e80941Smrg 584b8e80941Smrg#define GEN9_LP_FEATURES_2X6 \ 585b8e80941Smrg GEN9_LP_FEATURES, \ 586b8e80941Smrg .num_subslices = { 2, }, \ 587b8e80941Smrg .num_eu_per_subslice = 6, \ 588b8e80941Smrg .max_vs_threads = 56, \ 589b8e80941Smrg .max_tcs_threads = 56, \ 590b8e80941Smrg .max_tes_threads = 56, \ 591b8e80941Smrg .max_gs_threads = 56, \ 592b8e80941Smrg .max_cs_threads = 6 * 6, \ 593b8e80941Smrg .urb = { \ 594b8e80941Smrg .size = 128, \ 595b8e80941Smrg .min_entries = { \ 596b8e80941Smrg [MESA_SHADER_VERTEX] = 34, \ 597b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 598b8e80941Smrg }, \ 599b8e80941Smrg .max_entries = { \ 600b8e80941Smrg [MESA_SHADER_VERTEX] = 352, \ 601b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 128, \ 602b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 208, \ 603b8e80941Smrg [MESA_SHADER_GEOMETRY] = 128, \ 604b8e80941Smrg }, \ 605b8e80941Smrg } 606b8e80941Smrg 607b8e80941Smrg#define GEN9_FEATURES \ 608b8e80941Smrg GEN8_FEATURES, \ 609b8e80941Smrg GEN9_HW_INFO, \ 610b8e80941Smrg .has_sample_with_hiz = true 611b8e80941Smrg 612b8e80941Smrgstatic const struct gen_device_info gen_device_info_skl_gt1 = { 613b8e80941Smrg GEN9_FEATURES, .gt = 1, 614b8e80941Smrg .is_skylake = true, 615b8e80941Smrg .num_slices = 1, 616b8e80941Smrg .num_subslices = { 2, }, 617b8e80941Smrg .num_eu_per_subslice = 6, 618b8e80941Smrg .l3_banks = 2, 619b8e80941Smrg .urb.size = 192, 620b8e80941Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 621b8e80941Smrg * leading to some vertices to go missing if we use too much URB. 622b8e80941Smrg */ 623b8e80941Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 624b8e80941Smrg .simulator_id = 12, 625b8e80941Smrg}; 626b8e80941Smrg 627b8e80941Smrgstatic const struct gen_device_info gen_device_info_skl_gt2 = { 628b8e80941Smrg GEN9_FEATURES, .gt = 2, 629b8e80941Smrg .is_skylake = true, 630b8e80941Smrg .num_slices = 1, 631b8e80941Smrg .num_subslices = { 3, }, 632b8e80941Smrg .num_eu_per_subslice = 8, 633b8e80941Smrg .l3_banks = 4, 634b8e80941Smrg .simulator_id = 12, 635b8e80941Smrg}; 636b8e80941Smrg 637b8e80941Smrgstatic const struct gen_device_info gen_device_info_skl_gt3 = { 638b8e80941Smrg GEN9_FEATURES, .gt = 3, 639b8e80941Smrg .is_skylake = true, 640b8e80941Smrg .num_slices = 2, 641b8e80941Smrg .num_subslices = { 3, 3, }, 642b8e80941Smrg .num_eu_per_subslice = 8, 643b8e80941Smrg .l3_banks = 8, 644b8e80941Smrg .simulator_id = 12, 645b8e80941Smrg}; 646b8e80941Smrg 647b8e80941Smrgstatic const struct gen_device_info gen_device_info_skl_gt4 = { 648b8e80941Smrg GEN9_FEATURES, .gt = 4, 649b8e80941Smrg .is_skylake = true, 650b8e80941Smrg .num_slices = 3, 651b8e80941Smrg .num_subslices = { 3, 3, 3, }, 652b8e80941Smrg .num_eu_per_subslice = 8, 653b8e80941Smrg .l3_banks = 12, 654b8e80941Smrg /* From the "L3 Allocation and Programming" documentation: 655b8e80941Smrg * 656b8e80941Smrg * "URB is limited to 1008KB due to programming restrictions. This is not a 657b8e80941Smrg * restriction of the L3 implementation, but of the FF and other clients. 658b8e80941Smrg * Therefore, in a GT4 implementation it is possible for the programmed 659b8e80941Smrg * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but 660b8e80941Smrg * only 1008KB of this will be used." 661b8e80941Smrg */ 662b8e80941Smrg .urb.size = 1008 / 3, 663b8e80941Smrg .simulator_id = 12, 664b8e80941Smrg}; 665b8e80941Smrg 666b8e80941Smrgstatic const struct gen_device_info gen_device_info_bxt = { 667b8e80941Smrg GEN9_LP_FEATURES_3X6, 668b8e80941Smrg .is_broxton = true, 669b8e80941Smrg .l3_banks = 2, 670b8e80941Smrg .simulator_id = 14, 671b8e80941Smrg}; 672b8e80941Smrg 673b8e80941Smrgstatic const struct gen_device_info gen_device_info_bxt_2x6 = { 674b8e80941Smrg GEN9_LP_FEATURES_2X6, 675b8e80941Smrg .is_broxton = true, 676b8e80941Smrg .l3_banks = 1, 677b8e80941Smrg .simulator_id = 14, 678b8e80941Smrg}; 679b8e80941Smrg/* 680b8e80941Smrg * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. 681b8e80941Smrg * There's no KBL entry. Using the default SKL (GEN9) GS entries value. 682b8e80941Smrg */ 683b8e80941Smrg 684b8e80941Smrgstatic const struct gen_device_info gen_device_info_kbl_gt1 = { 685b8e80941Smrg GEN9_FEATURES, 686b8e80941Smrg .is_kabylake = true, 687b8e80941Smrg .gt = 1, 688b8e80941Smrg 689b8e80941Smrg .max_cs_threads = 7 * 6, 690b8e80941Smrg .urb.size = 192, 691b8e80941Smrg .num_slices = 1, 692b8e80941Smrg .num_subslices = { 2, }, 693b8e80941Smrg .num_eu_per_subslice = 6, 694b8e80941Smrg .l3_banks = 2, 695b8e80941Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 696b8e80941Smrg * leading to some vertices to go missing if we use too much URB. 697b8e80941Smrg */ 698b8e80941Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 699b8e80941Smrg .simulator_id = 16, 700b8e80941Smrg}; 701b8e80941Smrg 702b8e80941Smrgstatic const struct gen_device_info gen_device_info_kbl_gt1_5 = { 703b8e80941Smrg GEN9_FEATURES, 704b8e80941Smrg .is_kabylake = true, 705b8e80941Smrg .gt = 1, 706b8e80941Smrg 707b8e80941Smrg .max_cs_threads = 7 * 6, 708b8e80941Smrg .num_slices = 1, 709b8e80941Smrg .num_subslices = { 3, }, 710b8e80941Smrg .num_eu_per_subslice = 6, 711b8e80941Smrg .l3_banks = 4, 712b8e80941Smrg .simulator_id = 16, 713b8e80941Smrg}; 714b8e80941Smrg 715b8e80941Smrgstatic const struct gen_device_info gen_device_info_kbl_gt2 = { 716b8e80941Smrg GEN9_FEATURES, 717b8e80941Smrg .is_kabylake = true, 718b8e80941Smrg .gt = 2, 719b8e80941Smrg 720b8e80941Smrg .num_slices = 1, 721b8e80941Smrg .num_subslices = { 3, }, 722b8e80941Smrg .num_eu_per_subslice = 8, 723b8e80941Smrg .l3_banks = 4, 724b8e80941Smrg .simulator_id = 16, 725b8e80941Smrg}; 726b8e80941Smrg 727b8e80941Smrgstatic const struct gen_device_info gen_device_info_kbl_gt3 = { 728b8e80941Smrg GEN9_FEATURES, 729b8e80941Smrg .is_kabylake = true, 730b8e80941Smrg .gt = 3, 731b8e80941Smrg 732b8e80941Smrg .num_slices = 2, 733b8e80941Smrg .num_subslices = { 3, 3, }, 734b8e80941Smrg .num_eu_per_subslice = 8, 735b8e80941Smrg .l3_banks = 8, 736b8e80941Smrg .simulator_id = 16, 737b8e80941Smrg}; 738b8e80941Smrg 739b8e80941Smrgstatic const struct gen_device_info gen_device_info_kbl_gt4 = { 740b8e80941Smrg GEN9_FEATURES, 741b8e80941Smrg .is_kabylake = true, 742b8e80941Smrg .gt = 4, 743b8e80941Smrg 744b8e80941Smrg /* 745b8e80941Smrg * From the "L3 Allocation and Programming" documentation: 746b8e80941Smrg * 747b8e80941Smrg * "URB is limited to 1008KB due to programming restrictions. This 748b8e80941Smrg * is not a restriction of the L3 implementation, but of the FF and 749b8e80941Smrg * other clients. Therefore, in a GT4 implementation it is 750b8e80941Smrg * possible for the programmed allocation of the L3 data array to 751b8e80941Smrg * provide 3*384KB=1152KB for URB, but only 1008KB of this 752b8e80941Smrg * will be used." 753b8e80941Smrg */ 754b8e80941Smrg .urb.size = 1008 / 3, 755b8e80941Smrg .num_slices = 3, 756b8e80941Smrg .num_subslices = { 3, 3, 3, }, 757b8e80941Smrg .num_eu_per_subslice = 8, 758b8e80941Smrg .l3_banks = 12, 759b8e80941Smrg .simulator_id = 16, 760b8e80941Smrg}; 761b8e80941Smrg 762b8e80941Smrgstatic const struct gen_device_info gen_device_info_glk = { 763b8e80941Smrg GEN9_LP_FEATURES_3X6, 764b8e80941Smrg .is_geminilake = true, 765b8e80941Smrg .l3_banks = 2, 766b8e80941Smrg .simulator_id = 17, 767b8e80941Smrg}; 768b8e80941Smrg 769b8e80941Smrgstatic const struct gen_device_info gen_device_info_glk_2x6 = { 770b8e80941Smrg GEN9_LP_FEATURES_2X6, 771b8e80941Smrg .is_geminilake = true, 772b8e80941Smrg .l3_banks = 2, 773b8e80941Smrg .simulator_id = 17, 774b8e80941Smrg}; 775b8e80941Smrg 776b8e80941Smrgstatic const struct gen_device_info gen_device_info_cfl_gt1 = { 777b8e80941Smrg GEN9_FEATURES, 778b8e80941Smrg .is_coffeelake = true, 779b8e80941Smrg .gt = 1, 780b8e80941Smrg 781b8e80941Smrg .num_slices = 1, 782b8e80941Smrg .num_subslices = { 2, }, 783b8e80941Smrg .num_eu_per_subslice = 6, 784b8e80941Smrg .l3_banks = 2, 785b8e80941Smrg .urb.size = 192, 786b8e80941Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 787b8e80941Smrg * leading to some vertices to go missing if we use too much URB. 788b8e80941Smrg */ 789b8e80941Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 790b8e80941Smrg .simulator_id = 24, 791b8e80941Smrg}; 792b8e80941Smrgstatic const struct gen_device_info gen_device_info_cfl_gt2 = { 793b8e80941Smrg GEN9_FEATURES, 794b8e80941Smrg .is_coffeelake = true, 795b8e80941Smrg .gt = 2, 796b8e80941Smrg 797b8e80941Smrg .num_slices = 1, 798b8e80941Smrg .num_subslices = { 3, }, 799b8e80941Smrg .num_eu_per_subslice = 8, 800b8e80941Smrg .l3_banks = 4, 801b8e80941Smrg .simulator_id = 24, 802b8e80941Smrg}; 803b8e80941Smrg 804b8e80941Smrgstatic const struct gen_device_info gen_device_info_cfl_gt3 = { 805b8e80941Smrg GEN9_FEATURES, 806b8e80941Smrg .is_coffeelake = true, 807b8e80941Smrg .gt = 3, 808b8e80941Smrg 809b8e80941Smrg .num_slices = 2, 810b8e80941Smrg .num_subslices = { 3, 3, }, 811b8e80941Smrg .num_eu_per_subslice = 8, 812b8e80941Smrg .l3_banks = 8, 813b8e80941Smrg .simulator_id = 24, 814b8e80941Smrg}; 815b8e80941Smrg 816b8e80941Smrg#define GEN10_HW_INFO \ 817b8e80941Smrg .gen = 10, \ 818b8e80941Smrg .num_thread_per_eu = 7, \ 819b8e80941Smrg .max_vs_threads = 728, \ 820b8e80941Smrg .max_gs_threads = 432, \ 821b8e80941Smrg .max_tcs_threads = 432, \ 822b8e80941Smrg .max_tes_threads = 624, \ 823b8e80941Smrg .max_cs_threads = 56, \ 824b8e80941Smrg .timestamp_frequency = 19200000, \ 825b8e80941Smrg .urb = { \ 826b8e80941Smrg .size = 256, \ 827b8e80941Smrg .min_entries = { \ 828b8e80941Smrg [MESA_SHADER_VERTEX] = 64, \ 829b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 830b8e80941Smrg }, \ 831b8e80941Smrg .max_entries = { \ 832b8e80941Smrg [MESA_SHADER_VERTEX] = 3936, \ 833b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 896, \ 834b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 2064, \ 835b8e80941Smrg [MESA_SHADER_GEOMETRY] = 832, \ 836b8e80941Smrg }, \ 837b8e80941Smrg } 838b8e80941Smrg 839b8e80941Smrg#define subslices(args...) { args, } 840b8e80941Smrg 841b8e80941Smrg#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \ 842b8e80941Smrg GEN8_FEATURES, \ 843b8e80941Smrg GEN10_HW_INFO, \ 844b8e80941Smrg .has_sample_with_hiz = true, \ 845b8e80941Smrg .gt = _gt, \ 846b8e80941Smrg .num_slices = _slices, \ 847b8e80941Smrg .num_subslices = _subslices, \ 848b8e80941Smrg .num_eu_per_subslice = 8, \ 849b8e80941Smrg .l3_banks = _l3 850b8e80941Smrg 851b8e80941Smrgstatic const struct gen_device_info gen_device_info_cnl_2x8 = { 852b8e80941Smrg /* GT0.5 */ 853b8e80941Smrg GEN10_FEATURES(1, 1, subslices(2), 2), 854b8e80941Smrg .is_cannonlake = true, 855b8e80941Smrg .simulator_id = 15, 856b8e80941Smrg}; 857b8e80941Smrg 858b8e80941Smrgstatic const struct gen_device_info gen_device_info_cnl_3x8 = { 859b8e80941Smrg /* GT1 */ 860b8e80941Smrg GEN10_FEATURES(1, 1, subslices(3), 3), 861b8e80941Smrg .is_cannonlake = true, 862b8e80941Smrg .simulator_id = 15, 863b8e80941Smrg}; 864b8e80941Smrg 865b8e80941Smrgstatic const struct gen_device_info gen_device_info_cnl_4x8 = { 866b8e80941Smrg /* GT 1.5 */ 867b8e80941Smrg GEN10_FEATURES(1, 2, subslices(2, 2), 6), 868b8e80941Smrg .is_cannonlake = true, 869b8e80941Smrg .simulator_id = 15, 870b8e80941Smrg}; 871b8e80941Smrg 872b8e80941Smrgstatic const struct gen_device_info gen_device_info_cnl_5x8 = { 873b8e80941Smrg /* GT2 */ 874b8e80941Smrg GEN10_FEATURES(2, 2, subslices(3, 2), 6), 875b8e80941Smrg .is_cannonlake = true, 876b8e80941Smrg .simulator_id = 15, 877b8e80941Smrg}; 878b8e80941Smrg 879b8e80941Smrg#define GEN11_HW_INFO \ 880b8e80941Smrg .gen = 11, \ 881b8e80941Smrg .has_pln = false, \ 882b8e80941Smrg .max_vs_threads = 364, \ 883b8e80941Smrg .max_gs_threads = 224, \ 884b8e80941Smrg .max_tcs_threads = 224, \ 885b8e80941Smrg .max_tes_threads = 364, \ 886b8e80941Smrg .max_cs_threads = 56 887b8e80941Smrg 888b8e80941Smrg#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ 889b8e80941Smrg GEN8_FEATURES, \ 890b8e80941Smrg GEN11_HW_INFO, \ 891b8e80941Smrg .has_64bit_types = false, \ 892b8e80941Smrg .has_integer_dword_mul = false, \ 893b8e80941Smrg .has_sample_with_hiz = false, \ 894b8e80941Smrg .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 895b8e80941Smrg .num_subslices = _subslices, \ 896b8e80941Smrg .num_eu_per_subslice = 8 897b8e80941Smrg 898b8e80941Smrg#define GEN11_URB_MIN_MAX_ENTRIES \ 899b8e80941Smrg .min_entries = { \ 900b8e80941Smrg [MESA_SHADER_VERTEX] = 64, \ 901b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 902b8e80941Smrg }, \ 903b8e80941Smrg .max_entries = { \ 904b8e80941Smrg [MESA_SHADER_VERTEX] = 2384, \ 905b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 1032, \ 906b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 2384, \ 907b8e80941Smrg [MESA_SHADER_GEOMETRY] = 1032, \ 908b8e80941Smrg } 909b8e80941Smrg 910b8e80941Smrgstatic const struct gen_device_info gen_device_info_icl_8x8 = { 911b8e80941Smrg GEN11_FEATURES(2, 1, subslices(8), 8), 912b8e80941Smrg .urb = { 913b8e80941Smrg .size = 1024, 914b8e80941Smrg GEN11_URB_MIN_MAX_ENTRIES, 915b8e80941Smrg }, 916b8e80941Smrg .simulator_id = 19, 917b8e80941Smrg}; 918b8e80941Smrg 919b8e80941Smrgstatic const struct gen_device_info gen_device_info_icl_6x8 = { 920b8e80941Smrg GEN11_FEATURES(1, 1, subslices(6), 6), 921b8e80941Smrg .urb = { 922b8e80941Smrg .size = 768, 923b8e80941Smrg GEN11_URB_MIN_MAX_ENTRIES, 924b8e80941Smrg }, 925b8e80941Smrg .simulator_id = 19, 926b8e80941Smrg}; 927b8e80941Smrg 928b8e80941Smrgstatic const struct gen_device_info gen_device_info_icl_4x8 = { 929b8e80941Smrg GEN11_FEATURES(1, 1, subslices(4), 6), 930b8e80941Smrg .urb = { 931b8e80941Smrg .size = 768, 932b8e80941Smrg GEN11_URB_MIN_MAX_ENTRIES, 933b8e80941Smrg }, 934b8e80941Smrg .simulator_id = 19, 935b8e80941Smrg}; 936b8e80941Smrg 937b8e80941Smrgstatic const struct gen_device_info gen_device_info_icl_1x8 = { 938b8e80941Smrg GEN11_FEATURES(1, 1, subslices(1), 6), 939b8e80941Smrg .urb = { 940b8e80941Smrg .size = 768, 941b8e80941Smrg GEN11_URB_MIN_MAX_ENTRIES, 942b8e80941Smrg }, 943b8e80941Smrg .simulator_id = 19, 944b8e80941Smrg}; 945b8e80941Smrg 946b8e80941Smrgstatic const struct gen_device_info gen_device_info_ehl_4x8 = { 947b8e80941Smrg GEN11_FEATURES(1, 1, subslices(4), 4), 948b8e80941Smrg .urb = { 949b8e80941Smrg .size = 512, 950b8e80941Smrg .min_entries = { 951b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 952b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 953b8e80941Smrg }, 954b8e80941Smrg .max_entries = { 955b8e80941Smrg [MESA_SHADER_VERTEX] = 2384, 956b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 1032, 957b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 2384, 958b8e80941Smrg [MESA_SHADER_GEOMETRY] = 1032, 959b8e80941Smrg }, 960b8e80941Smrg }, 961b8e80941Smrg .simulator_id = 28, 962b8e80941Smrg}; 963b8e80941Smrg 964b8e80941Smrg/* FIXME: Verfiy below entries when more information is available for this SKU. 965b8e80941Smrg */ 966b8e80941Smrgstatic const struct gen_device_info gen_device_info_ehl_4x4 = { 967b8e80941Smrg GEN11_FEATURES(1, 1, subslices(4), 4), 968b8e80941Smrg .urb = { 969b8e80941Smrg .size = 512, 970b8e80941Smrg .min_entries = { 971b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 972b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 973b8e80941Smrg }, 974b8e80941Smrg .max_entries = { 975b8e80941Smrg [MESA_SHADER_VERTEX] = 2384, 976b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 1032, 977b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 2384, 978b8e80941Smrg [MESA_SHADER_GEOMETRY] = 1032, 979b8e80941Smrg }, 980b8e80941Smrg }, 981b8e80941Smrg .num_eu_per_subslice = 4, 982b8e80941Smrg .simulator_id = 28, 983b8e80941Smrg}; 984b8e80941Smrg 985b8e80941Smrg/* FIXME: Verfiy below entries when more information is available for this SKU. 986b8e80941Smrg */ 987b8e80941Smrgstatic const struct gen_device_info gen_device_info_ehl_2x4 = { 988b8e80941Smrg GEN11_FEATURES(1, 1, subslices(2), 4), 989b8e80941Smrg .urb = { 990b8e80941Smrg .size = 512, 991b8e80941Smrg .min_entries = { 992b8e80941Smrg [MESA_SHADER_VERTEX] = 64, 993b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 34, 994b8e80941Smrg }, 995b8e80941Smrg .max_entries = { 996b8e80941Smrg [MESA_SHADER_VERTEX] = 2384, 997b8e80941Smrg [MESA_SHADER_TESS_CTRL] = 1032, 998b8e80941Smrg [MESA_SHADER_TESS_EVAL] = 2384, 999b8e80941Smrg [MESA_SHADER_GEOMETRY] = 1032, 1000b8e80941Smrg }, 1001b8e80941Smrg }, 1002b8e80941Smrg .num_eu_per_subslice =4, 1003b8e80941Smrg .simulator_id = 28, 1004b8e80941Smrg}; 1005b8e80941Smrg 1006b8e80941Smrgstatic void 1007b8e80941Smrggen_device_info_set_eu_mask(struct gen_device_info *devinfo, 1008b8e80941Smrg unsigned slice, 1009b8e80941Smrg unsigned subslice, 1010b8e80941Smrg unsigned eu_mask) 1011b8e80941Smrg{ 1012b8e80941Smrg unsigned subslice_offset = slice * devinfo->eu_slice_stride + 1013b8e80941Smrg subslice * devinfo->eu_subslice_stride; 1014b8e80941Smrg 1015b8e80941Smrg for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) { 1016b8e80941Smrg devinfo->eu_masks[subslice_offset + b_eu] = 1017b8e80941Smrg (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff; 1018b8e80941Smrg } 1019b8e80941Smrg} 1020b8e80941Smrg 1021b8e80941Smrg/* Generate slice/subslice/eu masks from number of 1022b8e80941Smrg * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info 1023b8e80941Smrg * structure. 1024b8e80941Smrg * 1025b8e80941Smrg * These can be overridden with values reported by the kernel either from 1026b8e80941Smrg * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+ 1027b8e80941Smrg * through the i915 query uapi. 1028b8e80941Smrg */ 1029b8e80941Smrgstatic void 1030b8e80941Smrgfill_masks(struct gen_device_info *devinfo) 1031b8e80941Smrg{ 1032b8e80941Smrg devinfo->slice_masks = (1U << devinfo->num_slices) - 1; 1033b8e80941Smrg 1034b8e80941Smrg /* Subslice masks */ 1035b8e80941Smrg unsigned max_subslices = 0; 1036b8e80941Smrg for (int s = 0; s < devinfo->num_slices; s++) 1037b8e80941Smrg max_subslices = MAX2(devinfo->num_subslices[s], max_subslices); 1038b8e80941Smrg devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); 1039b8e80941Smrg 1040b8e80941Smrg for (int s = 0; s < devinfo->num_slices; s++) { 1041b8e80941Smrg devinfo->subslice_masks[s * devinfo->subslice_slice_stride] = 1042b8e80941Smrg (1U << devinfo->num_subslices[s]) - 1; 1043b8e80941Smrg } 1044b8e80941Smrg 1045b8e80941Smrg /* EU masks */ 1046b8e80941Smrg devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8); 1047b8e80941Smrg devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride; 1048b8e80941Smrg 1049b8e80941Smrg for (int s = 0; s < devinfo->num_slices; s++) { 1050b8e80941Smrg for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) { 1051b8e80941Smrg gen_device_info_set_eu_mask(devinfo, s, ss, 1052b8e80941Smrg (1U << devinfo->num_eu_per_subslice) - 1); 1053b8e80941Smrg } 1054b8e80941Smrg } 1055b8e80941Smrg} 1056b8e80941Smrg 1057b8e80941Smrgvoid 1058b8e80941Smrggen_device_info_update_from_masks(struct gen_device_info *devinfo, 1059b8e80941Smrg uint32_t slice_mask, 1060b8e80941Smrg uint32_t subslice_mask, 1061b8e80941Smrg uint32_t n_eus) 1062b8e80941Smrg{ 1063b8e80941Smrg struct { 1064b8e80941Smrg struct drm_i915_query_topology_info base; 1065b8e80941Smrg } topology; 1066b8e80941Smrg 1067b8e80941Smrg assert((slice_mask & 0xff) == slice_mask); 1068b8e80941Smrg 1069b8e80941Smrg memset(&topology, 0, sizeof(topology)); 1070b8e80941Smrg 1071b8e80941Smrg topology.base.max_slices = util_last_bit(slice_mask); 1072b8e80941Smrg topology.base.max_subslices = util_last_bit(subslice_mask); 1073b8e80941Smrg 1074b8e80941Smrg topology.base.subslice_offset = DIV_ROUND_UP(topology.base.max_slices, 8); 1075b8e80941Smrg topology.base.subslice_stride = DIV_ROUND_UP(topology.base.max_subslices, 8); 1076b8e80941Smrg 1077b8e80941Smrg uint32_t n_subslices = __builtin_popcount(slice_mask) * 1078b8e80941Smrg __builtin_popcount(subslice_mask); 1079b8e80941Smrg uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 1080b8e80941Smrg uint32_t eu_mask = (1U << num_eu_per_subslice) - 1; 1081b8e80941Smrg 1082b8e80941Smrg topology.base.eu_offset = topology.base.subslice_offset + 1083b8e80941Smrg DIV_ROUND_UP(topology.base.max_subslices, 8); 1084b8e80941Smrg topology.base.eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8); 1085b8e80941Smrg 1086b8e80941Smrg /* Set slice mask in topology */ 1087b8e80941Smrg for (int b = 0; b < topology.base.subslice_offset; b++) 1088b8e80941Smrg topology.base.data[b] = (slice_mask >> (b * 8)) & 0xff; 1089b8e80941Smrg 1090b8e80941Smrg for (int s = 0; s < topology.base.max_slices; s++) { 1091b8e80941Smrg 1092b8e80941Smrg /* Set subslice mask in topology */ 1093b8e80941Smrg for (int b = 0; b < topology.base.subslice_stride; b++) { 1094b8e80941Smrg int subslice_offset = topology.base.subslice_offset + 1095b8e80941Smrg s * topology.base.subslice_stride + b; 1096b8e80941Smrg 1097b8e80941Smrg topology.base.data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; 1098b8e80941Smrg } 1099b8e80941Smrg 1100b8e80941Smrg /* Set eu mask in topology */ 1101b8e80941Smrg for (int ss = 0; ss < topology.base.max_subslices; ss++) { 1102b8e80941Smrg for (int b = 0; b < topology.base.eu_stride; b++) { 1103b8e80941Smrg int eu_offset = topology.base.eu_offset + 1104b8e80941Smrg (s * topology.base.max_subslices + ss) * topology.base.eu_stride + b; 1105b8e80941Smrg 1106b8e80941Smrg topology.base.data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; 1107b8e80941Smrg } 1108b8e80941Smrg } 1109b8e80941Smrg } 1110b8e80941Smrg 1111b8e80941Smrg gen_device_info_update_from_topology(devinfo, &topology.base); 1112b8e80941Smrg} 1113b8e80941Smrg 1114b8e80941Smrgstatic void 1115b8e80941Smrgreset_masks(struct gen_device_info *devinfo) 1116b8e80941Smrg{ 1117b8e80941Smrg devinfo->subslice_slice_stride = 0; 1118b8e80941Smrg devinfo->eu_subslice_stride = 0; 1119b8e80941Smrg devinfo->eu_slice_stride = 0; 1120b8e80941Smrg 1121b8e80941Smrg devinfo->num_slices = 0; 1122b8e80941Smrg devinfo->num_eu_per_subslice = 0; 1123b8e80941Smrg memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); 1124b8e80941Smrg 1125b8e80941Smrg memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); 1126b8e80941Smrg memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); 1127b8e80941Smrg memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); 1128b8e80941Smrg} 1129b8e80941Smrg 1130b8e80941Smrgvoid 1131b8e80941Smrggen_device_info_update_from_topology(struct gen_device_info *devinfo, 1132b8e80941Smrg const struct drm_i915_query_topology_info *topology) 1133b8e80941Smrg{ 1134b8e80941Smrg reset_masks(devinfo); 1135b8e80941Smrg 1136b8e80941Smrg devinfo->subslice_slice_stride = topology->subslice_stride; 1137b8e80941Smrg 1138b8e80941Smrg devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8); 1139b8e80941Smrg devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride; 1140b8e80941Smrg 1141b8e80941Smrg assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); 1142b8e80941Smrg memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); 1143b8e80941Smrg devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); 1144b8e80941Smrg 1145b8e80941Smrg uint32_t subslice_mask_len = 1146b8e80941Smrg topology->max_slices * topology->subslice_stride; 1147b8e80941Smrg assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); 1148b8e80941Smrg memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset], 1149b8e80941Smrg subslice_mask_len); 1150b8e80941Smrg 1151b8e80941Smrg uint32_t n_subslices = 0; 1152b8e80941Smrg for (int s = 0; s < topology->max_slices; s++) { 1153b8e80941Smrg if ((devinfo->slice_masks & (1UL << s)) == 0) 1154b8e80941Smrg continue; 1155b8e80941Smrg 1156b8e80941Smrg for (int b = 0; b < devinfo->subslice_slice_stride; b++) { 1157b8e80941Smrg devinfo->num_subslices[s] += 1158b8e80941Smrg __builtin_popcount(devinfo->subslice_masks[b]); 1159b8e80941Smrg } 1160b8e80941Smrg n_subslices += devinfo->num_subslices[s]; 1161b8e80941Smrg } 1162b8e80941Smrg assert(n_subslices > 0); 1163b8e80941Smrg 1164b8e80941Smrg uint32_t eu_mask_len = 1165b8e80941Smrg topology->eu_stride * topology->max_subslices * topology->max_slices; 1166b8e80941Smrg assert(sizeof(devinfo->eu_masks) >= eu_mask_len); 1167b8e80941Smrg memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len); 1168b8e80941Smrg 1169b8e80941Smrg uint32_t n_eus = 0; 1170b8e80941Smrg for (int b = 0; b < eu_mask_len; b++) 1171b8e80941Smrg n_eus += __builtin_popcount(devinfo->eu_masks[b]); 1172b8e80941Smrg 1173b8e80941Smrg devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 1174b8e80941Smrg} 1175b8e80941Smrg 1176b8e80941Smrgbool 1177b8e80941Smrggen_get_device_info(int devid, struct gen_device_info *devinfo) 1178b8e80941Smrg{ 1179b8e80941Smrg switch (devid) { 1180b8e80941Smrg#undef CHIPSET 1181b8e80941Smrg#define CHIPSET(id, family, name) \ 1182b8e80941Smrg case id: *devinfo = gen_device_info_##family; break; 1183b8e80941Smrg#include "pci_ids/i965_pci_ids.h" 1184b8e80941Smrg default: 1185b8e80941Smrg fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); 1186b8e80941Smrg return false; 1187b8e80941Smrg } 1188b8e80941Smrg 1189b8e80941Smrg fill_masks(devinfo); 1190b8e80941Smrg 1191b8e80941Smrg /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: 1192b8e80941Smrg * 1193b8e80941Smrg * "Scratch Space per slice is computed based on 4 sub-slices. SW must 1194b8e80941Smrg * allocate scratch space enough so that each slice has 4 slices allowed." 1195b8e80941Smrg * 1196b8e80941Smrg * The equivalent internal documentation says that this programming note 1197b8e80941Smrg * applies to all Gen9+ platforms. 1198b8e80941Smrg * 1199b8e80941Smrg * The hardware typically calculates the scratch space pointer by taking 1200b8e80941Smrg * the base address, and adding per-thread-scratch-space * thread ID. 1201b8e80941Smrg * Extra padding can be necessary depending how the thread IDs are 1202b8e80941Smrg * calculated for a particular shader stage. 1203b8e80941Smrg */ 1204b8e80941Smrg 1205b8e80941Smrg switch(devinfo->gen) { 1206b8e80941Smrg case 9: 1207b8e80941Smrg case 10: 1208b8e80941Smrg devinfo->max_wm_threads = 64 /* threads-per-PSD */ 1209b8e80941Smrg * devinfo->num_slices 1210b8e80941Smrg * 4; /* effective subslices per slice */ 1211b8e80941Smrg break; 1212b8e80941Smrg case 11: 1213b8e80941Smrg devinfo->max_wm_threads = 128 /* threads-per-PSD */ 1214b8e80941Smrg * devinfo->num_slices 1215b8e80941Smrg * 8; /* subslices per slice */ 1216b8e80941Smrg break; 1217b8e80941Smrg default: 1218b8e80941Smrg break; 1219b8e80941Smrg } 1220b8e80941Smrg 1221b8e80941Smrg assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); 1222b8e80941Smrg 1223b8e80941Smrg return true; 1224b8e80941Smrg} 1225b8e80941Smrg 1226b8e80941Smrgconst char * 1227b8e80941Smrggen_get_device_name(int devid) 1228b8e80941Smrg{ 1229b8e80941Smrg switch (devid) { 1230b8e80941Smrg#undef CHIPSET 1231b8e80941Smrg#define CHIPSET(id, family, name) case id: return name; 1232b8e80941Smrg#include "pci_ids/i965_pci_ids.h" 1233b8e80941Smrg default: 1234b8e80941Smrg return NULL; 1235b8e80941Smrg } 1236b8e80941Smrg} 1237