17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2013 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include <assert.h> 257ec681f3Smrg#include <stdbool.h> 267ec681f3Smrg#include <stdio.h> 277ec681f3Smrg#include <stdlib.h> 287ec681f3Smrg#include <string.h> 297ec681f3Smrg#include <unistd.h> 307ec681f3Smrg#include "intel_device_info.h" 317ec681f3Smrg#include "intel/common/intel_gem.h" 327ec681f3Smrg#include "util/bitscan.h" 337ec681f3Smrg#include "util/debug.h" 347ec681f3Smrg#include "util/log.h" 357ec681f3Smrg#include "util/macros.h" 367ec681f3Smrg 377ec681f3Smrg#include "drm-uapi/i915_drm.h" 387ec681f3Smrg 397ec681f3Smrgstatic const struct { 407ec681f3Smrg const char *name; 417ec681f3Smrg int pci_id; 427ec681f3Smrg} name_map[] = { 437ec681f3Smrg { "lpt", 0x27a2 }, 447ec681f3Smrg { "brw", 0x2a02 }, 457ec681f3Smrg { "g4x", 0x2a42 }, 467ec681f3Smrg { "ilk", 0x0042 }, 477ec681f3Smrg { "snb", 0x0126 }, 487ec681f3Smrg { "ivb", 0x016a }, 497ec681f3Smrg { "hsw", 0x0d2e }, 507ec681f3Smrg { "byt", 0x0f33 }, 517ec681f3Smrg { "bdw", 0x162e }, 527ec681f3Smrg { "chv", 0x22B3 }, 537ec681f3Smrg { "skl", 0x1912 }, 547ec681f3Smrg { "bxt", 0x5A85 }, 557ec681f3Smrg { "kbl", 0x5912 }, 567ec681f3Smrg { "aml", 0x591C }, 577ec681f3Smrg { "glk", 0x3185 }, 587ec681f3Smrg { "cfl", 0x3E9B }, 597ec681f3Smrg { "whl", 0x3EA1 }, 607ec681f3Smrg { "cml", 0x9b41 }, 617ec681f3Smrg { "icl", 0x8a52 }, 627ec681f3Smrg { "ehl", 0x4500 }, 637ec681f3Smrg { "jsl", 0x4E71 }, 647ec681f3Smrg { "tgl", 0x9a49 }, 657ec681f3Smrg { "rkl", 0x4c8a }, 667ec681f3Smrg { "dg1", 0x4905 }, 677ec681f3Smrg { "adl", 0x4680 }, 687ec681f3Smrg { "sg1", 0x4907 }, 697ec681f3Smrg}; 707ec681f3Smrg 717ec681f3Smrg/** 727ec681f3Smrg * Get the PCI ID for the device name. 737ec681f3Smrg * 747ec681f3Smrg * Returns -1 if the device is not known. 757ec681f3Smrg */ 767ec681f3Smrgint 777ec681f3Smrgintel_device_name_to_pci_device_id(const char *name) 787ec681f3Smrg{ 797ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { 807ec681f3Smrg if (!strcmp(name_map[i].name, name)) 817ec681f3Smrg return name_map[i].pci_id; 827ec681f3Smrg } 837ec681f3Smrg 847ec681f3Smrg return -1; 857ec681f3Smrg} 867ec681f3Smrg 877ec681f3Smrgstatic const struct intel_device_info intel_device_info_gfx3 = { 887ec681f3Smrg .ver = 3, 897ec681f3Smrg .simulator_id = -1, 907ec681f3Smrg .cs_prefetch_size = 512, 917ec681f3Smrg}; 927ec681f3Smrg 937ec681f3Smrgstatic const struct intel_device_info intel_device_info_i965 = { 947ec681f3Smrg .ver = 4, 957ec681f3Smrg .has_negative_rhw_bug = true, 967ec681f3Smrg .num_slices = 1, 977ec681f3Smrg .num_subslices = { 1, }, 987ec681f3Smrg .num_eu_per_subslice = 8, 997ec681f3Smrg .num_thread_per_eu = 4, 1007ec681f3Smrg .max_vs_threads = 16, 1017ec681f3Smrg .max_gs_threads = 2, 1027ec681f3Smrg .max_wm_threads = 8 * 4, 1037ec681f3Smrg .urb = { 1047ec681f3Smrg .size = 256, 1057ec681f3Smrg }, 1067ec681f3Smrg .timestamp_frequency = 12500000, 1077ec681f3Smrg .simulator_id = -1, 1087ec681f3Smrg .cs_prefetch_size = 512, 1097ec681f3Smrg}; 1107ec681f3Smrg 1117ec681f3Smrgstatic const struct intel_device_info intel_device_info_g4x = { 1127ec681f3Smrg .ver = 4, 1137ec681f3Smrg .verx10 = 45, 1147ec681f3Smrg .has_pln = true, 1157ec681f3Smrg .has_compr4 = true, 1167ec681f3Smrg .has_surface_tile_offset = true, 1177ec681f3Smrg .is_g4x = true, 1187ec681f3Smrg .num_slices = 1, 1197ec681f3Smrg .num_subslices = { 1, }, 1207ec681f3Smrg .num_eu_per_subslice = 10, 1217ec681f3Smrg .num_thread_per_eu = 5, 1227ec681f3Smrg .max_vs_threads = 32, 1237ec681f3Smrg .max_gs_threads = 2, 1247ec681f3Smrg .max_wm_threads = 10 * 5, 1257ec681f3Smrg .urb = { 1267ec681f3Smrg .size = 384, 1277ec681f3Smrg }, 1287ec681f3Smrg .timestamp_frequency = 12500000, 1297ec681f3Smrg .simulator_id = -1, 1307ec681f3Smrg .cs_prefetch_size = 512, 1317ec681f3Smrg}; 1327ec681f3Smrg 1337ec681f3Smrgstatic const struct intel_device_info intel_device_info_ilk = { 1347ec681f3Smrg .ver = 5, 1357ec681f3Smrg .has_pln = true, 1367ec681f3Smrg .has_compr4 = true, 1377ec681f3Smrg .has_surface_tile_offset = true, 1387ec681f3Smrg .num_slices = 1, 1397ec681f3Smrg .num_subslices = { 1, }, 1407ec681f3Smrg .num_eu_per_subslice = 12, 1417ec681f3Smrg .num_thread_per_eu = 6, 1427ec681f3Smrg .max_vs_threads = 72, 1437ec681f3Smrg .max_gs_threads = 32, 1447ec681f3Smrg .max_wm_threads = 12 * 6, 1457ec681f3Smrg .urb = { 1467ec681f3Smrg .size = 1024, 1477ec681f3Smrg }, 1487ec681f3Smrg .timestamp_frequency = 12500000, 1497ec681f3Smrg .simulator_id = -1, 1507ec681f3Smrg .cs_prefetch_size = 512, 1517ec681f3Smrg}; 1527ec681f3Smrg 1537ec681f3Smrgstatic const struct intel_device_info intel_device_info_snb_gt1 = { 1547ec681f3Smrg .ver = 6, 1557ec681f3Smrg .gt = 1, 1567ec681f3Smrg .has_hiz_and_separate_stencil = true, 1577ec681f3Smrg .has_llc = true, 1587ec681f3Smrg .has_pln = true, 1597ec681f3Smrg .has_surface_tile_offset = true, 1607ec681f3Smrg .needs_unlit_centroid_workaround = true, 1617ec681f3Smrg .num_slices = 1, 1627ec681f3Smrg .num_subslices = { 1, }, 1637ec681f3Smrg .num_eu_per_subslice = 6, 1647ec681f3Smrg .num_thread_per_eu = 6, /* Not confirmed */ 1657ec681f3Smrg .max_vs_threads = 24, 1667ec681f3Smrg .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ 1677ec681f3Smrg .max_wm_threads = 40, 1687ec681f3Smrg .urb = { 1697ec681f3Smrg .size = 32, 1707ec681f3Smrg .min_entries = { 1717ec681f3Smrg [MESA_SHADER_VERTEX] = 24, 1727ec681f3Smrg }, 1737ec681f3Smrg .max_entries = { 1747ec681f3Smrg [MESA_SHADER_VERTEX] = 256, 1757ec681f3Smrg [MESA_SHADER_GEOMETRY] = 256, 1767ec681f3Smrg }, 1777ec681f3Smrg }, 1787ec681f3Smrg .timestamp_frequency = 12500000, 1797ec681f3Smrg .simulator_id = -1, 1807ec681f3Smrg .cs_prefetch_size = 512, 1817ec681f3Smrg}; 1827ec681f3Smrg 1837ec681f3Smrgstatic const struct intel_device_info intel_device_info_snb_gt2 = { 1847ec681f3Smrg .ver = 6, 1857ec681f3Smrg .gt = 2, 1867ec681f3Smrg .has_hiz_and_separate_stencil = true, 1877ec681f3Smrg .has_llc = true, 1887ec681f3Smrg .has_pln = true, 1897ec681f3Smrg .has_surface_tile_offset = true, 1907ec681f3Smrg .needs_unlit_centroid_workaround = true, 1917ec681f3Smrg .num_slices = 1, 1927ec681f3Smrg .num_subslices = { 1, }, 1937ec681f3Smrg .num_eu_per_subslice = 12, 1947ec681f3Smrg .num_thread_per_eu = 6, /* Not confirmed */ 1957ec681f3Smrg .max_vs_threads = 60, 1967ec681f3Smrg .max_gs_threads = 60, 1977ec681f3Smrg .max_wm_threads = 80, 1987ec681f3Smrg .urb = { 1997ec681f3Smrg .size = 64, 2007ec681f3Smrg .min_entries = { 2017ec681f3Smrg [MESA_SHADER_VERTEX] = 24, 2027ec681f3Smrg }, 2037ec681f3Smrg .max_entries = { 2047ec681f3Smrg [MESA_SHADER_VERTEX] = 256, 2057ec681f3Smrg [MESA_SHADER_GEOMETRY] = 256, 2067ec681f3Smrg }, 2077ec681f3Smrg }, 2087ec681f3Smrg .timestamp_frequency = 12500000, 2097ec681f3Smrg .simulator_id = -1, 2107ec681f3Smrg .cs_prefetch_size = 512, 2117ec681f3Smrg}; 2127ec681f3Smrg 2137ec681f3Smrg#define GFX7_FEATURES \ 2147ec681f3Smrg .ver = 7, \ 2157ec681f3Smrg .has_hiz_and_separate_stencil = true, \ 2167ec681f3Smrg .must_use_separate_stencil = true, \ 2177ec681f3Smrg .has_llc = true, \ 2187ec681f3Smrg .has_pln = true, \ 2197ec681f3Smrg .has_64bit_float = true, \ 2207ec681f3Smrg .has_surface_tile_offset = true, \ 2217ec681f3Smrg .timestamp_frequency = 12500000, \ 2227ec681f3Smrg .max_constant_urb_size_kb = 16, \ 2237ec681f3Smrg .cs_prefetch_size = 512 2247ec681f3Smrg 2257ec681f3Smrgstatic const struct intel_device_info intel_device_info_ivb_gt1 = { 2267ec681f3Smrg GFX7_FEATURES, .is_ivybridge = true, .gt = 1, 2277ec681f3Smrg .num_slices = 1, 2287ec681f3Smrg .num_subslices = { 1, }, 2297ec681f3Smrg .num_eu_per_subslice = 6, 2307ec681f3Smrg .num_thread_per_eu = 6, 2317ec681f3Smrg .l3_banks = 2, 2327ec681f3Smrg .max_vs_threads = 36, 2337ec681f3Smrg .max_tcs_threads = 36, 2347ec681f3Smrg .max_tes_threads = 36, 2357ec681f3Smrg .max_gs_threads = 36, 2367ec681f3Smrg .max_wm_threads = 48, 2377ec681f3Smrg .max_cs_threads = 36, 2387ec681f3Smrg .urb = { 2397ec681f3Smrg .min_entries = { 2407ec681f3Smrg [MESA_SHADER_VERTEX] = 32, 2417ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 2427ec681f3Smrg }, 2437ec681f3Smrg .max_entries = { 2447ec681f3Smrg [MESA_SHADER_VERTEX] = 512, 2457ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 32, 2467ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 288, 2477ec681f3Smrg [MESA_SHADER_GEOMETRY] = 192, 2487ec681f3Smrg }, 2497ec681f3Smrg }, 2507ec681f3Smrg .simulator_id = 7, 2517ec681f3Smrg}; 2527ec681f3Smrg 2537ec681f3Smrgstatic const struct intel_device_info intel_device_info_ivb_gt2 = { 2547ec681f3Smrg GFX7_FEATURES, .is_ivybridge = true, .gt = 2, 2557ec681f3Smrg .num_slices = 1, 2567ec681f3Smrg .num_subslices = { 1, }, 2577ec681f3Smrg .num_eu_per_subslice = 12, 2587ec681f3Smrg .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of 2597ec681f3Smrg * @max_wm_threads ... */ 2607ec681f3Smrg .l3_banks = 4, 2617ec681f3Smrg .max_vs_threads = 128, 2627ec681f3Smrg .max_tcs_threads = 128, 2637ec681f3Smrg .max_tes_threads = 128, 2647ec681f3Smrg .max_gs_threads = 128, 2657ec681f3Smrg .max_wm_threads = 172, 2667ec681f3Smrg .max_cs_threads = 64, 2677ec681f3Smrg .urb = { 2687ec681f3Smrg .min_entries = { 2697ec681f3Smrg [MESA_SHADER_VERTEX] = 32, 2707ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 2717ec681f3Smrg }, 2727ec681f3Smrg .max_entries = { 2737ec681f3Smrg [MESA_SHADER_VERTEX] = 704, 2747ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 64, 2757ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 448, 2767ec681f3Smrg [MESA_SHADER_GEOMETRY] = 320, 2777ec681f3Smrg }, 2787ec681f3Smrg }, 2797ec681f3Smrg .simulator_id = 7, 2807ec681f3Smrg}; 2817ec681f3Smrg 2827ec681f3Smrgstatic const struct intel_device_info intel_device_info_byt = { 2837ec681f3Smrg GFX7_FEATURES, .is_baytrail = true, .gt = 1, 2847ec681f3Smrg .num_slices = 1, 2857ec681f3Smrg .num_subslices = { 1, }, 2867ec681f3Smrg .num_eu_per_subslice = 4, 2877ec681f3Smrg .num_thread_per_eu = 8, 2887ec681f3Smrg .l3_banks = 1, 2897ec681f3Smrg .has_llc = false, 2907ec681f3Smrg .max_vs_threads = 36, 2917ec681f3Smrg .max_tcs_threads = 36, 2927ec681f3Smrg .max_tes_threads = 36, 2937ec681f3Smrg .max_gs_threads = 36, 2947ec681f3Smrg .max_wm_threads = 48, 2957ec681f3Smrg .max_cs_threads = 32, 2967ec681f3Smrg .urb = { 2977ec681f3Smrg .min_entries = { 2987ec681f3Smrg [MESA_SHADER_VERTEX] = 32, 2997ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 3007ec681f3Smrg }, 3017ec681f3Smrg .max_entries = { 3027ec681f3Smrg [MESA_SHADER_VERTEX] = 512, 3037ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 32, 3047ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 288, 3057ec681f3Smrg [MESA_SHADER_GEOMETRY] = 192, 3067ec681f3Smrg }, 3077ec681f3Smrg }, 3087ec681f3Smrg .simulator_id = 10, 3097ec681f3Smrg}; 3107ec681f3Smrg 3117ec681f3Smrg#define HSW_FEATURES \ 3127ec681f3Smrg GFX7_FEATURES, \ 3137ec681f3Smrg .is_haswell = true, \ 3147ec681f3Smrg .verx10 = 75, \ 3157ec681f3Smrg .supports_simd16_3src = true 3167ec681f3Smrg 3177ec681f3Smrgstatic const struct intel_device_info intel_device_info_hsw_gt1 = { 3187ec681f3Smrg HSW_FEATURES, .gt = 1, 3197ec681f3Smrg .num_slices = 1, 3207ec681f3Smrg .num_subslices = { 1, }, 3217ec681f3Smrg .num_eu_per_subslice = 10, 3227ec681f3Smrg .num_thread_per_eu = 7, 3237ec681f3Smrg .l3_banks = 2, 3247ec681f3Smrg .max_vs_threads = 70, 3257ec681f3Smrg .max_tcs_threads = 70, 3267ec681f3Smrg .max_tes_threads = 70, 3277ec681f3Smrg .max_gs_threads = 70, 3287ec681f3Smrg .max_wm_threads = 102, 3297ec681f3Smrg .max_cs_threads = 70, 3307ec681f3Smrg .urb = { 3317ec681f3Smrg .min_entries = { 3327ec681f3Smrg [MESA_SHADER_VERTEX] = 32, 3337ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 3347ec681f3Smrg }, 3357ec681f3Smrg .max_entries = { 3367ec681f3Smrg [MESA_SHADER_VERTEX] = 640, 3377ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 64, 3387ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 384, 3397ec681f3Smrg [MESA_SHADER_GEOMETRY] = 256, 3407ec681f3Smrg }, 3417ec681f3Smrg }, 3427ec681f3Smrg .simulator_id = 9, 3437ec681f3Smrg}; 3447ec681f3Smrg 3457ec681f3Smrgstatic const struct intel_device_info intel_device_info_hsw_gt2 = { 3467ec681f3Smrg HSW_FEATURES, .gt = 2, 3477ec681f3Smrg .num_slices = 1, 3487ec681f3Smrg .num_subslices = { 2, }, 3497ec681f3Smrg .num_eu_per_subslice = 10, 3507ec681f3Smrg .num_thread_per_eu = 7, 3517ec681f3Smrg .l3_banks = 4, 3527ec681f3Smrg .max_vs_threads = 280, 3537ec681f3Smrg .max_tcs_threads = 256, 3547ec681f3Smrg .max_tes_threads = 280, 3557ec681f3Smrg .max_gs_threads = 256, 3567ec681f3Smrg .max_wm_threads = 204, 3577ec681f3Smrg .max_cs_threads = 70, 3587ec681f3Smrg .urb = { 3597ec681f3Smrg .min_entries = { 3607ec681f3Smrg [MESA_SHADER_VERTEX] = 64, 3617ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 3627ec681f3Smrg }, 3637ec681f3Smrg .max_entries = { 3647ec681f3Smrg [MESA_SHADER_VERTEX] = 1664, 3657ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 128, 3667ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 960, 3677ec681f3Smrg [MESA_SHADER_GEOMETRY] = 640, 3687ec681f3Smrg }, 3697ec681f3Smrg }, 3707ec681f3Smrg .simulator_id = 9, 3717ec681f3Smrg}; 3727ec681f3Smrg 3737ec681f3Smrgstatic const struct intel_device_info intel_device_info_hsw_gt3 = { 3747ec681f3Smrg HSW_FEATURES, .gt = 3, 3757ec681f3Smrg .num_slices = 2, 3767ec681f3Smrg .num_subslices = { 2, 2, }, 3777ec681f3Smrg .num_eu_per_subslice = 10, 3787ec681f3Smrg .num_thread_per_eu = 7, 3797ec681f3Smrg .l3_banks = 8, 3807ec681f3Smrg .max_vs_threads = 280, 3817ec681f3Smrg .max_tcs_threads = 256, 3827ec681f3Smrg .max_tes_threads = 280, 3837ec681f3Smrg .max_gs_threads = 256, 3847ec681f3Smrg .max_wm_threads = 408, 3857ec681f3Smrg .max_cs_threads = 70, 3867ec681f3Smrg .urb = { 3877ec681f3Smrg .min_entries = { 3887ec681f3Smrg [MESA_SHADER_VERTEX] = 64, 3897ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 10, 3907ec681f3Smrg }, 3917ec681f3Smrg .max_entries = { 3927ec681f3Smrg [MESA_SHADER_VERTEX] = 1664, 3937ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 128, 3947ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 960, 3957ec681f3Smrg [MESA_SHADER_GEOMETRY] = 640, 3967ec681f3Smrg }, 3977ec681f3Smrg }, 3987ec681f3Smrg .max_constant_urb_size_kb = 32, 3997ec681f3Smrg .simulator_id = 9, 4007ec681f3Smrg}; 4017ec681f3Smrg 4027ec681f3Smrg/* It's unclear how well supported sampling from the hiz buffer is on GFX8, 4037ec681f3Smrg * so keep things conservative for now and set has_sample_with_hiz = false. 4047ec681f3Smrg */ 4057ec681f3Smrg#define GFX8_FEATURES \ 4067ec681f3Smrg .ver = 8, \ 4077ec681f3Smrg .has_hiz_and_separate_stencil = true, \ 4087ec681f3Smrg .must_use_separate_stencil = true, \ 4097ec681f3Smrg .has_llc = true, \ 4107ec681f3Smrg .has_sample_with_hiz = false, \ 4117ec681f3Smrg .has_pln = true, \ 4127ec681f3Smrg .has_integer_dword_mul = true, \ 4137ec681f3Smrg .has_64bit_float = true, \ 4147ec681f3Smrg .has_64bit_int = true, \ 4157ec681f3Smrg .supports_simd16_3src = true, \ 4167ec681f3Smrg .has_surface_tile_offset = true, \ 4177ec681f3Smrg .num_thread_per_eu = 7, \ 4187ec681f3Smrg .max_vs_threads = 504, \ 4197ec681f3Smrg .max_tcs_threads = 504, \ 4207ec681f3Smrg .max_tes_threads = 504, \ 4217ec681f3Smrg .max_gs_threads = 504, \ 4227ec681f3Smrg .max_wm_threads = 384, \ 4237ec681f3Smrg .timestamp_frequency = 12500000, \ 4247ec681f3Smrg .max_constant_urb_size_kb = 32, \ 4257ec681f3Smrg .cs_prefetch_size = 512 4267ec681f3Smrg 4277ec681f3Smrgstatic const struct intel_device_info intel_device_info_bdw_gt1 = { 4287ec681f3Smrg GFX8_FEATURES, .gt = 1, 4297ec681f3Smrg .is_broadwell = true, 4307ec681f3Smrg .num_slices = 1, 4317ec681f3Smrg .num_subslices = { 2, }, 4327ec681f3Smrg .num_eu_per_subslice = 6, 4337ec681f3Smrg .l3_banks = 2, 4347ec681f3Smrg .max_cs_threads = 42, 4357ec681f3Smrg .urb = { 4367ec681f3Smrg .min_entries = { 4377ec681f3Smrg [MESA_SHADER_VERTEX] = 64, 4387ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, 4397ec681f3Smrg }, 4407ec681f3Smrg .max_entries = { 4417ec681f3Smrg [MESA_SHADER_VERTEX] = 2560, 4427ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 504, 4437ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 1536, 4447ec681f3Smrg /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */ 4457ec681f3Smrg [MESA_SHADER_GEOMETRY] = 690, 4467ec681f3Smrg }, 4477ec681f3Smrg }, 4487ec681f3Smrg .simulator_id = 11, 4497ec681f3Smrg}; 4507ec681f3Smrg 4517ec681f3Smrgstatic const struct intel_device_info intel_device_info_bdw_gt2 = { 4527ec681f3Smrg GFX8_FEATURES, .gt = 2, 4537ec681f3Smrg .is_broadwell = true, 4547ec681f3Smrg .num_slices = 1, 4557ec681f3Smrg .num_subslices = { 3, }, 4567ec681f3Smrg .num_eu_per_subslice = 8, 4577ec681f3Smrg .l3_banks = 4, 4587ec681f3Smrg .max_cs_threads = 56, 4597ec681f3Smrg .urb = { 4607ec681f3Smrg .min_entries = { 4617ec681f3Smrg [MESA_SHADER_VERTEX] = 64, 4627ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, 4637ec681f3Smrg }, 4647ec681f3Smrg .max_entries = { 4657ec681f3Smrg [MESA_SHADER_VERTEX] = 2560, 4667ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 504, 4677ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 1536, 4687ec681f3Smrg [MESA_SHADER_GEOMETRY] = 960, 4697ec681f3Smrg }, 4707ec681f3Smrg }, 4717ec681f3Smrg .simulator_id = 11, 4727ec681f3Smrg}; 4737ec681f3Smrg 4747ec681f3Smrgstatic const struct intel_device_info intel_device_info_bdw_gt3 = { 4757ec681f3Smrg GFX8_FEATURES, .gt = 3, 4767ec681f3Smrg .is_broadwell = true, 4777ec681f3Smrg .num_slices = 2, 4787ec681f3Smrg .num_subslices = { 3, 3, }, 4797ec681f3Smrg .num_eu_per_subslice = 8, 4807ec681f3Smrg .l3_banks = 8, 4817ec681f3Smrg .max_cs_threads = 56, 4827ec681f3Smrg .urb = { 4837ec681f3Smrg .min_entries = { 4847ec681f3Smrg [MESA_SHADER_VERTEX] = 64, 4857ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, 4867ec681f3Smrg }, 4877ec681f3Smrg .max_entries = { 4887ec681f3Smrg [MESA_SHADER_VERTEX] = 2560, 4897ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 504, 4907ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 1536, 4917ec681f3Smrg [MESA_SHADER_GEOMETRY] = 960, 4927ec681f3Smrg }, 4937ec681f3Smrg }, 4947ec681f3Smrg .simulator_id = 11, 4957ec681f3Smrg}; 4967ec681f3Smrg 4977ec681f3Smrgstatic const struct intel_device_info intel_device_info_chv = { 4987ec681f3Smrg GFX8_FEATURES, .is_cherryview = 1, .gt = 1, 4997ec681f3Smrg .has_llc = false, 5007ec681f3Smrg .has_integer_dword_mul = false, 5017ec681f3Smrg .num_slices = 1, 5027ec681f3Smrg .num_subslices = { 2, }, 5037ec681f3Smrg .num_eu_per_subslice = 8, 5047ec681f3Smrg .l3_banks = 2, 5057ec681f3Smrg .max_vs_threads = 80, 5067ec681f3Smrg .max_tcs_threads = 80, 5077ec681f3Smrg .max_tes_threads = 80, 5087ec681f3Smrg .max_gs_threads = 80, 5097ec681f3Smrg .max_wm_threads = 128, 5107ec681f3Smrg .max_cs_threads = 6 * 7, 5117ec681f3Smrg .urb = { 5127ec681f3Smrg .min_entries = { 5137ec681f3Smrg [MESA_SHADER_VERTEX] = 34, 5147ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, 5157ec681f3Smrg }, 5167ec681f3Smrg .max_entries = { 5177ec681f3Smrg [MESA_SHADER_VERTEX] = 640, 5187ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 80, 5197ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 384, 5207ec681f3Smrg [MESA_SHADER_GEOMETRY] = 256, 5217ec681f3Smrg }, 5227ec681f3Smrg }, 5237ec681f3Smrg .simulator_id = 13, 5247ec681f3Smrg}; 5257ec681f3Smrg 5267ec681f3Smrg#define GFX9_HW_INFO \ 5277ec681f3Smrg .ver = 9, \ 5287ec681f3Smrg .max_vs_threads = 336, \ 5297ec681f3Smrg .max_gs_threads = 336, \ 5307ec681f3Smrg .max_tcs_threads = 336, \ 5317ec681f3Smrg .max_tes_threads = 336, \ 5327ec681f3Smrg .max_cs_threads = 56, \ 5337ec681f3Smrg .timestamp_frequency = 12000000, \ 5347ec681f3Smrg .cs_prefetch_size = 512, \ 5357ec681f3Smrg .urb = { \ 5367ec681f3Smrg .min_entries = { \ 5377ec681f3Smrg [MESA_SHADER_VERTEX] = 64, \ 5387ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 5397ec681f3Smrg }, \ 5407ec681f3Smrg .max_entries = { \ 5417ec681f3Smrg [MESA_SHADER_VERTEX] = 1856, \ 5427ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 672, \ 5437ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 1120, \ 5447ec681f3Smrg [MESA_SHADER_GEOMETRY] = 640, \ 5457ec681f3Smrg }, \ 5467ec681f3Smrg } 5477ec681f3Smrg 5487ec681f3Smrg#define GFX9_LP_FEATURES \ 5497ec681f3Smrg GFX8_FEATURES, \ 5507ec681f3Smrg GFX9_HW_INFO, \ 5517ec681f3Smrg .has_integer_dword_mul = false, \ 5527ec681f3Smrg .gt = 1, \ 5537ec681f3Smrg .has_llc = false, \ 5547ec681f3Smrg .has_sample_with_hiz = true, \ 5557ec681f3Smrg .num_slices = 1, \ 5567ec681f3Smrg .num_thread_per_eu = 6, \ 5577ec681f3Smrg .max_vs_threads = 112, \ 5587ec681f3Smrg .max_tcs_threads = 112, \ 5597ec681f3Smrg .max_tes_threads = 112, \ 5607ec681f3Smrg .max_gs_threads = 112, \ 5617ec681f3Smrg .max_cs_threads = 6 * 6, \ 5627ec681f3Smrg .timestamp_frequency = 19200000, \ 5637ec681f3Smrg .urb = { \ 5647ec681f3Smrg .min_entries = { \ 5657ec681f3Smrg [MESA_SHADER_VERTEX] = 34, \ 5667ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 5677ec681f3Smrg }, \ 5687ec681f3Smrg .max_entries = { \ 5697ec681f3Smrg [MESA_SHADER_VERTEX] = 704, \ 5707ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 256, \ 5717ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 416, \ 5727ec681f3Smrg [MESA_SHADER_GEOMETRY] = 256, \ 5737ec681f3Smrg }, \ 5747ec681f3Smrg } 5757ec681f3Smrg 5767ec681f3Smrg#define GFX9_LP_FEATURES_3X6 \ 5777ec681f3Smrg GFX9_LP_FEATURES, \ 5787ec681f3Smrg .num_subslices = { 3, }, \ 5797ec681f3Smrg .num_eu_per_subslice = 6 5807ec681f3Smrg 5817ec681f3Smrg#define GFX9_LP_FEATURES_2X6 \ 5827ec681f3Smrg GFX9_LP_FEATURES, \ 5837ec681f3Smrg .num_subslices = { 2, }, \ 5847ec681f3Smrg .num_eu_per_subslice = 6, \ 5857ec681f3Smrg .max_vs_threads = 56, \ 5867ec681f3Smrg .max_tcs_threads = 56, \ 5877ec681f3Smrg .max_tes_threads = 56, \ 5887ec681f3Smrg .max_gs_threads = 56, \ 5897ec681f3Smrg .max_cs_threads = 6 * 6, \ 5907ec681f3Smrg .urb = { \ 5917ec681f3Smrg .min_entries = { \ 5927ec681f3Smrg [MESA_SHADER_VERTEX] = 34, \ 5937ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 5947ec681f3Smrg }, \ 5957ec681f3Smrg .max_entries = { \ 5967ec681f3Smrg [MESA_SHADER_VERTEX] = 352, \ 5977ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 128, \ 5987ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 208, \ 5997ec681f3Smrg [MESA_SHADER_GEOMETRY] = 128, \ 6007ec681f3Smrg }, \ 6017ec681f3Smrg } 6027ec681f3Smrg 6037ec681f3Smrg#define GFX9_FEATURES \ 6047ec681f3Smrg GFX8_FEATURES, \ 6057ec681f3Smrg GFX9_HW_INFO, \ 6067ec681f3Smrg .has_sample_with_hiz = true 6077ec681f3Smrg 6087ec681f3Smrgstatic const struct intel_device_info intel_device_info_skl_gt1 = { 6097ec681f3Smrg GFX9_FEATURES, .gt = 1, 6107ec681f3Smrg .is_skylake = true, 6117ec681f3Smrg .num_slices = 1, 6127ec681f3Smrg .num_subslices = { 2, }, 6137ec681f3Smrg .num_eu_per_subslice = 6, 6147ec681f3Smrg .l3_banks = 2, 6157ec681f3Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 6167ec681f3Smrg * leading to some vertices to go missing if we use too much URB. 6177ec681f3Smrg */ 6187ec681f3Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 6197ec681f3Smrg .simulator_id = 12, 6207ec681f3Smrg}; 6217ec681f3Smrg 6227ec681f3Smrgstatic const struct intel_device_info intel_device_info_skl_gt2 = { 6237ec681f3Smrg GFX9_FEATURES, .gt = 2, 6247ec681f3Smrg .is_skylake = true, 6257ec681f3Smrg .num_slices = 1, 6267ec681f3Smrg .num_subslices = { 3, }, 6277ec681f3Smrg .num_eu_per_subslice = 8, 6287ec681f3Smrg .l3_banks = 4, 6297ec681f3Smrg .simulator_id = 12, 6307ec681f3Smrg}; 6317ec681f3Smrg 6327ec681f3Smrgstatic const struct intel_device_info intel_device_info_skl_gt3 = { 6337ec681f3Smrg GFX9_FEATURES, .gt = 3, 6347ec681f3Smrg .is_skylake = true, 6357ec681f3Smrg .num_slices = 2, 6367ec681f3Smrg .num_subslices = { 3, 3, }, 6377ec681f3Smrg .num_eu_per_subslice = 8, 6387ec681f3Smrg .l3_banks = 8, 6397ec681f3Smrg .simulator_id = 12, 6407ec681f3Smrg}; 6417ec681f3Smrg 6427ec681f3Smrgstatic const struct intel_device_info intel_device_info_skl_gt4 = { 6437ec681f3Smrg GFX9_FEATURES, .gt = 4, 6447ec681f3Smrg .is_skylake = true, 6457ec681f3Smrg .num_slices = 3, 6467ec681f3Smrg .num_subslices = { 3, 3, 3, }, 6477ec681f3Smrg .num_eu_per_subslice = 8, 6487ec681f3Smrg .l3_banks = 12, 6497ec681f3Smrg /* From the "L3 Allocation and Programming" documentation: 6507ec681f3Smrg * 6517ec681f3Smrg * "URB is limited to 1008KB due to programming restrictions. This is not a 6527ec681f3Smrg * restriction of the L3 implementation, but of the FF and other clients. 6537ec681f3Smrg * Therefore, in a GT4 implementation it is possible for the programmed 6547ec681f3Smrg * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but 6557ec681f3Smrg * only 1008KB of this will be used." 6567ec681f3Smrg */ 6577ec681f3Smrg .simulator_id = 12, 6587ec681f3Smrg}; 6597ec681f3Smrg 6607ec681f3Smrgstatic const struct intel_device_info intel_device_info_bxt = { 6617ec681f3Smrg GFX9_LP_FEATURES_3X6, 6627ec681f3Smrg .is_broxton = true, 6637ec681f3Smrg .l3_banks = 2, 6647ec681f3Smrg .simulator_id = 14, 6657ec681f3Smrg}; 6667ec681f3Smrg 6677ec681f3Smrgstatic const struct intel_device_info intel_device_info_bxt_2x6 = { 6687ec681f3Smrg GFX9_LP_FEATURES_2X6, 6697ec681f3Smrg .is_broxton = true, 6707ec681f3Smrg .l3_banks = 1, 6717ec681f3Smrg .simulator_id = 14, 6727ec681f3Smrg}; 6737ec681f3Smrg/* 6747ec681f3Smrg * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. 6757ec681f3Smrg * There's no KBL entry. Using the default SKL (GFX9) GS entries value. 6767ec681f3Smrg */ 6777ec681f3Smrg 6787ec681f3Smrgstatic const struct intel_device_info intel_device_info_kbl_gt1 = { 6797ec681f3Smrg GFX9_FEATURES, 6807ec681f3Smrg .is_kabylake = true, 6817ec681f3Smrg .gt = 1, 6827ec681f3Smrg 6837ec681f3Smrg .max_cs_threads = 7 * 6, 6847ec681f3Smrg .num_slices = 1, 6857ec681f3Smrg .num_subslices = { 2, }, 6867ec681f3Smrg .num_eu_per_subslice = 6, 6877ec681f3Smrg .l3_banks = 2, 6887ec681f3Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 6897ec681f3Smrg * leading to some vertices to go missing if we use too much URB. 6907ec681f3Smrg */ 6917ec681f3Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 6927ec681f3Smrg .urb.max_entries[MESA_SHADER_GEOMETRY] = 256, 6937ec681f3Smrg .simulator_id = 16, 6947ec681f3Smrg}; 6957ec681f3Smrg 6967ec681f3Smrgstatic const struct intel_device_info intel_device_info_kbl_gt1_5 = { 6977ec681f3Smrg GFX9_FEATURES, 6987ec681f3Smrg .is_kabylake = true, 6997ec681f3Smrg .gt = 1, 7007ec681f3Smrg 7017ec681f3Smrg .max_cs_threads = 7 * 6, 7027ec681f3Smrg .num_slices = 1, 7037ec681f3Smrg .num_subslices = { 3, }, 7047ec681f3Smrg .num_eu_per_subslice = 6, 7057ec681f3Smrg .l3_banks = 4, 7067ec681f3Smrg .simulator_id = 16, 7077ec681f3Smrg}; 7087ec681f3Smrg 7097ec681f3Smrgstatic const struct intel_device_info intel_device_info_kbl_gt2 = { 7107ec681f3Smrg GFX9_FEATURES, 7117ec681f3Smrg .is_kabylake = true, 7127ec681f3Smrg .gt = 2, 7137ec681f3Smrg 7147ec681f3Smrg .num_slices = 1, 7157ec681f3Smrg .num_subslices = { 3, }, 7167ec681f3Smrg .num_eu_per_subslice = 8, 7177ec681f3Smrg .l3_banks = 4, 7187ec681f3Smrg .simulator_id = 16, 7197ec681f3Smrg}; 7207ec681f3Smrg 7217ec681f3Smrgstatic const struct intel_device_info intel_device_info_kbl_gt3 = { 7227ec681f3Smrg GFX9_FEATURES, 7237ec681f3Smrg .is_kabylake = true, 7247ec681f3Smrg .gt = 3, 7257ec681f3Smrg 7267ec681f3Smrg .num_slices = 2, 7277ec681f3Smrg .num_subslices = { 3, 3, }, 7287ec681f3Smrg .num_eu_per_subslice = 8, 7297ec681f3Smrg .l3_banks = 8, 7307ec681f3Smrg .simulator_id = 16, 7317ec681f3Smrg}; 7327ec681f3Smrg 7337ec681f3Smrgstatic const struct intel_device_info intel_device_info_kbl_gt4 = { 7347ec681f3Smrg GFX9_FEATURES, 7357ec681f3Smrg .is_kabylake = true, 7367ec681f3Smrg .gt = 4, 7377ec681f3Smrg 7387ec681f3Smrg /* 7397ec681f3Smrg * From the "L3 Allocation and Programming" documentation: 7407ec681f3Smrg * 7417ec681f3Smrg * "URB is limited to 1008KB due to programming restrictions. This 7427ec681f3Smrg * is not a restriction of the L3 implementation, but of the FF and 7437ec681f3Smrg * other clients. Therefore, in a GT4 implementation it is 7447ec681f3Smrg * possible for the programmed allocation of the L3 data array to 7457ec681f3Smrg * provide 3*384KB=1152KB for URB, but only 1008KB of this 7467ec681f3Smrg * will be used." 7477ec681f3Smrg */ 7487ec681f3Smrg .num_slices = 3, 7497ec681f3Smrg .num_subslices = { 3, 3, 3, }, 7507ec681f3Smrg .num_eu_per_subslice = 8, 7517ec681f3Smrg .l3_banks = 12, 7527ec681f3Smrg .simulator_id = 16, 7537ec681f3Smrg}; 7547ec681f3Smrg 7557ec681f3Smrgstatic const struct intel_device_info intel_device_info_glk = { 7567ec681f3Smrg GFX9_LP_FEATURES_3X6, 7577ec681f3Smrg .is_geminilake = true, 7587ec681f3Smrg .l3_banks = 2, 7597ec681f3Smrg .simulator_id = 17, 7607ec681f3Smrg}; 7617ec681f3Smrg 7627ec681f3Smrgstatic const struct intel_device_info intel_device_info_glk_2x6 = { 7637ec681f3Smrg GFX9_LP_FEATURES_2X6, 7647ec681f3Smrg .is_geminilake = true, 7657ec681f3Smrg .l3_banks = 2, 7667ec681f3Smrg .simulator_id = 17, 7677ec681f3Smrg}; 7687ec681f3Smrg 7697ec681f3Smrgstatic const struct intel_device_info intel_device_info_cfl_gt1 = { 7707ec681f3Smrg GFX9_FEATURES, 7717ec681f3Smrg .is_coffeelake = true, 7727ec681f3Smrg .gt = 1, 7737ec681f3Smrg 7747ec681f3Smrg .num_slices = 1, 7757ec681f3Smrg .num_subslices = { 2, }, 7767ec681f3Smrg .num_eu_per_subslice = 6, 7777ec681f3Smrg .l3_banks = 2, 7787ec681f3Smrg /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions 7797ec681f3Smrg * leading to some vertices to go missing if we use too much URB. 7807ec681f3Smrg */ 7817ec681f3Smrg .urb.max_entries[MESA_SHADER_VERTEX] = 928, 7827ec681f3Smrg .urb.max_entries[MESA_SHADER_GEOMETRY] = 256, 7837ec681f3Smrg .simulator_id = 24, 7847ec681f3Smrg}; 7857ec681f3Smrgstatic const struct intel_device_info intel_device_info_cfl_gt2 = { 7867ec681f3Smrg GFX9_FEATURES, 7877ec681f3Smrg .is_coffeelake = true, 7887ec681f3Smrg .gt = 2, 7897ec681f3Smrg 7907ec681f3Smrg .num_slices = 1, 7917ec681f3Smrg .num_subslices = { 3, }, 7927ec681f3Smrg .num_eu_per_subslice = 8, 7937ec681f3Smrg .l3_banks = 4, 7947ec681f3Smrg .simulator_id = 24, 7957ec681f3Smrg}; 7967ec681f3Smrg 7977ec681f3Smrgstatic const struct intel_device_info intel_device_info_cfl_gt3 = { 7987ec681f3Smrg GFX9_FEATURES, 7997ec681f3Smrg .is_coffeelake = true, 8007ec681f3Smrg .gt = 3, 8017ec681f3Smrg 8027ec681f3Smrg .num_slices = 2, 8037ec681f3Smrg .num_subslices = { 3, 3, }, 8047ec681f3Smrg .num_eu_per_subslice = 8, 8057ec681f3Smrg .l3_banks = 8, 8067ec681f3Smrg .simulator_id = 24, 8077ec681f3Smrg}; 8087ec681f3Smrg 8097ec681f3Smrg#define subslices(args...) { args, } 8107ec681f3Smrg 8117ec681f3Smrg#define GFX11_HW_INFO \ 8127ec681f3Smrg .ver = 11, \ 8137ec681f3Smrg .has_pln = false, \ 8147ec681f3Smrg .max_vs_threads = 364, \ 8157ec681f3Smrg .max_gs_threads = 224, \ 8167ec681f3Smrg .max_tcs_threads = 224, \ 8177ec681f3Smrg .max_tes_threads = 364, \ 8187ec681f3Smrg .max_cs_threads = 56, \ 8197ec681f3Smrg .cs_prefetch_size = 512 8207ec681f3Smrg 8217ec681f3Smrg#define GFX11_FEATURES(_gt, _slices, _subslices, _l3) \ 8227ec681f3Smrg GFX8_FEATURES, \ 8237ec681f3Smrg GFX11_HW_INFO, \ 8247ec681f3Smrg .has_64bit_float = false, \ 8257ec681f3Smrg .has_64bit_int = false, \ 8267ec681f3Smrg .has_integer_dword_mul = false, \ 8277ec681f3Smrg .has_sample_with_hiz = false, \ 8287ec681f3Smrg .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 8297ec681f3Smrg .num_subslices = _subslices, \ 8307ec681f3Smrg .num_eu_per_subslice = 8 8317ec681f3Smrg 8327ec681f3Smrg#define GFX11_URB_MIN_MAX_ENTRIES \ 8337ec681f3Smrg .min_entries = { \ 8347ec681f3Smrg [MESA_SHADER_VERTEX] = 64, \ 8357ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 8367ec681f3Smrg }, \ 8377ec681f3Smrg .max_entries = { \ 8387ec681f3Smrg [MESA_SHADER_VERTEX] = 2384, \ 8397ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 1032, \ 8407ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 2384, \ 8417ec681f3Smrg [MESA_SHADER_GEOMETRY] = 1032, \ 8427ec681f3Smrg } 8437ec681f3Smrg 8447ec681f3Smrgstatic const struct intel_device_info intel_device_info_icl_gt2 = { 8457ec681f3Smrg GFX11_FEATURES(2, 1, subslices(8), 8), 8467ec681f3Smrg .urb = { 8477ec681f3Smrg GFX11_URB_MIN_MAX_ENTRIES, 8487ec681f3Smrg }, 8497ec681f3Smrg .simulator_id = 19, 8507ec681f3Smrg}; 8517ec681f3Smrg 8527ec681f3Smrgstatic const struct intel_device_info intel_device_info_icl_gt1_5 = { 8537ec681f3Smrg GFX11_FEATURES(1, 1, subslices(6), 6), 8547ec681f3Smrg .urb = { 8557ec681f3Smrg GFX11_URB_MIN_MAX_ENTRIES, 8567ec681f3Smrg }, 8577ec681f3Smrg .simulator_id = 19, 8587ec681f3Smrg}; 8597ec681f3Smrg 8607ec681f3Smrgstatic const struct intel_device_info intel_device_info_icl_gt1 = { 8617ec681f3Smrg GFX11_FEATURES(1, 1, subslices(4), 6), 8627ec681f3Smrg .urb = { 8637ec681f3Smrg GFX11_URB_MIN_MAX_ENTRIES, 8647ec681f3Smrg }, 8657ec681f3Smrg .simulator_id = 19, 8667ec681f3Smrg}; 8677ec681f3Smrg 8687ec681f3Smrgstatic const struct intel_device_info intel_device_info_icl_gt0_5 = { 8697ec681f3Smrg GFX11_FEATURES(1, 1, subslices(1), 6), 8707ec681f3Smrg .urb = { 8717ec681f3Smrg GFX11_URB_MIN_MAX_ENTRIES, 8727ec681f3Smrg }, 8737ec681f3Smrg .simulator_id = 19, 8747ec681f3Smrg}; 8757ec681f3Smrg 8767ec681f3Smrg#define GFX11_LP_FEATURES \ 8777ec681f3Smrg .is_elkhartlake = true, \ 8787ec681f3Smrg .urb = { \ 8797ec681f3Smrg GFX11_URB_MIN_MAX_ENTRIES, \ 8807ec681f3Smrg }, \ 8817ec681f3Smrg .disable_ccs_repack = true, \ 8827ec681f3Smrg .simulator_id = 28 8837ec681f3Smrg 8847ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_4x8 = { 8857ec681f3Smrg GFX11_FEATURES(1, 1, subslices(4), 4), 8867ec681f3Smrg GFX11_LP_FEATURES, 8877ec681f3Smrg}; 8887ec681f3Smrg 8897ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_4x6 = { 8907ec681f3Smrg GFX11_FEATURES(1, 1, subslices(4), 4), 8917ec681f3Smrg GFX11_LP_FEATURES, 8927ec681f3Smrg .num_eu_per_subslice = 6, 8937ec681f3Smrg}; 8947ec681f3Smrg 8957ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_4x5 = { 8967ec681f3Smrg GFX11_FEATURES(1, 1, subslices(4), 4), 8977ec681f3Smrg GFX11_LP_FEATURES, 8987ec681f3Smrg .num_eu_per_subslice = 5, 8997ec681f3Smrg}; 9007ec681f3Smrg 9017ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_4x4 = { 9027ec681f3Smrg GFX11_FEATURES(1, 1, subslices(4), 4), 9037ec681f3Smrg GFX11_LP_FEATURES, 9047ec681f3Smrg .num_eu_per_subslice = 4, 9057ec681f3Smrg}; 9067ec681f3Smrg 9077ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_2x8 = { 9087ec681f3Smrg GFX11_FEATURES(1, 1, subslices(2), 4), 9097ec681f3Smrg GFX11_LP_FEATURES, 9107ec681f3Smrg}; 9117ec681f3Smrg 9127ec681f3Smrgstatic const struct intel_device_info intel_device_info_ehl_2x4 = { 9137ec681f3Smrg GFX11_FEATURES(1, 1, subslices(2), 4), 9147ec681f3Smrg GFX11_LP_FEATURES, 9157ec681f3Smrg .num_eu_per_subslice =4, 9167ec681f3Smrg}; 9177ec681f3Smrg 9187ec681f3Smrg#define GFX12_URB_MIN_MAX_ENTRIES \ 9197ec681f3Smrg .min_entries = { \ 9207ec681f3Smrg [MESA_SHADER_VERTEX] = 64, \ 9217ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 34, \ 9227ec681f3Smrg }, \ 9237ec681f3Smrg .max_entries = { \ 9247ec681f3Smrg [MESA_SHADER_VERTEX] = 3576, \ 9257ec681f3Smrg [MESA_SHADER_TESS_CTRL] = 1548, \ 9267ec681f3Smrg [MESA_SHADER_TESS_EVAL] = 3576, \ 9277ec681f3Smrg /* Wa_14013840143 */ \ 9287ec681f3Smrg [MESA_SHADER_GEOMETRY] = 1536, \ 9297ec681f3Smrg } 9307ec681f3Smrg 9317ec681f3Smrg#define GFX12_HW_INFO \ 9327ec681f3Smrg .ver = 12, \ 9337ec681f3Smrg .has_pln = false, \ 9347ec681f3Smrg .has_sample_with_hiz = false, \ 9357ec681f3Smrg .has_aux_map = true, \ 9367ec681f3Smrg .max_vs_threads = 546, \ 9377ec681f3Smrg .max_gs_threads = 336, \ 9387ec681f3Smrg .max_tcs_threads = 336, \ 9397ec681f3Smrg .max_tes_threads = 546, \ 9407ec681f3Smrg .max_cs_threads = 112, /* threads per DSS */ \ 9417ec681f3Smrg .urb = { \ 9427ec681f3Smrg GFX12_URB_MIN_MAX_ENTRIES, \ 9437ec681f3Smrg } 9447ec681f3Smrg 9457ec681f3Smrg#define GFX12_FEATURES(_gt, _slices, _l3) \ 9467ec681f3Smrg GFX8_FEATURES, \ 9477ec681f3Smrg GFX12_HW_INFO, \ 9487ec681f3Smrg .has_64bit_float = false, \ 9497ec681f3Smrg .has_64bit_int = false, \ 9507ec681f3Smrg .has_integer_dword_mul = false, \ 9517ec681f3Smrg .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ 9527ec681f3Smrg .simulator_id = 22, \ 9537ec681f3Smrg .num_eu_per_subslice = 16, \ 9547ec681f3Smrg .cs_prefetch_size = 512 9557ec681f3Smrg 9567ec681f3Smrg#define dual_subslices(args...) { args, } 9577ec681f3Smrg 9587ec681f3Smrg#define GFX12_GT05_FEATURES \ 9597ec681f3Smrg GFX12_FEATURES(1, 1, 4), \ 9607ec681f3Smrg .num_subslices = dual_subslices(1) 9617ec681f3Smrg 9627ec681f3Smrg#define GFX12_GT_FEATURES(_gt) \ 9637ec681f3Smrg GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8), \ 9647ec681f3Smrg .num_subslices = dual_subslices(_gt == 1 ? 2 : 6) 9657ec681f3Smrg 9667ec681f3Smrgstatic const struct intel_device_info intel_device_info_tgl_gt1 = { 9677ec681f3Smrg GFX12_GT_FEATURES(1), 9687ec681f3Smrg .is_tigerlake = true, 9697ec681f3Smrg}; 9707ec681f3Smrg 9717ec681f3Smrgstatic const struct intel_device_info intel_device_info_tgl_gt2 = { 9727ec681f3Smrg GFX12_GT_FEATURES(2), 9737ec681f3Smrg .is_tigerlake = true, 9747ec681f3Smrg}; 9757ec681f3Smrg 9767ec681f3Smrgstatic const struct intel_device_info intel_device_info_rkl_gt05 = { 9777ec681f3Smrg GFX12_GT05_FEATURES, 9787ec681f3Smrg .is_rocketlake = true, 9797ec681f3Smrg}; 9807ec681f3Smrg 9817ec681f3Smrgstatic const struct intel_device_info intel_device_info_rkl_gt1 = { 9827ec681f3Smrg GFX12_GT_FEATURES(1), 9837ec681f3Smrg .is_rocketlake = true, 9847ec681f3Smrg}; 9857ec681f3Smrg 9867ec681f3Smrgstatic const struct intel_device_info intel_device_info_adl_gt05 = { 9877ec681f3Smrg GFX12_GT05_FEATURES, 9887ec681f3Smrg .is_alderlake = true, 9897ec681f3Smrg}; 9907ec681f3Smrg 9917ec681f3Smrgstatic const struct intel_device_info intel_device_info_adl_gt1 = { 9927ec681f3Smrg GFX12_GT_FEATURES(1), 9937ec681f3Smrg .is_alderlake = true, 9947ec681f3Smrg}; 9957ec681f3Smrg 9967ec681f3Smrgstatic const struct intel_device_info intel_device_info_adl_gt2 = { 9977ec681f3Smrg GFX12_GT_FEATURES(2), 9987ec681f3Smrg .is_alderlake = true, 9997ec681f3Smrg .display_ver = 13, 10007ec681f3Smrg}; 10017ec681f3Smrg 10027ec681f3Smrg#define GFX12_DG1_SG1_FEATURES \ 10037ec681f3Smrg GFX12_GT_FEATURES(2), \ 10047ec681f3Smrg .is_dg1 = true, \ 10057ec681f3Smrg .has_llc = false, \ 10067ec681f3Smrg .has_local_mem = true, \ 10077ec681f3Smrg .urb.size = 768, \ 10087ec681f3Smrg .simulator_id = 30 10097ec681f3Smrg 10107ec681f3Smrgstatic const struct intel_device_info intel_device_info_dg1 = { 10117ec681f3Smrg GFX12_DG1_SG1_FEATURES, 10127ec681f3Smrg}; 10137ec681f3Smrg 10147ec681f3Smrgstatic const struct intel_device_info intel_device_info_sg1 = { 10157ec681f3Smrg GFX12_DG1_SG1_FEATURES, 10167ec681f3Smrg}; 10177ec681f3Smrg 10187ec681f3Smrgstatic void 10197ec681f3Smrgreset_masks(struct intel_device_info *devinfo) 10207ec681f3Smrg{ 10217ec681f3Smrg devinfo->subslice_slice_stride = 0; 10227ec681f3Smrg devinfo->eu_subslice_stride = 0; 10237ec681f3Smrg devinfo->eu_slice_stride = 0; 10247ec681f3Smrg 10257ec681f3Smrg devinfo->num_slices = 0; 10267ec681f3Smrg devinfo->num_eu_per_subslice = 0; 10277ec681f3Smrg memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); 10287ec681f3Smrg 10297ec681f3Smrg memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); 10307ec681f3Smrg memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); 10317ec681f3Smrg memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); 10327ec681f3Smrg memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices)); 10337ec681f3Smrg} 10347ec681f3Smrg 10357ec681f3Smrgstatic void 10367ec681f3Smrgupdate_from_topology(struct intel_device_info *devinfo, 10377ec681f3Smrg const struct drm_i915_query_topology_info *topology) 10387ec681f3Smrg{ 10397ec681f3Smrg reset_masks(devinfo); 10407ec681f3Smrg 10417ec681f3Smrg assert(topology->max_slices > 0); 10427ec681f3Smrg assert(topology->max_subslices > 0); 10437ec681f3Smrg assert(topology->max_eus_per_subslice > 0); 10447ec681f3Smrg 10457ec681f3Smrg devinfo->subslice_slice_stride = topology->subslice_stride; 10467ec681f3Smrg 10477ec681f3Smrg devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8); 10487ec681f3Smrg devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride; 10497ec681f3Smrg 10507ec681f3Smrg assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); 10517ec681f3Smrg memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); 10527ec681f3Smrg devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); 10537ec681f3Smrg devinfo->max_slices = topology->max_slices; 10547ec681f3Smrg devinfo->max_subslices_per_slice = topology->max_subslices; 10557ec681f3Smrg devinfo->max_eu_per_subslice = topology->max_eus_per_subslice; 10567ec681f3Smrg 10577ec681f3Smrg uint32_t subslice_mask_len = 10587ec681f3Smrg topology->max_slices * topology->subslice_stride; 10597ec681f3Smrg assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); 10607ec681f3Smrg memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset], 10617ec681f3Smrg subslice_mask_len); 10627ec681f3Smrg 10637ec681f3Smrg uint32_t n_subslices = 0; 10647ec681f3Smrg for (int s = 0; s < topology->max_slices; s++) { 10657ec681f3Smrg if ((devinfo->slice_masks & (1 << s)) == 0) 10667ec681f3Smrg continue; 10677ec681f3Smrg 10687ec681f3Smrg for (int b = 0; b < devinfo->subslice_slice_stride; b++) { 10697ec681f3Smrg devinfo->num_subslices[s] += 10707ec681f3Smrg __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]); 10717ec681f3Smrg } 10727ec681f3Smrg n_subslices += devinfo->num_subslices[s]; 10737ec681f3Smrg } 10747ec681f3Smrg assert(n_subslices > 0); 10757ec681f3Smrg 10767ec681f3Smrg if (devinfo->ver >= 11) { 10777ec681f3Smrg /* On current ICL+ hardware we only have one slice. */ 10787ec681f3Smrg assert(devinfo->slice_masks == 1); 10797ec681f3Smrg 10807ec681f3Smrg /* Count the number of subslices on each pixel pipe. Assume that every 10817ec681f3Smrg * contiguous group of 4 subslices in the mask belong to the same pixel 10827ec681f3Smrg * pipe. However note that on TGL the kernel returns a mask of enabled 10837ec681f3Smrg * *dual* subslices instead of actual subslices somewhat confusingly, so 10847ec681f3Smrg * each pixel pipe only takes 2 bits in the mask even though it's still 10857ec681f3Smrg * 4 subslices. 10867ec681f3Smrg */ 10877ec681f3Smrg const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4; 10887ec681f3Smrg for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) { 10897ec681f3Smrg const unsigned ppipe_mask = BITFIELD_RANGE(p * ppipe_bits, ppipe_bits); 10907ec681f3Smrg devinfo->ppipe_subslices[p] = 10917ec681f3Smrg __builtin_popcount(devinfo->subslice_masks[0] & ppipe_mask); 10927ec681f3Smrg } 10937ec681f3Smrg } 10947ec681f3Smrg 10957ec681f3Smrg if (devinfo->ver == 12 && devinfo->num_slices == 1) { 10967ec681f3Smrg if (n_subslices >= 6) { 10977ec681f3Smrg assert(n_subslices == 6); 10987ec681f3Smrg devinfo->l3_banks = 8; 10997ec681f3Smrg } else if (n_subslices > 2) { 11007ec681f3Smrg devinfo->l3_banks = 6; 11017ec681f3Smrg } else { 11027ec681f3Smrg devinfo->l3_banks = 4; 11037ec681f3Smrg } 11047ec681f3Smrg } 11057ec681f3Smrg 11067ec681f3Smrg uint32_t eu_mask_len = 11077ec681f3Smrg topology->eu_stride * topology->max_subslices * topology->max_slices; 11087ec681f3Smrg assert(sizeof(devinfo->eu_masks) >= eu_mask_len); 11097ec681f3Smrg memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len); 11107ec681f3Smrg 11117ec681f3Smrg uint32_t n_eus = 0; 11127ec681f3Smrg for (int b = 0; b < eu_mask_len; b++) 11137ec681f3Smrg n_eus += __builtin_popcount(devinfo->eu_masks[b]); 11147ec681f3Smrg 11157ec681f3Smrg devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 11167ec681f3Smrg} 11177ec681f3Smrg 11187ec681f3Smrg/* Generate detailed mask from the I915_PARAM_SLICE_MASK, 11197ec681f3Smrg * I915_PARAM_SUBSLICE_MASK & I915_PARAM_EU_TOTAL getparam. 11207ec681f3Smrg */ 11217ec681f3Smrgstatic bool 11227ec681f3Smrgupdate_from_masks(struct intel_device_info *devinfo, uint32_t slice_mask, 11237ec681f3Smrg uint32_t subslice_mask, uint32_t n_eus) 11247ec681f3Smrg{ 11257ec681f3Smrg struct drm_i915_query_topology_info *topology; 11267ec681f3Smrg 11277ec681f3Smrg assert((slice_mask & 0xff) == slice_mask); 11287ec681f3Smrg 11297ec681f3Smrg size_t data_length = 100; 11307ec681f3Smrg 11317ec681f3Smrg topology = calloc(1, sizeof(*topology) + data_length); 11327ec681f3Smrg if (!topology) 11337ec681f3Smrg return false; 11347ec681f3Smrg 11357ec681f3Smrg topology->max_slices = util_last_bit(slice_mask); 11367ec681f3Smrg topology->max_subslices = util_last_bit(subslice_mask); 11377ec681f3Smrg 11387ec681f3Smrg topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8); 11397ec681f3Smrg topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8); 11407ec681f3Smrg 11417ec681f3Smrg uint32_t n_subslices = __builtin_popcount(slice_mask) * 11427ec681f3Smrg __builtin_popcount(subslice_mask); 11437ec681f3Smrg uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); 11447ec681f3Smrg uint32_t eu_mask = (1U << num_eu_per_subslice) - 1; 11457ec681f3Smrg 11467ec681f3Smrg topology->max_eus_per_subslice = num_eu_per_subslice; 11477ec681f3Smrg topology->eu_offset = topology->subslice_offset + 11487ec681f3Smrg topology->max_slices * DIV_ROUND_UP(topology->max_subslices, 8); 11497ec681f3Smrg topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8); 11507ec681f3Smrg 11517ec681f3Smrg /* Set slice mask in topology */ 11527ec681f3Smrg for (int b = 0; b < topology->subslice_offset; b++) 11537ec681f3Smrg topology->data[b] = (slice_mask >> (b * 8)) & 0xff; 11547ec681f3Smrg 11557ec681f3Smrg for (int s = 0; s < topology->max_slices; s++) { 11567ec681f3Smrg 11577ec681f3Smrg /* Set subslice mask in topology */ 11587ec681f3Smrg for (int b = 0; b < topology->subslice_stride; b++) { 11597ec681f3Smrg int subslice_offset = topology->subslice_offset + 11607ec681f3Smrg s * topology->subslice_stride + b; 11617ec681f3Smrg 11627ec681f3Smrg topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; 11637ec681f3Smrg } 11647ec681f3Smrg 11657ec681f3Smrg /* Set eu mask in topology */ 11667ec681f3Smrg for (int ss = 0; ss < topology->max_subslices; ss++) { 11677ec681f3Smrg for (int b = 0; b < topology->eu_stride; b++) { 11687ec681f3Smrg int eu_offset = topology->eu_offset + 11697ec681f3Smrg (s * topology->max_subslices + ss) * topology->eu_stride + b; 11707ec681f3Smrg 11717ec681f3Smrg topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; 11727ec681f3Smrg } 11737ec681f3Smrg } 11747ec681f3Smrg } 11757ec681f3Smrg 11767ec681f3Smrg update_from_topology(devinfo, topology); 11777ec681f3Smrg free(topology); 11787ec681f3Smrg 11797ec681f3Smrg return true; 11807ec681f3Smrg} 11817ec681f3Smrg 11827ec681f3Smrg/* Generate mask from the device data. */ 11837ec681f3Smrgstatic void 11847ec681f3Smrgfill_masks(struct intel_device_info *devinfo) 11857ec681f3Smrg{ 11867ec681f3Smrg /* All of our internal device descriptions assign the same number of 11877ec681f3Smrg * subslices for each slice. Just verify that this is true. 11887ec681f3Smrg */ 11897ec681f3Smrg for (int s = 1; s < devinfo->num_slices; s++) 11907ec681f3Smrg assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]); 11917ec681f3Smrg 11927ec681f3Smrg update_from_masks(devinfo, 11937ec681f3Smrg (1U << devinfo->num_slices) - 1, 11947ec681f3Smrg (1U << devinfo->num_subslices[0]) - 1, 11957ec681f3Smrg devinfo->num_slices * devinfo->num_subslices[0] * 11967ec681f3Smrg devinfo->num_eu_per_subslice); 11977ec681f3Smrg} 11987ec681f3Smrg 11997ec681f3Smrgstatic bool 12007ec681f3Smrggetparam(int fd, uint32_t param, int *value) 12017ec681f3Smrg{ 12027ec681f3Smrg int tmp; 12037ec681f3Smrg 12047ec681f3Smrg struct drm_i915_getparam gp = { 12057ec681f3Smrg .param = param, 12067ec681f3Smrg .value = &tmp, 12077ec681f3Smrg }; 12087ec681f3Smrg 12097ec681f3Smrg int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 12107ec681f3Smrg if (ret != 0) 12117ec681f3Smrg return false; 12127ec681f3Smrg 12137ec681f3Smrg *value = tmp; 12147ec681f3Smrg return true; 12157ec681f3Smrg} 12167ec681f3Smrg 12177ec681f3Smrgstatic void 12187ec681f3Smrgupdate_cs_workgroup_threads(struct intel_device_info *devinfo) 12197ec681f3Smrg{ 12207ec681f3Smrg /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we 12217ec681f3Smrg * can program is 64 without going up to a rectangular group. This only 12227ec681f3Smrg * impacts Haswell and TGL which have higher thread counts. 12237ec681f3Smrg * 12247ec681f3Smrg * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+ 12257ec681f3Smrg * is 10 bits so we have no such restrictions. 12267ec681f3Smrg */ 12277ec681f3Smrg devinfo->max_cs_workgroup_threads = 12287ec681f3Smrg devinfo->verx10 >= 125 ? devinfo->max_cs_threads : 12297ec681f3Smrg MIN2(devinfo->max_cs_threads, 64); 12307ec681f3Smrg} 12317ec681f3Smrg 12327ec681f3Smrgbool 12337ec681f3Smrgintel_get_device_info_from_pci_id(int pci_id, 12347ec681f3Smrg struct intel_device_info *devinfo) 12357ec681f3Smrg{ 12367ec681f3Smrg switch (pci_id) { 12377ec681f3Smrg#undef CHIPSET 12387ec681f3Smrg#define CHIPSET(id, family, fam_str, name) \ 12397ec681f3Smrg case id: *devinfo = intel_device_info_##family; break; 12407ec681f3Smrg#include "pci_ids/i965_pci_ids.h" 12417ec681f3Smrg#include "pci_ids/iris_pci_ids.h" 12427ec681f3Smrg 12437ec681f3Smrg#undef CHIPSET 12447ec681f3Smrg#define CHIPSET(id, fam_str, name) \ 12457ec681f3Smrg case id: *devinfo = intel_device_info_gfx3; break; 12467ec681f3Smrg#include "pci_ids/i915_pci_ids.h" 12477ec681f3Smrg 12487ec681f3Smrg default: 12497ec681f3Smrg mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id); 12507ec681f3Smrg return false; 12517ec681f3Smrg } 12527ec681f3Smrg 12537ec681f3Smrg switch (pci_id) { 12547ec681f3Smrg#undef CHIPSET 12557ec681f3Smrg#define CHIPSET(_id, _family, _fam_str, _name) \ 12567ec681f3Smrg case _id: \ 12577ec681f3Smrg /* sizeof(str_literal) includes the null */ \ 12587ec681f3Smrg STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \ 12597ec681f3Smrg sizeof(devinfo->name)); \ 12607ec681f3Smrg strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \ 12617ec681f3Smrg break; 12627ec681f3Smrg#include "pci_ids/i965_pci_ids.h" 12637ec681f3Smrg#include "pci_ids/iris_pci_ids.h" 12647ec681f3Smrg default: 12657ec681f3Smrg strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name)); 12667ec681f3Smrg } 12677ec681f3Smrg 12687ec681f3Smrg fill_masks(devinfo); 12697ec681f3Smrg 12707ec681f3Smrg /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: 12717ec681f3Smrg * 12727ec681f3Smrg * "Scratch Space per slice is computed based on 4 sub-slices. SW must 12737ec681f3Smrg * allocate scratch space enough so that each slice has 4 slices allowed." 12747ec681f3Smrg * 12757ec681f3Smrg * The equivalent internal documentation says that this programming note 12767ec681f3Smrg * applies to all Gfx9+ platforms. 12777ec681f3Smrg * 12787ec681f3Smrg * The hardware typically calculates the scratch space pointer by taking 12797ec681f3Smrg * the base address, and adding per-thread-scratch-space * thread ID. 12807ec681f3Smrg * Extra padding can be necessary depending how the thread IDs are 12817ec681f3Smrg * calculated for a particular shader stage. 12827ec681f3Smrg */ 12837ec681f3Smrg 12847ec681f3Smrg switch(devinfo->ver) { 12857ec681f3Smrg case 9: 12867ec681f3Smrg devinfo->max_wm_threads = 64 /* threads-per-PSD */ 12877ec681f3Smrg * devinfo->num_slices 12887ec681f3Smrg * 4; /* effective subslices per slice */ 12897ec681f3Smrg break; 12907ec681f3Smrg case 11: 12917ec681f3Smrg case 12: 12927ec681f3Smrg devinfo->max_wm_threads = 128 /* threads-per-PSD */ 12937ec681f3Smrg * devinfo->num_slices 12947ec681f3Smrg * 8; /* subslices per slice */ 12957ec681f3Smrg break; 12967ec681f3Smrg default: 12977ec681f3Smrg assert(devinfo->ver < 9); 12987ec681f3Smrg break; 12997ec681f3Smrg } 13007ec681f3Smrg 13017ec681f3Smrg assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); 13027ec681f3Smrg 13037ec681f3Smrg if (devinfo->verx10 == 0) 13047ec681f3Smrg devinfo->verx10 = devinfo->ver * 10; 13057ec681f3Smrg 13067ec681f3Smrg if (devinfo->display_ver == 0) 13077ec681f3Smrg devinfo->display_ver = devinfo->ver; 13087ec681f3Smrg 13097ec681f3Smrg update_cs_workgroup_threads(devinfo); 13107ec681f3Smrg 13117ec681f3Smrg devinfo->chipset_id = pci_id; 13127ec681f3Smrg return true; 13137ec681f3Smrg} 13147ec681f3Smrg 13157ec681f3Smrg/** 13167ec681f3Smrg * for gfx8/gfx9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology 13177ec681f3Smrg * (kernel 4.13+) 13187ec681f3Smrg */ 13197ec681f3Smrgstatic bool 13207ec681f3Smrggetparam_topology(struct intel_device_info *devinfo, int fd) 13217ec681f3Smrg{ 13227ec681f3Smrg int slice_mask = 0; 13237ec681f3Smrg if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask)) 13247ec681f3Smrg goto maybe_warn; 13257ec681f3Smrg 13267ec681f3Smrg int n_eus; 13277ec681f3Smrg if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus)) 13287ec681f3Smrg goto maybe_warn; 13297ec681f3Smrg 13307ec681f3Smrg int subslice_mask = 0; 13317ec681f3Smrg if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask)) 13327ec681f3Smrg goto maybe_warn; 13337ec681f3Smrg 13347ec681f3Smrg return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus); 13357ec681f3Smrg 13367ec681f3Smrg maybe_warn: 13377ec681f3Smrg /* Only with Gfx8+ are we starting to see devices with fusing that can only 13387ec681f3Smrg * be detected at runtime. 13397ec681f3Smrg */ 13407ec681f3Smrg if (devinfo->ver >= 8) 13417ec681f3Smrg mesa_logw("Kernel 4.1 required to properly query GPU properties."); 13427ec681f3Smrg 13437ec681f3Smrg return false; 13447ec681f3Smrg} 13457ec681f3Smrg 13467ec681f3Smrg/** 13477ec681f3Smrg * preferred API for updating the topology in devinfo (kernel 4.17+) 13487ec681f3Smrg */ 13497ec681f3Smrgstatic bool 13507ec681f3Smrgquery_topology(struct intel_device_info *devinfo, int fd) 13517ec681f3Smrg{ 13527ec681f3Smrg struct drm_i915_query_topology_info *topo_info = 13537ec681f3Smrg intel_i915_query_alloc(fd, DRM_I915_QUERY_TOPOLOGY_INFO); 13547ec681f3Smrg if (topo_info == NULL) 13557ec681f3Smrg return false; 13567ec681f3Smrg 13577ec681f3Smrg update_from_topology(devinfo, topo_info); 13587ec681f3Smrg 13597ec681f3Smrg free(topo_info); 13607ec681f3Smrg 13617ec681f3Smrg return true; 13627ec681f3Smrg 13637ec681f3Smrg} 13647ec681f3Smrg 13657ec681f3Smrgint 13667ec681f3Smrgintel_get_aperture_size(int fd, uint64_t *size) 13677ec681f3Smrg{ 13687ec681f3Smrg struct drm_i915_gem_get_aperture aperture = { 0 }; 13697ec681f3Smrg 13707ec681f3Smrg int ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 13717ec681f3Smrg if (ret == 0 && size) 13727ec681f3Smrg *size = aperture.aper_size; 13737ec681f3Smrg 13747ec681f3Smrg return ret; 13757ec681f3Smrg} 13767ec681f3Smrg 13777ec681f3Smrgstatic bool 13787ec681f3Smrghas_get_tiling(int fd) 13797ec681f3Smrg{ 13807ec681f3Smrg int ret; 13817ec681f3Smrg 13827ec681f3Smrg struct drm_i915_gem_create gem_create = { 13837ec681f3Smrg .size = 4096, 13847ec681f3Smrg }; 13857ec681f3Smrg 13867ec681f3Smrg if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { 13877ec681f3Smrg unreachable("Failed to create GEM BO"); 13887ec681f3Smrg return false; 13897ec681f3Smrg } 13907ec681f3Smrg 13917ec681f3Smrg struct drm_i915_gem_get_tiling get_tiling = { 13927ec681f3Smrg .handle = gem_create.handle, 13937ec681f3Smrg }; 13947ec681f3Smrg ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &get_tiling); 13957ec681f3Smrg 13967ec681f3Smrg struct drm_gem_close close = { 13977ec681f3Smrg .handle = gem_create.handle, 13987ec681f3Smrg }; 13997ec681f3Smrg intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); 14007ec681f3Smrg 14017ec681f3Smrg return ret == 0; 14027ec681f3Smrg} 14037ec681f3Smrg 14047ec681f3Smrgstatic void 14057ec681f3Smrgfixup_chv_device_info(struct intel_device_info *devinfo) 14067ec681f3Smrg{ 14077ec681f3Smrg assert(devinfo->is_cherryview); 14087ec681f3Smrg 14097ec681f3Smrg /* Cherryview is annoying. The number of EUs is depending on fusing and 14107ec681f3Smrg * isn't determinable from the PCI ID alone. We default to the minimum 14117ec681f3Smrg * available for that PCI ID and then compute the real value from the 14127ec681f3Smrg * subslice information we get from the kernel. 14137ec681f3Smrg */ 14147ec681f3Smrg const uint32_t subslice_total = intel_device_info_subslice_total(devinfo); 14157ec681f3Smrg const uint32_t eu_total = intel_device_info_eu_total(devinfo); 14167ec681f3Smrg 14177ec681f3Smrg /* Logical CS threads = EUs per subslice * num threads per EU */ 14187ec681f3Smrg uint32_t max_cs_threads = 14197ec681f3Smrg eu_total / subslice_total * devinfo->num_thread_per_eu; 14207ec681f3Smrg 14217ec681f3Smrg /* Fuse configurations may give more threads than expected, never less. */ 14227ec681f3Smrg if (max_cs_threads > devinfo->max_cs_threads) 14237ec681f3Smrg devinfo->max_cs_threads = max_cs_threads; 14247ec681f3Smrg 14257ec681f3Smrg update_cs_workgroup_threads(devinfo); 14267ec681f3Smrg 14277ec681f3Smrg /* Braswell is even more annoying. Its marketing name isn't determinable 14287ec681f3Smrg * from the PCI ID and is also dependent on fusing. 14297ec681f3Smrg */ 14307ec681f3Smrg if (devinfo->chipset_id != 0x22B1) 14317ec681f3Smrg return; 14327ec681f3Smrg 14337ec681f3Smrg char *bsw_model; 14347ec681f3Smrg switch (eu_total) { 14357ec681f3Smrg case 16: bsw_model = "405"; break; 14367ec681f3Smrg case 12: bsw_model = "400"; break; 14377ec681f3Smrg default: bsw_model = " "; break; 14387ec681f3Smrg } 14397ec681f3Smrg 14407ec681f3Smrg char *needle = strstr(devinfo->name, "XXX"); 14417ec681f3Smrg assert(needle); 14427ec681f3Smrg if (needle) 14437ec681f3Smrg memcpy(needle, bsw_model, 3); 14447ec681f3Smrg} 14457ec681f3Smrg 14467ec681f3Smrgstatic void 14477ec681f3Smrginit_max_scratch_ids(struct intel_device_info *devinfo) 14487ec681f3Smrg{ 14497ec681f3Smrg /* Determine the max number of subslices that potentially might be used in 14507ec681f3Smrg * scratch space ids. 14517ec681f3Smrg * 14527ec681f3Smrg * For, Gfx11+, scratch space allocation is based on the number of threads 14537ec681f3Smrg * in the base configuration. 14547ec681f3Smrg * 14557ec681f3Smrg * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and 14567ec681f3Smrg * we wish to view that there are 4 subslices per slice instead of the 14577ec681f3Smrg * actual number of subslices per slice. The documentation for 3DSTATE_PS 14587ec681f3Smrg * "Scratch Space Base Pointer" says: 14597ec681f3Smrg * 14607ec681f3Smrg * "Scratch Space per slice is computed based on 4 sub-slices. SW 14617ec681f3Smrg * must allocate scratch space enough so that each slice has 4 14627ec681f3Smrg * slices allowed." 14637ec681f3Smrg * 14647ec681f3Smrg * According to the other driver team, this applies to compute shaders 14657ec681f3Smrg * as well. This is not currently documented at all. 14667ec681f3Smrg * 14677ec681f3Smrg * For Gfx8 and older we user devinfo->subslice_total. 14687ec681f3Smrg */ 14697ec681f3Smrg unsigned subslices; 14707ec681f3Smrg if (devinfo->verx10 == 125) 14717ec681f3Smrg subslices = 32; 14727ec681f3Smrg else if (devinfo->ver == 12) 14737ec681f3Smrg subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); 14747ec681f3Smrg else if (devinfo->ver == 11) 14757ec681f3Smrg subslices = 8; 14767ec681f3Smrg else if (devinfo->ver >= 9 && devinfo->ver < 11) 14777ec681f3Smrg subslices = 4 * devinfo->num_slices; 14787ec681f3Smrg else 14797ec681f3Smrg subslices = devinfo->subslice_total; 14807ec681f3Smrg assert(subslices >= devinfo->subslice_total); 14817ec681f3Smrg 14827ec681f3Smrg unsigned scratch_ids_per_subslice; 14837ec681f3Smrg if (devinfo->ver >= 12) { 14847ec681f3Smrg /* Same as ICL below, but with 16 EUs. */ 14857ec681f3Smrg scratch_ids_per_subslice = 16 * 8; 14867ec681f3Smrg } else if (devinfo->ver >= 11) { 14877ec681f3Smrg /* The MEDIA_VFE_STATE docs say: 14887ec681f3Smrg * 14897ec681f3Smrg * "Starting with this configuration, the Maximum Number of 14907ec681f3Smrg * Threads must be set to (#EU * 8) for GPGPU dispatches. 14917ec681f3Smrg * 14927ec681f3Smrg * Although there are only 7 threads per EU in the configuration, 14937ec681f3Smrg * the FFTID is calculated as if there are 8 threads per EU, 14947ec681f3Smrg * which in turn requires a larger amount of Scratch Space to be 14957ec681f3Smrg * allocated by the driver." 14967ec681f3Smrg */ 14977ec681f3Smrg scratch_ids_per_subslice = 8 * 8; 14987ec681f3Smrg } else if (devinfo->is_haswell) { 14997ec681f3Smrg /* WaCSScratchSize:hsw 15007ec681f3Smrg * 15017ec681f3Smrg * Haswell's scratch space address calculation appears to be sparse 15027ec681f3Smrg * rather than tightly packed. The Thread ID has bits indicating 15037ec681f3Smrg * which subslice, EU within a subslice, and thread within an EU it 15047ec681f3Smrg * is. There's a maximum of two slices and two subslices, so these 15057ec681f3Smrg * can be stored with a single bit. Even though there are only 10 EUs 15067ec681f3Smrg * per subslice, this is stored in 4 bits, so there's an effective 15077ec681f3Smrg * maximum value of 16 EUs. Similarly, although there are only 7 15087ec681f3Smrg * threads per EU, this is stored in a 3 bit number, giving an 15097ec681f3Smrg * effective maximum value of 8 threads per EU. 15107ec681f3Smrg * 15117ec681f3Smrg * This means that we need to use 16 * 8 instead of 10 * 7 for the 15127ec681f3Smrg * number of threads per subslice. 15137ec681f3Smrg */ 15147ec681f3Smrg scratch_ids_per_subslice = 16 * 8; 15157ec681f3Smrg } else if (devinfo->is_cherryview) { 15167ec681f3Smrg /* Cherryview devices have either 6 or 8 EUs per subslice, and each 15177ec681f3Smrg * EU has 7 threads. The 6 EU devices appear to calculate thread IDs 15187ec681f3Smrg * as if it had 8 EUs. 15197ec681f3Smrg */ 15207ec681f3Smrg scratch_ids_per_subslice = 8 * 7; 15217ec681f3Smrg } else { 15227ec681f3Smrg scratch_ids_per_subslice = devinfo->max_cs_threads; 15237ec681f3Smrg } 15247ec681f3Smrg 15257ec681f3Smrg unsigned max_thread_ids = scratch_ids_per_subslice * subslices; 15267ec681f3Smrg 15277ec681f3Smrg if (devinfo->verx10 >= 125) { 15287ec681f3Smrg /* On GFX version 12.5, scratch access changed to a surface-based model. 15297ec681f3Smrg * Instead of each shader type having its own layout based on IDs passed 15307ec681f3Smrg * from the relevant fixed-function unit, all scratch access is based on 15317ec681f3Smrg * thread IDs like it always has been for compute. 15327ec681f3Smrg */ 15337ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) 15347ec681f3Smrg devinfo->max_scratch_ids[i] = max_thread_ids; 15357ec681f3Smrg } else { 15367ec681f3Smrg unsigned max_scratch_ids[] = { 15377ec681f3Smrg [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, 15387ec681f3Smrg [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, 15397ec681f3Smrg [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, 15407ec681f3Smrg [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, 15417ec681f3Smrg [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, 15427ec681f3Smrg [MESA_SHADER_COMPUTE] = max_thread_ids, 15437ec681f3Smrg }; 15447ec681f3Smrg STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids)); 15457ec681f3Smrg memcpy(devinfo->max_scratch_ids, max_scratch_ids, 15467ec681f3Smrg sizeof(devinfo->max_scratch_ids)); 15477ec681f3Smrg } 15487ec681f3Smrg} 15497ec681f3Smrg 15507ec681f3Smrgbool 15517ec681f3Smrgintel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) 15527ec681f3Smrg{ 15537ec681f3Smrg int devid = 0; 15547ec681f3Smrg 15557ec681f3Smrg const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); 15567ec681f3Smrg if (devid_override && strlen(devid_override) > 0) { 15577ec681f3Smrg if (geteuid() == getuid()) { 15587ec681f3Smrg devid = intel_device_name_to_pci_device_id(devid_override); 15597ec681f3Smrg /* Fallback to PCI ID. */ 15607ec681f3Smrg if (devid <= 0) 15617ec681f3Smrg devid = strtol(devid_override, NULL, 0); 15627ec681f3Smrg if (devid <= 0) { 15637ec681f3Smrg mesa_loge("Invalid INTEL_DEVID_OVERRIDE=\"%s\". " 15647ec681f3Smrg "Use a valid numeric PCI ID or one of the supported " 15657ec681f3Smrg "platform names:", devid_override); 15667ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) 15677ec681f3Smrg mesa_loge(" %s", name_map[i].name); 15687ec681f3Smrg return false; 15697ec681f3Smrg } 15707ec681f3Smrg } else { 15717ec681f3Smrg mesa_logi("Ignoring INTEL_DEVID_OVERRIDE=\"%s\" because " 15727ec681f3Smrg "real and effective user ID don't match.", devid_override); 15737ec681f3Smrg } 15747ec681f3Smrg } 15757ec681f3Smrg 15767ec681f3Smrg if (devid > 0) { 15777ec681f3Smrg if (!intel_get_device_info_from_pci_id(devid, devinfo)) 15787ec681f3Smrg return false; 15797ec681f3Smrg devinfo->no_hw = true; 15807ec681f3Smrg } else { 15817ec681f3Smrg /* query the device id */ 15827ec681f3Smrg if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid)) 15837ec681f3Smrg return false; 15847ec681f3Smrg if (!intel_get_device_info_from_pci_id(devid, devinfo)) 15857ec681f3Smrg return false; 15867ec681f3Smrg devinfo->no_hw = env_var_as_boolean("INTEL_NO_HW", false); 15877ec681f3Smrg } 15887ec681f3Smrg 15897ec681f3Smrg if (devinfo->ver == 10) { 15907ec681f3Smrg mesa_loge("Gfx10 support is redacted."); 15917ec681f3Smrg return false; 15927ec681f3Smrg } 15937ec681f3Smrg 15947ec681f3Smrg /* remaining initializion queries the kernel for device info */ 15957ec681f3Smrg if (devinfo->no_hw) 15967ec681f3Smrg return true; 15977ec681f3Smrg 15987ec681f3Smrg int timestamp_frequency; 15997ec681f3Smrg if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY, 16007ec681f3Smrg ×tamp_frequency)) 16017ec681f3Smrg devinfo->timestamp_frequency = timestamp_frequency; 16027ec681f3Smrg else if (devinfo->ver >= 10) { 16037ec681f3Smrg mesa_loge("Kernel 4.15 required to read the CS timestamp frequency."); 16047ec681f3Smrg return false; 16057ec681f3Smrg } 16067ec681f3Smrg 16077ec681f3Smrg if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision)) 16087ec681f3Smrg devinfo->revision = 0; 16097ec681f3Smrg 16107ec681f3Smrg if (!query_topology(devinfo, fd)) { 16117ec681f3Smrg if (devinfo->ver >= 10) { 16127ec681f3Smrg /* topology uAPI required for CNL+ (kernel 4.17+) */ 16137ec681f3Smrg return false; 16147ec681f3Smrg } 16157ec681f3Smrg 16167ec681f3Smrg /* else use the kernel 4.13+ api for gfx8+. For older kernels, topology 16177ec681f3Smrg * will be wrong, affecting GPU metrics. In this case, fail silently. 16187ec681f3Smrg */ 16197ec681f3Smrg getparam_topology(devinfo, fd); 16207ec681f3Smrg } 16217ec681f3Smrg 16227ec681f3Smrg if (devinfo->is_cherryview) 16237ec681f3Smrg fixup_chv_device_info(devinfo); 16247ec681f3Smrg 16257ec681f3Smrg intel_get_aperture_size(fd, &devinfo->aperture_bytes); 16267ec681f3Smrg devinfo->has_tiling_uapi = has_get_tiling(fd); 16277ec681f3Smrg 16287ec681f3Smrg devinfo->subslice_total = 0; 16297ec681f3Smrg for (uint32_t i = 0; i < devinfo->max_slices; i++) 16307ec681f3Smrg devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]); 16317ec681f3Smrg 16327ec681f3Smrg /* Gfx7 and older do not support EU/Subslice info */ 16337ec681f3Smrg assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7); 16347ec681f3Smrg devinfo->subslice_total = MAX2(devinfo->subslice_total, 1); 16357ec681f3Smrg 16367ec681f3Smrg init_max_scratch_ids(devinfo); 16377ec681f3Smrg 16387ec681f3Smrg return true; 16397ec681f3Smrg} 1640