ac_gpu_info.c revision b8e80941
1/* 2 * Copyright © 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 14 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 16 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 */ 25 26#include "ac_gpu_info.h" 27#include "sid.h" 28#include "gfx9d.h" 29 30#include "util/u_math.h" 31 32#include <stdio.h> 33 34#include <xf86drm.h> 35#include <amdgpu_drm.h> 36 37#include <amdgpu.h> 38 39#define CIK_TILE_MODE_COLOR_2D 14 40 41#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f) 42#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0 43#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4 44#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5 45#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6 46#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7 47#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8 48#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9 49#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10 50#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11 51#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12 52#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13 53#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14 54#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16 55#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17 56 57static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) 58{ 59 unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D]; 60 61 switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { 62 case CIK__PIPE_CONFIG__ADDR_SURF_P2: 63 return 2; 64 case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: 65 case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: 66 case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32: 67 case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32: 68 return 4; 69 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16: 70 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16: 71 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16: 72 case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16: 73 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16: 74 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32: 75 case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32: 76 return 8; 77 case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: 78 case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: 79 return 16; 80 default: 81 fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); 82 assert(!"this should never occur"); 83 return 2; 84 } 85} 86 87static bool has_syncobj(int fd) 88{ 89 uint64_t value; 90 if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &value)) 91 return false; 92 return value ? true : false; 93} 94 95bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, 96 struct radeon_info *info, 97 struct amdgpu_gpu_info *amdinfo) 98{ 99 struct drm_amdgpu_info_device device_info = {}; 100 struct amdgpu_buffer_size_alignments alignment_info = {}; 101 struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}; 102 struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_jpeg = {}; 103 struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {}; 104 struct amdgpu_gds_resource_info gds = {}; 105 uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; 106 int r, i, j; 107 drmDevicePtr devinfo; 108 109 /* Get PCI info. */ 110 r = drmGetDevice2(fd, 0, &devinfo); 111 if (r) { 112 fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n"); 113 return false; 114 } 115 info->pci_domain = devinfo->businfo.pci->domain; 116 info->pci_bus = devinfo->businfo.pci->bus; 117 info->pci_dev = devinfo->businfo.pci->dev; 118 info->pci_func = devinfo->businfo.pci->func; 119 drmFreeDevice(&devinfo); 120 121 /* Query hardware and driver information. */ 122 r = amdgpu_query_gpu_info(dev, amdinfo); 123 if (r) { 124 fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n"); 125 return false; 126 } 127 128 r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info), 129 &device_info); 130 if (r) { 131 fprintf(stderr, "amdgpu: amdgpu_query_info(dev_info) failed.\n"); 132 return false; 133 } 134 135 r = amdgpu_query_buffer_size_alignment(dev, &alignment_info); 136 if (r) { 137 fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n"); 138 return false; 139 } 140 141 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma); 142 if (r) { 143 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n"); 144 return false; 145 } 146 147 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &gfx); 148 if (r) { 149 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n"); 150 return false; 151 } 152 153 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute); 154 if (r) { 155 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n"); 156 return false; 157 } 158 159 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, &uvd); 160 if (r) { 161 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n"); 162 return false; 163 } 164 165 if (info->drm_major == 3 && info->drm_minor >= 17) { 166 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc); 167 if (r) { 168 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n"); 169 return false; 170 } 171 } 172 173 if (info->drm_major == 3 && info->drm_minor >= 17) { 174 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec); 175 if (r) { 176 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n"); 177 return false; 178 } 179 } 180 181 if (info->drm_major == 3 && info->drm_minor >= 17) { 182 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_ENC, 0, &vcn_enc); 183 if (r) { 184 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_enc) failed.\n"); 185 return false; 186 } 187 } 188 189 if (info->drm_major == 3 && info->drm_minor >= 27) { 190 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_JPEG, 0, &vcn_jpeg); 191 if (r) { 192 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_jpeg) failed.\n"); 193 return false; 194 } 195 } 196 197 r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, 198 &info->me_fw_version, 199 &info->me_fw_feature); 200 if (r) { 201 fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n"); 202 return false; 203 } 204 205 r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0, 206 &info->pfp_fw_version, 207 &info->pfp_fw_feature); 208 if (r) { 209 fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n"); 210 return false; 211 } 212 213 r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0, 214 &info->ce_fw_version, 215 &info->ce_fw_feature); 216 if (r) { 217 fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n"); 218 return false; 219 } 220 221 r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0, 222 &uvd_version, &uvd_feature); 223 if (r) { 224 fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n"); 225 return false; 226 } 227 228 r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCE, 0, &vce); 229 if (r) { 230 fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n"); 231 return false; 232 } 233 234 r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0, 235 &vce_version, &vce_feature); 236 if (r) { 237 fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n"); 238 return false; 239 } 240 241 r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi); 242 if (r) { 243 fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n"); 244 return false; 245 } 246 247 r = amdgpu_query_gds_info(dev, &gds); 248 if (r) { 249 fprintf(stderr, "amdgpu: amdgpu_query_gds_info failed.\n"); 250 return false; 251 } 252 253 if (info->drm_minor >= 9) { 254 struct drm_amdgpu_memory_info meminfo = {}; 255 256 r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo); 257 if (r) { 258 fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n"); 259 return false; 260 } 261 262 /* Note: usable_heap_size values can be random and can't be relied on. */ 263 info->gart_size = meminfo.gtt.total_heap_size; 264 info->vram_size = meminfo.vram.total_heap_size; 265 info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size; 266 } else { 267 /* This is a deprecated interface, which reports usable sizes 268 * (total minus pinned), but the pinned size computation is 269 * buggy, so the values returned from these functions can be 270 * random. 271 */ 272 struct amdgpu_heap_info vram, vram_vis, gtt; 273 274 r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram); 275 if (r) { 276 fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n"); 277 return false; 278 } 279 280 r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 282 &vram_vis); 283 if (r) { 284 fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n"); 285 return false; 286 } 287 288 r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t); 289 if (r) { 290 fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n"); 291 return false; 292 } 293 294 info->gart_size = gtt.heap_size; 295 info->vram_size = vram.heap_size; 296 info->vram_vis_size = vram_vis.heap_size; 297 } 298 299 /* Set chip identification. */ 300 info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */ 301 info->vce_harvest_config = amdinfo->vce_harvest_config; 302 303 switch (info->pci_id) { 304#define CHIPSET(pci_id, cfamily) \ 305 case pci_id: \ 306 info->family = CHIP_##cfamily; \ 307 info->name = #cfamily; \ 308 break; 309#include "pci_ids/radeonsi_pci_ids.h" 310#undef CHIPSET 311 312 default: 313 fprintf(stderr, "amdgpu: Invalid PCI ID.\n"); 314 return false; 315 } 316 317 /* Raven2 uses the same PCI IDs as Raven1, but different revision IDs. */ 318 if (info->family == CHIP_RAVEN && amdinfo->chip_rev >= 0x8) { 319 info->family = CHIP_RAVEN2; 320 info->name = "RAVEN2"; 321 } 322 323 if (info->family >= CHIP_VEGA10) 324 info->chip_class = GFX9; 325 else if (info->family >= CHIP_TONGA) 326 info->chip_class = VI; 327 else if (info->family >= CHIP_BONAIRE) 328 info->chip_class = CIK; 329 else if (info->family >= CHIP_TAHITI) 330 info->chip_class = SI; 331 else { 332 fprintf(stderr, "amdgpu: Unknown family.\n"); 333 return false; 334 } 335 336 info->marketing_name = amdgpu_get_marketing_name(dev); 337 info->is_pro_graphics = info->marketing_name && 338 (!strcmp(info->marketing_name, "Pro") || 339 !strcmp(info->marketing_name, "PRO") || 340 !strcmp(info->marketing_name, "Frontier")); 341 342 /* Set which chips have dedicated VRAM. */ 343 info->has_dedicated_vram = 344 !(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION); 345 346 /* The kernel can split large buffers in VRAM but not in GTT, so large 347 * allocations can fail or cause buffer movement failures in the kernel. 348 */ 349 if (info->has_dedicated_vram) 350 info->max_alloc_size = info->vram_size * 0.8; 351 else 352 info->max_alloc_size = info->gart_size * 0.7; 353 354 /* Set hardware information. */ 355 info->gds_size = gds.gds_total_size; 356 info->gds_gfx_partition_size = gds.gds_gfx_partition_size; 357 /* convert the shader clock from KHz to MHz */ 358 info->max_shader_clock = amdinfo->max_engine_clk / 1000; 359 info->num_tcc_blocks = device_info.num_tcc_blocks; 360 info->max_se = amdinfo->num_shader_engines; 361 info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine; 362 info->has_hw_decode = 363 (uvd.available_rings != 0) || (vcn_dec.available_rings != 0) || 364 (vcn_jpeg.available_rings != 0); 365 info->uvd_fw_version = 366 uvd.available_rings ? uvd_version : 0; 367 info->vce_fw_version = 368 vce.available_rings ? vce_version : 0; 369 info->uvd_enc_supported = 370 uvd_enc.available_rings ? true : false; 371 info->has_userptr = true; 372 info->has_syncobj = has_syncobj(fd); 373 info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20; 374 info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21; 375 info->has_ctx_priority = info->drm_minor >= 22; 376 info->has_local_buffers = info->drm_minor >= 20; 377 info->kernel_flushes_hdp_before_ib = true; 378 info->htile_cmask_support_1d_tiling = true; 379 info->si_TA_CS_BC_BASE_ADDR_allowed = true; 380 info->has_bo_metadata = true; 381 info->has_gpu_reset_status_query = true; 382 info->has_gpu_reset_counter_query = false; 383 info->has_eqaa_surface_allocator = true; 384 info->has_format_bc1_through_bc7 = true; 385 /* DRM 3.1.0 doesn't flush TC for VI correctly. */ 386 info->kernel_flushes_tc_l2_after_ib = info->chip_class != VI || 387 info->drm_minor >= 2; 388 info->has_indirect_compute_dispatch = true; 389 /* SI doesn't support unaligned loads. */ 390 info->has_unaligned_shader_loads = info->chip_class != SI; 391 /* Disable sparse mappings on SI due to VM faults in CP DMA. Enable them once 392 * these faults are mitigated in software. 393 * Disable sparse mappings on GFX9 due to hangs. 394 */ 395 info->has_sparse_vm_mappings = 396 info->chip_class >= CIK && info->chip_class <= VI && 397 info->drm_minor >= 13; 398 info->has_2d_tiling = true; 399 info->has_read_registers_query = true; 400 401 info->num_render_backends = amdinfo->rb_pipes; 402 /* The value returned by the kernel driver was wrong. */ 403 if (info->family == CHIP_KAVERI) 404 info->num_render_backends = 2; 405 406 info->clock_crystal_freq = amdinfo->gpu_counter_freq; 407 if (!info->clock_crystal_freq) { 408 fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n"); 409 info->clock_crystal_freq = 1; 410 } 411 info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */ 412 info->gb_addr_config = amdinfo->gb_addr_cfg; 413 if (info->chip_class == GFX9) { 414 info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg); 415 info->pipe_interleave_bytes = 416 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg); 417 } else { 418 info->num_tile_pipes = cik_get_num_tile_pipes(amdinfo); 419 info->pipe_interleave_bytes = 420 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo->gb_addr_cfg); 421 } 422 info->r600_has_virtual_memory = true; 423 424 assert(util_is_power_of_two_or_zero(dma.available_rings + 1)); 425 assert(util_is_power_of_two_or_zero(compute.available_rings + 1)); 426 427 info->num_sdma_rings = util_bitcount(dma.available_rings); 428 info->num_compute_rings = util_bitcount(compute.available_rings); 429 430 /* Get the number of good compute units. */ 431 info->num_good_compute_units = 0; 432 for (i = 0; i < info->max_se; i++) 433 for (j = 0; j < info->max_sh_per_se; j++) 434 info->num_good_compute_units += 435 util_bitcount(amdinfo->cu_bitmap[i][j]); 436 info->num_good_cu_per_sh = info->num_good_compute_units / 437 (info->max_se * info->max_sh_per_se); 438 439 memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, 440 sizeof(amdinfo->gb_tile_mode)); 441 info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask; 442 443 memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode, 444 sizeof(amdinfo->gb_macro_tile_mode)); 445 446 info->pte_fragment_size = alignment_info.size_local; 447 info->gart_page_size = alignment_info.size_remote; 448 449 if (info->chip_class == SI) 450 info->gfx_ib_pad_with_type2 = TRUE; 451 452 unsigned ib_align = 0; 453 ib_align = MAX2(ib_align, gfx.ib_start_alignment); 454 ib_align = MAX2(ib_align, compute.ib_start_alignment); 455 ib_align = MAX2(ib_align, dma.ib_start_alignment); 456 ib_align = MAX2(ib_align, uvd.ib_start_alignment); 457 ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment); 458 ib_align = MAX2(ib_align, vce.ib_start_alignment); 459 ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment); 460 ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment); 461 ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment); 462 assert(ib_align); 463 info->ib_start_alignment = ib_align; 464 465 if (info->drm_minor >= 31 && 466 (info->family == CHIP_RAVEN || 467 info->family == CHIP_RAVEN2)) { 468 if (info->num_render_backends == 1) 469 info->use_display_dcc_unaligned = true; 470 else 471 info->use_display_dcc_with_retile_blit = true; 472 } 473 return true; 474} 475 476void ac_compute_driver_uuid(char *uuid, size_t size) 477{ 478 char amd_uuid[] = "AMD-MESA-DRV"; 479 480 assert(size >= sizeof(amd_uuid)); 481 482 memset(uuid, 0, size); 483 strncpy(uuid, amd_uuid, size); 484} 485 486void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size) 487{ 488 uint32_t *uint_uuid = (uint32_t*)uuid; 489 490 assert(size >= sizeof(uint32_t)*4); 491 492 /** 493 * Use the device info directly instead of using a sha1. GL/VK UUIDs 494 * are 16 byte vs 20 byte for sha1, and the truncation that would be 495 * required would get rid of part of the little entropy we have. 496 * */ 497 memset(uuid, 0, size); 498 uint_uuid[0] = info->pci_domain; 499 uint_uuid[1] = info->pci_bus; 500 uint_uuid[2] = info->pci_dev; 501 uint_uuid[3] = info->pci_func; 502} 503 504void ac_print_gpu_info(struct radeon_info *info) 505{ 506 printf("Device info:\n"); 507 printf(" pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", 508 info->pci_domain, info->pci_bus, 509 info->pci_dev, info->pci_func); 510 printf(" pci_id = 0x%x\n", info->pci_id); 511 printf(" family = %i\n", info->family); 512 printf(" chip_class = %i\n", info->chip_class); 513 printf(" num_compute_rings = %u\n", info->num_compute_rings); 514 printf(" num_sdma_rings = %i\n", info->num_sdma_rings); 515 printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq); 516 printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); 517 518 printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); 519 printf(" use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit); 520 521 printf("Memory info:\n"); 522 printf(" pte_fragment_size = %u\n", info->pte_fragment_size); 523 printf(" gart_page_size = %u\n", info->gart_page_size); 524 printf(" gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024)); 525 printf(" vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024)); 526 printf(" vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024)); 527 printf(" gds_size = %u kB\n", info->gds_size / 1024); 528 printf(" gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024); 529 printf(" max_alloc_size = %i MB\n", 530 (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024)); 531 printf(" min_alloc_size = %u\n", info->min_alloc_size); 532 printf(" address32_hi = %u\n", info->address32_hi); 533 printf(" has_dedicated_vram = %u\n", info->has_dedicated_vram); 534 535 printf("CP info:\n"); 536 printf(" gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); 537 printf(" ib_start_alignment = %u\n", info->ib_start_alignment); 538 printf(" me_fw_version = %i\n", info->me_fw_version); 539 printf(" me_fw_feature = %i\n", info->me_fw_feature); 540 printf(" pfp_fw_version = %i\n", info->pfp_fw_version); 541 printf(" pfp_fw_feature = %i\n", info->pfp_fw_feature); 542 printf(" ce_fw_version = %i\n", info->ce_fw_version); 543 printf(" ce_fw_feature = %i\n", info->ce_fw_feature); 544 545 printf("Multimedia info:\n"); 546 printf(" has_hw_decode = %u\n", info->has_hw_decode); 547 printf(" uvd_enc_supported = %u\n", info->uvd_enc_supported); 548 printf(" uvd_fw_version = %u\n", info->uvd_fw_version); 549 printf(" vce_fw_version = %u\n", info->vce_fw_version); 550 printf(" vce_harvest_config = %i\n", info->vce_harvest_config); 551 552 printf("Kernel & winsys capabilities:\n"); 553 printf(" drm = %i.%i.%i\n", info->drm_major, 554 info->drm_minor, info->drm_patchlevel); 555 printf(" has_userptr = %i\n", info->has_userptr); 556 printf(" has_syncobj = %u\n", info->has_syncobj); 557 printf(" has_syncobj_wait_for_submit = %u\n", info->has_syncobj_wait_for_submit); 558 printf(" has_fence_to_handle = %u\n", info->has_fence_to_handle); 559 printf(" has_ctx_priority = %u\n", info->has_ctx_priority); 560 printf(" has_local_buffers = %u\n", info->has_local_buffers); 561 printf(" kernel_flushes_hdp_before_ib = %u\n", info->kernel_flushes_hdp_before_ib); 562 printf(" htile_cmask_support_1d_tiling = %u\n", info->htile_cmask_support_1d_tiling); 563 printf(" si_TA_CS_BC_BASE_ADDR_allowed = %u\n", info->si_TA_CS_BC_BASE_ADDR_allowed); 564 printf(" has_bo_metadata = %u\n", info->has_bo_metadata); 565 printf(" has_gpu_reset_status_query = %u\n", info->has_gpu_reset_status_query); 566 printf(" has_gpu_reset_counter_query = %u\n", info->has_gpu_reset_counter_query); 567 printf(" has_eqaa_surface_allocator = %u\n", info->has_eqaa_surface_allocator); 568 printf(" has_format_bc1_through_bc7 = %u\n", info->has_format_bc1_through_bc7); 569 printf(" kernel_flushes_tc_l2_after_ib = %u\n", info->kernel_flushes_tc_l2_after_ib); 570 printf(" has_indirect_compute_dispatch = %u\n", info->has_indirect_compute_dispatch); 571 printf(" has_unaligned_shader_loads = %u\n", info->has_unaligned_shader_loads); 572 printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings); 573 printf(" has_2d_tiling = %u\n", info->has_2d_tiling); 574 printf(" has_read_registers_query = %u\n", info->has_read_registers_query); 575 576 printf("Shader core info:\n"); 577 printf(" max_shader_clock = %i\n", info->max_shader_clock); 578 printf(" num_good_compute_units = %i\n", info->num_good_compute_units); 579 printf(" num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh); 580 printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks); 581 printf(" max_se = %i\n", info->max_se); 582 printf(" max_sh_per_se = %i\n", info->max_sh_per_se); 583 584 printf("Render backend info:\n"); 585 printf(" num_render_backends = %i\n", info->num_render_backends); 586 printf(" num_tile_pipes = %i\n", info->num_tile_pipes); 587 printf(" pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes); 588 printf(" enabled_rb_mask = 0x%x\n", info->enabled_rb_mask); 589 printf(" max_alignment = %u\n", (unsigned)info->max_alignment); 590 591 printf("GB_ADDR_CONFIG:\n"); 592 if (info->chip_class >= GFX9) { 593 printf(" num_pipes = %u\n", 594 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); 595 printf(" pipe_interleave_size = %u\n", 596 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)); 597 printf(" max_compressed_frags = %u\n", 598 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config)); 599 printf(" bank_interleave_size = %u\n", 600 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config)); 601 printf(" num_banks = %u\n", 602 1 << G_0098F8_NUM_BANKS(info->gb_addr_config)); 603 printf(" shader_engine_tile_size = %u\n", 604 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config)); 605 printf(" num_shader_engines = %u\n", 606 1 << G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config)); 607 printf(" num_gpus = %u (raw)\n", 608 G_0098F8_NUM_GPUS_GFX9(info->gb_addr_config)); 609 printf(" multi_gpu_tile_size = %u (raw)\n", 610 G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config)); 611 printf(" num_rb_per_se = %u\n", 612 1 << G_0098F8_NUM_RB_PER_SE(info->gb_addr_config)); 613 printf(" row_size = %u\n", 614 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config)); 615 printf(" num_lower_pipes = %u (raw)\n", 616 G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config)); 617 printf(" se_enable = %u (raw)\n", 618 G_0098F8_SE_ENABLE(info->gb_addr_config)); 619 } else { 620 printf(" num_pipes = %u\n", 621 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); 622 printf(" pipe_interleave_size = %u\n", 623 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config)); 624 printf(" bank_interleave_size = %u\n", 625 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config)); 626 printf(" num_shader_engines = %u\n", 627 1 << G_0098F8_NUM_SHADER_ENGINES_GFX6(info->gb_addr_config)); 628 printf(" shader_engine_tile_size = %u\n", 629 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config)); 630 printf(" num_gpus = %u (raw)\n", 631 G_0098F8_NUM_GPUS_GFX6(info->gb_addr_config)); 632 printf(" multi_gpu_tile_size = %u (raw)\n", 633 G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config)); 634 printf(" row_size = %u\n", 635 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config)); 636 printf(" num_lower_pipes = %u (raw)\n", 637 G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config)); 638 } 639} 640 641int 642ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family) 643{ 644 if (chip_class >= GFX9) 645 return -1; 646 647 switch (family) { 648 case CHIP_OLAND: 649 case CHIP_HAINAN: 650 case CHIP_KAVERI: 651 case CHIP_KABINI: 652 case CHIP_MULLINS: 653 case CHIP_ICELAND: 654 case CHIP_CARRIZO: 655 case CHIP_STONEY: 656 return 16; 657 case CHIP_TAHITI: 658 case CHIP_PITCAIRN: 659 case CHIP_VERDE: 660 case CHIP_BONAIRE: 661 case CHIP_HAWAII: 662 case CHIP_TONGA: 663 case CHIP_FIJI: 664 case CHIP_POLARIS10: 665 case CHIP_POLARIS11: 666 case CHIP_POLARIS12: 667 case CHIP_VEGAM: 668 return 32; 669 default: 670 unreachable("Unknown GPU"); 671 } 672} 673 674void 675ac_get_raster_config(struct radeon_info *info, 676 uint32_t *raster_config_p, 677 uint32_t *raster_config_1_p, 678 uint32_t *se_tile_repeat_p) 679{ 680 unsigned raster_config, raster_config_1, se_tile_repeat; 681 682 switch (info->family) { 683 /* 1 SE / 1 RB */ 684 case CHIP_HAINAN: 685 case CHIP_KABINI: 686 case CHIP_MULLINS: 687 case CHIP_STONEY: 688 raster_config = 0x00000000; 689 raster_config_1 = 0x00000000; 690 break; 691 /* 1 SE / 4 RBs */ 692 case CHIP_VERDE: 693 raster_config = 0x0000124a; 694 raster_config_1 = 0x00000000; 695 break; 696 /* 1 SE / 2 RBs (Oland is special) */ 697 case CHIP_OLAND: 698 raster_config = 0x00000082; 699 raster_config_1 = 0x00000000; 700 break; 701 /* 1 SE / 2 RBs */ 702 case CHIP_KAVERI: 703 case CHIP_ICELAND: 704 case CHIP_CARRIZO: 705 raster_config = 0x00000002; 706 raster_config_1 = 0x00000000; 707 break; 708 /* 2 SEs / 4 RBs */ 709 case CHIP_BONAIRE: 710 case CHIP_POLARIS11: 711 case CHIP_POLARIS12: 712 raster_config = 0x16000012; 713 raster_config_1 = 0x00000000; 714 break; 715 /* 2 SEs / 8 RBs */ 716 case CHIP_TAHITI: 717 case CHIP_PITCAIRN: 718 raster_config = 0x2a00126a; 719 raster_config_1 = 0x00000000; 720 break; 721 /* 4 SEs / 8 RBs */ 722 case CHIP_TONGA: 723 case CHIP_POLARIS10: 724 raster_config = 0x16000012; 725 raster_config_1 = 0x0000002a; 726 break; 727 /* 4 SEs / 16 RBs */ 728 case CHIP_HAWAII: 729 case CHIP_FIJI: 730 case CHIP_VEGAM: 731 raster_config = 0x3a00161a; 732 raster_config_1 = 0x0000002e; 733 break; 734 default: 735 fprintf(stderr, 736 "ac: Unknown GPU, using 0 for raster_config\n"); 737 raster_config = 0x00000000; 738 raster_config_1 = 0x00000000; 739 break; 740 } 741 742 /* drm/radeon on Kaveri is buggy, so disable 1 RB to work around it. 743 * This decreases performance by up to 50% when the RB is the bottleneck. 744 */ 745 if (info->family == CHIP_KAVERI && info->drm_major == 2) 746 raster_config = 0x00000000; 747 748 /* Fiji: Old kernels have incorrect tiling config. This decreases 749 * RB performance by 25%. (it disables 1 RB in the second packer) 750 */ 751 if (info->family == CHIP_FIJI && 752 info->cik_macrotile_mode_array[0] == 0x000000e8) { 753 raster_config = 0x16000012; 754 raster_config_1 = 0x0000002a; 755 } 756 757 unsigned se_width = 8 << G_028350_SE_XSEL_GFX6(raster_config); 758 unsigned se_height = 8 << G_028350_SE_YSEL_GFX6(raster_config); 759 760 /* I don't know how to calculate this, though this is probably a good guess. */ 761 se_tile_repeat = MAX2(se_width, se_height) * info->max_se; 762 763 *raster_config_p = raster_config; 764 *raster_config_1_p = raster_config_1; 765 if (se_tile_repeat_p) 766 *se_tile_repeat_p = se_tile_repeat; 767} 768 769void 770ac_get_harvested_configs(struct radeon_info *info, 771 unsigned raster_config, 772 unsigned *cik_raster_config_1_p, 773 unsigned *raster_config_se) 774{ 775 unsigned sh_per_se = MAX2(info->max_sh_per_se, 1); 776 unsigned num_se = MAX2(info->max_se, 1); 777 unsigned rb_mask = info->enabled_rb_mask; 778 unsigned num_rb = MIN2(info->num_render_backends, 16); 779 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 780 unsigned rb_per_se = num_rb / num_se; 781 unsigned se_mask[4]; 782 unsigned se; 783 784 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 785 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 786 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 787 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 788 789 assert(num_se == 1 || num_se == 2 || num_se == 4); 790 assert(sh_per_se == 1 || sh_per_se == 2); 791 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 792 793 794 if (info->chip_class >= CIK) { 795 unsigned raster_config_1 = *cik_raster_config_1_p; 796 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 797 (!se_mask[2] && !se_mask[3]))) { 798 raster_config_1 &= C_028354_SE_PAIR_MAP; 799 800 if (!se_mask[0] && !se_mask[1]) { 801 raster_config_1 |= 802 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 803 } else { 804 raster_config_1 |= 805 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 806 } 807 *cik_raster_config_1_p = raster_config_1; 808 } 809 } 810 811 for (se = 0; se < num_se; se++) { 812 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 813 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 814 int idx = (se / 2) * 2; 815 816 raster_config_se[se] = raster_config; 817 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 818 raster_config_se[se] &= C_028350_SE_MAP; 819 820 if (!se_mask[idx]) { 821 raster_config_se[se] |= 822 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 823 } else { 824 raster_config_se[se] |= 825 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 826 } 827 } 828 829 pkr0_mask &= rb_mask; 830 pkr1_mask &= rb_mask; 831 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 832 raster_config_se[se] &= C_028350_PKR_MAP; 833 834 if (!pkr0_mask) { 835 raster_config_se[se] |= 836 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 837 } else { 838 raster_config_se[se] |= 839 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 840 } 841 } 842 843 if (rb_per_se >= 2) { 844 unsigned rb0_mask = 1 << (se * rb_per_se); 845 unsigned rb1_mask = rb0_mask << 1; 846 847 rb0_mask &= rb_mask; 848 rb1_mask &= rb_mask; 849 if (!rb0_mask || !rb1_mask) { 850 raster_config_se[se] &= C_028350_RB_MAP_PKR0; 851 852 if (!rb0_mask) { 853 raster_config_se[se] |= 854 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 855 } else { 856 raster_config_se[se] |= 857 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 858 } 859 } 860 861 if (rb_per_se > 2) { 862 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 863 rb1_mask = rb0_mask << 1; 864 rb0_mask &= rb_mask; 865 rb1_mask &= rb_mask; 866 if (!rb0_mask || !rb1_mask) { 867 raster_config_se[se] &= C_028350_RB_MAP_PKR1; 868 869 if (!rb0_mask) { 870 raster_config_se[se] |= 871 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 872 } else { 873 raster_config_se[se] |= 874 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 875 } 876 } 877 } 878 } 879 } 880} 881