1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include <stdbool.h> 29#include <string.h> 30#include <unistd.h> 31#include <fcntl.h> 32#include "radv_debug.h" 33#include "radv_private.h" 34#include "radv_shader.h" 35#include "radv_cs.h" 36#include "util/disk_cache.h" 37#include "util/strtod.h" 38#include "vk_util.h" 39#include <xf86drm.h> 40#include <amdgpu.h> 41#include <amdgpu_drm.h> 42#include "winsys/amdgpu/radv_amdgpu_winsys_public.h" 43#include "ac_llvm_util.h" 44#include "vk_format.h" 45#include "sid.h" 46#include "git_sha1.h" 47#include "gfx9d.h" 48#include "util/build_id.h" 49#include "util/debug.h" 50#include "util/mesa-sha1.h" 51#include "compiler/glsl_types.h" 52#include "util/xmlpool.h" 53 54static int 55radv_device_get_cache_uuid(enum radeon_family family, void *uuid) 56{ 57 struct mesa_sha1 ctx; 58 unsigned char sha1[20]; 59 unsigned ptr_size = sizeof(void*); 60 61 memset(uuid, 0, VK_UUID_SIZE); 62 _mesa_sha1_init(&ctx); 63 64 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) || 65 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) 66 return -1; 67 68 _mesa_sha1_update(&ctx, &family, sizeof(family)); 69 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size)); 70 _mesa_sha1_final(&ctx, sha1); 71 72 memcpy(uuid, sha1, VK_UUID_SIZE); 73 return 0; 74} 75 76static void 77radv_get_driver_uuid(void *uuid) 78{ 79 ac_compute_driver_uuid(uuid, VK_UUID_SIZE); 80} 81 82static void 83radv_get_device_uuid(struct radeon_info *info, void *uuid) 84{ 85 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE); 86} 87 88static void 89radv_get_device_name(enum radeon_family family, char *name, size_t name_len) 90{ 91 const char *chip_string; 92 93 switch (family) { 94 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break; 95 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break; 96 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break; 97 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break; 98 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break; 99 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break; 100 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break; 101 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break; 102 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break; 103 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break; 104 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break; 105 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break; 106 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break; 107 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break; 108 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break; 109 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break; 110 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break; 111 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break; 112 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break; 113 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break; 114 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break; 115 case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break; 116 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break; 117 case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break; 118 default: chip_string = "AMD RADV unknown"; break; 119 } 120 121 snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string); 122} 123 124static uint64_t 125radv_get_visible_vram_size(struct radv_physical_device *device) 126{ 127 return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size); 128} 129 130static uint64_t 131radv_get_vram_size(struct radv_physical_device *device) 132{ 133 return device->rad_info.vram_size - radv_get_visible_vram_size(device); 134} 135 136static void 137radv_physical_device_init_mem_types(struct radv_physical_device *device) 138{ 139 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); 140 uint64_t visible_vram_size = radv_get_visible_vram_size(device); 141 uint64_t vram_size = radv_get_vram_size(device); 142 int vram_index = -1, visible_vram_index = -1, gart_index = -1; 143 device->memory_properties.memoryHeapCount = 0; 144 if (vram_size > 0) { 145 vram_index = device->memory_properties.memoryHeapCount++; 146 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) { 147 .size = vram_size, 148 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 149 }; 150 } 151 if (visible_vram_size) { 152 visible_vram_index = device->memory_properties.memoryHeapCount++; 153 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) { 154 .size = visible_vram_size, 155 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 156 }; 157 } 158 if (device->rad_info.gart_size > 0) { 159 gart_index = device->memory_properties.memoryHeapCount++; 160 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) { 161 .size = device->rad_info.gart_size, 162 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 163 }; 164 } 165 166 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES); 167 unsigned type_count = 0; 168 if (vram_index >= 0) { 169 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM; 170 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 171 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 172 .heapIndex = vram_index, 173 }; 174 } 175 if (gart_index >= 0) { 176 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE; 177 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 178 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 179 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 180 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), 181 .heapIndex = gart_index, 182 }; 183 } 184 if (visible_vram_index >= 0) { 185 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS; 186 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 187 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 188 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 189 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 190 .heapIndex = visible_vram_index, 191 }; 192 } 193 if (gart_index >= 0) { 194 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED; 195 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 196 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 197 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 198 VK_MEMORY_PROPERTY_HOST_CACHED_BIT | 199 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), 200 .heapIndex = gart_index, 201 }; 202 } 203 device->memory_properties.memoryTypeCount = type_count; 204} 205 206static void 207radv_handle_env_var_force_family(struct radv_physical_device *device) 208{ 209 const char *family = getenv("RADV_FORCE_FAMILY"); 210 unsigned i; 211 212 if (!family) 213 return; 214 215 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { 216 if (!strcmp(family, ac_get_llvm_processor_name(i))) { 217 /* Override family and chip_class. */ 218 device->rad_info.family = i; 219 220 if (i >= CHIP_VEGA10) 221 device->rad_info.chip_class = GFX9; 222 else if (i >= CHIP_TONGA) 223 device->rad_info.chip_class = VI; 224 else if (i >= CHIP_BONAIRE) 225 device->rad_info.chip_class = CIK; 226 else 227 device->rad_info.chip_class = SI; 228 229 return; 230 } 231 } 232 233 fprintf(stderr, "radv: Unknown family: %s\n", family); 234 exit(1); 235} 236 237static VkResult 238radv_physical_device_init(struct radv_physical_device *device, 239 struct radv_instance *instance, 240 drmDevicePtr drm_device) 241{ 242 const char *path = drm_device->nodes[DRM_NODE_RENDER]; 243 VkResult result; 244 drmVersionPtr version; 245 int fd; 246 int master_fd = -1; 247 248 fd = open(path, O_RDWR | O_CLOEXEC); 249 if (fd < 0) { 250 if (instance->debug_flags & RADV_DEBUG_STARTUP) 251 radv_logi("Could not open device '%s'", path); 252 253 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 254 } 255 256 version = drmGetVersion(fd); 257 if (!version) { 258 close(fd); 259 260 if (instance->debug_flags & RADV_DEBUG_STARTUP) 261 radv_logi("Could not get the kernel driver version for device '%s'", path); 262 263 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 264 "failed to get version %s: %m", path); 265 } 266 267 if (strcmp(version->name, "amdgpu")) { 268 drmFreeVersion(version); 269 close(fd); 270 271 if (instance->debug_flags & RADV_DEBUG_STARTUP) 272 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path); 273 274 return VK_ERROR_INCOMPATIBLE_DRIVER; 275 } 276 drmFreeVersion(version); 277 278 if (instance->debug_flags & RADV_DEBUG_STARTUP) 279 radv_logi("Found compatible device '%s'.", path); 280 281 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 282 device->instance = instance; 283 284 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, 285 instance->perftest_flags); 286 if (!device->ws) { 287 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 288 goto fail; 289 } 290 291 if (instance->enabled_extensions.KHR_display) { 292 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); 293 if (master_fd >= 0) { 294 uint32_t accel_working = 0; 295 struct drm_amdgpu_info request = { 296 .return_pointer = (uintptr_t)&accel_working, 297 .return_size = sizeof(accel_working), 298 .query = AMDGPU_INFO_ACCEL_WORKING 299 }; 300 301 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) { 302 close(master_fd); 303 master_fd = -1; 304 } 305 } 306 } 307 308 device->master_fd = master_fd; 309 device->local_fd = fd; 310 device->ws->query_info(device->ws, &device->rad_info); 311 312 radv_handle_env_var_force_family(device); 313 314 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name)); 315 316 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) { 317 device->ws->destroy(device->ws); 318 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 319 "cannot generate UUID"); 320 goto fail; 321 } 322 323 /* These flags affect shader compilation. */ 324 uint64_t shader_env_flags = 325 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) | 326 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0); 327 328 /* The gpu id is already embedded in the uuid so we just pass "radv" 329 * when creating the cache. 330 */ 331 char buf[VK_UUID_SIZE * 2 + 1]; 332 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); 333 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags); 334 335 if (device->rad_info.chip_class < VI || 336 device->rad_info.chip_class > GFX9) 337 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); 338 339 radv_get_driver_uuid(&device->driver_uuid); 340 radv_get_device_uuid(&device->rad_info, &device->device_uuid); 341 342 if (device->rad_info.family == CHIP_STONEY || 343 device->rad_info.chip_class >= GFX9) { 344 device->has_rbplus = true; 345 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY || 346 device->rad_info.family == CHIP_VEGA12 || 347 device->rad_info.family == CHIP_RAVEN || 348 device->rad_info.family == CHIP_RAVEN2; 349 } 350 351 /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs 352 * on SI. 353 */ 354 device->has_clear_state = device->rad_info.chip_class >= CIK; 355 356 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI; 357 358 /* Vega10/Raven need a special workaround for a hardware bug. */ 359 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || 360 device->rad_info.family == CHIP_RAVEN; 361 362 /* Out-of-order primitive rasterization. */ 363 device->has_out_of_order_rast = device->rad_info.chip_class >= VI && 364 device->rad_info.max_se >= 2; 365 device->out_of_order_rast_allowed = device->has_out_of_order_rast && 366 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); 367 368 device->dcc_msaa_allowed = 369 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); 370 371 /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ 372 device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 || 373 (device->rad_info.chip_class >= VI && 374 device->rad_info.me_fw_feature >= 41); 375 376 radv_physical_device_init_mem_types(device); 377 radv_fill_device_extension_table(device, &device->supported_extensions); 378 379 device->bus_info = *drm_device->businfo.pci; 380 381 if ((device->instance->debug_flags & RADV_DEBUG_INFO)) 382 ac_print_gpu_info(&device->rad_info); 383 384 /* The WSI is structured as a layer on top of the driver, so this has 385 * to be the last part of initialization (at least until we get other 386 * semi-layers). 387 */ 388 result = radv_init_wsi(device); 389 if (result != VK_SUCCESS) { 390 device->ws->destroy(device->ws); 391 vk_error(instance, result); 392 goto fail; 393 } 394 395 return VK_SUCCESS; 396 397fail: 398 close(fd); 399 if (master_fd != -1) 400 close(master_fd); 401 return result; 402} 403 404static void 405radv_physical_device_finish(struct radv_physical_device *device) 406{ 407 radv_finish_wsi(device); 408 device->ws->destroy(device->ws); 409 disk_cache_destroy(device->disk_cache); 410 close(device->local_fd); 411 if (device->master_fd != -1) 412 close(device->master_fd); 413} 414 415static void * 416default_alloc_func(void *pUserData, size_t size, size_t align, 417 VkSystemAllocationScope allocationScope) 418{ 419 return malloc(size); 420} 421 422static void * 423default_realloc_func(void *pUserData, void *pOriginal, size_t size, 424 size_t align, VkSystemAllocationScope allocationScope) 425{ 426 return realloc(pOriginal, size); 427} 428 429static void 430default_free_func(void *pUserData, void *pMemory) 431{ 432 free(pMemory); 433} 434 435static const VkAllocationCallbacks default_alloc = { 436 .pUserData = NULL, 437 .pfnAllocation = default_alloc_func, 438 .pfnReallocation = default_realloc_func, 439 .pfnFree = default_free_func, 440}; 441 442static const struct debug_control radv_debug_options[] = { 443 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, 444 {"nodcc", RADV_DEBUG_NO_DCC}, 445 {"shaders", RADV_DEBUG_DUMP_SHADERS}, 446 {"nocache", RADV_DEBUG_NO_CACHE}, 447 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, 448 {"nohiz", RADV_DEBUG_NO_HIZ}, 449 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, 450 {"unsafemath", RADV_DEBUG_UNSAFE_MATH}, 451 {"allbos", RADV_DEBUG_ALL_BOS}, 452 {"noibs", RADV_DEBUG_NO_IBS}, 453 {"spirv", RADV_DEBUG_DUMP_SPIRV}, 454 {"vmfaults", RADV_DEBUG_VM_FAULTS}, 455 {"zerovram", RADV_DEBUG_ZERO_VRAM}, 456 {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, 457 {"nosisched", RADV_DEBUG_NO_SISCHED}, 458 {"preoptir", RADV_DEBUG_PREOPTIR}, 459 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS}, 460 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER}, 461 {"info", RADV_DEBUG_INFO}, 462 {"errors", RADV_DEBUG_ERRORS}, 463 {"startup", RADV_DEBUG_STARTUP}, 464 {"checkir", RADV_DEBUG_CHECKIR}, 465 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, 466 {"nobinning", RADV_DEBUG_NOBINNING}, 467 {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT}, 468 {NULL, 0} 469}; 470 471const char * 472radv_get_debug_option_name(int id) 473{ 474 assert(id < ARRAY_SIZE(radv_debug_options) - 1); 475 return radv_debug_options[id].string; 476} 477 478static const struct debug_control radv_perftest_options[] = { 479 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN}, 480 {"sisched", RADV_PERFTEST_SISCHED}, 481 {"localbos", RADV_PERFTEST_LOCAL_BOS}, 482 {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, 483 {"bolist", RADV_PERFTEST_BO_LIST}, 484 {NULL, 0} 485}; 486 487const char * 488radv_get_perftest_option_name(int id) 489{ 490 assert(id < ARRAY_SIZE(radv_perftest_options) - 1); 491 return radv_perftest_options[id].string; 492} 493 494static void 495radv_handle_per_app_options(struct radv_instance *instance, 496 const VkApplicationInfo *info) 497{ 498 const char *name = info ? info->pApplicationName : NULL; 499 500 if (!name) 501 return; 502 503 if (!strcmp(name, "Talos - Linux - 32bit") || 504 !strcmp(name, "Talos - Linux - 64bit")) { 505 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) { 506 /* Force enable LLVM sisched for Talos because it looks 507 * safe and it gives few more FPS. 508 */ 509 instance->perftest_flags |= RADV_PERFTEST_SISCHED; 510 } 511 } else if (!strcmp(name, "DOOM_VFR")) { 512 /* Work around a Doom VFR game bug */ 513 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; 514 } else if (!strcmp(name, "MonsterHunterWorld.exe")) { 515 /* Workaround for a WaW hazard when LLVM moves/merges 516 * load/store memory operations. 517 * See https://reviews.llvm.org/D61313 518 */ 519 if (HAVE_LLVM < 0x900) 520 instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT; 521 } 522} 523 524static int radv_get_instance_extension_index(const char *name) 525{ 526 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) { 527 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0) 528 return i; 529 } 530 return -1; 531} 532 533static const char radv_dri_options_xml[] = 534DRI_CONF_BEGIN 535 DRI_CONF_SECTION_PERFORMANCE 536 DRI_CONF_ADAPTIVE_SYNC("true") 537 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) 538 DRI_CONF_SECTION_END 539DRI_CONF_END; 540 541static void radv_init_dri_options(struct radv_instance *instance) 542{ 543 driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml); 544 driParseConfigFiles(&instance->dri_options, 545 &instance->available_dri_options, 546 0, "radv", NULL); 547} 548 549VkResult radv_CreateInstance( 550 const VkInstanceCreateInfo* pCreateInfo, 551 const VkAllocationCallbacks* pAllocator, 552 VkInstance* pInstance) 553{ 554 struct radv_instance *instance; 555 VkResult result; 556 557 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 558 559 uint32_t client_version; 560 if (pCreateInfo->pApplicationInfo && 561 pCreateInfo->pApplicationInfo->apiVersion != 0) { 562 client_version = pCreateInfo->pApplicationInfo->apiVersion; 563 } else { 564 client_version = VK_API_VERSION_1_0; 565 } 566 567 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8, 568 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 569 if (!instance) 570 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 571 572 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 573 574 if (pAllocator) 575 instance->alloc = *pAllocator; 576 else 577 instance->alloc = default_alloc; 578 579 instance->apiVersion = client_version; 580 instance->physicalDeviceCount = -1; 581 582 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), 583 radv_debug_options); 584 585 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), 586 radv_perftest_options); 587 588 589 if (instance->debug_flags & RADV_DEBUG_STARTUP) 590 radv_logi("Created an instance"); 591 592 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 593 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; 594 int index = radv_get_instance_extension_index(ext_name); 595 596 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) { 597 vk_free2(&default_alloc, pAllocator, instance); 598 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT); 599 } 600 601 instance->enabled_extensions.extensions[index] = true; 602 } 603 604 result = vk_debug_report_instance_init(&instance->debug_report_callbacks); 605 if (result != VK_SUCCESS) { 606 vk_free2(&default_alloc, pAllocator, instance); 607 return vk_error(instance, result); 608 } 609 610 _mesa_locale_init(); 611 glsl_type_singleton_init_or_ref(); 612 613 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 614 615 radv_init_dri_options(instance); 616 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo); 617 618 *pInstance = radv_instance_to_handle(instance); 619 620 return VK_SUCCESS; 621} 622 623void radv_DestroyInstance( 624 VkInstance _instance, 625 const VkAllocationCallbacks* pAllocator) 626{ 627 RADV_FROM_HANDLE(radv_instance, instance, _instance); 628 629 if (!instance) 630 return; 631 632 for (int i = 0; i < instance->physicalDeviceCount; ++i) { 633 radv_physical_device_finish(instance->physicalDevices + i); 634 } 635 636 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 637 638 glsl_type_singleton_decref(); 639 _mesa_locale_fini(); 640 641 driDestroyOptionCache(&instance->dri_options); 642 driDestroyOptionInfo(&instance->available_dri_options); 643 644 vk_debug_report_instance_destroy(&instance->debug_report_callbacks); 645 646 vk_free(&instance->alloc, instance); 647} 648 649static VkResult 650radv_enumerate_devices(struct radv_instance *instance) 651{ 652 /* TODO: Check for more devices ? */ 653 drmDevicePtr devices[8]; 654 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; 655 int max_devices; 656 657 instance->physicalDeviceCount = 0; 658 659 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); 660 661 if (instance->debug_flags & RADV_DEBUG_STARTUP) 662 radv_logi("Found %d drm nodes", max_devices); 663 664 if (max_devices < 1) 665 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 666 667 for (unsigned i = 0; i < (unsigned)max_devices; i++) { 668 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 669 devices[i]->bustype == DRM_BUS_PCI && 670 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) { 671 672 result = radv_physical_device_init(instance->physicalDevices + 673 instance->physicalDeviceCount, 674 instance, 675 devices[i]); 676 if (result == VK_SUCCESS) 677 ++instance->physicalDeviceCount; 678 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 679 break; 680 } 681 } 682 drmFreeDevices(devices, max_devices); 683 684 return result; 685} 686 687VkResult radv_EnumeratePhysicalDevices( 688 VkInstance _instance, 689 uint32_t* pPhysicalDeviceCount, 690 VkPhysicalDevice* pPhysicalDevices) 691{ 692 RADV_FROM_HANDLE(radv_instance, instance, _instance); 693 VkResult result; 694 695 if (instance->physicalDeviceCount < 0) { 696 result = radv_enumerate_devices(instance); 697 if (result != VK_SUCCESS && 698 result != VK_ERROR_INCOMPATIBLE_DRIVER) 699 return result; 700 } 701 702 if (!pPhysicalDevices) { 703 *pPhysicalDeviceCount = instance->physicalDeviceCount; 704 } else { 705 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount); 706 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i) 707 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i); 708 } 709 710 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE 711 : VK_SUCCESS; 712} 713 714VkResult radv_EnumeratePhysicalDeviceGroups( 715 VkInstance _instance, 716 uint32_t* pPhysicalDeviceGroupCount, 717 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) 718{ 719 RADV_FROM_HANDLE(radv_instance, instance, _instance); 720 VkResult result; 721 722 if (instance->physicalDeviceCount < 0) { 723 result = radv_enumerate_devices(instance); 724 if (result != VK_SUCCESS && 725 result != VK_ERROR_INCOMPATIBLE_DRIVER) 726 return result; 727 } 728 729 if (!pPhysicalDeviceGroupProperties) { 730 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount; 731 } else { 732 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount); 733 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) { 734 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1; 735 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i); 736 pPhysicalDeviceGroupProperties[i].subsetAllocation = false; 737 } 738 } 739 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE 740 : VK_SUCCESS; 741} 742 743void radv_GetPhysicalDeviceFeatures( 744 VkPhysicalDevice physicalDevice, 745 VkPhysicalDeviceFeatures* pFeatures) 746{ 747 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 748 memset(pFeatures, 0, sizeof(*pFeatures)); 749 750 *pFeatures = (VkPhysicalDeviceFeatures) { 751 .robustBufferAccess = true, 752 .fullDrawIndexUint32 = true, 753 .imageCubeArray = true, 754 .independentBlend = true, 755 .geometryShader = true, 756 .tessellationShader = true, 757 .sampleRateShading = true, 758 .dualSrcBlend = true, 759 .logicOp = true, 760 .multiDrawIndirect = true, 761 .drawIndirectFirstInstance = true, 762 .depthClamp = true, 763 .depthBiasClamp = true, 764 .fillModeNonSolid = true, 765 .depthBounds = true, 766 .wideLines = true, 767 .largePoints = true, 768 .alphaToOne = true, 769 .multiViewport = true, 770 .samplerAnisotropy = true, 771 .textureCompressionETC2 = radv_device_supports_etc(pdevice), 772 .textureCompressionASTC_LDR = false, 773 .textureCompressionBC = true, 774 .occlusionQueryPrecise = true, 775 .pipelineStatisticsQuery = true, 776 .vertexPipelineStoresAndAtomics = true, 777 .fragmentStoresAndAtomics = true, 778 .shaderTessellationAndGeometryPointSize = true, 779 .shaderImageGatherExtended = true, 780 .shaderStorageImageExtendedFormats = true, 781 .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI, 782 .shaderUniformBufferArrayDynamicIndexing = true, 783 .shaderSampledImageArrayDynamicIndexing = true, 784 .shaderStorageBufferArrayDynamicIndexing = true, 785 .shaderStorageImageArrayDynamicIndexing = true, 786 .shaderStorageImageReadWithoutFormat = true, 787 .shaderStorageImageWriteWithoutFormat = true, 788 .shaderClipDistance = true, 789 .shaderCullDistance = true, 790 .shaderFloat64 = true, 791 .shaderInt64 = true, 792 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9, 793 .sparseBinding = true, 794 .variableMultisampleRate = true, 795 .inheritedQueries = true, 796 }; 797} 798 799void radv_GetPhysicalDeviceFeatures2( 800 VkPhysicalDevice physicalDevice, 801 VkPhysicalDeviceFeatures2 *pFeatures) 802{ 803 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 804 vk_foreach_struct(ext, pFeatures->pNext) { 805 switch (ext->sType) { 806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { 807 VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext; 808 features->variablePointersStorageBuffer = true; 809 features->variablePointers = true; 810 break; 811 } 812 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { 813 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext; 814 features->multiview = true; 815 features->multiviewGeometryShader = true; 816 features->multiviewTessellationShader = true; 817 break; 818 } 819 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { 820 VkPhysicalDeviceShaderDrawParametersFeatures *features = 821 (VkPhysicalDeviceShaderDrawParametersFeatures*)ext; 822 features->shaderDrawParameters = true; 823 break; 824 } 825 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { 826 VkPhysicalDeviceProtectedMemoryFeatures *features = 827 (VkPhysicalDeviceProtectedMemoryFeatures*)ext; 828 features->protectedMemory = false; 829 break; 830 } 831 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { 832 VkPhysicalDevice16BitStorageFeatures *features = 833 (VkPhysicalDevice16BitStorageFeatures*)ext; 834 bool enabled = pdevice->rad_info.chip_class >= VI; 835 features->storageBuffer16BitAccess = enabled; 836 features->uniformAndStorageBuffer16BitAccess = enabled; 837 features->storagePushConstant16 = enabled; 838 features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900; 839 break; 840 } 841 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { 842 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = 843 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext; 844 features->samplerYcbcrConversion = true; 845 break; 846 } 847 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { 848 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features = 849 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext; 850 features->shaderInputAttachmentArrayDynamicIndexing = true; 851 features->shaderUniformTexelBufferArrayDynamicIndexing = true; 852 features->shaderStorageTexelBufferArrayDynamicIndexing = true; 853 features->shaderUniformBufferArrayNonUniformIndexing = true; 854 features->shaderSampledImageArrayNonUniformIndexing = true; 855 features->shaderStorageBufferArrayNonUniformIndexing = true; 856 features->shaderStorageImageArrayNonUniformIndexing = true; 857 features->shaderInputAttachmentArrayNonUniformIndexing = true; 858 features->shaderUniformTexelBufferArrayNonUniformIndexing = true; 859 features->shaderStorageTexelBufferArrayNonUniformIndexing = true; 860 features->descriptorBindingUniformBufferUpdateAfterBind = true; 861 features->descriptorBindingSampledImageUpdateAfterBind = true; 862 features->descriptorBindingStorageImageUpdateAfterBind = true; 863 features->descriptorBindingStorageBufferUpdateAfterBind = true; 864 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true; 865 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true; 866 features->descriptorBindingUpdateUnusedWhilePending = true; 867 features->descriptorBindingPartiallyBound = true; 868 features->descriptorBindingVariableDescriptorCount = true; 869 features->runtimeDescriptorArray = true; 870 break; 871 } 872 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { 873 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = 874 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; 875 features->conditionalRendering = true; 876 features->inheritedConditionalRendering = false; 877 break; 878 } 879 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { 880 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = 881 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; 882 features->vertexAttributeInstanceRateDivisor = VK_TRUE; 883 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; 884 break; 885 } 886 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { 887 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = 888 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext; 889 features->transformFeedback = true; 890 features->geometryStreams = true; 891 break; 892 } 893 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { 894 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = 895 (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; 896 features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK; 897 break; 898 } 899 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { 900 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features = 901 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext; 902 features->memoryPriority = VK_TRUE; 903 break; 904 } 905 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: { 906 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = 907 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext; 908 features->bufferDeviceAddress = true; 909 features->bufferDeviceAddressCaptureReplay = false; 910 features->bufferDeviceAddressMultiDevice = false; 911 break; 912 } 913 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { 914 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = 915 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; 916 features->depthClipEnable = true; 917 break; 918 } 919 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: { 920 VkPhysicalDeviceHostQueryResetFeaturesEXT *features = 921 (VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext; 922 features->hostQueryReset = true; 923 break; 924 } 925 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { 926 VkPhysicalDevice8BitStorageFeaturesKHR *features = 927 (VkPhysicalDevice8BitStorageFeaturesKHR*)ext; 928 bool enabled = pdevice->rad_info.chip_class >= VI; 929 features->storageBuffer8BitAccess = enabled; 930 features->uniformAndStorageBuffer8BitAccess = enabled; 931 features->storagePushConstant8 = enabled; 932 break; 933 } 934 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { 935 VkPhysicalDeviceFloat16Int8FeaturesKHR *features = 936 (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext; 937 features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800; 938 features->shaderInt8 = true; 939 break; 940 } 941 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { 942 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = 943 (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext; 944 /* TODO: Enable this once the driver supports 64-bit 945 * compare&swap atomic operations. 946 */ 947 features->shaderBufferInt64Atomics = false; 948 features->shaderSharedInt64Atomics = false; 949 break; 950 } 951 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { 952 VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features = 953 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext; 954 955 features->inlineUniformBlock = true; 956 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true; 957 break; 958 } 959 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: { 960 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features = 961 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext; 962 features->computeDerivativeGroupQuads = false; 963 features->computeDerivativeGroupLinear = true; 964 break; 965 } 966 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: { 967 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features = 968 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext; 969 features->ycbcrImageArrays = true; 970 break; 971 } 972 default: 973 break; 974 } 975 } 976 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); 977} 978 979void radv_GetPhysicalDeviceProperties( 980 VkPhysicalDevice physicalDevice, 981 VkPhysicalDeviceProperties* pProperties) 982{ 983 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 984 VkSampleCountFlags sample_counts = 0xf; 985 986 /* make sure that the entire descriptor set is addressable with a signed 987 * 32-bit int. So the sum of all limits scaled by descriptor size has to 988 * be at most 2 GiB. the combined image & samples object count as one of 989 * both. This limit is for the pipeline layout, not for the set layout, but 990 * there is no set limit, so we just set a pipeline limit. I don't think 991 * any app is going to hit this soon. */ 992 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / 993 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 994 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 995 32 /* sampler, largest when combined with image */ + 996 64 /* sampled image */ + 997 64 /* storage image */); 998 999 VkPhysicalDeviceLimits limits = { 1000 .maxImageDimension1D = (1 << 14), 1001 .maxImageDimension2D = (1 << 14), 1002 .maxImageDimension3D = (1 << 11), 1003 .maxImageDimensionCube = (1 << 14), 1004 .maxImageArrayLayers = (1 << 11), 1005 .maxTexelBufferElements = 128 * 1024 * 1024, 1006 .maxUniformBufferRange = UINT32_MAX, 1007 .maxStorageBufferRange = UINT32_MAX, 1008 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 1009 .maxMemoryAllocationCount = UINT32_MAX, 1010 .maxSamplerAllocationCount = 64 * 1024, 1011 .bufferImageGranularity = 64, /* A cache line */ 1012 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ 1013 .maxBoundDescriptorSets = MAX_SETS, 1014 .maxPerStageDescriptorSamplers = max_descriptor_set_size, 1015 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, 1016 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, 1017 .maxPerStageDescriptorSampledImages = max_descriptor_set_size, 1018 .maxPerStageDescriptorStorageImages = max_descriptor_set_size, 1019 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, 1020 .maxPerStageResources = max_descriptor_set_size, 1021 .maxDescriptorSetSamplers = max_descriptor_set_size, 1022 .maxDescriptorSetUniformBuffers = max_descriptor_set_size, 1023 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, 1024 .maxDescriptorSetStorageBuffers = max_descriptor_set_size, 1025 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, 1026 .maxDescriptorSetSampledImages = max_descriptor_set_size, 1027 .maxDescriptorSetStorageImages = max_descriptor_set_size, 1028 .maxDescriptorSetInputAttachments = max_descriptor_set_size, 1029 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS, 1030 .maxVertexInputBindings = MAX_VBS, 1031 .maxVertexInputAttributeOffset = 2047, 1032 .maxVertexInputBindingStride = 2048, 1033 .maxVertexOutputComponents = 128, 1034 .maxTessellationGenerationLevel = 64, 1035 .maxTessellationPatchSize = 32, 1036 .maxTessellationControlPerVertexInputComponents = 128, 1037 .maxTessellationControlPerVertexOutputComponents = 128, 1038 .maxTessellationControlPerPatchOutputComponents = 120, 1039 .maxTessellationControlTotalOutputComponents = 4096, 1040 .maxTessellationEvaluationInputComponents = 128, 1041 .maxTessellationEvaluationOutputComponents = 128, 1042 .maxGeometryShaderInvocations = 127, 1043 .maxGeometryInputComponents = 64, 1044 .maxGeometryOutputComponents = 128, 1045 .maxGeometryOutputVertices = 256, 1046 .maxGeometryTotalOutputComponents = 1024, 1047 .maxFragmentInputComponents = 128, 1048 .maxFragmentOutputAttachments = 8, 1049 .maxFragmentDualSrcAttachments = 1, 1050 .maxFragmentCombinedOutputResources = 8, 1051 .maxComputeSharedMemorySize = 32768, 1052 .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 1053 .maxComputeWorkGroupInvocations = 2048, 1054 .maxComputeWorkGroupSize = { 1055 2048, 1056 2048, 1057 2048 1058 }, 1059 .subPixelPrecisionBits = 8, 1060 .subTexelPrecisionBits = 8, 1061 .mipmapPrecisionBits = 8, 1062 .maxDrawIndexedIndexValue = UINT32_MAX, 1063 .maxDrawIndirectCount = UINT32_MAX, 1064 .maxSamplerLodBias = 16, 1065 .maxSamplerAnisotropy = 16, 1066 .maxViewports = MAX_VIEWPORTS, 1067 .maxViewportDimensions = { (1 << 14), (1 << 14) }, 1068 .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 1069 .viewportSubPixelBits = 8, 1070 .minMemoryMapAlignment = 4096, /* A page */ 1071 .minTexelBufferOffsetAlignment = 1, 1072 .minUniformBufferOffsetAlignment = 4, 1073 .minStorageBufferOffsetAlignment = 4, 1074 .minTexelOffset = -32, 1075 .maxTexelOffset = 31, 1076 .minTexelGatherOffset = -32, 1077 .maxTexelGatherOffset = 31, 1078 .minInterpolationOffset = -2, 1079 .maxInterpolationOffset = 2, 1080 .subPixelInterpolationOffsetBits = 8, 1081 .maxFramebufferWidth = (1 << 14), 1082 .maxFramebufferHeight = (1 << 14), 1083 .maxFramebufferLayers = (1 << 10), 1084 .framebufferColorSampleCounts = sample_counts, 1085 .framebufferDepthSampleCounts = sample_counts, 1086 .framebufferStencilSampleCounts = sample_counts, 1087 .framebufferNoAttachmentsSampleCounts = sample_counts, 1088 .maxColorAttachments = MAX_RTS, 1089 .sampledImageColorSampleCounts = sample_counts, 1090 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, 1091 .sampledImageDepthSampleCounts = sample_counts, 1092 .sampledImageStencilSampleCounts = sample_counts, 1093 .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT, 1094 .maxSampleMaskWords = 1, 1095 .timestampComputeAndGraphics = true, 1096 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, 1097 .maxClipDistances = 8, 1098 .maxCullDistances = 8, 1099 .maxCombinedClipAndCullDistances = 8, 1100 .discreteQueuePriorities = 2, 1101 .pointSizeRange = { 0.0, 8192.0 }, 1102 .lineWidthRange = { 0.0, 7.9921875 }, 1103 .pointSizeGranularity = (1.0 / 8.0), 1104 .lineWidthGranularity = (1.0 / 128.0), 1105 .strictLines = false, /* FINISHME */ 1106 .standardSampleLocations = true, 1107 .optimalBufferCopyOffsetAlignment = 128, 1108 .optimalBufferCopyRowPitchAlignment = 128, 1109 .nonCoherentAtomSize = 64, 1110 }; 1111 1112 *pProperties = (VkPhysicalDeviceProperties) { 1113 .apiVersion = radv_physical_device_api_version(pdevice), 1114 .driverVersion = vk_get_driver_version(), 1115 .vendorID = ATI_VENDOR_ID, 1116 .deviceID = pdevice->rad_info.pci_id, 1117 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 1118 .limits = limits, 1119 .sparseProperties = {0}, 1120 }; 1121 1122 strcpy(pProperties->deviceName, pdevice->name); 1123 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); 1124} 1125 1126void radv_GetPhysicalDeviceProperties2( 1127 VkPhysicalDevice physicalDevice, 1128 VkPhysicalDeviceProperties2 *pProperties) 1129{ 1130 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1131 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); 1132 1133 vk_foreach_struct(ext, pProperties->pNext) { 1134 switch (ext->sType) { 1135 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { 1136 VkPhysicalDevicePushDescriptorPropertiesKHR *properties = 1137 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; 1138 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; 1139 break; 1140 } 1141 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { 1142 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext; 1143 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); 1144 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); 1145 properties->deviceLUIDValid = false; 1146 break; 1147 } 1148 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { 1149 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext; 1150 properties->maxMultiviewViewCount = MAX_VIEWS; 1151 properties->maxMultiviewInstanceIndex = INT_MAX; 1152 break; 1153 } 1154 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { 1155 VkPhysicalDevicePointClippingProperties *properties = 1156 (VkPhysicalDevicePointClippingProperties*)ext; 1157 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; 1158 break; 1159 } 1160 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: { 1161 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties = 1162 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext; 1163 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES; 1164 break; 1165 } 1166 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: { 1167 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties = 1168 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext; 1169 properties->minImportedHostPointerAlignment = 4096; 1170 break; 1171 } 1172 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { 1173 VkPhysicalDeviceSubgroupProperties *properties = 1174 (VkPhysicalDeviceSubgroupProperties*)ext; 1175 properties->subgroupSize = 64; 1176 properties->supportedStages = VK_SHADER_STAGE_ALL; 1177 properties->supportedOperations = 1178 VK_SUBGROUP_FEATURE_BASIC_BIT | 1179 VK_SUBGROUP_FEATURE_BALLOT_BIT | 1180 VK_SUBGROUP_FEATURE_QUAD_BIT | 1181 VK_SUBGROUP_FEATURE_VOTE_BIT; 1182 if (pdevice->rad_info.chip_class >= VI) { 1183 properties->supportedOperations |= 1184 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | 1185 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | 1186 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; 1187 } 1188 properties->quadOperationsInAllStages = true; 1189 break; 1190 } 1191 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { 1192 VkPhysicalDeviceMaintenance3Properties *properties = 1193 (VkPhysicalDeviceMaintenance3Properties*)ext; 1194 /* Make sure everything is addressable by a signed 32-bit int, and 1195 * our largest descriptors are 96 bytes. */ 1196 properties->maxPerSetDescriptors = (1ull << 31) / 96; 1197 /* Our buffer size fields allow only this much */ 1198 properties->maxMemoryAllocationSize = 0xFFFFFFFFull; 1199 break; 1200 } 1201 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { 1202 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties = 1203 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext; 1204 /* GFX6-8 only support single channel min/max filter. */ 1205 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9; 1206 properties->filterMinmaxSingleComponentFormats = true; 1207 break; 1208 } 1209 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: { 1210 VkPhysicalDeviceShaderCorePropertiesAMD *properties = 1211 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext; 1212 1213 /* Shader engines. */ 1214 properties->shaderEngineCount = 1215 pdevice->rad_info.max_se; 1216 properties->shaderArraysPerEngineCount = 1217 pdevice->rad_info.max_sh_per_se; 1218 properties->computeUnitsPerShaderArray = 1219 pdevice->rad_info.num_good_cu_per_sh; 1220 properties->simdPerComputeUnit = 4; 1221 properties->wavefrontsPerSimd = 1222 pdevice->rad_info.family == CHIP_TONGA || 1223 pdevice->rad_info.family == CHIP_ICELAND || 1224 pdevice->rad_info.family == CHIP_POLARIS10 || 1225 pdevice->rad_info.family == CHIP_POLARIS11 || 1226 pdevice->rad_info.family == CHIP_POLARIS12 || 1227 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10; 1228 properties->wavefrontSize = 64; 1229 1230 /* SGPR. */ 1231 properties->sgprsPerSimd = 1232 ac_get_num_physical_sgprs(pdevice->rad_info.chip_class); 1233 properties->minSgprAllocation = 1234 pdevice->rad_info.chip_class >= VI ? 16 : 8; 1235 properties->maxSgprAllocation = 1236 pdevice->rad_info.family == CHIP_TONGA || 1237 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104; 1238 properties->sgprAllocationGranularity = 1239 pdevice->rad_info.chip_class >= VI ? 16 : 8; 1240 1241 /* VGPR. */ 1242 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS; 1243 properties->minVgprAllocation = 4; 1244 properties->maxVgprAllocation = 256; 1245 properties->vgprAllocationGranularity = 4; 1246 break; 1247 } 1248 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { 1249 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties = 1250 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; 1251 properties->maxVertexAttribDivisor = UINT32_MAX; 1252 break; 1253 } 1254 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: { 1255 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties = 1256 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext; 1257 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64; 1258 properties->shaderUniformBufferArrayNonUniformIndexingNative = false; 1259 properties->shaderSampledImageArrayNonUniformIndexingNative = false; 1260 properties->shaderStorageBufferArrayNonUniformIndexingNative = false; 1261 properties->shaderStorageImageArrayNonUniformIndexingNative = false; 1262 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false; 1263 properties->robustBufferAccessUpdateAfterBind = false; 1264 properties->quadDivergentImplicitLod = false; 1265 1266 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - 1267 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / 1268 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 1269 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 1270 32 /* sampler, largest when combined with image */ + 1271 64 /* sampled image */ + 1272 64 /* storage image */); 1273 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size; 1274 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1275 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1276 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size; 1277 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size; 1278 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size; 1279 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size; 1280 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size; 1281 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1282 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS; 1283 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1284 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS; 1285 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size; 1286 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size; 1287 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size; 1288 break; 1289 } 1290 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { 1291 VkPhysicalDeviceProtectedMemoryProperties *properties = 1292 (VkPhysicalDeviceProtectedMemoryProperties *)ext; 1293 properties->protectedNoFault = false; 1294 break; 1295 } 1296 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: { 1297 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties = 1298 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext; 1299 properties->primitiveOverestimationSize = 0; 1300 properties->maxExtraPrimitiveOverestimationSize = 0; 1301 properties->extraPrimitiveOverestimationSizeGranularity = 0; 1302 properties->primitiveUnderestimation = VK_FALSE; 1303 properties->conservativePointAndLineRasterization = VK_FALSE; 1304 properties->degenerateTrianglesRasterized = VK_FALSE; 1305 properties->degenerateLinesRasterized = VK_FALSE; 1306 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE; 1307 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE; 1308 break; 1309 } 1310 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { 1311 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = 1312 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; 1313 properties->pciDomain = pdevice->bus_info.domain; 1314 properties->pciBus = pdevice->bus_info.bus; 1315 properties->pciDevice = pdevice->bus_info.dev; 1316 properties->pciFunction = pdevice->bus_info.func; 1317 break; 1318 } 1319 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: { 1320 VkPhysicalDeviceDriverPropertiesKHR *driver_props = 1321 (VkPhysicalDeviceDriverPropertiesKHR *) ext; 1322 1323 driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR; 1324 memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR); 1325 strcpy(driver_props->driverName, "radv"); 1326 1327 memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR); 1328 snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR, 1329 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 1330 " (LLVM " MESA_LLVM_VERSION_STRING ")"); 1331 1332 driver_props->conformanceVersion = (VkConformanceVersionKHR) { 1333 .major = 1, 1334 .minor = 1, 1335 .subminor = 2, 1336 .patch = 0, 1337 }; 1338 break; 1339 } 1340 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { 1341 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties = 1342 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; 1343 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS; 1344 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS; 1345 properties->maxTransformFeedbackBufferSize = UINT32_MAX; 1346 properties->maxTransformFeedbackStreamDataSize = 512; 1347 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX; 1348 properties->maxTransformFeedbackBufferDataStride = 512; 1349 properties->transformFeedbackQueries = true; 1350 properties->transformFeedbackStreamsLinesTriangles = false; 1351 properties->transformFeedbackRasterizationStreamSelect = false; 1352 properties->transformFeedbackDraw = true; 1353 break; 1354 } 1355 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { 1356 VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props = 1357 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext; 1358 1359 props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; 1360 props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 1361 props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 1362 props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 1363 props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 1364 break; 1365 } 1366 default: 1367 break; 1368 } 1369 } 1370} 1371 1372static void radv_get_physical_device_queue_family_properties( 1373 struct radv_physical_device* pdevice, 1374 uint32_t* pCount, 1375 VkQueueFamilyProperties** pQueueFamilyProperties) 1376{ 1377 int num_queue_families = 1; 1378 int idx; 1379 if (pdevice->rad_info.num_compute_rings > 0 && 1380 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) 1381 num_queue_families++; 1382 1383 if (pQueueFamilyProperties == NULL) { 1384 *pCount = num_queue_families; 1385 return; 1386 } 1387 1388 if (!*pCount) 1389 return; 1390 1391 idx = 0; 1392 if (*pCount >= 1) { 1393 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 1394 .queueFlags = VK_QUEUE_GRAPHICS_BIT | 1395 VK_QUEUE_COMPUTE_BIT | 1396 VK_QUEUE_TRANSFER_BIT | 1397 VK_QUEUE_SPARSE_BINDING_BIT, 1398 .queueCount = 1, 1399 .timestampValidBits = 64, 1400 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 1401 }; 1402 idx++; 1403 } 1404 1405 if (pdevice->rad_info.num_compute_rings > 0 && 1406 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { 1407 if (*pCount > idx) { 1408 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 1409 .queueFlags = VK_QUEUE_COMPUTE_BIT | 1410 VK_QUEUE_TRANSFER_BIT | 1411 VK_QUEUE_SPARSE_BINDING_BIT, 1412 .queueCount = pdevice->rad_info.num_compute_rings, 1413 .timestampValidBits = 64, 1414 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 1415 }; 1416 idx++; 1417 } 1418 } 1419 *pCount = idx; 1420} 1421 1422void radv_GetPhysicalDeviceQueueFamilyProperties( 1423 VkPhysicalDevice physicalDevice, 1424 uint32_t* pCount, 1425 VkQueueFamilyProperties* pQueueFamilyProperties) 1426{ 1427 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1428 if (!pQueueFamilyProperties) { 1429 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); 1430 return; 1431 } 1432 VkQueueFamilyProperties *properties[] = { 1433 pQueueFamilyProperties + 0, 1434 pQueueFamilyProperties + 1, 1435 pQueueFamilyProperties + 2, 1436 }; 1437 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); 1438 assert(*pCount <= 3); 1439} 1440 1441void radv_GetPhysicalDeviceQueueFamilyProperties2( 1442 VkPhysicalDevice physicalDevice, 1443 uint32_t* pCount, 1444 VkQueueFamilyProperties2 *pQueueFamilyProperties) 1445{ 1446 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1447 if (!pQueueFamilyProperties) { 1448 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); 1449 return; 1450 } 1451 VkQueueFamilyProperties *properties[] = { 1452 &pQueueFamilyProperties[0].queueFamilyProperties, 1453 &pQueueFamilyProperties[1].queueFamilyProperties, 1454 &pQueueFamilyProperties[2].queueFamilyProperties, 1455 }; 1456 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); 1457 assert(*pCount <= 3); 1458} 1459 1460void radv_GetPhysicalDeviceMemoryProperties( 1461 VkPhysicalDevice physicalDevice, 1462 VkPhysicalDeviceMemoryProperties *pMemoryProperties) 1463{ 1464 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 1465 1466 *pMemoryProperties = physical_device->memory_properties; 1467} 1468 1469static void 1470radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, 1471 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) 1472{ 1473 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); 1474 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; 1475 uint64_t visible_vram_size = radv_get_visible_vram_size(device); 1476 uint64_t vram_size = radv_get_vram_size(device); 1477 uint64_t gtt_size = device->rad_info.gart_size; 1478 uint64_t heap_budget, heap_usage; 1479 1480 /* For all memory heaps, the computation of budget is as follow: 1481 * heap_budget = heap_size - global_heap_usage + app_heap_usage 1482 * 1483 * The Vulkan spec 1.1.97 says that the budget should include any 1484 * currently allocated device memory. 1485 * 1486 * Note that the application heap usages are not really accurate (eg. 1487 * in presence of shared buffers). 1488 */ 1489 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { 1490 uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex; 1491 1492 switch (device->mem_type_indices[i]) { 1493 case RADV_MEM_TYPE_VRAM: 1494 heap_usage = device->ws->query_value(device->ws, 1495 RADEON_ALLOCATED_VRAM); 1496 1497 heap_budget = vram_size - 1498 device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + 1499 heap_usage; 1500 1501 memoryBudget->heapBudget[heap_index] = heap_budget; 1502 memoryBudget->heapUsage[heap_index] = heap_usage; 1503 break; 1504 case RADV_MEM_TYPE_VRAM_CPU_ACCESS: 1505 heap_usage = device->ws->query_value(device->ws, 1506 RADEON_ALLOCATED_VRAM_VIS); 1507 1508 heap_budget = visible_vram_size - 1509 device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + 1510 heap_usage; 1511 1512 memoryBudget->heapBudget[heap_index] = heap_budget; 1513 memoryBudget->heapUsage[heap_index] = heap_usage; 1514 break; 1515 case RADV_MEM_TYPE_GTT_WRITE_COMBINE: 1516 heap_usage = device->ws->query_value(device->ws, 1517 RADEON_ALLOCATED_GTT); 1518 1519 heap_budget = gtt_size - 1520 device->ws->query_value(device->ws, RADEON_GTT_USAGE) + 1521 heap_usage; 1522 1523 memoryBudget->heapBudget[heap_index] = heap_budget; 1524 memoryBudget->heapUsage[heap_index] = heap_usage; 1525 break; 1526 default: 1527 break; 1528 } 1529 } 1530 1531 /* The heapBudget and heapUsage values must be zero for array elements 1532 * greater than or equal to 1533 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. 1534 */ 1535 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { 1536 memoryBudget->heapBudget[i] = 0; 1537 memoryBudget->heapUsage[i] = 0; 1538 } 1539} 1540 1541void radv_GetPhysicalDeviceMemoryProperties2( 1542 VkPhysicalDevice physicalDevice, 1543 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) 1544{ 1545 radv_GetPhysicalDeviceMemoryProperties(physicalDevice, 1546 &pMemoryProperties->memoryProperties); 1547 1548 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = 1549 vk_find_struct(pMemoryProperties->pNext, 1550 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); 1551 if (memory_budget) 1552 radv_get_memory_budget_properties(physicalDevice, memory_budget); 1553} 1554 1555VkResult radv_GetMemoryHostPointerPropertiesEXT( 1556 VkDevice _device, 1557 VkExternalMemoryHandleTypeFlagBits handleType, 1558 const void *pHostPointer, 1559 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) 1560{ 1561 RADV_FROM_HANDLE(radv_device, device, _device); 1562 1563 switch (handleType) 1564 { 1565 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: { 1566 const struct radv_physical_device *physical_device = device->physical_device; 1567 uint32_t memoryTypeBits = 0; 1568 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) { 1569 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) { 1570 memoryTypeBits = (1 << i); 1571 break; 1572 } 1573 } 1574 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits; 1575 return VK_SUCCESS; 1576 } 1577 default: 1578 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 1579 } 1580} 1581 1582static enum radeon_ctx_priority 1583radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj) 1584{ 1585 /* Default to MEDIUM when a specific global priority isn't requested */ 1586 if (!pObj) 1587 return RADEON_CTX_PRIORITY_MEDIUM; 1588 1589 switch(pObj->globalPriority) { 1590 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT: 1591 return RADEON_CTX_PRIORITY_REALTIME; 1592 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: 1593 return RADEON_CTX_PRIORITY_HIGH; 1594 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: 1595 return RADEON_CTX_PRIORITY_MEDIUM; 1596 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: 1597 return RADEON_CTX_PRIORITY_LOW; 1598 default: 1599 unreachable("Illegal global priority value"); 1600 return RADEON_CTX_PRIORITY_INVALID; 1601 } 1602} 1603 1604static int 1605radv_queue_init(struct radv_device *device, struct radv_queue *queue, 1606 uint32_t queue_family_index, int idx, 1607 VkDeviceQueueCreateFlags flags, 1608 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority) 1609{ 1610 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1611 queue->device = device; 1612 queue->queue_family_index = queue_family_index; 1613 queue->queue_idx = idx; 1614 queue->priority = radv_get_queue_global_priority(global_priority); 1615 queue->flags = flags; 1616 1617 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority); 1618 if (!queue->hw_ctx) 1619 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1620 1621 return VK_SUCCESS; 1622} 1623 1624static void 1625radv_queue_finish(struct radv_queue *queue) 1626{ 1627 if (queue->hw_ctx) 1628 queue->device->ws->ctx_destroy(queue->hw_ctx); 1629 1630 if (queue->initial_full_flush_preamble_cs) 1631 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); 1632 if (queue->initial_preamble_cs) 1633 queue->device->ws->cs_destroy(queue->initial_preamble_cs); 1634 if (queue->continue_preamble_cs) 1635 queue->device->ws->cs_destroy(queue->continue_preamble_cs); 1636 if (queue->descriptor_bo) 1637 queue->device->ws->buffer_destroy(queue->descriptor_bo); 1638 if (queue->scratch_bo) 1639 queue->device->ws->buffer_destroy(queue->scratch_bo); 1640 if (queue->esgs_ring_bo) 1641 queue->device->ws->buffer_destroy(queue->esgs_ring_bo); 1642 if (queue->gsvs_ring_bo) 1643 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); 1644 if (queue->tess_rings_bo) 1645 queue->device->ws->buffer_destroy(queue->tess_rings_bo); 1646 if (queue->compute_scratch_bo) 1647 queue->device->ws->buffer_destroy(queue->compute_scratch_bo); 1648} 1649 1650static void 1651radv_bo_list_init(struct radv_bo_list *bo_list) 1652{ 1653 pthread_mutex_init(&bo_list->mutex, NULL); 1654 bo_list->list.count = bo_list->capacity = 0; 1655 bo_list->list.bos = NULL; 1656} 1657 1658static void 1659radv_bo_list_finish(struct radv_bo_list *bo_list) 1660{ 1661 free(bo_list->list.bos); 1662 pthread_mutex_destroy(&bo_list->mutex); 1663} 1664 1665static VkResult radv_bo_list_add(struct radv_device *device, 1666 struct radeon_winsys_bo *bo) 1667{ 1668 struct radv_bo_list *bo_list = &device->bo_list; 1669 1670 if (bo->is_local) 1671 return VK_SUCCESS; 1672 1673 if (unlikely(!device->use_global_bo_list)) 1674 return VK_SUCCESS; 1675 1676 pthread_mutex_lock(&bo_list->mutex); 1677 if (bo_list->list.count == bo_list->capacity) { 1678 unsigned capacity = MAX2(4, bo_list->capacity * 2); 1679 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*)); 1680 1681 if (!data) { 1682 pthread_mutex_unlock(&bo_list->mutex); 1683 return VK_ERROR_OUT_OF_HOST_MEMORY; 1684 } 1685 1686 bo_list->list.bos = (struct radeon_winsys_bo**)data; 1687 bo_list->capacity = capacity; 1688 } 1689 1690 bo_list->list.bos[bo_list->list.count++] = bo; 1691 pthread_mutex_unlock(&bo_list->mutex); 1692 return VK_SUCCESS; 1693} 1694 1695static void radv_bo_list_remove(struct radv_device *device, 1696 struct radeon_winsys_bo *bo) 1697{ 1698 struct radv_bo_list *bo_list = &device->bo_list; 1699 1700 if (bo->is_local) 1701 return; 1702 1703 if (unlikely(!device->use_global_bo_list)) 1704 return; 1705 1706 pthread_mutex_lock(&bo_list->mutex); 1707 for(unsigned i = 0; i < bo_list->list.count; ++i) { 1708 if (bo_list->list.bos[i] == bo) { 1709 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1]; 1710 --bo_list->list.count; 1711 break; 1712 } 1713 } 1714 pthread_mutex_unlock(&bo_list->mutex); 1715} 1716 1717static void 1718radv_device_init_gs_info(struct radv_device *device) 1719{ 1720 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class, 1721 device->physical_device->rad_info.family); 1722} 1723 1724static int radv_get_device_extension_index(const char *name) 1725{ 1726 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) { 1727 if (strcmp(name, radv_device_extensions[i].extensionName) == 0) 1728 return i; 1729 } 1730 return -1; 1731} 1732 1733static int 1734radv_get_int_debug_option(const char *name, int default_value) 1735{ 1736 const char *str; 1737 int result; 1738 1739 str = getenv(name); 1740 if (!str) { 1741 result = default_value; 1742 } else { 1743 char *endptr; 1744 1745 result = strtol(str, &endptr, 0); 1746 if (str == endptr) { 1747 /* No digits founs. */ 1748 result = default_value; 1749 } 1750 } 1751 1752 return result; 1753} 1754 1755VkResult radv_CreateDevice( 1756 VkPhysicalDevice physicalDevice, 1757 const VkDeviceCreateInfo* pCreateInfo, 1758 const VkAllocationCallbacks* pAllocator, 1759 VkDevice* pDevice) 1760{ 1761 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 1762 VkResult result; 1763 struct radv_device *device; 1764 1765 bool keep_shader_info = false; 1766 1767 /* Check enabled features */ 1768 if (pCreateInfo->pEnabledFeatures) { 1769 VkPhysicalDeviceFeatures supported_features; 1770 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); 1771 VkBool32 *supported_feature = (VkBool32 *)&supported_features; 1772 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; 1773 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); 1774 for (uint32_t i = 0; i < num_features; i++) { 1775 if (enabled_feature[i] && !supported_feature[i]) 1776 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT); 1777 } 1778 } 1779 1780 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator, 1781 sizeof(*device), 8, 1782 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1783 if (!device) 1784 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1785 1786 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1787 device->instance = physical_device->instance; 1788 device->physical_device = physical_device; 1789 1790 device->ws = physical_device->ws; 1791 if (pAllocator) 1792 device->alloc = *pAllocator; 1793 else 1794 device->alloc = physical_device->instance->alloc; 1795 1796 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 1797 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; 1798 int index = radv_get_device_extension_index(ext_name); 1799 if (index < 0 || !physical_device->supported_extensions.extensions[index]) { 1800 vk_free(&device->alloc, device); 1801 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT); 1802 } 1803 1804 device->enabled_extensions.extensions[index] = true; 1805 } 1806 1807 keep_shader_info = device->enabled_extensions.AMD_shader_info; 1808 1809 /* With update after bind we can't attach bo's to the command buffer 1810 * from the descriptor set anymore, so we have to use a global BO list. 1811 */ 1812 device->use_global_bo_list = 1813 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || 1814 device->enabled_extensions.EXT_descriptor_indexing || 1815 device->enabled_extensions.EXT_buffer_device_address; 1816 1817 mtx_init(&device->shader_slab_mutex, mtx_plain); 1818 list_inithead(&device->shader_slabs); 1819 1820 radv_bo_list_init(&device->bo_list); 1821 1822 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 1823 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; 1824 uint32_t qfi = queue_create->queueFamilyIndex; 1825 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority = 1826 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT); 1827 1828 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority); 1829 1830 device->queues[qfi] = vk_alloc(&device->alloc, 1831 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1832 if (!device->queues[qfi]) { 1833 result = VK_ERROR_OUT_OF_HOST_MEMORY; 1834 goto fail; 1835 } 1836 1837 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); 1838 1839 device->queue_count[qfi] = queue_create->queueCount; 1840 1841 for (unsigned q = 0; q < queue_create->queueCount; q++) { 1842 result = radv_queue_init(device, &device->queues[qfi][q], 1843 qfi, q, queue_create->flags, 1844 global_priority); 1845 if (result != VK_SUCCESS) 1846 goto fail; 1847 } 1848 } 1849 1850 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 && 1851 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); 1852 1853 /* Disabled and not implemented for now. */ 1854 device->dfsm_allowed = device->pbb_allowed && 1855 (device->physical_device->rad_info.family == CHIP_RAVEN || 1856 device->physical_device->rad_info.family == CHIP_RAVEN2); 1857 1858#ifdef ANDROID 1859 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit; 1860#endif 1861 1862 /* The maximum number of scratch waves. Scratch space isn't divided 1863 * evenly between CUs. The number is only a function of the number of CUs. 1864 * We can decrease the constant to decrease the scratch buffer size. 1865 * 1866 * sctx->scratch_waves must be >= the maximum possible size of 1867 * 1 threadgroup, so that the hw doesn't hang from being unable 1868 * to start any. 1869 * 1870 * The recommended value is 4 per CU at most. Higher numbers don't 1871 * bring much benefit, but they still occupy chip resources (think 1872 * async compute). I've seen ~2% performance difference between 4 and 32. 1873 */ 1874 uint32_t max_threads_per_block = 2048; 1875 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, 1876 max_threads_per_block / 64); 1877 1878 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); 1879 1880 if (device->physical_device->rad_info.chip_class >= CIK) { 1881 /* If the KMD allows it (there is a KMD hw register for it), 1882 * allow launching waves out-of-order. 1883 */ 1884 device->dispatch_initiator |= S_00B800_ORDER_MODE(1); 1885 } 1886 1887 radv_device_init_gs_info(device); 1888 1889 device->tess_offchip_block_dw_size = 1890 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; 1891 device->has_distributed_tess = 1892 device->physical_device->rad_info.chip_class >= VI && 1893 device->physical_device->rad_info.max_se >= 2; 1894 1895 if (getenv("RADV_TRACE_FILE")) { 1896 const char *filename = getenv("RADV_TRACE_FILE"); 1897 1898 keep_shader_info = true; 1899 1900 if (!radv_init_trace(device)) 1901 goto fail; 1902 1903 fprintf(stderr, "*****************************************************************************\n"); 1904 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n"); 1905 fprintf(stderr, "*****************************************************************************\n"); 1906 1907 fprintf(stderr, "Trace file will be dumped to %s\n", filename); 1908 radv_dump_enabled_options(device, stderr); 1909 } 1910 1911 device->keep_shader_info = keep_shader_info; 1912 1913 result = radv_device_init_meta(device); 1914 if (result != VK_SUCCESS) 1915 goto fail; 1916 1917 radv_device_init_msaa(device); 1918 1919 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) { 1920 device->empty_cs[family] = device->ws->cs_create(device->ws, family); 1921 switch (family) { 1922 case RADV_QUEUE_GENERAL: 1923 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 1924 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1)); 1925 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1)); 1926 break; 1927 case RADV_QUEUE_COMPUTE: 1928 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0)); 1929 radeon_emit(device->empty_cs[family], 0); 1930 break; 1931 } 1932 device->ws->cs_finalize(device->empty_cs[family]); 1933 } 1934 1935 if (device->physical_device->rad_info.chip_class >= CIK) 1936 cik_create_gfx_config(device); 1937 1938 VkPipelineCacheCreateInfo ci; 1939 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; 1940 ci.pNext = NULL; 1941 ci.flags = 0; 1942 ci.pInitialData = NULL; 1943 ci.initialDataSize = 0; 1944 VkPipelineCache pc; 1945 result = radv_CreatePipelineCache(radv_device_to_handle(device), 1946 &ci, NULL, &pc); 1947 if (result != VK_SUCCESS) 1948 goto fail_meta; 1949 1950 device->mem_cache = radv_pipeline_cache_from_handle(pc); 1951 1952 device->force_aniso = 1953 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1)); 1954 if (device->force_aniso >= 0) { 1955 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1956 1 << util_logbase2(device->force_aniso)); 1957 } 1958 1959 *pDevice = radv_device_to_handle(device); 1960 return VK_SUCCESS; 1961 1962fail_meta: 1963 radv_device_finish_meta(device); 1964fail: 1965 radv_bo_list_finish(&device->bo_list); 1966 1967 if (device->trace_bo) 1968 device->ws->buffer_destroy(device->trace_bo); 1969 1970 if (device->gfx_init) 1971 device->ws->buffer_destroy(device->gfx_init); 1972 1973 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 1974 for (unsigned q = 0; q < device->queue_count[i]; q++) 1975 radv_queue_finish(&device->queues[i][q]); 1976 if (device->queue_count[i]) 1977 vk_free(&device->alloc, device->queues[i]); 1978 } 1979 1980 vk_free(&device->alloc, device); 1981 return result; 1982} 1983 1984void radv_DestroyDevice( 1985 VkDevice _device, 1986 const VkAllocationCallbacks* pAllocator) 1987{ 1988 RADV_FROM_HANDLE(radv_device, device, _device); 1989 1990 if (!device) 1991 return; 1992 1993 if (device->trace_bo) 1994 device->ws->buffer_destroy(device->trace_bo); 1995 1996 if (device->gfx_init) 1997 device->ws->buffer_destroy(device->gfx_init); 1998 1999 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 2000 for (unsigned q = 0; q < device->queue_count[i]; q++) 2001 radv_queue_finish(&device->queues[i][q]); 2002 if (device->queue_count[i]) 2003 vk_free(&device->alloc, device->queues[i]); 2004 if (device->empty_cs[i]) 2005 device->ws->cs_destroy(device->empty_cs[i]); 2006 } 2007 radv_device_finish_meta(device); 2008 2009 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); 2010 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); 2011 2012 radv_destroy_shader_slabs(device); 2013 2014 radv_bo_list_finish(&device->bo_list); 2015 vk_free(&device->alloc, device); 2016} 2017 2018VkResult radv_EnumerateInstanceLayerProperties( 2019 uint32_t* pPropertyCount, 2020 VkLayerProperties* pProperties) 2021{ 2022 if (pProperties == NULL) { 2023 *pPropertyCount = 0; 2024 return VK_SUCCESS; 2025 } 2026 2027 /* None supported at this time */ 2028 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 2029} 2030 2031VkResult radv_EnumerateDeviceLayerProperties( 2032 VkPhysicalDevice physicalDevice, 2033 uint32_t* pPropertyCount, 2034 VkLayerProperties* pProperties) 2035{ 2036 if (pProperties == NULL) { 2037 *pPropertyCount = 0; 2038 return VK_SUCCESS; 2039 } 2040 2041 /* None supported at this time */ 2042 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 2043} 2044 2045void radv_GetDeviceQueue2( 2046 VkDevice _device, 2047 const VkDeviceQueueInfo2* pQueueInfo, 2048 VkQueue* pQueue) 2049{ 2050 RADV_FROM_HANDLE(radv_device, device, _device); 2051 struct radv_queue *queue; 2052 2053 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]; 2054 if (pQueueInfo->flags != queue->flags) { 2055 /* From the Vulkan 1.1.70 spec: 2056 * 2057 * "The queue returned by vkGetDeviceQueue2 must have the same 2058 * flags value from this structure as that used at device 2059 * creation time in a VkDeviceQueueCreateInfo instance. If no 2060 * matching flags were specified at device creation time then 2061 * pQueue will return VK_NULL_HANDLE." 2062 */ 2063 *pQueue = VK_NULL_HANDLE; 2064 return; 2065 } 2066 2067 *pQueue = radv_queue_to_handle(queue); 2068} 2069 2070void radv_GetDeviceQueue( 2071 VkDevice _device, 2072 uint32_t queueFamilyIndex, 2073 uint32_t queueIndex, 2074 VkQueue* pQueue) 2075{ 2076 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) { 2077 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, 2078 .queueFamilyIndex = queueFamilyIndex, 2079 .queueIndex = queueIndex 2080 }; 2081 2082 radv_GetDeviceQueue2(_device, &info, pQueue); 2083} 2084 2085static void 2086fill_geom_tess_rings(struct radv_queue *queue, 2087 uint32_t *map, 2088 bool add_sample_positions, 2089 uint32_t esgs_ring_size, 2090 struct radeon_winsys_bo *esgs_ring_bo, 2091 uint32_t gsvs_ring_size, 2092 struct radeon_winsys_bo *gsvs_ring_bo, 2093 uint32_t tess_factor_ring_size, 2094 uint32_t tess_offchip_ring_offset, 2095 uint32_t tess_offchip_ring_size, 2096 struct radeon_winsys_bo *tess_rings_bo) 2097{ 2098 uint32_t *desc = &map[4]; 2099 2100 if (esgs_ring_bo) { 2101 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo); 2102 2103 /* stride 0, num records - size, add tid, swizzle, elsize4, 2104 index stride 64 */ 2105 desc[0] = esgs_va; 2106 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | 2107 S_008F04_STRIDE(0) | 2108 S_008F04_SWIZZLE_ENABLE(true); 2109 desc[2] = esgs_ring_size; 2110 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2111 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2112 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2113 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2114 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2115 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2116 S_008F0C_ELEMENT_SIZE(1) | 2117 S_008F0C_INDEX_STRIDE(3) | 2118 S_008F0C_ADD_TID_ENABLE(true); 2119 2120 /* GS entry for ES->GS ring */ 2121 /* stride 0, num records - size, elsize0, 2122 index stride 0 */ 2123 desc[4] = esgs_va; 2124 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| 2125 S_008F04_STRIDE(0) | 2126 S_008F04_SWIZZLE_ENABLE(false); 2127 desc[6] = esgs_ring_size; 2128 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2129 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2130 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2131 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2132 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2133 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2134 S_008F0C_ELEMENT_SIZE(0) | 2135 S_008F0C_INDEX_STRIDE(0) | 2136 S_008F0C_ADD_TID_ENABLE(false); 2137 } 2138 2139 desc += 8; 2140 2141 if (gsvs_ring_bo) { 2142 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo); 2143 2144 /* VS entry for GS->VS ring */ 2145 /* stride 0, num records - size, elsize0, 2146 index stride 0 */ 2147 desc[0] = gsvs_va; 2148 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| 2149 S_008F04_STRIDE(0) | 2150 S_008F04_SWIZZLE_ENABLE(false); 2151 desc[2] = gsvs_ring_size; 2152 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2153 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2154 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2155 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2156 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2157 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2158 S_008F0C_ELEMENT_SIZE(0) | 2159 S_008F0C_INDEX_STRIDE(0) | 2160 S_008F0C_ADD_TID_ENABLE(false); 2161 2162 /* stride gsvs_itemsize, num records 64 2163 elsize 4, index stride 16 */ 2164 /* shader will patch stride and desc[2] */ 2165 desc[4] = gsvs_va; 2166 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| 2167 S_008F04_STRIDE(0) | 2168 S_008F04_SWIZZLE_ENABLE(true); 2169 desc[6] = 0; 2170 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2171 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2172 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2173 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2174 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2175 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2176 S_008F0C_ELEMENT_SIZE(1) | 2177 S_008F0C_INDEX_STRIDE(1) | 2178 S_008F0C_ADD_TID_ENABLE(true); 2179 } 2180 2181 desc += 8; 2182 2183 if (tess_rings_bo) { 2184 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); 2185 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset; 2186 2187 desc[0] = tess_va; 2188 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | 2189 S_008F04_STRIDE(0) | 2190 S_008F04_SWIZZLE_ENABLE(false); 2191 desc[2] = tess_factor_ring_size; 2192 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2193 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2194 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2195 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2196 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2197 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2198 S_008F0C_ELEMENT_SIZE(0) | 2199 S_008F0C_INDEX_STRIDE(0) | 2200 S_008F0C_ADD_TID_ENABLE(false); 2201 2202 desc[4] = tess_offchip_va; 2203 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | 2204 S_008F04_STRIDE(0) | 2205 S_008F04_SWIZZLE_ENABLE(false); 2206 desc[6] = tess_offchip_ring_size; 2207 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2208 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2209 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2210 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2211 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2212 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2213 S_008F0C_ELEMENT_SIZE(0) | 2214 S_008F0C_INDEX_STRIDE(0) | 2215 S_008F0C_ADD_TID_ENABLE(false); 2216 } 2217 2218 desc += 8; 2219 2220 if (add_sample_positions) { 2221 /* add sample positions after all rings */ 2222 memcpy(desc, queue->device->sample_locations_1x, 8); 2223 desc += 2; 2224 memcpy(desc, queue->device->sample_locations_2x, 16); 2225 desc += 4; 2226 memcpy(desc, queue->device->sample_locations_4x, 32); 2227 desc += 8; 2228 memcpy(desc, queue->device->sample_locations_8x, 64); 2229 } 2230} 2231 2232static unsigned 2233radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p) 2234{ 2235 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK && 2236 device->physical_device->rad_info.family != CHIP_CARRIZO && 2237 device->physical_device->rad_info.family != CHIP_STONEY; 2238 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; 2239 unsigned max_offchip_buffers; 2240 unsigned offchip_granularity; 2241 unsigned hs_offchip_param; 2242 2243 /* 2244 * Per RadeonSI: 2245 * This must be one less than the maximum number due to a hw limitation. 2246 * Various hardware bugs in SI, CIK, and GFX9 need this. 2247 * 2248 * Per AMDVLK: 2249 * Vega10 should limit max_offchip_buffers to 508 (4 * 127). 2250 * Gfx7 should limit max_offchip_buffers to 508 2251 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63) 2252 * 2253 * Follow AMDVLK here. 2254 */ 2255 if (device->physical_device->rad_info.family == CHIP_VEGA10 || 2256 device->physical_device->rad_info.chip_class == CIK || 2257 device->physical_device->rad_info.chip_class == SI) 2258 --max_offchip_buffers_per_se; 2259 2260 max_offchip_buffers = max_offchip_buffers_per_se * 2261 device->physical_device->rad_info.max_se; 2262 2263 /* Hawaii has a bug with offchip buffers > 256 that can be worked 2264 * around by setting 4K granularity. 2265 */ 2266 if (device->tess_offchip_block_dw_size == 4096) { 2267 assert(device->physical_device->rad_info.family == CHIP_HAWAII); 2268 offchip_granularity = V_03093C_X_4K_DWORDS; 2269 } else { 2270 assert(device->tess_offchip_block_dw_size == 8192); 2271 offchip_granularity = V_03093C_X_8K_DWORDS; 2272 } 2273 2274 switch (device->physical_device->rad_info.chip_class) { 2275 case SI: 2276 max_offchip_buffers = MIN2(max_offchip_buffers, 126); 2277 break; 2278 case CIK: 2279 case VI: 2280 case GFX9: 2281 default: 2282 max_offchip_buffers = MIN2(max_offchip_buffers, 508); 2283 break; 2284 } 2285 2286 *max_offchip_buffers_p = max_offchip_buffers; 2287 if (device->physical_device->rad_info.chip_class >= CIK) { 2288 if (device->physical_device->rad_info.chip_class >= VI) 2289 --max_offchip_buffers; 2290 hs_offchip_param = 2291 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | 2292 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); 2293 } else { 2294 hs_offchip_param = 2295 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); 2296 } 2297 return hs_offchip_param; 2298} 2299 2300static void 2301radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2302 struct radeon_winsys_bo *esgs_ring_bo, 2303 uint32_t esgs_ring_size, 2304 struct radeon_winsys_bo *gsvs_ring_bo, 2305 uint32_t gsvs_ring_size) 2306{ 2307 if (!esgs_ring_bo && !gsvs_ring_bo) 2308 return; 2309 2310 if (esgs_ring_bo) 2311 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo); 2312 2313 if (gsvs_ring_bo) 2314 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo); 2315 2316 if (queue->device->physical_device->rad_info.chip_class >= CIK) { 2317 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); 2318 radeon_emit(cs, esgs_ring_size >> 8); 2319 radeon_emit(cs, gsvs_ring_size >> 8); 2320 } else { 2321 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2); 2322 radeon_emit(cs, esgs_ring_size >> 8); 2323 radeon_emit(cs, gsvs_ring_size >> 8); 2324 } 2325} 2326 2327static void 2328radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2329 unsigned hs_offchip_param, unsigned tf_ring_size, 2330 struct radeon_winsys_bo *tess_rings_bo) 2331{ 2332 uint64_t tf_va; 2333 2334 if (!tess_rings_bo) 2335 return; 2336 2337 tf_va = radv_buffer_get_va(tess_rings_bo); 2338 2339 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo); 2340 2341 if (queue->device->physical_device->rad_info.chip_class >= CIK) { 2342 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, 2343 S_030938_SIZE(tf_ring_size / 4)); 2344 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, 2345 tf_va >> 8); 2346 if (queue->device->physical_device->rad_info.chip_class >= GFX9) { 2347 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, 2348 S_030944_BASE_HI(tf_va >> 40)); 2349 } 2350 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, 2351 hs_offchip_param); 2352 } else { 2353 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, 2354 S_008988_SIZE(tf_ring_size / 4)); 2355 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, 2356 tf_va >> 8); 2357 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, 2358 hs_offchip_param); 2359 } 2360} 2361 2362static void 2363radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2364 struct radeon_winsys_bo *compute_scratch_bo) 2365{ 2366 uint64_t scratch_va; 2367 2368 if (!compute_scratch_bo) 2369 return; 2370 2371 scratch_va = radv_buffer_get_va(compute_scratch_bo); 2372 2373 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo); 2374 2375 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2); 2376 radeon_emit(cs, scratch_va); 2377 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | 2378 S_008F04_SWIZZLE_ENABLE(1)); 2379} 2380 2381static void 2382radv_emit_global_shader_pointers(struct radv_queue *queue, 2383 struct radeon_cmdbuf *cs, 2384 struct radeon_winsys_bo *descriptor_bo) 2385{ 2386 uint64_t va; 2387 2388 if (!descriptor_bo) 2389 return; 2390 2391 va = radv_buffer_get_va(descriptor_bo); 2392 2393 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo); 2394 2395 if (queue->device->physical_device->rad_info.chip_class >= GFX9) { 2396 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, 2397 R_00B130_SPI_SHADER_USER_DATA_VS_0, 2398 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, 2399 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; 2400 2401 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 2402 radv_emit_shader_pointer(queue->device, cs, regs[i], 2403 va, true); 2404 } 2405 } else { 2406 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, 2407 R_00B130_SPI_SHADER_USER_DATA_VS_0, 2408 R_00B230_SPI_SHADER_USER_DATA_GS_0, 2409 R_00B330_SPI_SHADER_USER_DATA_ES_0, 2410 R_00B430_SPI_SHADER_USER_DATA_HS_0, 2411 R_00B530_SPI_SHADER_USER_DATA_LS_0}; 2412 2413 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 2414 radv_emit_shader_pointer(queue->device, cs, regs[i], 2415 va, true); 2416 } 2417 } 2418} 2419 2420static void 2421radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) 2422{ 2423 struct radv_device *device = queue->device; 2424 2425 if (device->gfx_init) { 2426 uint64_t va = radv_buffer_get_va(device->gfx_init); 2427 2428 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 2429 radeon_emit(cs, va); 2430 radeon_emit(cs, va >> 32); 2431 radeon_emit(cs, device->gfx_init_size_dw & 0xffff); 2432 2433 radv_cs_add_buffer(device->ws, cs, device->gfx_init); 2434 } else { 2435 struct radv_physical_device *physical_device = device->physical_device; 2436 si_emit_graphics(physical_device, cs); 2437 } 2438} 2439 2440static void 2441radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) 2442{ 2443 struct radv_physical_device *physical_device = queue->device->physical_device; 2444 si_emit_compute(physical_device, cs); 2445} 2446 2447static VkResult 2448radv_get_preamble_cs(struct radv_queue *queue, 2449 uint32_t scratch_size, 2450 uint32_t compute_scratch_size, 2451 uint32_t esgs_ring_size, 2452 uint32_t gsvs_ring_size, 2453 bool needs_tess_rings, 2454 bool needs_sample_positions, 2455 struct radeon_cmdbuf **initial_full_flush_preamble_cs, 2456 struct radeon_cmdbuf **initial_preamble_cs, 2457 struct radeon_cmdbuf **continue_preamble_cs) 2458{ 2459 struct radeon_winsys_bo *scratch_bo = NULL; 2460 struct radeon_winsys_bo *descriptor_bo = NULL; 2461 struct radeon_winsys_bo *compute_scratch_bo = NULL; 2462 struct radeon_winsys_bo *esgs_ring_bo = NULL; 2463 struct radeon_winsys_bo *gsvs_ring_bo = NULL; 2464 struct radeon_winsys_bo *tess_rings_bo = NULL; 2465 struct radeon_cmdbuf *dest_cs[3] = {0}; 2466 bool add_tess_rings = false, add_sample_positions = false; 2467 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; 2468 unsigned max_offchip_buffers; 2469 unsigned hs_offchip_param = 0; 2470 unsigned tess_offchip_ring_offset; 2471 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING; 2472 if (!queue->has_tess_rings) { 2473 if (needs_tess_rings) 2474 add_tess_rings = true; 2475 } 2476 if (!queue->has_sample_positions) { 2477 if (needs_sample_positions) 2478 add_sample_positions = true; 2479 } 2480 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se; 2481 hs_offchip_param = radv_get_hs_offchip_param(queue->device, 2482 &max_offchip_buffers); 2483 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024); 2484 tess_offchip_ring_size = max_offchip_buffers * 2485 queue->device->tess_offchip_block_dw_size * 4; 2486 2487 if (scratch_size <= queue->scratch_size && 2488 compute_scratch_size <= queue->compute_scratch_size && 2489 esgs_ring_size <= queue->esgs_ring_size && 2490 gsvs_ring_size <= queue->gsvs_ring_size && 2491 !add_tess_rings && !add_sample_positions && 2492 queue->initial_preamble_cs) { 2493 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; 2494 *initial_preamble_cs = queue->initial_preamble_cs; 2495 *continue_preamble_cs = queue->continue_preamble_cs; 2496 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) 2497 *continue_preamble_cs = NULL; 2498 return VK_SUCCESS; 2499 } 2500 2501 if (scratch_size > queue->scratch_size) { 2502 scratch_bo = queue->device->ws->buffer_create(queue->device->ws, 2503 scratch_size, 2504 4096, 2505 RADEON_DOMAIN_VRAM, 2506 ring_bo_flags, 2507 RADV_BO_PRIORITY_SCRATCH); 2508 if (!scratch_bo) 2509 goto fail; 2510 } else 2511 scratch_bo = queue->scratch_bo; 2512 2513 if (compute_scratch_size > queue->compute_scratch_size) { 2514 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, 2515 compute_scratch_size, 2516 4096, 2517 RADEON_DOMAIN_VRAM, 2518 ring_bo_flags, 2519 RADV_BO_PRIORITY_SCRATCH); 2520 if (!compute_scratch_bo) 2521 goto fail; 2522 2523 } else 2524 compute_scratch_bo = queue->compute_scratch_bo; 2525 2526 if (esgs_ring_size > queue->esgs_ring_size) { 2527 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, 2528 esgs_ring_size, 2529 4096, 2530 RADEON_DOMAIN_VRAM, 2531 ring_bo_flags, 2532 RADV_BO_PRIORITY_SCRATCH); 2533 if (!esgs_ring_bo) 2534 goto fail; 2535 } else { 2536 esgs_ring_bo = queue->esgs_ring_bo; 2537 esgs_ring_size = queue->esgs_ring_size; 2538 } 2539 2540 if (gsvs_ring_size > queue->gsvs_ring_size) { 2541 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, 2542 gsvs_ring_size, 2543 4096, 2544 RADEON_DOMAIN_VRAM, 2545 ring_bo_flags, 2546 RADV_BO_PRIORITY_SCRATCH); 2547 if (!gsvs_ring_bo) 2548 goto fail; 2549 } else { 2550 gsvs_ring_bo = queue->gsvs_ring_bo; 2551 gsvs_ring_size = queue->gsvs_ring_size; 2552 } 2553 2554 if (add_tess_rings) { 2555 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws, 2556 tess_offchip_ring_offset + tess_offchip_ring_size, 2557 256, 2558 RADEON_DOMAIN_VRAM, 2559 ring_bo_flags, 2560 RADV_BO_PRIORITY_SCRATCH); 2561 if (!tess_rings_bo) 2562 goto fail; 2563 } else { 2564 tess_rings_bo = queue->tess_rings_bo; 2565 } 2566 2567 if (scratch_bo != queue->scratch_bo || 2568 esgs_ring_bo != queue->esgs_ring_bo || 2569 gsvs_ring_bo != queue->gsvs_ring_bo || 2570 tess_rings_bo != queue->tess_rings_bo || 2571 add_sample_positions) { 2572 uint32_t size = 0; 2573 if (gsvs_ring_bo || esgs_ring_bo || 2574 tess_rings_bo || add_sample_positions) { 2575 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */ 2576 if (add_sample_positions) 2577 size += 128; /* 64+32+16+8 = 120 bytes */ 2578 } 2579 else if (scratch_bo) 2580 size = 8; /* 2 dword */ 2581 2582 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws, 2583 size, 2584 4096, 2585 RADEON_DOMAIN_VRAM, 2586 RADEON_FLAG_CPU_ACCESS | 2587 RADEON_FLAG_NO_INTERPROCESS_SHARING | 2588 RADEON_FLAG_READ_ONLY, 2589 RADV_BO_PRIORITY_DESCRIPTOR); 2590 if (!descriptor_bo) 2591 goto fail; 2592 } else 2593 descriptor_bo = queue->descriptor_bo; 2594 2595 if (descriptor_bo != queue->descriptor_bo) { 2596 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); 2597 2598 if (scratch_bo) { 2599 uint64_t scratch_va = radv_buffer_get_va(scratch_bo); 2600 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | 2601 S_008F04_SWIZZLE_ENABLE(1); 2602 map[0] = scratch_va; 2603 map[1] = rsrc1; 2604 } 2605 2606 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions) 2607 fill_geom_tess_rings(queue, map, add_sample_positions, 2608 esgs_ring_size, esgs_ring_bo, 2609 gsvs_ring_size, gsvs_ring_bo, 2610 tess_factor_ring_size, 2611 tess_offchip_ring_offset, 2612 tess_offchip_ring_size, 2613 tess_rings_bo); 2614 2615 queue->device->ws->buffer_unmap(descriptor_bo); 2616 } 2617 2618 for(int i = 0; i < 3; ++i) { 2619 struct radeon_cmdbuf *cs = NULL; 2620 cs = queue->device->ws->cs_create(queue->device->ws, 2621 queue->queue_family_index ? RING_COMPUTE : RING_GFX); 2622 if (!cs) 2623 goto fail; 2624 2625 dest_cs[i] = cs; 2626 2627 if (scratch_bo) 2628 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo); 2629 2630 /* Emit initial configuration. */ 2631 switch (queue->queue_family_index) { 2632 case RADV_QUEUE_GENERAL: 2633 radv_init_graphics_state(cs, queue); 2634 break; 2635 case RADV_QUEUE_COMPUTE: 2636 radv_init_compute_state(cs, queue); 2637 break; 2638 case RADV_QUEUE_TRANSFER: 2639 break; 2640 } 2641 2642 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) { 2643 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2644 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2645 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2646 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 2647 } 2648 2649 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, 2650 gsvs_ring_bo, gsvs_ring_size); 2651 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, 2652 tess_factor_ring_size, tess_rings_bo); 2653 radv_emit_global_shader_pointers(queue, cs, descriptor_bo); 2654 radv_emit_compute_scratch(queue, cs, compute_scratch_bo); 2655 2656 if (i == 0) { 2657 si_cs_emit_cache_flush(cs, 2658 queue->device->physical_device->rad_info.chip_class, 2659 NULL, 0, 2660 queue->queue_family_index == RING_COMPUTE && 2661 queue->device->physical_device->rad_info.chip_class >= CIK, 2662 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | 2663 RADV_CMD_FLAG_INV_ICACHE | 2664 RADV_CMD_FLAG_INV_SMEM_L1 | 2665 RADV_CMD_FLAG_INV_VMEM_L1 | 2666 RADV_CMD_FLAG_INV_GLOBAL_L2 | 2667 RADV_CMD_FLAG_START_PIPELINE_STATS, 0); 2668 } else if (i == 1) { 2669 si_cs_emit_cache_flush(cs, 2670 queue->device->physical_device->rad_info.chip_class, 2671 NULL, 0, 2672 queue->queue_family_index == RING_COMPUTE && 2673 queue->device->physical_device->rad_info.chip_class >= CIK, 2674 RADV_CMD_FLAG_INV_ICACHE | 2675 RADV_CMD_FLAG_INV_SMEM_L1 | 2676 RADV_CMD_FLAG_INV_VMEM_L1 | 2677 RADV_CMD_FLAG_INV_GLOBAL_L2 | 2678 RADV_CMD_FLAG_START_PIPELINE_STATS, 0); 2679 } 2680 2681 if (!queue->device->ws->cs_finalize(cs)) 2682 goto fail; 2683 } 2684 2685 if (queue->initial_full_flush_preamble_cs) 2686 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); 2687 2688 if (queue->initial_preamble_cs) 2689 queue->device->ws->cs_destroy(queue->initial_preamble_cs); 2690 2691 if (queue->continue_preamble_cs) 2692 queue->device->ws->cs_destroy(queue->continue_preamble_cs); 2693 2694 queue->initial_full_flush_preamble_cs = dest_cs[0]; 2695 queue->initial_preamble_cs = dest_cs[1]; 2696 queue->continue_preamble_cs = dest_cs[2]; 2697 2698 if (scratch_bo != queue->scratch_bo) { 2699 if (queue->scratch_bo) 2700 queue->device->ws->buffer_destroy(queue->scratch_bo); 2701 queue->scratch_bo = scratch_bo; 2702 queue->scratch_size = scratch_size; 2703 } 2704 2705 if (compute_scratch_bo != queue->compute_scratch_bo) { 2706 if (queue->compute_scratch_bo) 2707 queue->device->ws->buffer_destroy(queue->compute_scratch_bo); 2708 queue->compute_scratch_bo = compute_scratch_bo; 2709 queue->compute_scratch_size = compute_scratch_size; 2710 } 2711 2712 if (esgs_ring_bo != queue->esgs_ring_bo) { 2713 if (queue->esgs_ring_bo) 2714 queue->device->ws->buffer_destroy(queue->esgs_ring_bo); 2715 queue->esgs_ring_bo = esgs_ring_bo; 2716 queue->esgs_ring_size = esgs_ring_size; 2717 } 2718 2719 if (gsvs_ring_bo != queue->gsvs_ring_bo) { 2720 if (queue->gsvs_ring_bo) 2721 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); 2722 queue->gsvs_ring_bo = gsvs_ring_bo; 2723 queue->gsvs_ring_size = gsvs_ring_size; 2724 } 2725 2726 if (tess_rings_bo != queue->tess_rings_bo) { 2727 queue->tess_rings_bo = tess_rings_bo; 2728 queue->has_tess_rings = true; 2729 } 2730 2731 if (descriptor_bo != queue->descriptor_bo) { 2732 if (queue->descriptor_bo) 2733 queue->device->ws->buffer_destroy(queue->descriptor_bo); 2734 2735 queue->descriptor_bo = descriptor_bo; 2736 } 2737 2738 if (add_sample_positions) 2739 queue->has_sample_positions = true; 2740 2741 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; 2742 *initial_preamble_cs = queue->initial_preamble_cs; 2743 *continue_preamble_cs = queue->continue_preamble_cs; 2744 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) 2745 *continue_preamble_cs = NULL; 2746 return VK_SUCCESS; 2747fail: 2748 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i) 2749 if (dest_cs[i]) 2750 queue->device->ws->cs_destroy(dest_cs[i]); 2751 if (descriptor_bo && descriptor_bo != queue->descriptor_bo) 2752 queue->device->ws->buffer_destroy(descriptor_bo); 2753 if (scratch_bo && scratch_bo != queue->scratch_bo) 2754 queue->device->ws->buffer_destroy(scratch_bo); 2755 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo) 2756 queue->device->ws->buffer_destroy(compute_scratch_bo); 2757 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo) 2758 queue->device->ws->buffer_destroy(esgs_ring_bo); 2759 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo) 2760 queue->device->ws->buffer_destroy(gsvs_ring_bo); 2761 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo) 2762 queue->device->ws->buffer_destroy(tess_rings_bo); 2763 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2764} 2765 2766static VkResult radv_alloc_sem_counts(struct radv_instance *instance, 2767 struct radv_winsys_sem_counts *counts, 2768 int num_sems, 2769 const VkSemaphore *sems, 2770 VkFence _fence, 2771 bool reset_temp) 2772{ 2773 int syncobj_idx = 0, sem_idx = 0; 2774 2775 if (num_sems == 0 && _fence == VK_NULL_HANDLE) 2776 return VK_SUCCESS; 2777 2778 for (uint32_t i = 0; i < num_sems; i++) { 2779 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2780 2781 if (sem->temp_syncobj || sem->syncobj) 2782 counts->syncobj_count++; 2783 else 2784 counts->sem_count++; 2785 } 2786 2787 if (_fence != VK_NULL_HANDLE) { 2788 RADV_FROM_HANDLE(radv_fence, fence, _fence); 2789 if (fence->temp_syncobj || fence->syncobj) 2790 counts->syncobj_count++; 2791 } 2792 2793 if (counts->syncobj_count) { 2794 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count); 2795 if (!counts->syncobj) 2796 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 2797 } 2798 2799 if (counts->sem_count) { 2800 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count); 2801 if (!counts->sem) { 2802 free(counts->syncobj); 2803 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 2804 } 2805 } 2806 2807 for (uint32_t i = 0; i < num_sems; i++) { 2808 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2809 2810 if (sem->temp_syncobj) { 2811 counts->syncobj[syncobj_idx++] = sem->temp_syncobj; 2812 } 2813 else if (sem->syncobj) 2814 counts->syncobj[syncobj_idx++] = sem->syncobj; 2815 else { 2816 assert(sem->sem); 2817 counts->sem[sem_idx++] = sem->sem; 2818 } 2819 } 2820 2821 if (_fence != VK_NULL_HANDLE) { 2822 RADV_FROM_HANDLE(radv_fence, fence, _fence); 2823 if (fence->temp_syncobj) 2824 counts->syncobj[syncobj_idx++] = fence->temp_syncobj; 2825 else if (fence->syncobj) 2826 counts->syncobj[syncobj_idx++] = fence->syncobj; 2827 } 2828 2829 return VK_SUCCESS; 2830} 2831 2832static void 2833radv_free_sem_info(struct radv_winsys_sem_info *sem_info) 2834{ 2835 free(sem_info->wait.syncobj); 2836 free(sem_info->wait.sem); 2837 free(sem_info->signal.syncobj); 2838 free(sem_info->signal.sem); 2839} 2840 2841 2842static void radv_free_temp_syncobjs(struct radv_device *device, 2843 int num_sems, 2844 const VkSemaphore *sems) 2845{ 2846 for (uint32_t i = 0; i < num_sems; i++) { 2847 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2848 2849 if (sem->temp_syncobj) { 2850 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj); 2851 sem->temp_syncobj = 0; 2852 } 2853 } 2854} 2855 2856static VkResult 2857radv_alloc_sem_info(struct radv_instance *instance, 2858 struct radv_winsys_sem_info *sem_info, 2859 int num_wait_sems, 2860 const VkSemaphore *wait_sems, 2861 int num_signal_sems, 2862 const VkSemaphore *signal_sems, 2863 VkFence fence) 2864{ 2865 VkResult ret; 2866 memset(sem_info, 0, sizeof(*sem_info)); 2867 2868 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true); 2869 if (ret) 2870 return ret; 2871 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false); 2872 if (ret) 2873 radv_free_sem_info(sem_info); 2874 2875 /* caller can override these */ 2876 sem_info->cs_emit_wait = true; 2877 sem_info->cs_emit_signal = true; 2878 return ret; 2879} 2880 2881/* Signals fence as soon as all the work currently put on queue is done. */ 2882static VkResult radv_signal_fence(struct radv_queue *queue, 2883 struct radv_fence *fence) 2884{ 2885 int ret; 2886 VkResult result; 2887 struct radv_winsys_sem_info sem_info; 2888 2889 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL, 2890 radv_fence_to_handle(fence)); 2891 if (result != VK_SUCCESS) 2892 return result; 2893 2894 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, 2895 &queue->device->empty_cs[queue->queue_family_index], 2896 1, NULL, NULL, &sem_info, NULL, 2897 false, fence->fence); 2898 radv_free_sem_info(&sem_info); 2899 2900 if (ret) 2901 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST); 2902 2903 return VK_SUCCESS; 2904} 2905 2906VkResult radv_QueueSubmit( 2907 VkQueue _queue, 2908 uint32_t submitCount, 2909 const VkSubmitInfo* pSubmits, 2910 VkFence _fence) 2911{ 2912 RADV_FROM_HANDLE(radv_queue, queue, _queue); 2913 RADV_FROM_HANDLE(radv_fence, fence, _fence); 2914 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; 2915 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 2916 int ret; 2917 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; 2918 uint32_t scratch_size = 0; 2919 uint32_t compute_scratch_size = 0; 2920 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; 2921 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL; 2922 VkResult result; 2923 bool fence_emitted = false; 2924 bool tess_rings_needed = false; 2925 bool sample_positions_needed = false; 2926 2927 /* Do this first so failing to allocate scratch buffers can't result in 2928 * partially executed submissions. */ 2929 for (uint32_t i = 0; i < submitCount; i++) { 2930 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 2931 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, 2932 pSubmits[i].pCommandBuffers[j]); 2933 2934 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); 2935 compute_scratch_size = MAX2(compute_scratch_size, 2936 cmd_buffer->compute_scratch_size_needed); 2937 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); 2938 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); 2939 tess_rings_needed |= cmd_buffer->tess_rings_needed; 2940 sample_positions_needed |= cmd_buffer->sample_positions_needed; 2941 } 2942 } 2943 2944 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, 2945 esgs_ring_size, gsvs_ring_size, tess_rings_needed, 2946 sample_positions_needed, &initial_flush_preamble_cs, 2947 &initial_preamble_cs, &continue_preamble_cs); 2948 if (result != VK_SUCCESS) 2949 return result; 2950 2951 for (uint32_t i = 0; i < submitCount; i++) { 2952 struct radeon_cmdbuf **cs_array; 2953 bool do_flush = !i || pSubmits[i].pWaitDstStageMask; 2954 bool can_patch = true; 2955 uint32_t advance; 2956 struct radv_winsys_sem_info sem_info; 2957 2958 result = radv_alloc_sem_info(queue->device->instance, 2959 &sem_info, 2960 pSubmits[i].waitSemaphoreCount, 2961 pSubmits[i].pWaitSemaphores, 2962 pSubmits[i].signalSemaphoreCount, 2963 pSubmits[i].pSignalSemaphores, 2964 _fence); 2965 if (result != VK_SUCCESS) 2966 return result; 2967 2968 if (!pSubmits[i].commandBufferCount) { 2969 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { 2970 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, 2971 &queue->device->empty_cs[queue->queue_family_index], 2972 1, NULL, NULL, 2973 &sem_info, NULL, 2974 false, base_fence); 2975 if (ret) { 2976 radv_loge("failed to submit CS %d\n", i); 2977 abort(); 2978 } 2979 fence_emitted = true; 2980 } 2981 radv_free_sem_info(&sem_info); 2982 continue; 2983 } 2984 2985 cs_array = malloc(sizeof(struct radeon_cmdbuf *) * 2986 (pSubmits[i].commandBufferCount)); 2987 2988 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 2989 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, 2990 pSubmits[i].pCommandBuffers[j]); 2991 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 2992 2993 cs_array[j] = cmd_buffer->cs; 2994 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) 2995 can_patch = false; 2996 2997 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; 2998 } 2999 3000 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { 3001 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; 3002 const struct radv_winsys_bo_list *bo_list = NULL; 3003 3004 advance = MIN2(max_cs_submission, 3005 pSubmits[i].commandBufferCount - j); 3006 3007 if (queue->device->trace_bo) 3008 *queue->device->trace_id_ptr = 0; 3009 3010 sem_info.cs_emit_wait = j == 0; 3011 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount; 3012 3013 if (unlikely(queue->device->use_global_bo_list)) { 3014 pthread_mutex_lock(&queue->device->bo_list.mutex); 3015 bo_list = &queue->device->bo_list.list; 3016 } 3017 3018 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, 3019 advance, initial_preamble, continue_preamble_cs, 3020 &sem_info, bo_list, 3021 can_patch, base_fence); 3022 3023 if (unlikely(queue->device->use_global_bo_list)) 3024 pthread_mutex_unlock(&queue->device->bo_list.mutex); 3025 3026 if (ret) { 3027 radv_loge("failed to submit CS %d\n", i); 3028 abort(); 3029 } 3030 fence_emitted = true; 3031 if (queue->device->trace_bo) { 3032 radv_check_gpu_hangs(queue, cs_array[j]); 3033 } 3034 } 3035 3036 radv_free_temp_syncobjs(queue->device, 3037 pSubmits[i].waitSemaphoreCount, 3038 pSubmits[i].pWaitSemaphores); 3039 radv_free_sem_info(&sem_info); 3040 free(cs_array); 3041 } 3042 3043 if (fence) { 3044 if (!fence_emitted) { 3045 result = radv_signal_fence(queue, fence); 3046 if (result != VK_SUCCESS) 3047 return result; 3048 } 3049 fence->submitted = true; 3050 } 3051 3052 return VK_SUCCESS; 3053} 3054 3055VkResult radv_QueueWaitIdle( 3056 VkQueue _queue) 3057{ 3058 RADV_FROM_HANDLE(radv_queue, queue, _queue); 3059 3060 queue->device->ws->ctx_wait_idle(queue->hw_ctx, 3061 radv_queue_family_to_ring(queue->queue_family_index), 3062 queue->queue_idx); 3063 return VK_SUCCESS; 3064} 3065 3066VkResult radv_DeviceWaitIdle( 3067 VkDevice _device) 3068{ 3069 RADV_FROM_HANDLE(radv_device, device, _device); 3070 3071 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 3072 for (unsigned q = 0; q < device->queue_count[i]; q++) { 3073 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q])); 3074 } 3075 } 3076 return VK_SUCCESS; 3077} 3078 3079VkResult radv_EnumerateInstanceExtensionProperties( 3080 const char* pLayerName, 3081 uint32_t* pPropertyCount, 3082 VkExtensionProperties* pProperties) 3083{ 3084 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 3085 3086 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) { 3087 if (radv_supported_instance_extensions.extensions[i]) { 3088 vk_outarray_append(&out, prop) { 3089 *prop = radv_instance_extensions[i]; 3090 } 3091 } 3092 } 3093 3094 return vk_outarray_status(&out); 3095} 3096 3097VkResult radv_EnumerateDeviceExtensionProperties( 3098 VkPhysicalDevice physicalDevice, 3099 const char* pLayerName, 3100 uint32_t* pPropertyCount, 3101 VkExtensionProperties* pProperties) 3102{ 3103 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); 3104 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 3105 3106 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) { 3107 if (device->supported_extensions.extensions[i]) { 3108 vk_outarray_append(&out, prop) { 3109 *prop = radv_device_extensions[i]; 3110 } 3111 } 3112 } 3113 3114 return vk_outarray_status(&out); 3115} 3116 3117PFN_vkVoidFunction radv_GetInstanceProcAddr( 3118 VkInstance _instance, 3119 const char* pName) 3120{ 3121 RADV_FROM_HANDLE(radv_instance, instance, _instance); 3122 3123 return radv_lookup_entrypoint_checked(pName, 3124 instance ? instance->apiVersion : 0, 3125 instance ? &instance->enabled_extensions : NULL, 3126 NULL); 3127} 3128 3129/* The loader wants us to expose a second GetInstanceProcAddr function 3130 * to work around certain LD_PRELOAD issues seen in apps. 3131 */ 3132PUBLIC 3133VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 3134 VkInstance instance, 3135 const char* pName); 3136 3137PUBLIC 3138VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 3139 VkInstance instance, 3140 const char* pName) 3141{ 3142 return radv_GetInstanceProcAddr(instance, pName); 3143} 3144 3145PFN_vkVoidFunction radv_GetDeviceProcAddr( 3146 VkDevice _device, 3147 const char* pName) 3148{ 3149 RADV_FROM_HANDLE(radv_device, device, _device); 3150 3151 return radv_lookup_entrypoint_checked(pName, 3152 device->instance->apiVersion, 3153 &device->instance->enabled_extensions, 3154 &device->enabled_extensions); 3155} 3156 3157bool radv_get_memory_fd(struct radv_device *device, 3158 struct radv_device_memory *memory, 3159 int *pFD) 3160{ 3161 struct radeon_bo_metadata metadata; 3162 3163 if (memory->image) { 3164 radv_init_metadata(device, memory->image, &metadata); 3165 device->ws->buffer_set_metadata(memory->bo, &metadata); 3166 } 3167 3168 return device->ws->buffer_get_fd(device->ws, memory->bo, 3169 pFD); 3170} 3171 3172static VkResult radv_alloc_memory(struct radv_device *device, 3173 const VkMemoryAllocateInfo* pAllocateInfo, 3174 const VkAllocationCallbacks* pAllocator, 3175 VkDeviceMemory* pMem) 3176{ 3177 struct radv_device_memory *mem; 3178 VkResult result; 3179 enum radeon_bo_domain domain; 3180 uint32_t flags = 0; 3181 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex]; 3182 3183 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 3184 3185 if (pAllocateInfo->allocationSize == 0) { 3186 /* Apparently, this is allowed */ 3187 *pMem = VK_NULL_HANDLE; 3188 return VK_SUCCESS; 3189 } 3190 3191 const VkImportMemoryFdInfoKHR *import_info = 3192 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); 3193 const VkMemoryDedicatedAllocateInfo *dedicate_info = 3194 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); 3195 const VkExportMemoryAllocateInfo *export_info = 3196 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); 3197 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = 3198 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT); 3199 3200 const struct wsi_memory_allocate_info *wsi_info = 3201 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); 3202 3203 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, 3204 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3205 if (mem == NULL) 3206 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3207 3208 if (wsi_info && wsi_info->implicit_sync) 3209 flags |= RADEON_FLAG_IMPLICIT_SYNC; 3210 3211 if (dedicate_info) { 3212 mem->image = radv_image_from_handle(dedicate_info->image); 3213 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer); 3214 } else { 3215 mem->image = NULL; 3216 mem->buffer = NULL; 3217 } 3218 3219 float priority_float = 0.5; 3220 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext = 3221 vk_find_struct_const(pAllocateInfo->pNext, 3222 MEMORY_PRIORITY_ALLOCATE_INFO_EXT); 3223 if (priority_ext) 3224 priority_float = priority_ext->priority; 3225 3226 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, 3227 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); 3228 3229 mem->user_ptr = NULL; 3230 3231 if (import_info) { 3232 assert(import_info->handleType == 3233 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 3234 import_info->handleType == 3235 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 3236 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd, 3237 priority, NULL, NULL); 3238 if (!mem->bo) { 3239 result = VK_ERROR_INVALID_EXTERNAL_HANDLE; 3240 goto fail; 3241 } else { 3242 close(import_info->fd); 3243 } 3244 } else if (host_ptr_info) { 3245 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); 3246 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED); 3247 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, 3248 pAllocateInfo->allocationSize, 3249 priority); 3250 if (!mem->bo) { 3251 result = VK_ERROR_INVALID_EXTERNAL_HANDLE; 3252 goto fail; 3253 } else { 3254 mem->user_ptr = host_ptr_info->pHostPointer; 3255 } 3256 } else { 3257 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); 3258 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE || 3259 mem_type_index == RADV_MEM_TYPE_GTT_CACHED) 3260 domain = RADEON_DOMAIN_GTT; 3261 else 3262 domain = RADEON_DOMAIN_VRAM; 3263 3264 if (mem_type_index == RADV_MEM_TYPE_VRAM) 3265 flags |= RADEON_FLAG_NO_CPU_ACCESS; 3266 else 3267 flags |= RADEON_FLAG_CPU_ACCESS; 3268 3269 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE) 3270 flags |= RADEON_FLAG_GTT_WC; 3271 3272 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) { 3273 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; 3274 if (device->use_global_bo_list) { 3275 flags |= RADEON_FLAG_PREFER_LOCAL_BO; 3276 } 3277 } 3278 3279 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, 3280 domain, flags, priority); 3281 3282 if (!mem->bo) { 3283 result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 3284 goto fail; 3285 } 3286 mem->type_index = mem_type_index; 3287 } 3288 3289 result = radv_bo_list_add(device, mem->bo); 3290 if (result != VK_SUCCESS) 3291 goto fail_bo; 3292 3293 *pMem = radv_device_memory_to_handle(mem); 3294 3295 return VK_SUCCESS; 3296 3297fail_bo: 3298 device->ws->buffer_destroy(mem->bo); 3299fail: 3300 vk_free2(&device->alloc, pAllocator, mem); 3301 3302 return result; 3303} 3304 3305VkResult radv_AllocateMemory( 3306 VkDevice _device, 3307 const VkMemoryAllocateInfo* pAllocateInfo, 3308 const VkAllocationCallbacks* pAllocator, 3309 VkDeviceMemory* pMem) 3310{ 3311 RADV_FROM_HANDLE(radv_device, device, _device); 3312 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem); 3313} 3314 3315void radv_FreeMemory( 3316 VkDevice _device, 3317 VkDeviceMemory _mem, 3318 const VkAllocationCallbacks* pAllocator) 3319{ 3320 RADV_FROM_HANDLE(radv_device, device, _device); 3321 RADV_FROM_HANDLE(radv_device_memory, mem, _mem); 3322 3323 if (mem == NULL) 3324 return; 3325 3326 radv_bo_list_remove(device, mem->bo); 3327 device->ws->buffer_destroy(mem->bo); 3328 mem->bo = NULL; 3329 3330 vk_free2(&device->alloc, pAllocator, mem); 3331} 3332 3333VkResult radv_MapMemory( 3334 VkDevice _device, 3335 VkDeviceMemory _memory, 3336 VkDeviceSize offset, 3337 VkDeviceSize size, 3338 VkMemoryMapFlags flags, 3339 void** ppData) 3340{ 3341 RADV_FROM_HANDLE(radv_device, device, _device); 3342 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 3343 3344 if (mem == NULL) { 3345 *ppData = NULL; 3346 return VK_SUCCESS; 3347 } 3348 3349 if (mem->user_ptr) 3350 *ppData = mem->user_ptr; 3351 else 3352 *ppData = device->ws->buffer_map(mem->bo); 3353 3354 if (*ppData) { 3355 *ppData += offset; 3356 return VK_SUCCESS; 3357 } 3358 3359 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED); 3360} 3361 3362void radv_UnmapMemory( 3363 VkDevice _device, 3364 VkDeviceMemory _memory) 3365{ 3366 RADV_FROM_HANDLE(radv_device, device, _device); 3367 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 3368 3369 if (mem == NULL) 3370 return; 3371 3372 if (mem->user_ptr == NULL) 3373 device->ws->buffer_unmap(mem->bo); 3374} 3375 3376VkResult radv_FlushMappedMemoryRanges( 3377 VkDevice _device, 3378 uint32_t memoryRangeCount, 3379 const VkMappedMemoryRange* pMemoryRanges) 3380{ 3381 return VK_SUCCESS; 3382} 3383 3384VkResult radv_InvalidateMappedMemoryRanges( 3385 VkDevice _device, 3386 uint32_t memoryRangeCount, 3387 const VkMappedMemoryRange* pMemoryRanges) 3388{ 3389 return VK_SUCCESS; 3390} 3391 3392void radv_GetBufferMemoryRequirements( 3393 VkDevice _device, 3394 VkBuffer _buffer, 3395 VkMemoryRequirements* pMemoryRequirements) 3396{ 3397 RADV_FROM_HANDLE(radv_device, device, _device); 3398 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3399 3400 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; 3401 3402 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) 3403 pMemoryRequirements->alignment = 4096; 3404 else 3405 pMemoryRequirements->alignment = 16; 3406 3407 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment); 3408} 3409 3410void radv_GetBufferMemoryRequirements2( 3411 VkDevice device, 3412 const VkBufferMemoryRequirementsInfo2 *pInfo, 3413 VkMemoryRequirements2 *pMemoryRequirements) 3414{ 3415 radv_GetBufferMemoryRequirements(device, pInfo->buffer, 3416 &pMemoryRequirements->memoryRequirements); 3417 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 3418 vk_foreach_struct(ext, pMemoryRequirements->pNext) { 3419 switch (ext->sType) { 3420 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 3421 VkMemoryDedicatedRequirements *req = 3422 (VkMemoryDedicatedRequirements *) ext; 3423 req->requiresDedicatedAllocation = buffer->shareable; 3424 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 3425 break; 3426 } 3427 default: 3428 break; 3429 } 3430 } 3431} 3432 3433void radv_GetImageMemoryRequirements( 3434 VkDevice _device, 3435 VkImage _image, 3436 VkMemoryRequirements* pMemoryRequirements) 3437{ 3438 RADV_FROM_HANDLE(radv_device, device, _device); 3439 RADV_FROM_HANDLE(radv_image, image, _image); 3440 3441 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; 3442 3443 pMemoryRequirements->size = image->size; 3444 pMemoryRequirements->alignment = image->alignment; 3445} 3446 3447void radv_GetImageMemoryRequirements2( 3448 VkDevice device, 3449 const VkImageMemoryRequirementsInfo2 *pInfo, 3450 VkMemoryRequirements2 *pMemoryRequirements) 3451{ 3452 radv_GetImageMemoryRequirements(device, pInfo->image, 3453 &pMemoryRequirements->memoryRequirements); 3454 3455 RADV_FROM_HANDLE(radv_image, image, pInfo->image); 3456 3457 vk_foreach_struct(ext, pMemoryRequirements->pNext) { 3458 switch (ext->sType) { 3459 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 3460 VkMemoryDedicatedRequirements *req = 3461 (VkMemoryDedicatedRequirements *) ext; 3462 req->requiresDedicatedAllocation = image->shareable; 3463 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 3464 break; 3465 } 3466 default: 3467 break; 3468 } 3469 } 3470} 3471 3472void radv_GetImageSparseMemoryRequirements( 3473 VkDevice device, 3474 VkImage image, 3475 uint32_t* pSparseMemoryRequirementCount, 3476 VkSparseImageMemoryRequirements* pSparseMemoryRequirements) 3477{ 3478 stub(); 3479} 3480 3481void radv_GetImageSparseMemoryRequirements2( 3482 VkDevice device, 3483 const VkImageSparseMemoryRequirementsInfo2 *pInfo, 3484 uint32_t* pSparseMemoryRequirementCount, 3485 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) 3486{ 3487 stub(); 3488} 3489 3490void radv_GetDeviceMemoryCommitment( 3491 VkDevice device, 3492 VkDeviceMemory memory, 3493 VkDeviceSize* pCommittedMemoryInBytes) 3494{ 3495 *pCommittedMemoryInBytes = 0; 3496} 3497 3498VkResult radv_BindBufferMemory2(VkDevice device, 3499 uint32_t bindInfoCount, 3500 const VkBindBufferMemoryInfo *pBindInfos) 3501{ 3502 for (uint32_t i = 0; i < bindInfoCount; ++i) { 3503 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 3504 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer); 3505 3506 if (mem) { 3507 buffer->bo = mem->bo; 3508 buffer->offset = pBindInfos[i].memoryOffset; 3509 } else { 3510 buffer->bo = NULL; 3511 } 3512 } 3513 return VK_SUCCESS; 3514} 3515 3516VkResult radv_BindBufferMemory( 3517 VkDevice device, 3518 VkBuffer buffer, 3519 VkDeviceMemory memory, 3520 VkDeviceSize memoryOffset) 3521{ 3522 const VkBindBufferMemoryInfo info = { 3523 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 3524 .buffer = buffer, 3525 .memory = memory, 3526 .memoryOffset = memoryOffset 3527 }; 3528 3529 return radv_BindBufferMemory2(device, 1, &info); 3530} 3531 3532VkResult radv_BindImageMemory2(VkDevice device, 3533 uint32_t bindInfoCount, 3534 const VkBindImageMemoryInfo *pBindInfos) 3535{ 3536 for (uint32_t i = 0; i < bindInfoCount; ++i) { 3537 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 3538 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image); 3539 3540 if (mem) { 3541 image->bo = mem->bo; 3542 image->offset = pBindInfos[i].memoryOffset; 3543 } else { 3544 image->bo = NULL; 3545 image->offset = 0; 3546 } 3547 } 3548 return VK_SUCCESS; 3549} 3550 3551 3552VkResult radv_BindImageMemory( 3553 VkDevice device, 3554 VkImage image, 3555 VkDeviceMemory memory, 3556 VkDeviceSize memoryOffset) 3557{ 3558 const VkBindImageMemoryInfo info = { 3559 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 3560 .image = image, 3561 .memory = memory, 3562 .memoryOffset = memoryOffset 3563 }; 3564 3565 return radv_BindImageMemory2(device, 1, &info); 3566} 3567 3568 3569static void 3570radv_sparse_buffer_bind_memory(struct radv_device *device, 3571 const VkSparseBufferMemoryBindInfo *bind) 3572{ 3573 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer); 3574 3575 for (uint32_t i = 0; i < bind->bindCount; ++i) { 3576 struct radv_device_memory *mem = NULL; 3577 3578 if (bind->pBinds[i].memory != VK_NULL_HANDLE) 3579 mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 3580 3581 device->ws->buffer_virtual_bind(buffer->bo, 3582 bind->pBinds[i].resourceOffset, 3583 bind->pBinds[i].size, 3584 mem ? mem->bo : NULL, 3585 bind->pBinds[i].memoryOffset); 3586 } 3587} 3588 3589static void 3590radv_sparse_image_opaque_bind_memory(struct radv_device *device, 3591 const VkSparseImageOpaqueMemoryBindInfo *bind) 3592{ 3593 RADV_FROM_HANDLE(radv_image, image, bind->image); 3594 3595 for (uint32_t i = 0; i < bind->bindCount; ++i) { 3596 struct radv_device_memory *mem = NULL; 3597 3598 if (bind->pBinds[i].memory != VK_NULL_HANDLE) 3599 mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 3600 3601 device->ws->buffer_virtual_bind(image->bo, 3602 bind->pBinds[i].resourceOffset, 3603 bind->pBinds[i].size, 3604 mem ? mem->bo : NULL, 3605 bind->pBinds[i].memoryOffset); 3606 } 3607} 3608 3609 VkResult radv_QueueBindSparse( 3610 VkQueue _queue, 3611 uint32_t bindInfoCount, 3612 const VkBindSparseInfo* pBindInfo, 3613 VkFence _fence) 3614{ 3615 RADV_FROM_HANDLE(radv_fence, fence, _fence); 3616 RADV_FROM_HANDLE(radv_queue, queue, _queue); 3617 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; 3618 bool fence_emitted = false; 3619 VkResult result; 3620 int ret; 3621 3622 for (uint32_t i = 0; i < bindInfoCount; ++i) { 3623 struct radv_winsys_sem_info sem_info; 3624 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { 3625 radv_sparse_buffer_bind_memory(queue->device, 3626 pBindInfo[i].pBufferBinds + j); 3627 } 3628 3629 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) { 3630 radv_sparse_image_opaque_bind_memory(queue->device, 3631 pBindInfo[i].pImageOpaqueBinds + j); 3632 } 3633 3634 VkResult result; 3635 result = radv_alloc_sem_info(queue->device->instance, 3636 &sem_info, 3637 pBindInfo[i].waitSemaphoreCount, 3638 pBindInfo[i].pWaitSemaphores, 3639 pBindInfo[i].signalSemaphoreCount, 3640 pBindInfo[i].pSignalSemaphores, 3641 _fence); 3642 if (result != VK_SUCCESS) 3643 return result; 3644 3645 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { 3646 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, 3647 &queue->device->empty_cs[queue->queue_family_index], 3648 1, NULL, NULL, 3649 &sem_info, NULL, 3650 false, base_fence); 3651 if (ret) { 3652 radv_loge("failed to submit CS %d\n", i); 3653 abort(); 3654 } 3655 3656 fence_emitted = true; 3657 if (fence) 3658 fence->submitted = true; 3659 } 3660 3661 radv_free_sem_info(&sem_info); 3662 3663 } 3664 3665 if (fence) { 3666 if (!fence_emitted) { 3667 result = radv_signal_fence(queue, fence); 3668 if (result != VK_SUCCESS) 3669 return result; 3670 } 3671 fence->submitted = true; 3672 } 3673 3674 return VK_SUCCESS; 3675} 3676 3677VkResult radv_CreateFence( 3678 VkDevice _device, 3679 const VkFenceCreateInfo* pCreateInfo, 3680 const VkAllocationCallbacks* pAllocator, 3681 VkFence* pFence) 3682{ 3683 RADV_FROM_HANDLE(radv_device, device, _device); 3684 const VkExportFenceCreateInfo *export = 3685 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); 3686 VkExternalFenceHandleTypeFlags handleTypes = 3687 export ? export->handleTypes : 0; 3688 3689 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator, 3690 sizeof(*fence), 8, 3691 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3692 3693 if (!fence) 3694 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3695 3696 fence->fence_wsi = NULL; 3697 fence->submitted = false; 3698 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT); 3699 fence->temp_syncobj = 0; 3700 if (device->always_use_syncobj || handleTypes) { 3701 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj); 3702 if (ret) { 3703 vk_free2(&device->alloc, pAllocator, fence); 3704 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3705 } 3706 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { 3707 device->ws->signal_syncobj(device->ws, fence->syncobj); 3708 } 3709 fence->fence = NULL; 3710 } else { 3711 fence->fence = device->ws->create_fence(); 3712 if (!fence->fence) { 3713 vk_free2(&device->alloc, pAllocator, fence); 3714 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3715 } 3716 fence->syncobj = 0; 3717 } 3718 3719 *pFence = radv_fence_to_handle(fence); 3720 3721 return VK_SUCCESS; 3722} 3723 3724void radv_DestroyFence( 3725 VkDevice _device, 3726 VkFence _fence, 3727 const VkAllocationCallbacks* pAllocator) 3728{ 3729 RADV_FROM_HANDLE(radv_device, device, _device); 3730 RADV_FROM_HANDLE(radv_fence, fence, _fence); 3731 3732 if (!fence) 3733 return; 3734 3735 if (fence->temp_syncobj) 3736 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj); 3737 if (fence->syncobj) 3738 device->ws->destroy_syncobj(device->ws, fence->syncobj); 3739 if (fence->fence) 3740 device->ws->destroy_fence(fence->fence); 3741 if (fence->fence_wsi) 3742 fence->fence_wsi->destroy(fence->fence_wsi); 3743 vk_free2(&device->alloc, pAllocator, fence); 3744} 3745 3746 3747uint64_t radv_get_current_time(void) 3748{ 3749 struct timespec tv; 3750 clock_gettime(CLOCK_MONOTONIC, &tv); 3751 return tv.tv_nsec + tv.tv_sec*1000000000ull; 3752} 3753 3754static uint64_t radv_get_absolute_timeout(uint64_t timeout) 3755{ 3756 uint64_t current_time = radv_get_current_time(); 3757 3758 timeout = MIN2(UINT64_MAX - current_time, timeout); 3759 3760 return current_time + timeout; 3761} 3762 3763 3764static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences) 3765{ 3766 for (uint32_t i = 0; i < fenceCount; ++i) { 3767 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3768 if (fence->fence == NULL || fence->syncobj || 3769 fence->temp_syncobj || 3770 (!fence->signalled && !fence->submitted)) 3771 return false; 3772 } 3773 return true; 3774} 3775 3776static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences) 3777{ 3778 for (uint32_t i = 0; i < fenceCount; ++i) { 3779 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3780 if (fence->syncobj == 0 && fence->temp_syncobj == 0) 3781 return false; 3782 } 3783 return true; 3784} 3785 3786VkResult radv_WaitForFences( 3787 VkDevice _device, 3788 uint32_t fenceCount, 3789 const VkFence* pFences, 3790 VkBool32 waitAll, 3791 uint64_t timeout) 3792{ 3793 RADV_FROM_HANDLE(radv_device, device, _device); 3794 timeout = radv_get_absolute_timeout(timeout); 3795 3796 if (device->always_use_syncobj && 3797 radv_all_fences_syncobj(fenceCount, pFences)) 3798 { 3799 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount); 3800 if (!handles) 3801 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3802 3803 for (uint32_t i = 0; i < fenceCount; ++i) { 3804 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3805 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj; 3806 } 3807 3808 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout); 3809 3810 free(handles); 3811 return success ? VK_SUCCESS : VK_TIMEOUT; 3812 } 3813 3814 if (!waitAll && fenceCount > 1) { 3815 /* Not doing this by default for waitAll, due to needing to allocate twice. */ 3816 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) { 3817 uint32_t wait_count = 0; 3818 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount); 3819 if (!fences) 3820 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3821 3822 for (uint32_t i = 0; i < fenceCount; ++i) { 3823 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3824 3825 if (fence->signalled) { 3826 free(fences); 3827 return VK_SUCCESS; 3828 } 3829 3830 fences[wait_count++] = fence->fence; 3831 } 3832 3833 bool success = device->ws->fences_wait(device->ws, fences, wait_count, 3834 waitAll, timeout - radv_get_current_time()); 3835 3836 free(fences); 3837 return success ? VK_SUCCESS : VK_TIMEOUT; 3838 } 3839 3840 while(radv_get_current_time() <= timeout) { 3841 for (uint32_t i = 0; i < fenceCount; ++i) { 3842 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS) 3843 return VK_SUCCESS; 3844 } 3845 } 3846 return VK_TIMEOUT; 3847 } 3848 3849 for (uint32_t i = 0; i < fenceCount; ++i) { 3850 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3851 bool expired = false; 3852 3853 if (fence->temp_syncobj) { 3854 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout)) 3855 return VK_TIMEOUT; 3856 continue; 3857 } 3858 3859 if (fence->syncobj) { 3860 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout)) 3861 return VK_TIMEOUT; 3862 continue; 3863 } 3864 3865 if (fence->signalled) 3866 continue; 3867 3868 if (fence->fence) { 3869 if (!fence->submitted) { 3870 while(radv_get_current_time() <= timeout && 3871 !fence->submitted) 3872 /* Do nothing */; 3873 3874 if (!fence->submitted) 3875 return VK_TIMEOUT; 3876 3877 /* Recheck as it may have been set by 3878 * submitting operations. */ 3879 3880 if (fence->signalled) 3881 continue; 3882 } 3883 3884 expired = device->ws->fence_wait(device->ws, 3885 fence->fence, 3886 true, timeout); 3887 if (!expired) 3888 return VK_TIMEOUT; 3889 } 3890 3891 if (fence->fence_wsi) { 3892 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout); 3893 if (result != VK_SUCCESS) 3894 return result; 3895 } 3896 3897 fence->signalled = true; 3898 } 3899 3900 return VK_SUCCESS; 3901} 3902 3903VkResult radv_ResetFences(VkDevice _device, 3904 uint32_t fenceCount, 3905 const VkFence *pFences) 3906{ 3907 RADV_FROM_HANDLE(radv_device, device, _device); 3908 3909 for (unsigned i = 0; i < fenceCount; ++i) { 3910 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3911 fence->submitted = fence->signalled = false; 3912 3913 /* Per spec, we first restore the permanent payload, and then reset, so 3914 * having a temp syncobj should not skip resetting the permanent syncobj. */ 3915 if (fence->temp_syncobj) { 3916 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj); 3917 fence->temp_syncobj = 0; 3918 } 3919 3920 if (fence->syncobj) { 3921 device->ws->reset_syncobj(device->ws, fence->syncobj); 3922 } 3923 } 3924 3925 return VK_SUCCESS; 3926} 3927 3928VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence) 3929{ 3930 RADV_FROM_HANDLE(radv_device, device, _device); 3931 RADV_FROM_HANDLE(radv_fence, fence, _fence); 3932 3933 if (fence->temp_syncobj) { 3934 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0); 3935 return success ? VK_SUCCESS : VK_NOT_READY; 3936 } 3937 3938 if (fence->syncobj) { 3939 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0); 3940 return success ? VK_SUCCESS : VK_NOT_READY; 3941 } 3942 3943 if (fence->signalled) 3944 return VK_SUCCESS; 3945 if (!fence->submitted) 3946 return VK_NOT_READY; 3947 if (fence->fence) { 3948 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0)) 3949 return VK_NOT_READY; 3950 } 3951 if (fence->fence_wsi) { 3952 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0); 3953 3954 if (result != VK_SUCCESS) { 3955 if (result == VK_TIMEOUT) 3956 return VK_NOT_READY; 3957 return result; 3958 } 3959 } 3960 return VK_SUCCESS; 3961} 3962 3963 3964// Queue semaphore functions 3965 3966VkResult radv_CreateSemaphore( 3967 VkDevice _device, 3968 const VkSemaphoreCreateInfo* pCreateInfo, 3969 const VkAllocationCallbacks* pAllocator, 3970 VkSemaphore* pSemaphore) 3971{ 3972 RADV_FROM_HANDLE(radv_device, device, _device); 3973 const VkExportSemaphoreCreateInfo *export = 3974 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); 3975 VkExternalSemaphoreHandleTypeFlags handleTypes = 3976 export ? export->handleTypes : 0; 3977 3978 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, 3979 sizeof(*sem), 8, 3980 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3981 if (!sem) 3982 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3983 3984 sem->temp_syncobj = 0; 3985 /* create a syncobject if we are going to export this semaphore */ 3986 if (device->always_use_syncobj || handleTypes) { 3987 assert (device->physical_device->rad_info.has_syncobj); 3988 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj); 3989 if (ret) { 3990 vk_free2(&device->alloc, pAllocator, sem); 3991 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3992 } 3993 sem->sem = NULL; 3994 } else { 3995 sem->sem = device->ws->create_sem(device->ws); 3996 if (!sem->sem) { 3997 vk_free2(&device->alloc, pAllocator, sem); 3998 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3999 } 4000 sem->syncobj = 0; 4001 } 4002 4003 *pSemaphore = radv_semaphore_to_handle(sem); 4004 return VK_SUCCESS; 4005} 4006 4007void radv_DestroySemaphore( 4008 VkDevice _device, 4009 VkSemaphore _semaphore, 4010 const VkAllocationCallbacks* pAllocator) 4011{ 4012 RADV_FROM_HANDLE(radv_device, device, _device); 4013 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore); 4014 if (!_semaphore) 4015 return; 4016 4017 if (sem->syncobj) 4018 device->ws->destroy_syncobj(device->ws, sem->syncobj); 4019 else 4020 device->ws->destroy_sem(sem->sem); 4021 vk_free2(&device->alloc, pAllocator, sem); 4022} 4023 4024VkResult radv_CreateEvent( 4025 VkDevice _device, 4026 const VkEventCreateInfo* pCreateInfo, 4027 const VkAllocationCallbacks* pAllocator, 4028 VkEvent* pEvent) 4029{ 4030 RADV_FROM_HANDLE(radv_device, device, _device); 4031 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator, 4032 sizeof(*event), 8, 4033 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4034 4035 if (!event) 4036 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4037 4038 event->bo = device->ws->buffer_create(device->ws, 8, 8, 4039 RADEON_DOMAIN_GTT, 4040 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, 4041 RADV_BO_PRIORITY_FENCE); 4042 if (!event->bo) { 4043 vk_free2(&device->alloc, pAllocator, event); 4044 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4045 } 4046 4047 event->map = (uint64_t*)device->ws->buffer_map(event->bo); 4048 4049 *pEvent = radv_event_to_handle(event); 4050 4051 return VK_SUCCESS; 4052} 4053 4054void radv_DestroyEvent( 4055 VkDevice _device, 4056 VkEvent _event, 4057 const VkAllocationCallbacks* pAllocator) 4058{ 4059 RADV_FROM_HANDLE(radv_device, device, _device); 4060 RADV_FROM_HANDLE(radv_event, event, _event); 4061 4062 if (!event) 4063 return; 4064 device->ws->buffer_destroy(event->bo); 4065 vk_free2(&device->alloc, pAllocator, event); 4066} 4067 4068VkResult radv_GetEventStatus( 4069 VkDevice _device, 4070 VkEvent _event) 4071{ 4072 RADV_FROM_HANDLE(radv_event, event, _event); 4073 4074 if (*event->map == 1) 4075 return VK_EVENT_SET; 4076 return VK_EVENT_RESET; 4077} 4078 4079VkResult radv_SetEvent( 4080 VkDevice _device, 4081 VkEvent _event) 4082{ 4083 RADV_FROM_HANDLE(radv_event, event, _event); 4084 *event->map = 1; 4085 4086 return VK_SUCCESS; 4087} 4088 4089VkResult radv_ResetEvent( 4090 VkDevice _device, 4091 VkEvent _event) 4092{ 4093 RADV_FROM_HANDLE(radv_event, event, _event); 4094 *event->map = 0; 4095 4096 return VK_SUCCESS; 4097} 4098 4099VkResult radv_CreateBuffer( 4100 VkDevice _device, 4101 const VkBufferCreateInfo* pCreateInfo, 4102 const VkAllocationCallbacks* pAllocator, 4103 VkBuffer* pBuffer) 4104{ 4105 RADV_FROM_HANDLE(radv_device, device, _device); 4106 struct radv_buffer *buffer; 4107 4108 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 4109 4110 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, 4111 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4112 if (buffer == NULL) 4113 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4114 4115 buffer->size = pCreateInfo->size; 4116 buffer->usage = pCreateInfo->usage; 4117 buffer->bo = NULL; 4118 buffer->offset = 0; 4119 buffer->flags = pCreateInfo->flags; 4120 4121 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext, 4122 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL; 4123 4124 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { 4125 buffer->bo = device->ws->buffer_create(device->ws, 4126 align64(buffer->size, 4096), 4127 4096, 0, RADEON_FLAG_VIRTUAL, 4128 RADV_BO_PRIORITY_VIRTUAL); 4129 if (!buffer->bo) { 4130 vk_free2(&device->alloc, pAllocator, buffer); 4131 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4132 } 4133 } 4134 4135 *pBuffer = radv_buffer_to_handle(buffer); 4136 4137 return VK_SUCCESS; 4138} 4139 4140void radv_DestroyBuffer( 4141 VkDevice _device, 4142 VkBuffer _buffer, 4143 const VkAllocationCallbacks* pAllocator) 4144{ 4145 RADV_FROM_HANDLE(radv_device, device, _device); 4146 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4147 4148 if (!buffer) 4149 return; 4150 4151 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) 4152 device->ws->buffer_destroy(buffer->bo); 4153 4154 vk_free2(&device->alloc, pAllocator, buffer); 4155} 4156 4157VkDeviceAddress radv_GetBufferDeviceAddressEXT( 4158 VkDevice device, 4159 const VkBufferDeviceAddressInfoEXT* pInfo) 4160{ 4161 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 4162 return radv_buffer_get_va(buffer->bo) + buffer->offset; 4163} 4164 4165 4166static inline unsigned 4167si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil) 4168{ 4169 if (stencil) 4170 return plane->surface.u.legacy.stencil_tiling_index[level]; 4171 else 4172 return plane->surface.u.legacy.tiling_index[level]; 4173} 4174 4175static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview) 4176{ 4177 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count); 4178} 4179 4180static uint32_t 4181radv_init_dcc_control_reg(struct radv_device *device, 4182 struct radv_image_view *iview) 4183{ 4184 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 4185 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 4186 unsigned max_compressed_block_size; 4187 unsigned independent_64b_blocks; 4188 4189 if (!radv_image_has_dcc(iview->image)) 4190 return 0; 4191 4192 if (iview->image->info.samples > 1) { 4193 if (iview->image->planes[0].surface.bpe == 1) 4194 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 4195 else if (iview->image->planes[0].surface.bpe == 2) 4196 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 4197 } 4198 4199 if (!device->physical_device->rad_info.has_dedicated_vram) { 4200 /* amdvlk: [min-compressed-block-size] should be set to 32 for 4201 * dGPU and 64 for APU because all of our APUs to date use 4202 * DIMMs which have a request granularity size of 64B while all 4203 * other chips have a 32B request size. 4204 */ 4205 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 4206 } 4207 4208 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | 4209 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 4210 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { 4211 /* If this DCC image is potentially going to be used in texture 4212 * fetches, we need some special settings. 4213 */ 4214 independent_64b_blocks = 1; 4215 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 4216 } else { 4217 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >= 4218 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as 4219 * big as possible for better compression state. 4220 */ 4221 independent_64b_blocks = 0; 4222 max_compressed_block_size = max_uncompressed_block_size; 4223 } 4224 4225 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 4226 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) | 4227 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 4228 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks); 4229} 4230 4231static void 4232radv_initialise_color_surface(struct radv_device *device, 4233 struct radv_color_buffer_info *cb, 4234 struct radv_image_view *iview) 4235{ 4236 const struct vk_format_description *desc; 4237 unsigned ntype, format, swap, endian; 4238 unsigned blend_clamp = 0, blend_bypass = 0; 4239 uint64_t va; 4240 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id]; 4241 const struct radeon_surf *surf = &plane->surface; 4242 4243 desc = vk_format_description(iview->vk_format); 4244 4245 memset(cb, 0, sizeof(*cb)); 4246 4247 /* Intensity is implemented as Red, so treat it that way. */ 4248 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1); 4249 4250 va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset; 4251 4252 cb->cb_color_base = va >> 8; 4253 4254 if (device->physical_device->rad_info.chip_class >= GFX9) { 4255 struct gfx9_surf_meta_flags meta; 4256 if (iview->image->dcc_offset) 4257 meta = surf->u.gfx9.dcc; 4258 else 4259 meta = surf->u.gfx9.cmask; 4260 4261 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) | 4262 S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) | 4263 S_028C74_RB_ALIGNED(meta.rb_aligned) | 4264 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 4265 4266 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8; 4267 cb->cb_color_base |= surf->tile_swizzle; 4268 4269 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch); 4270 } else { 4271 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip]; 4272 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 4273 4274 cb->cb_color_base += level_info->offset >> 8; 4275 if (level_info->mode == RADEON_SURF_MODE_2D) 4276 cb->cb_color_base |= surf->tile_swizzle; 4277 4278 pitch_tile_max = level_info->nblk_x / 8 - 1; 4279 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1; 4280 tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false); 4281 4282 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 4283 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 4284 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max; 4285 4286 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 4287 4288 if (radv_image_has_fmask(iview->image)) { 4289 if (device->physical_device->rad_info.chip_class >= CIK) 4290 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); 4291 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); 4292 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); 4293 } else { 4294 /* This must be set for fast clear to work without FMASK. */ 4295 if (device->physical_device->rad_info.chip_class >= CIK) 4296 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 4297 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 4298 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 4299 } 4300 } 4301 4302 /* CMASK variables */ 4303 va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4304 va += iview->image->cmask.offset; 4305 cb->cb_color_cmask = va >> 8; 4306 4307 va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4308 va += iview->image->dcc_offset; 4309 cb->cb_dcc_base = va >> 8; 4310 cb->cb_dcc_base |= surf->tile_swizzle; 4311 4312 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 4313 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | 4314 S_028C6C_SLICE_MAX(max_slice); 4315 4316 if (iview->image->info.samples > 1) { 4317 unsigned log_samples = util_logbase2(iview->image->info.samples); 4318 4319 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 4320 S_028C74_NUM_FRAGMENTS(log_samples); 4321 } 4322 4323 if (radv_image_has_fmask(iview->image)) { 4324 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; 4325 cb->cb_color_fmask = va >> 8; 4326 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle; 4327 } else { 4328 cb->cb_color_fmask = cb->cb_color_base; 4329 } 4330 4331 ntype = radv_translate_color_numformat(iview->vk_format, 4332 desc, 4333 vk_format_get_first_non_void_channel(iview->vk_format)); 4334 format = radv_translate_colorformat(iview->vk_format); 4335 if (format == V_028C70_COLOR_INVALID || ntype == ~0u) 4336 radv_finishme("Illegal color\n"); 4337 swap = radv_translate_colorswap(iview->vk_format, FALSE); 4338 endian = radv_colorformat_endian_swap(format); 4339 4340 /* blend clamp should be set for all NORM/SRGB types */ 4341 if (ntype == V_028C70_NUMBER_UNORM || 4342 ntype == V_028C70_NUMBER_SNORM || 4343 ntype == V_028C70_NUMBER_SRGB) 4344 blend_clamp = 1; 4345 4346 /* set blend bypass according to docs if SINT/UINT or 4347 8/24 COLOR variants */ 4348 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 4349 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 4350 format == V_028C70_COLOR_X24_8_32_FLOAT) { 4351 blend_clamp = 0; 4352 blend_bypass = 1; 4353 } 4354#if 0 4355 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 4356 (format == V_028C70_COLOR_8 || 4357 format == V_028C70_COLOR_8_8 || 4358 format == V_028C70_COLOR_8_8_8_8)) 4359 ->color_is_int8 = true; 4360#endif 4361 cb->cb_color_info = S_028C70_FORMAT(format) | 4362 S_028C70_COMP_SWAP(swap) | 4363 S_028C70_BLEND_CLAMP(blend_clamp) | 4364 S_028C70_BLEND_BYPASS(blend_bypass) | 4365 S_028C70_SIMPLE_FLOAT(1) | 4366 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 4367 ntype != V_028C70_NUMBER_SNORM && 4368 ntype != V_028C70_NUMBER_SRGB && 4369 format != V_028C70_COLOR_8_24 && 4370 format != V_028C70_COLOR_24_8) | 4371 S_028C70_NUMBER_TYPE(ntype) | 4372 S_028C70_ENDIAN(endian); 4373 if (radv_image_has_fmask(iview->image)) { 4374 cb->cb_color_info |= S_028C70_COMPRESSION(1); 4375 if (device->physical_device->rad_info.chip_class == SI) { 4376 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); 4377 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 4378 } 4379 } 4380 4381 if (radv_image_has_cmask(iview->image) && 4382 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) 4383 cb->cb_color_info |= S_028C70_FAST_CLEAR(1); 4384 4385 if (radv_dcc_enabled(iview->image, iview->base_mip)) 4386 cb->cb_color_info |= S_028C70_DCC_ENABLE(1); 4387 4388 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview); 4389 4390 /* This must be set for fast clear to work without FMASK. */ 4391 if (!radv_image_has_fmask(iview->image) && 4392 device->physical_device->rad_info.chip_class == SI) { 4393 unsigned bankh = util_logbase2(surf->u.legacy.bankh); 4394 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 4395 } 4396 4397 if (device->physical_device->rad_info.chip_class >= GFX9) { 4398 const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format); 4399 4400 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ? 4401 (iview->extent.depth - 1) : (iview->image->info.array_size - 1); 4402 unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1); 4403 unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1); 4404 4405 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip); 4406 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 4407 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); 4408 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | 4409 S_028C68_MIP0_HEIGHT(height - 1) | 4410 S_028C68_MAX_MIP(iview->image->info.levels - 1); 4411 } 4412} 4413 4414static unsigned 4415radv_calc_decompress_on_z_planes(struct radv_device *device, 4416 struct radv_image_view *iview) 4417{ 4418 unsigned max_zplanes = 0; 4419 4420 assert(radv_image_is_tc_compat_htile(iview->image)); 4421 4422 if (device->physical_device->rad_info.chip_class >= GFX9) { 4423 /* Default value for 32-bit depth surfaces. */ 4424 max_zplanes = 4; 4425 4426 if (iview->vk_format == VK_FORMAT_D16_UNORM && 4427 iview->image->info.samples > 1) 4428 max_zplanes = 2; 4429 4430 max_zplanes = max_zplanes + 1; 4431 } else { 4432 if (iview->vk_format == VK_FORMAT_D16_UNORM) { 4433 /* Do not enable Z plane compression for 16-bit depth 4434 * surfaces because isn't supported on GFX8. Only 4435 * 32-bit depth surfaces are supported by the hardware. 4436 * This allows to maintain shader compatibility and to 4437 * reduce the number of depth decompressions. 4438 */ 4439 max_zplanes = 1; 4440 } else { 4441 if (iview->image->info.samples <= 1) 4442 max_zplanes = 5; 4443 else if (iview->image->info.samples <= 4) 4444 max_zplanes = 3; 4445 else 4446 max_zplanes = 2; 4447 } 4448 } 4449 4450 return max_zplanes; 4451} 4452 4453static void 4454radv_initialise_ds_surface(struct radv_device *device, 4455 struct radv_ds_buffer_info *ds, 4456 struct radv_image_view *iview) 4457{ 4458 unsigned level = iview->base_mip; 4459 unsigned format, stencil_format; 4460 uint64_t va, s_offs, z_offs; 4461 bool stencil_only = false; 4462 const struct radv_image_plane *plane = &iview->image->planes[0]; 4463 const struct radeon_surf *surf = &plane->surface; 4464 4465 assert(vk_format_get_plane_count(iview->image->vk_format) == 1); 4466 4467 memset(ds, 0, sizeof(*ds)); 4468 switch (iview->image->vk_format) { 4469 case VK_FORMAT_D24_UNORM_S8_UINT: 4470 case VK_FORMAT_X8_D24_UNORM_PACK32: 4471 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 4472 ds->offset_scale = 2.0f; 4473 break; 4474 case VK_FORMAT_D16_UNORM: 4475 case VK_FORMAT_D16_UNORM_S8_UINT: 4476 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 4477 ds->offset_scale = 4.0f; 4478 break; 4479 case VK_FORMAT_D32_SFLOAT: 4480 case VK_FORMAT_D32_SFLOAT_S8_UINT: 4481 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 4482 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 4483 ds->offset_scale = 1.0f; 4484 break; 4485 case VK_FORMAT_S8_UINT: 4486 stencil_only = true; 4487 break; 4488 default: 4489 break; 4490 } 4491 4492 format = radv_translate_dbformat(iview->image->vk_format); 4493 stencil_format = surf->has_stencil ? 4494 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 4495 4496 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 4497 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | 4498 S_028008_SLICE_MAX(max_slice); 4499 4500 ds->db_htile_data_base = 0; 4501 ds->db_htile_surface = 0; 4502 4503 va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4504 s_offs = z_offs = va; 4505 4506 if (device->physical_device->rad_info.chip_class >= GFX9) { 4507 assert(surf->u.gfx9.surf_offset == 0); 4508 s_offs += surf->u.gfx9.stencil_offset; 4509 4510 ds->db_z_info = S_028038_FORMAT(format) | 4511 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) | 4512 S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) | 4513 S_028038_MAXMIP(iview->image->info.levels - 1) | 4514 S_028038_ZRANGE_PRECISION(1); 4515 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | 4516 S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode); 4517 4518 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch); 4519 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch); 4520 ds->db_depth_view |= S_028008_MIPID(level); 4521 4522 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) | 4523 S_02801C_Y_MAX(iview->image->info.height - 1); 4524 4525 if (radv_htile_enabled(iview->image, level)) { 4526 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); 4527 4528 if (radv_image_is_tc_compat_htile(iview->image)) { 4529 unsigned max_zplanes = 4530 radv_calc_decompress_on_z_planes(device, iview); 4531 4532 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) | 4533 S_028038_ITERATE_FLUSH(1); 4534 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 4535 } 4536 4537 if (!surf->has_stencil) 4538 /* Use all of the htile_buffer for depth if there's no stencil. */ 4539 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); 4540 va = radv_buffer_get_va(iview->bo) + iview->image->offset + 4541 iview->image->htile_offset; 4542 ds->db_htile_data_base = va >> 8; 4543 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | 4544 S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned) | 4545 S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned); 4546 } 4547 } else { 4548 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level]; 4549 4550 if (stencil_only) 4551 level_info = &surf->u.legacy.stencil_level[level]; 4552 4553 z_offs += surf->u.legacy.level[level].offset; 4554 s_offs += surf->u.legacy.stencil_level[level].offset; 4555 4556 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image)); 4557 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); 4558 ds->db_stencil_info = S_028044_FORMAT(stencil_format); 4559 4560 if (iview->image->info.samples > 1) 4561 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples)); 4562 4563 if (device->physical_device->rad_info.chip_class >= CIK) { 4564 struct radeon_info *info = &device->physical_device->rad_info; 4565 unsigned tiling_index = surf->u.legacy.tiling_index[level]; 4566 unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level]; 4567 unsigned macro_index = surf->u.legacy.macro_tile_index; 4568 unsigned tile_mode = info->si_tile_mode_array[tiling_index]; 4569 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 4570 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 4571 4572 if (stencil_only) 4573 tile_mode = stencil_tile_mode; 4574 4575 ds->db_depth_info |= 4576 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 4577 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 4578 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 4579 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 4580 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 4581 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 4582 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 4583 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 4584 } else { 4585 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false); 4586 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 4587 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true); 4588 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 4589 if (stencil_only) 4590 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 4591 } 4592 4593 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | 4594 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); 4595 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); 4596 4597 if (radv_htile_enabled(iview->image, level)) { 4598 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); 4599 4600 if (!surf->has_stencil && 4601 !radv_image_is_tc_compat_htile(iview->image)) 4602 /* Use all of the htile_buffer for depth if there's no stencil. */ 4603 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 4604 4605 va = radv_buffer_get_va(iview->bo) + iview->image->offset + 4606 iview->image->htile_offset; 4607 ds->db_htile_data_base = va >> 8; 4608 ds->db_htile_surface = S_028ABC_FULL_CACHE(1); 4609 4610 if (radv_image_is_tc_compat_htile(iview->image)) { 4611 unsigned max_zplanes = 4612 radv_calc_decompress_on_z_planes(device, iview); 4613 4614 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 4615 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes); 4616 } 4617 } 4618 } 4619 4620 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8; 4621 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8; 4622} 4623 4624VkResult radv_CreateFramebuffer( 4625 VkDevice _device, 4626 const VkFramebufferCreateInfo* pCreateInfo, 4627 const VkAllocationCallbacks* pAllocator, 4628 VkFramebuffer* pFramebuffer) 4629{ 4630 RADV_FROM_HANDLE(radv_device, device, _device); 4631 struct radv_framebuffer *framebuffer; 4632 4633 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 4634 4635 size_t size = sizeof(*framebuffer) + 4636 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount; 4637 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, 4638 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4639 if (framebuffer == NULL) 4640 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4641 4642 framebuffer->attachment_count = pCreateInfo->attachmentCount; 4643 framebuffer->width = pCreateInfo->width; 4644 framebuffer->height = pCreateInfo->height; 4645 framebuffer->layers = pCreateInfo->layers; 4646 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 4647 VkImageView _iview = pCreateInfo->pAttachments[i]; 4648 struct radv_image_view *iview = radv_image_view_from_handle(_iview); 4649 framebuffer->attachments[i].attachment = iview; 4650 if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 4651 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview); 4652 } else { 4653 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview); 4654 } 4655 framebuffer->width = MIN2(framebuffer->width, iview->extent.width); 4656 framebuffer->height = MIN2(framebuffer->height, iview->extent.height); 4657 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview)); 4658 } 4659 4660 *pFramebuffer = radv_framebuffer_to_handle(framebuffer); 4661 return VK_SUCCESS; 4662} 4663 4664void radv_DestroyFramebuffer( 4665 VkDevice _device, 4666 VkFramebuffer _fb, 4667 const VkAllocationCallbacks* pAllocator) 4668{ 4669 RADV_FROM_HANDLE(radv_device, device, _device); 4670 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb); 4671 4672 if (!fb) 4673 return; 4674 vk_free2(&device->alloc, pAllocator, fb); 4675} 4676 4677static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode) 4678{ 4679 switch (address_mode) { 4680 case VK_SAMPLER_ADDRESS_MODE_REPEAT: 4681 return V_008F30_SQ_TEX_WRAP; 4682 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: 4683 return V_008F30_SQ_TEX_MIRROR; 4684 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: 4685 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 4686 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: 4687 return V_008F30_SQ_TEX_CLAMP_BORDER; 4688 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: 4689 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 4690 default: 4691 unreachable("illegal tex wrap mode"); 4692 break; 4693 } 4694} 4695 4696static unsigned 4697radv_tex_compare(VkCompareOp op) 4698{ 4699 switch (op) { 4700 case VK_COMPARE_OP_NEVER: 4701 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 4702 case VK_COMPARE_OP_LESS: 4703 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 4704 case VK_COMPARE_OP_EQUAL: 4705 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 4706 case VK_COMPARE_OP_LESS_OR_EQUAL: 4707 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 4708 case VK_COMPARE_OP_GREATER: 4709 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 4710 case VK_COMPARE_OP_NOT_EQUAL: 4711 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 4712 case VK_COMPARE_OP_GREATER_OR_EQUAL: 4713 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 4714 case VK_COMPARE_OP_ALWAYS: 4715 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 4716 default: 4717 unreachable("illegal compare mode"); 4718 break; 4719 } 4720} 4721 4722static unsigned 4723radv_tex_filter(VkFilter filter, unsigned max_ansio) 4724{ 4725 switch (filter) { 4726 case VK_FILTER_NEAREST: 4727 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT : 4728 V_008F38_SQ_TEX_XY_FILTER_POINT); 4729 case VK_FILTER_LINEAR: 4730 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR : 4731 V_008F38_SQ_TEX_XY_FILTER_BILINEAR); 4732 case VK_FILTER_CUBIC_IMG: 4733 default: 4734 fprintf(stderr, "illegal texture filter"); 4735 return 0; 4736 } 4737} 4738 4739static unsigned 4740radv_tex_mipfilter(VkSamplerMipmapMode mode) 4741{ 4742 switch (mode) { 4743 case VK_SAMPLER_MIPMAP_MODE_NEAREST: 4744 return V_008F38_SQ_TEX_Z_FILTER_POINT; 4745 case VK_SAMPLER_MIPMAP_MODE_LINEAR: 4746 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 4747 default: 4748 return V_008F38_SQ_TEX_Z_FILTER_NONE; 4749 } 4750} 4751 4752static unsigned 4753radv_tex_bordercolor(VkBorderColor bcolor) 4754{ 4755 switch (bcolor) { 4756 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: 4757 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: 4758 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 4759 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: 4760 case VK_BORDER_COLOR_INT_OPAQUE_BLACK: 4761 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 4762 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: 4763 case VK_BORDER_COLOR_INT_OPAQUE_WHITE: 4764 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 4765 default: 4766 break; 4767 } 4768 return 0; 4769} 4770 4771static unsigned 4772radv_tex_aniso_filter(unsigned filter) 4773{ 4774 if (filter < 2) 4775 return 0; 4776 if (filter < 4) 4777 return 1; 4778 if (filter < 8) 4779 return 2; 4780 if (filter < 16) 4781 return 3; 4782 return 4; 4783} 4784 4785static unsigned 4786radv_tex_filter_mode(VkSamplerReductionModeEXT mode) 4787{ 4788 switch (mode) { 4789 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT: 4790 return V_008F30_SQ_IMG_FILTER_MODE_BLEND; 4791 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT: 4792 return V_008F30_SQ_IMG_FILTER_MODE_MIN; 4793 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT: 4794 return V_008F30_SQ_IMG_FILTER_MODE_MAX; 4795 default: 4796 break; 4797 } 4798 return 0; 4799} 4800 4801static uint32_t 4802radv_get_max_anisotropy(struct radv_device *device, 4803 const VkSamplerCreateInfo *pCreateInfo) 4804{ 4805 if (device->force_aniso >= 0) 4806 return device->force_aniso; 4807 4808 if (pCreateInfo->anisotropyEnable && 4809 pCreateInfo->maxAnisotropy > 1.0f) 4810 return (uint32_t)pCreateInfo->maxAnisotropy; 4811 4812 return 0; 4813} 4814 4815static void 4816radv_init_sampler(struct radv_device *device, 4817 struct radv_sampler *sampler, 4818 const VkSamplerCreateInfo *pCreateInfo) 4819{ 4820 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); 4821 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); 4822 bool is_vi = (device->physical_device->rad_info.chip_class >= VI); 4823 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; 4824 4825 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction = 4826 vk_find_struct_const(pCreateInfo->pNext, 4827 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT); 4828 if (sampler_reduction) 4829 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode); 4830 4831 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | 4832 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | 4833 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | 4834 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4835 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) | 4836 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | 4837 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 4838 S_008F30_ANISO_BIAS(max_aniso_ratio) | 4839 S_008F30_DISABLE_CUBE_WRAP(0) | 4840 S_008F30_COMPAT_MODE(is_vi) | 4841 S_008F30_FILTER_MODE(filter_mode)); 4842 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | 4843 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | 4844 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4845 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | 4846 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | 4847 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | 4848 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | 4849 S_008F38_MIP_POINT_PRECLAMP(0) | 4850 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) | 4851 S_008F38_FILTER_PREC_FIX(1) | 4852 S_008F38_ANISO_OVERRIDE(is_vi)); 4853 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) | 4854 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor))); 4855} 4856 4857VkResult radv_CreateSampler( 4858 VkDevice _device, 4859 const VkSamplerCreateInfo* pCreateInfo, 4860 const VkAllocationCallbacks* pAllocator, 4861 VkSampler* pSampler) 4862{ 4863 RADV_FROM_HANDLE(radv_device, device, _device); 4864 struct radv_sampler *sampler; 4865 4866 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = 4867 vk_find_struct_const(pCreateInfo->pNext, 4868 SAMPLER_YCBCR_CONVERSION_INFO); 4869 4870 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 4871 4872 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, 4873 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4874 if (!sampler) 4875 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4876 4877 radv_init_sampler(device, sampler, pCreateInfo); 4878 4879 sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL; 4880 *pSampler = radv_sampler_to_handle(sampler); 4881 4882 return VK_SUCCESS; 4883} 4884 4885void radv_DestroySampler( 4886 VkDevice _device, 4887 VkSampler _sampler, 4888 const VkAllocationCallbacks* pAllocator) 4889{ 4890 RADV_FROM_HANDLE(radv_device, device, _device); 4891 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler); 4892 4893 if (!sampler) 4894 return; 4895 vk_free2(&device->alloc, pAllocator, sampler); 4896} 4897 4898/* vk_icd.h does not declare this function, so we declare it here to 4899 * suppress Wmissing-prototypes. 4900 */ 4901PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 4902vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); 4903 4904PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 4905vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 4906{ 4907 /* For the full details on loader interface versioning, see 4908 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 4909 * What follows is a condensed summary, to help you navigate the large and 4910 * confusing official doc. 4911 * 4912 * - Loader interface v0 is incompatible with later versions. We don't 4913 * support it. 4914 * 4915 * - In loader interface v1: 4916 * - The first ICD entrypoint called by the loader is 4917 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 4918 * entrypoint. 4919 * - The ICD must statically expose no other Vulkan symbol unless it is 4920 * linked with -Bsymbolic. 4921 * - Each dispatchable Vulkan handle created by the ICD must be 4922 * a pointer to a struct whose first member is VK_LOADER_DATA. The 4923 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 4924 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 4925 * vkDestroySurfaceKHR(). The ICD must be capable of working with 4926 * such loader-managed surfaces. 4927 * 4928 * - Loader interface v2 differs from v1 in: 4929 * - The first ICD entrypoint called by the loader is 4930 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 4931 * statically expose this entrypoint. 4932 * 4933 * - Loader interface v3 differs from v2 in: 4934 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 4935 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 4936 * because the loader no longer does so. 4937 */ 4938 *pSupportedVersion = MIN2(*pSupportedVersion, 3u); 4939 return VK_SUCCESS; 4940} 4941 4942VkResult radv_GetMemoryFdKHR(VkDevice _device, 4943 const VkMemoryGetFdInfoKHR *pGetFdInfo, 4944 int *pFD) 4945{ 4946 RADV_FROM_HANDLE(radv_device, device, _device); 4947 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory); 4948 4949 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 4950 4951 /* At the moment, we support only the below handle types. */ 4952 assert(pGetFdInfo->handleType == 4953 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 4954 pGetFdInfo->handleType == 4955 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 4956 4957 bool ret = radv_get_memory_fd(device, memory, pFD); 4958 if (ret == false) 4959 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4960 return VK_SUCCESS; 4961} 4962 4963VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device, 4964 VkExternalMemoryHandleTypeFlagBits handleType, 4965 int fd, 4966 VkMemoryFdPropertiesKHR *pMemoryFdProperties) 4967{ 4968 RADV_FROM_HANDLE(radv_device, device, _device); 4969 4970 switch (handleType) { 4971 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: 4972 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1; 4973 return VK_SUCCESS; 4974 4975 default: 4976 /* The valid usage section for this function says: 4977 * 4978 * "handleType must not be one of the handle types defined as 4979 * opaque." 4980 * 4981 * So opaque handle types fall into the default "unsupported" case. 4982 */ 4983 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 4984 } 4985} 4986 4987static VkResult radv_import_opaque_fd(struct radv_device *device, 4988 int fd, 4989 uint32_t *syncobj) 4990{ 4991 uint32_t syncobj_handle = 0; 4992 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle); 4993 if (ret != 0) 4994 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 4995 4996 if (*syncobj) 4997 device->ws->destroy_syncobj(device->ws, *syncobj); 4998 4999 *syncobj = syncobj_handle; 5000 close(fd); 5001 5002 return VK_SUCCESS; 5003} 5004 5005static VkResult radv_import_sync_fd(struct radv_device *device, 5006 int fd, 5007 uint32_t *syncobj) 5008{ 5009 /* If we create a syncobj we do it locally so that if we have an error, we don't 5010 * leave a syncobj in an undetermined state in the fence. */ 5011 uint32_t syncobj_handle = *syncobj; 5012 if (!syncobj_handle) { 5013 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle); 5014 if (ret) { 5015 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5016 } 5017 } 5018 5019 if (fd == -1) { 5020 device->ws->signal_syncobj(device->ws, syncobj_handle); 5021 } else { 5022 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd); 5023 if (ret != 0) 5024 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5025 } 5026 5027 *syncobj = syncobj_handle; 5028 if (fd != -1) 5029 close(fd); 5030 5031 return VK_SUCCESS; 5032} 5033 5034VkResult radv_ImportSemaphoreFdKHR(VkDevice _device, 5035 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) 5036{ 5037 RADV_FROM_HANDLE(radv_device, device, _device); 5038 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); 5039 uint32_t *syncobj_dst = NULL; 5040 5041 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { 5042 syncobj_dst = &sem->temp_syncobj; 5043 } else { 5044 syncobj_dst = &sem->syncobj; 5045 } 5046 5047 switch(pImportSemaphoreFdInfo->handleType) { 5048 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: 5049 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); 5050 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: 5051 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); 5052 default: 5053 unreachable("Unhandled semaphore handle type"); 5054 } 5055} 5056 5057VkResult radv_GetSemaphoreFdKHR(VkDevice _device, 5058 const VkSemaphoreGetFdInfoKHR *pGetFdInfo, 5059 int *pFd) 5060{ 5061 RADV_FROM_HANDLE(radv_device, device, _device); 5062 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore); 5063 int ret; 5064 uint32_t syncobj_handle; 5065 5066 if (sem->temp_syncobj) 5067 syncobj_handle = sem->temp_syncobj; 5068 else 5069 syncobj_handle = sem->syncobj; 5070 5071 switch(pGetFdInfo->handleType) { 5072 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: 5073 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); 5074 break; 5075 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: 5076 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); 5077 if (!ret) { 5078 if (sem->temp_syncobj) { 5079 close (sem->temp_syncobj); 5080 sem->temp_syncobj = 0; 5081 } else { 5082 device->ws->reset_syncobj(device->ws, syncobj_handle); 5083 } 5084 } 5085 break; 5086 default: 5087 unreachable("Unhandled semaphore handle type"); 5088 } 5089 5090 if (ret) 5091 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5092 return VK_SUCCESS; 5093} 5094 5095void radv_GetPhysicalDeviceExternalSemaphoreProperties( 5096 VkPhysicalDevice physicalDevice, 5097 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, 5098 VkExternalSemaphoreProperties *pExternalSemaphoreProperties) 5099{ 5100 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 5101 5102 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */ 5103 if (pdevice->rad_info.has_syncobj_wait_for_submit && 5104 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 5105 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) { 5106 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 5107 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 5108 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | 5109 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5110 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { 5111 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; 5112 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; 5113 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | 5114 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5115 } else { 5116 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; 5117 pExternalSemaphoreProperties->compatibleHandleTypes = 0; 5118 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; 5119 } 5120} 5121 5122VkResult radv_ImportFenceFdKHR(VkDevice _device, 5123 const VkImportFenceFdInfoKHR *pImportFenceFdInfo) 5124{ 5125 RADV_FROM_HANDLE(radv_device, device, _device); 5126 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence); 5127 uint32_t *syncobj_dst = NULL; 5128 5129 5130 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) { 5131 syncobj_dst = &fence->temp_syncobj; 5132 } else { 5133 syncobj_dst = &fence->syncobj; 5134 } 5135 5136 switch(pImportFenceFdInfo->handleType) { 5137 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: 5138 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst); 5139 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: 5140 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst); 5141 default: 5142 unreachable("Unhandled fence handle type"); 5143 } 5144} 5145 5146VkResult radv_GetFenceFdKHR(VkDevice _device, 5147 const VkFenceGetFdInfoKHR *pGetFdInfo, 5148 int *pFd) 5149{ 5150 RADV_FROM_HANDLE(radv_device, device, _device); 5151 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence); 5152 int ret; 5153 uint32_t syncobj_handle; 5154 5155 if (fence->temp_syncobj) 5156 syncobj_handle = fence->temp_syncobj; 5157 else 5158 syncobj_handle = fence->syncobj; 5159 5160 switch(pGetFdInfo->handleType) { 5161 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: 5162 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); 5163 break; 5164 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: 5165 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); 5166 if (!ret) { 5167 if (fence->temp_syncobj) { 5168 close (fence->temp_syncobj); 5169 fence->temp_syncobj = 0; 5170 } else { 5171 device->ws->reset_syncobj(device->ws, syncobj_handle); 5172 } 5173 } 5174 break; 5175 default: 5176 unreachable("Unhandled fence handle type"); 5177 } 5178 5179 if (ret) 5180 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5181 return VK_SUCCESS; 5182} 5183 5184void radv_GetPhysicalDeviceExternalFenceProperties( 5185 VkPhysicalDevice physicalDevice, 5186 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, 5187 VkExternalFenceProperties *pExternalFenceProperties) 5188{ 5189 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 5190 5191 if (pdevice->rad_info.has_syncobj_wait_for_submit && 5192 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT || 5193 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) { 5194 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; 5195 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; 5196 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | 5197 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5198 } else { 5199 pExternalFenceProperties->exportFromImportedHandleTypes = 0; 5200 pExternalFenceProperties->compatibleHandleTypes = 0; 5201 pExternalFenceProperties->externalFenceFeatures = 0; 5202 } 5203} 5204 5205VkResult 5206radv_CreateDebugReportCallbackEXT(VkInstance _instance, 5207 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, 5208 const VkAllocationCallbacks* pAllocator, 5209 VkDebugReportCallbackEXT* pCallback) 5210{ 5211 RADV_FROM_HANDLE(radv_instance, instance, _instance); 5212 return vk_create_debug_report_callback(&instance->debug_report_callbacks, 5213 pCreateInfo, pAllocator, &instance->alloc, 5214 pCallback); 5215} 5216 5217void 5218radv_DestroyDebugReportCallbackEXT(VkInstance _instance, 5219 VkDebugReportCallbackEXT _callback, 5220 const VkAllocationCallbacks* pAllocator) 5221{ 5222 RADV_FROM_HANDLE(radv_instance, instance, _instance); 5223 vk_destroy_debug_report_callback(&instance->debug_report_callbacks, 5224 _callback, pAllocator, &instance->alloc); 5225} 5226 5227void 5228radv_DebugReportMessageEXT(VkInstance _instance, 5229 VkDebugReportFlagsEXT flags, 5230 VkDebugReportObjectTypeEXT objectType, 5231 uint64_t object, 5232 size_t location, 5233 int32_t messageCode, 5234 const char* pLayerPrefix, 5235 const char* pMessage) 5236{ 5237 RADV_FROM_HANDLE(radv_instance, instance, _instance); 5238 vk_debug_report(&instance->debug_report_callbacks, flags, objectType, 5239 object, location, messageCode, pLayerPrefix, pMessage); 5240} 5241 5242void 5243radv_GetDeviceGroupPeerMemoryFeatures( 5244 VkDevice device, 5245 uint32_t heapIndex, 5246 uint32_t localDeviceIndex, 5247 uint32_t remoteDeviceIndex, 5248 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) 5249{ 5250 assert(localDeviceIndex == remoteDeviceIndex); 5251 5252 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | 5253 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | 5254 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | 5255 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; 5256} 5257 5258static const VkTimeDomainEXT radv_time_domains[] = { 5259 VK_TIME_DOMAIN_DEVICE_EXT, 5260 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, 5261 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, 5262}; 5263 5264VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT( 5265 VkPhysicalDevice physicalDevice, 5266 uint32_t *pTimeDomainCount, 5267 VkTimeDomainEXT *pTimeDomains) 5268{ 5269 int d; 5270 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount); 5271 5272 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) { 5273 vk_outarray_append(&out, i) { 5274 *i = radv_time_domains[d]; 5275 } 5276 } 5277 5278 return vk_outarray_status(&out); 5279} 5280 5281static uint64_t 5282radv_clock_gettime(clockid_t clock_id) 5283{ 5284 struct timespec current; 5285 int ret; 5286 5287 ret = clock_gettime(clock_id, ¤t); 5288 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW) 5289 ret = clock_gettime(CLOCK_MONOTONIC, ¤t); 5290 if (ret < 0) 5291 return 0; 5292 5293 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec; 5294} 5295 5296VkResult radv_GetCalibratedTimestampsEXT( 5297 VkDevice _device, 5298 uint32_t timestampCount, 5299 const VkCalibratedTimestampInfoEXT *pTimestampInfos, 5300 uint64_t *pTimestamps, 5301 uint64_t *pMaxDeviation) 5302{ 5303 RADV_FROM_HANDLE(radv_device, device, _device); 5304 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq; 5305 int d; 5306 uint64_t begin, end; 5307 uint64_t max_clock_period = 0; 5308 5309 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 5310 5311 for (d = 0; d < timestampCount; d++) { 5312 switch (pTimestampInfos[d].timeDomain) { 5313 case VK_TIME_DOMAIN_DEVICE_EXT: 5314 pTimestamps[d] = device->ws->query_value(device->ws, 5315 RADEON_TIMESTAMP); 5316 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq); 5317 max_clock_period = MAX2(max_clock_period, device_period); 5318 break; 5319 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT: 5320 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC); 5321 max_clock_period = MAX2(max_clock_period, 1); 5322 break; 5323 5324 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT: 5325 pTimestamps[d] = begin; 5326 break; 5327 default: 5328 pTimestamps[d] = 0; 5329 break; 5330 } 5331 } 5332 5333 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 5334 5335 /* 5336 * The maximum deviation is the sum of the interval over which we 5337 * perform the sampling and the maximum period of any sampled 5338 * clock. That's because the maximum skew between any two sampled 5339 * clock edges is when the sampled clock with the largest period is 5340 * sampled at the end of that period but right at the beginning of the 5341 * sampling interval and some other clock is sampled right at the 5342 * begining of its sampling period and right at the end of the 5343 * sampling interval. Let's assume the GPU has the longest clock 5344 * period and that the application is sampling GPU and monotonic: 5345 * 5346 * s e 5347 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f 5348 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 5349 * 5350 * g 5351 * 0 1 2 3 5352 * GPU -----_____-----_____-----_____-----_____ 5353 * 5354 * m 5355 * x y z 0 1 2 3 4 5 6 7 8 9 a b c 5356 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 5357 * 5358 * Interval <-----------------> 5359 * Deviation <--------------------------> 5360 * 5361 * s = read(raw) 2 5362 * g = read(GPU) 1 5363 * m = read(monotonic) 2 5364 * e = read(raw) b 5365 * 5366 * We round the sample interval up by one tick to cover sampling error 5367 * in the interval clock 5368 */ 5369 5370 uint64_t sample_interval = end - begin + 1; 5371 5372 *pMaxDeviation = sample_interval + max_clock_period; 5373 5374 return VK_SUCCESS; 5375} 5376