1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Red Hat. 3b8e80941Smrg * Copyright © 2016 Bas Nieuwenhuizen 4b8e80941Smrg * 5b8e80941Smrg * based in part on anv driver which is: 6b8e80941Smrg * Copyright © 2015 Intel Corporation 7b8e80941Smrg * 8b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 9b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 10b8e80941Smrg * to deal in the Software without restriction, including without limitation 11b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 13b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 14b8e80941Smrg * 15b8e80941Smrg * The above copyright notice and this permission notice (including the next 16b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 17b8e80941Smrg * Software. 18b8e80941Smrg * 19b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25b8e80941Smrg * IN THE SOFTWARE. 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#include <stdbool.h> 29b8e80941Smrg#include <string.h> 30b8e80941Smrg#include <unistd.h> 31b8e80941Smrg#include <fcntl.h> 32b8e80941Smrg#include "radv_debug.h" 33b8e80941Smrg#include "radv_private.h" 34b8e80941Smrg#include "radv_shader.h" 35b8e80941Smrg#include "radv_cs.h" 36b8e80941Smrg#include "util/disk_cache.h" 37b8e80941Smrg#include "util/strtod.h" 38b8e80941Smrg#include "vk_util.h" 39b8e80941Smrg#include <xf86drm.h> 40b8e80941Smrg#include <amdgpu.h> 41b8e80941Smrg#include <amdgpu_drm.h> 42b8e80941Smrg#include "winsys/amdgpu/radv_amdgpu_winsys_public.h" 43b8e80941Smrg#include "ac_llvm_util.h" 44b8e80941Smrg#include "vk_format.h" 45b8e80941Smrg#include "sid.h" 46b8e80941Smrg#include "git_sha1.h" 47b8e80941Smrg#include "gfx9d.h" 48b8e80941Smrg#include "util/build_id.h" 49b8e80941Smrg#include "util/debug.h" 50b8e80941Smrg#include "util/mesa-sha1.h" 51b8e80941Smrg#include "compiler/glsl_types.h" 52b8e80941Smrg#include "util/xmlpool.h" 53b8e80941Smrg 54b8e80941Smrgstatic int 55b8e80941Smrgradv_device_get_cache_uuid(enum radeon_family family, void *uuid) 56b8e80941Smrg{ 57b8e80941Smrg struct mesa_sha1 ctx; 58b8e80941Smrg unsigned char sha1[20]; 59b8e80941Smrg unsigned ptr_size = sizeof(void*); 60b8e80941Smrg 61b8e80941Smrg memset(uuid, 0, VK_UUID_SIZE); 62b8e80941Smrg _mesa_sha1_init(&ctx); 63b8e80941Smrg 64b8e80941Smrg if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) || 65b8e80941Smrg !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) 66b8e80941Smrg return -1; 67b8e80941Smrg 68b8e80941Smrg _mesa_sha1_update(&ctx, &family, sizeof(family)); 69b8e80941Smrg _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size)); 70b8e80941Smrg _mesa_sha1_final(&ctx, sha1); 71b8e80941Smrg 72b8e80941Smrg memcpy(uuid, sha1, VK_UUID_SIZE); 73b8e80941Smrg return 0; 74b8e80941Smrg} 75b8e80941Smrg 76b8e80941Smrgstatic void 77b8e80941Smrgradv_get_driver_uuid(void *uuid) 78b8e80941Smrg{ 79b8e80941Smrg ac_compute_driver_uuid(uuid, VK_UUID_SIZE); 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic void 83b8e80941Smrgradv_get_device_uuid(struct radeon_info *info, void *uuid) 84b8e80941Smrg{ 85b8e80941Smrg ac_compute_device_uuid(info, uuid, VK_UUID_SIZE); 86b8e80941Smrg} 87b8e80941Smrg 88b8e80941Smrgstatic void 89b8e80941Smrgradv_get_device_name(enum radeon_family family, char *name, size_t name_len) 90b8e80941Smrg{ 91b8e80941Smrg const char *chip_string; 92b8e80941Smrg 93b8e80941Smrg switch (family) { 94b8e80941Smrg case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break; 95b8e80941Smrg case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break; 96b8e80941Smrg case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break; 97b8e80941Smrg case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break; 98b8e80941Smrg case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break; 99b8e80941Smrg case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break; 100b8e80941Smrg case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break; 101b8e80941Smrg case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break; 102b8e80941Smrg case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break; 103b8e80941Smrg case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break; 104b8e80941Smrg case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break; 105b8e80941Smrg case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break; 106b8e80941Smrg case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break; 107b8e80941Smrg case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break; 108b8e80941Smrg case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break; 109b8e80941Smrg case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break; 110b8e80941Smrg case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break; 111b8e80941Smrg case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break; 112b8e80941Smrg case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break; 113b8e80941Smrg case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break; 114b8e80941Smrg case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break; 115b8e80941Smrg case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break; 116b8e80941Smrg case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break; 117b8e80941Smrg case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break; 118b8e80941Smrg default: chip_string = "AMD RADV unknown"; break; 119b8e80941Smrg } 120b8e80941Smrg 121b8e80941Smrg snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string); 122b8e80941Smrg} 123b8e80941Smrg 124b8e80941Smrgstatic uint64_t 125b8e80941Smrgradv_get_visible_vram_size(struct radv_physical_device *device) 126b8e80941Smrg{ 127b8e80941Smrg return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size); 128b8e80941Smrg} 129b8e80941Smrg 130b8e80941Smrgstatic uint64_t 131b8e80941Smrgradv_get_vram_size(struct radv_physical_device *device) 132b8e80941Smrg{ 133b8e80941Smrg return device->rad_info.vram_size - radv_get_visible_vram_size(device); 134b8e80941Smrg} 135b8e80941Smrg 136b8e80941Smrgstatic void 137b8e80941Smrgradv_physical_device_init_mem_types(struct radv_physical_device *device) 138b8e80941Smrg{ 139b8e80941Smrg STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); 140b8e80941Smrg uint64_t visible_vram_size = radv_get_visible_vram_size(device); 141b8e80941Smrg uint64_t vram_size = radv_get_vram_size(device); 142b8e80941Smrg int vram_index = -1, visible_vram_index = -1, gart_index = -1; 143b8e80941Smrg device->memory_properties.memoryHeapCount = 0; 144b8e80941Smrg if (vram_size > 0) { 145b8e80941Smrg vram_index = device->memory_properties.memoryHeapCount++; 146b8e80941Smrg device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) { 147b8e80941Smrg .size = vram_size, 148b8e80941Smrg .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 149b8e80941Smrg }; 150b8e80941Smrg } 151b8e80941Smrg if (visible_vram_size) { 152b8e80941Smrg visible_vram_index = device->memory_properties.memoryHeapCount++; 153b8e80941Smrg device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) { 154b8e80941Smrg .size = visible_vram_size, 155b8e80941Smrg .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 156b8e80941Smrg }; 157b8e80941Smrg } 158b8e80941Smrg if (device->rad_info.gart_size > 0) { 159b8e80941Smrg gart_index = device->memory_properties.memoryHeapCount++; 160b8e80941Smrg device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) { 161b8e80941Smrg .size = device->rad_info.gart_size, 162b8e80941Smrg .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 163b8e80941Smrg }; 164b8e80941Smrg } 165b8e80941Smrg 166b8e80941Smrg STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES); 167b8e80941Smrg unsigned type_count = 0; 168b8e80941Smrg if (vram_index >= 0) { 169b8e80941Smrg device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM; 170b8e80941Smrg device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 171b8e80941Smrg .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 172b8e80941Smrg .heapIndex = vram_index, 173b8e80941Smrg }; 174b8e80941Smrg } 175b8e80941Smrg if (gart_index >= 0) { 176b8e80941Smrg device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE; 177b8e80941Smrg device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 178b8e80941Smrg .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 179b8e80941Smrg VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 180b8e80941Smrg (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), 181b8e80941Smrg .heapIndex = gart_index, 182b8e80941Smrg }; 183b8e80941Smrg } 184b8e80941Smrg if (visible_vram_index >= 0) { 185b8e80941Smrg device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS; 186b8e80941Smrg device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 187b8e80941Smrg .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 188b8e80941Smrg VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 189b8e80941Smrg VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 190b8e80941Smrg .heapIndex = visible_vram_index, 191b8e80941Smrg }; 192b8e80941Smrg } 193b8e80941Smrg if (gart_index >= 0) { 194b8e80941Smrg device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED; 195b8e80941Smrg device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) { 196b8e80941Smrg .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 197b8e80941Smrg VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 198b8e80941Smrg VK_MEMORY_PROPERTY_HOST_CACHED_BIT | 199b8e80941Smrg (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), 200b8e80941Smrg .heapIndex = gart_index, 201b8e80941Smrg }; 202b8e80941Smrg } 203b8e80941Smrg device->memory_properties.memoryTypeCount = type_count; 204b8e80941Smrg} 205b8e80941Smrg 206b8e80941Smrgstatic void 207b8e80941Smrgradv_handle_env_var_force_family(struct radv_physical_device *device) 208b8e80941Smrg{ 209b8e80941Smrg const char *family = getenv("RADV_FORCE_FAMILY"); 210b8e80941Smrg unsigned i; 211b8e80941Smrg 212b8e80941Smrg if (!family) 213b8e80941Smrg return; 214b8e80941Smrg 215b8e80941Smrg for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { 216b8e80941Smrg if (!strcmp(family, ac_get_llvm_processor_name(i))) { 217b8e80941Smrg /* Override family and chip_class. */ 218b8e80941Smrg device->rad_info.family = i; 219b8e80941Smrg 220b8e80941Smrg if (i >= CHIP_VEGA10) 221b8e80941Smrg device->rad_info.chip_class = GFX9; 222b8e80941Smrg else if (i >= CHIP_TONGA) 223b8e80941Smrg device->rad_info.chip_class = VI; 224b8e80941Smrg else if (i >= CHIP_BONAIRE) 225b8e80941Smrg device->rad_info.chip_class = CIK; 226b8e80941Smrg else 227b8e80941Smrg device->rad_info.chip_class = SI; 228b8e80941Smrg 229b8e80941Smrg return; 230b8e80941Smrg } 231b8e80941Smrg } 232b8e80941Smrg 233b8e80941Smrg fprintf(stderr, "radv: Unknown family: %s\n", family); 234b8e80941Smrg exit(1); 235b8e80941Smrg} 236b8e80941Smrg 237b8e80941Smrgstatic VkResult 238b8e80941Smrgradv_physical_device_init(struct radv_physical_device *device, 239b8e80941Smrg struct radv_instance *instance, 240b8e80941Smrg drmDevicePtr drm_device) 241b8e80941Smrg{ 242b8e80941Smrg const char *path = drm_device->nodes[DRM_NODE_RENDER]; 243b8e80941Smrg VkResult result; 244b8e80941Smrg drmVersionPtr version; 245b8e80941Smrg int fd; 246b8e80941Smrg int master_fd = -1; 247b8e80941Smrg 248b8e80941Smrg fd = open(path, O_RDWR | O_CLOEXEC); 249b8e80941Smrg if (fd < 0) { 250b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 251b8e80941Smrg radv_logi("Could not open device '%s'", path); 252b8e80941Smrg 253b8e80941Smrg return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 254b8e80941Smrg } 255b8e80941Smrg 256b8e80941Smrg version = drmGetVersion(fd); 257b8e80941Smrg if (!version) { 258b8e80941Smrg close(fd); 259b8e80941Smrg 260b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 261b8e80941Smrg radv_logi("Could not get the kernel driver version for device '%s'", path); 262b8e80941Smrg 263b8e80941Smrg return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 264b8e80941Smrg "failed to get version %s: %m", path); 265b8e80941Smrg } 266b8e80941Smrg 267b8e80941Smrg if (strcmp(version->name, "amdgpu")) { 268b8e80941Smrg drmFreeVersion(version); 269b8e80941Smrg close(fd); 270b8e80941Smrg 271b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 272b8e80941Smrg radv_logi("Device '%s' is not using the amdgpu kernel driver.", path); 273b8e80941Smrg 274b8e80941Smrg return VK_ERROR_INCOMPATIBLE_DRIVER; 275b8e80941Smrg } 276b8e80941Smrg drmFreeVersion(version); 277b8e80941Smrg 278b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 279b8e80941Smrg radv_logi("Found compatible device '%s'.", path); 280b8e80941Smrg 281b8e80941Smrg device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 282b8e80941Smrg device->instance = instance; 283b8e80941Smrg 284b8e80941Smrg device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, 285b8e80941Smrg instance->perftest_flags); 286b8e80941Smrg if (!device->ws) { 287b8e80941Smrg result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 288b8e80941Smrg goto fail; 289b8e80941Smrg } 290b8e80941Smrg 291b8e80941Smrg if (instance->enabled_extensions.KHR_display) { 292b8e80941Smrg master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); 293b8e80941Smrg if (master_fd >= 0) { 294b8e80941Smrg uint32_t accel_working = 0; 295b8e80941Smrg struct drm_amdgpu_info request = { 296b8e80941Smrg .return_pointer = (uintptr_t)&accel_working, 297b8e80941Smrg .return_size = sizeof(accel_working), 298b8e80941Smrg .query = AMDGPU_INFO_ACCEL_WORKING 299b8e80941Smrg }; 300b8e80941Smrg 301b8e80941Smrg if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) { 302b8e80941Smrg close(master_fd); 303b8e80941Smrg master_fd = -1; 304b8e80941Smrg } 305b8e80941Smrg } 306b8e80941Smrg } 307b8e80941Smrg 308b8e80941Smrg device->master_fd = master_fd; 309b8e80941Smrg device->local_fd = fd; 310b8e80941Smrg device->ws->query_info(device->ws, &device->rad_info); 311b8e80941Smrg 312b8e80941Smrg radv_handle_env_var_force_family(device); 313b8e80941Smrg 314b8e80941Smrg radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name)); 315b8e80941Smrg 316b8e80941Smrg if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) { 317b8e80941Smrg device->ws->destroy(device->ws); 318b8e80941Smrg result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 319b8e80941Smrg "cannot generate UUID"); 320b8e80941Smrg goto fail; 321b8e80941Smrg } 322b8e80941Smrg 323b8e80941Smrg /* These flags affect shader compilation. */ 324b8e80941Smrg uint64_t shader_env_flags = 325b8e80941Smrg (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) | 326b8e80941Smrg (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0); 327b8e80941Smrg 328b8e80941Smrg /* The gpu id is already embedded in the uuid so we just pass "radv" 329b8e80941Smrg * when creating the cache. 330b8e80941Smrg */ 331b8e80941Smrg char buf[VK_UUID_SIZE * 2 + 1]; 332b8e80941Smrg disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); 333b8e80941Smrg device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags); 334b8e80941Smrg 335b8e80941Smrg if (device->rad_info.chip_class < VI || 336b8e80941Smrg device->rad_info.chip_class > GFX9) 337b8e80941Smrg fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); 338b8e80941Smrg 339b8e80941Smrg radv_get_driver_uuid(&device->driver_uuid); 340b8e80941Smrg radv_get_device_uuid(&device->rad_info, &device->device_uuid); 341b8e80941Smrg 342b8e80941Smrg if (device->rad_info.family == CHIP_STONEY || 343b8e80941Smrg device->rad_info.chip_class >= GFX9) { 344b8e80941Smrg device->has_rbplus = true; 345b8e80941Smrg device->rbplus_allowed = device->rad_info.family == CHIP_STONEY || 346b8e80941Smrg device->rad_info.family == CHIP_VEGA12 || 347b8e80941Smrg device->rad_info.family == CHIP_RAVEN || 348b8e80941Smrg device->rad_info.family == CHIP_RAVEN2; 349b8e80941Smrg } 350b8e80941Smrg 351b8e80941Smrg /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs 352b8e80941Smrg * on SI. 353b8e80941Smrg */ 354b8e80941Smrg device->has_clear_state = device->rad_info.chip_class >= CIK; 355b8e80941Smrg 356b8e80941Smrg device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI; 357b8e80941Smrg 358b8e80941Smrg /* Vega10/Raven need a special workaround for a hardware bug. */ 359b8e80941Smrg device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || 360b8e80941Smrg device->rad_info.family == CHIP_RAVEN; 361b8e80941Smrg 362b8e80941Smrg /* Out-of-order primitive rasterization. */ 363b8e80941Smrg device->has_out_of_order_rast = device->rad_info.chip_class >= VI && 364b8e80941Smrg device->rad_info.max_se >= 2; 365b8e80941Smrg device->out_of_order_rast_allowed = device->has_out_of_order_rast && 366b8e80941Smrg !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); 367b8e80941Smrg 368b8e80941Smrg device->dcc_msaa_allowed = 369b8e80941Smrg (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); 370b8e80941Smrg 371b8e80941Smrg /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ 372b8e80941Smrg device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 || 373b8e80941Smrg (device->rad_info.chip_class >= VI && 374b8e80941Smrg device->rad_info.me_fw_feature >= 41); 375b8e80941Smrg 376b8e80941Smrg radv_physical_device_init_mem_types(device); 377b8e80941Smrg radv_fill_device_extension_table(device, &device->supported_extensions); 378b8e80941Smrg 379b8e80941Smrg device->bus_info = *drm_device->businfo.pci; 380b8e80941Smrg 381b8e80941Smrg if ((device->instance->debug_flags & RADV_DEBUG_INFO)) 382b8e80941Smrg ac_print_gpu_info(&device->rad_info); 383b8e80941Smrg 384b8e80941Smrg /* The WSI is structured as a layer on top of the driver, so this has 385b8e80941Smrg * to be the last part of initialization (at least until we get other 386b8e80941Smrg * semi-layers). 387b8e80941Smrg */ 388b8e80941Smrg result = radv_init_wsi(device); 389b8e80941Smrg if (result != VK_SUCCESS) { 390b8e80941Smrg device->ws->destroy(device->ws); 391b8e80941Smrg vk_error(instance, result); 392b8e80941Smrg goto fail; 393b8e80941Smrg } 394b8e80941Smrg 395b8e80941Smrg return VK_SUCCESS; 396b8e80941Smrg 397b8e80941Smrgfail: 398b8e80941Smrg close(fd); 399b8e80941Smrg if (master_fd != -1) 400b8e80941Smrg close(master_fd); 401b8e80941Smrg return result; 402b8e80941Smrg} 403b8e80941Smrg 404b8e80941Smrgstatic void 405b8e80941Smrgradv_physical_device_finish(struct radv_physical_device *device) 406b8e80941Smrg{ 407b8e80941Smrg radv_finish_wsi(device); 408b8e80941Smrg device->ws->destroy(device->ws); 409b8e80941Smrg disk_cache_destroy(device->disk_cache); 410b8e80941Smrg close(device->local_fd); 411b8e80941Smrg if (device->master_fd != -1) 412b8e80941Smrg close(device->master_fd); 413b8e80941Smrg} 414b8e80941Smrg 415b8e80941Smrgstatic void * 416b8e80941Smrgdefault_alloc_func(void *pUserData, size_t size, size_t align, 417b8e80941Smrg VkSystemAllocationScope allocationScope) 418b8e80941Smrg{ 419b8e80941Smrg return malloc(size); 420b8e80941Smrg} 421b8e80941Smrg 422b8e80941Smrgstatic void * 423b8e80941Smrgdefault_realloc_func(void *pUserData, void *pOriginal, size_t size, 424b8e80941Smrg size_t align, VkSystemAllocationScope allocationScope) 425b8e80941Smrg{ 426b8e80941Smrg return realloc(pOriginal, size); 427b8e80941Smrg} 428b8e80941Smrg 429b8e80941Smrgstatic void 430b8e80941Smrgdefault_free_func(void *pUserData, void *pMemory) 431b8e80941Smrg{ 432b8e80941Smrg free(pMemory); 433b8e80941Smrg} 434b8e80941Smrg 435b8e80941Smrgstatic const VkAllocationCallbacks default_alloc = { 436b8e80941Smrg .pUserData = NULL, 437b8e80941Smrg .pfnAllocation = default_alloc_func, 438b8e80941Smrg .pfnReallocation = default_realloc_func, 439b8e80941Smrg .pfnFree = default_free_func, 440b8e80941Smrg}; 441b8e80941Smrg 442b8e80941Smrgstatic const struct debug_control radv_debug_options[] = { 443b8e80941Smrg {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, 444b8e80941Smrg {"nodcc", RADV_DEBUG_NO_DCC}, 445b8e80941Smrg {"shaders", RADV_DEBUG_DUMP_SHADERS}, 446b8e80941Smrg {"nocache", RADV_DEBUG_NO_CACHE}, 447b8e80941Smrg {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, 448b8e80941Smrg {"nohiz", RADV_DEBUG_NO_HIZ}, 449b8e80941Smrg {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, 450b8e80941Smrg {"unsafemath", RADV_DEBUG_UNSAFE_MATH}, 451b8e80941Smrg {"allbos", RADV_DEBUG_ALL_BOS}, 452b8e80941Smrg {"noibs", RADV_DEBUG_NO_IBS}, 453b8e80941Smrg {"spirv", RADV_DEBUG_DUMP_SPIRV}, 454b8e80941Smrg {"vmfaults", RADV_DEBUG_VM_FAULTS}, 455b8e80941Smrg {"zerovram", RADV_DEBUG_ZERO_VRAM}, 456b8e80941Smrg {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, 457b8e80941Smrg {"nosisched", RADV_DEBUG_NO_SISCHED}, 458b8e80941Smrg {"preoptir", RADV_DEBUG_PREOPTIR}, 459b8e80941Smrg {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS}, 460b8e80941Smrg {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER}, 461b8e80941Smrg {"info", RADV_DEBUG_INFO}, 462b8e80941Smrg {"errors", RADV_DEBUG_ERRORS}, 463b8e80941Smrg {"startup", RADV_DEBUG_STARTUP}, 464b8e80941Smrg {"checkir", RADV_DEBUG_CHECKIR}, 465b8e80941Smrg {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, 466b8e80941Smrg {"nobinning", RADV_DEBUG_NOBINNING}, 467b8e80941Smrg {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT}, 468b8e80941Smrg {NULL, 0} 469b8e80941Smrg}; 470b8e80941Smrg 471b8e80941Smrgconst char * 472b8e80941Smrgradv_get_debug_option_name(int id) 473b8e80941Smrg{ 474b8e80941Smrg assert(id < ARRAY_SIZE(radv_debug_options) - 1); 475b8e80941Smrg return radv_debug_options[id].string; 476b8e80941Smrg} 477b8e80941Smrg 478b8e80941Smrgstatic const struct debug_control radv_perftest_options[] = { 479b8e80941Smrg {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN}, 480b8e80941Smrg {"sisched", RADV_PERFTEST_SISCHED}, 481b8e80941Smrg {"localbos", RADV_PERFTEST_LOCAL_BOS}, 482b8e80941Smrg {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, 483b8e80941Smrg {"bolist", RADV_PERFTEST_BO_LIST}, 484b8e80941Smrg {NULL, 0} 485b8e80941Smrg}; 486b8e80941Smrg 487b8e80941Smrgconst char * 488b8e80941Smrgradv_get_perftest_option_name(int id) 489b8e80941Smrg{ 490b8e80941Smrg assert(id < ARRAY_SIZE(radv_perftest_options) - 1); 491b8e80941Smrg return radv_perftest_options[id].string; 492b8e80941Smrg} 493b8e80941Smrg 494b8e80941Smrgstatic void 495b8e80941Smrgradv_handle_per_app_options(struct radv_instance *instance, 496b8e80941Smrg const VkApplicationInfo *info) 497b8e80941Smrg{ 498b8e80941Smrg const char *name = info ? info->pApplicationName : NULL; 499b8e80941Smrg 500b8e80941Smrg if (!name) 501b8e80941Smrg return; 502b8e80941Smrg 503b8e80941Smrg if (!strcmp(name, "Talos - Linux - 32bit") || 504b8e80941Smrg !strcmp(name, "Talos - Linux - 64bit")) { 505b8e80941Smrg if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) { 506b8e80941Smrg /* Force enable LLVM sisched for Talos because it looks 507b8e80941Smrg * safe and it gives few more FPS. 508b8e80941Smrg */ 509b8e80941Smrg instance->perftest_flags |= RADV_PERFTEST_SISCHED; 510b8e80941Smrg } 511b8e80941Smrg } else if (!strcmp(name, "DOOM_VFR")) { 512b8e80941Smrg /* Work around a Doom VFR game bug */ 513b8e80941Smrg instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; 514b8e80941Smrg } else if (!strcmp(name, "MonsterHunterWorld.exe")) { 515b8e80941Smrg /* Workaround for a WaW hazard when LLVM moves/merges 516b8e80941Smrg * load/store memory operations. 517b8e80941Smrg * See https://reviews.llvm.org/D61313 518b8e80941Smrg */ 519b8e80941Smrg if (HAVE_LLVM < 0x900) 520b8e80941Smrg instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT; 521b8e80941Smrg } 522b8e80941Smrg} 523b8e80941Smrg 524b8e80941Smrgstatic int radv_get_instance_extension_index(const char *name) 525b8e80941Smrg{ 526b8e80941Smrg for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) { 527b8e80941Smrg if (strcmp(name, radv_instance_extensions[i].extensionName) == 0) 528b8e80941Smrg return i; 529b8e80941Smrg } 530b8e80941Smrg return -1; 531b8e80941Smrg} 532b8e80941Smrg 533b8e80941Smrgstatic const char radv_dri_options_xml[] = 534b8e80941SmrgDRI_CONF_BEGIN 535b8e80941Smrg DRI_CONF_SECTION_PERFORMANCE 536b8e80941Smrg DRI_CONF_ADAPTIVE_SYNC("true") 537b8e80941Smrg DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) 538b8e80941Smrg DRI_CONF_SECTION_END 539b8e80941SmrgDRI_CONF_END; 540b8e80941Smrg 541b8e80941Smrgstatic void radv_init_dri_options(struct radv_instance *instance) 542b8e80941Smrg{ 543b8e80941Smrg driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml); 544b8e80941Smrg driParseConfigFiles(&instance->dri_options, 545b8e80941Smrg &instance->available_dri_options, 546b8e80941Smrg 0, "radv", NULL); 547b8e80941Smrg} 548b8e80941Smrg 549b8e80941SmrgVkResult radv_CreateInstance( 550b8e80941Smrg const VkInstanceCreateInfo* pCreateInfo, 551b8e80941Smrg const VkAllocationCallbacks* pAllocator, 552b8e80941Smrg VkInstance* pInstance) 553b8e80941Smrg{ 554b8e80941Smrg struct radv_instance *instance; 555b8e80941Smrg VkResult result; 556b8e80941Smrg 557b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 558b8e80941Smrg 559b8e80941Smrg uint32_t client_version; 560b8e80941Smrg if (pCreateInfo->pApplicationInfo && 561b8e80941Smrg pCreateInfo->pApplicationInfo->apiVersion != 0) { 562b8e80941Smrg client_version = pCreateInfo->pApplicationInfo->apiVersion; 563b8e80941Smrg } else { 564b8e80941Smrg client_version = VK_API_VERSION_1_0; 565b8e80941Smrg } 566b8e80941Smrg 567b8e80941Smrg instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8, 568b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 569b8e80941Smrg if (!instance) 570b8e80941Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 571b8e80941Smrg 572b8e80941Smrg instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 573b8e80941Smrg 574b8e80941Smrg if (pAllocator) 575b8e80941Smrg instance->alloc = *pAllocator; 576b8e80941Smrg else 577b8e80941Smrg instance->alloc = default_alloc; 578b8e80941Smrg 579b8e80941Smrg instance->apiVersion = client_version; 580b8e80941Smrg instance->physicalDeviceCount = -1; 581b8e80941Smrg 582b8e80941Smrg instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), 583b8e80941Smrg radv_debug_options); 584b8e80941Smrg 585b8e80941Smrg instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), 586b8e80941Smrg radv_perftest_options); 587b8e80941Smrg 588b8e80941Smrg 589b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 590b8e80941Smrg radv_logi("Created an instance"); 591b8e80941Smrg 592b8e80941Smrg for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 593b8e80941Smrg const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; 594b8e80941Smrg int index = radv_get_instance_extension_index(ext_name); 595b8e80941Smrg 596b8e80941Smrg if (index < 0 || !radv_supported_instance_extensions.extensions[index]) { 597b8e80941Smrg vk_free2(&default_alloc, pAllocator, instance); 598b8e80941Smrg return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT); 599b8e80941Smrg } 600b8e80941Smrg 601b8e80941Smrg instance->enabled_extensions.extensions[index] = true; 602b8e80941Smrg } 603b8e80941Smrg 604b8e80941Smrg result = vk_debug_report_instance_init(&instance->debug_report_callbacks); 605b8e80941Smrg if (result != VK_SUCCESS) { 606b8e80941Smrg vk_free2(&default_alloc, pAllocator, instance); 607b8e80941Smrg return vk_error(instance, result); 608b8e80941Smrg } 609b8e80941Smrg 610b8e80941Smrg _mesa_locale_init(); 611b8e80941Smrg glsl_type_singleton_init_or_ref(); 612b8e80941Smrg 613b8e80941Smrg VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 614b8e80941Smrg 615b8e80941Smrg radv_init_dri_options(instance); 616b8e80941Smrg radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo); 617b8e80941Smrg 618b8e80941Smrg *pInstance = radv_instance_to_handle(instance); 619b8e80941Smrg 620b8e80941Smrg return VK_SUCCESS; 621b8e80941Smrg} 622b8e80941Smrg 623b8e80941Smrgvoid radv_DestroyInstance( 624b8e80941Smrg VkInstance _instance, 625b8e80941Smrg const VkAllocationCallbacks* pAllocator) 626b8e80941Smrg{ 627b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 628b8e80941Smrg 629b8e80941Smrg if (!instance) 630b8e80941Smrg return; 631b8e80941Smrg 632b8e80941Smrg for (int i = 0; i < instance->physicalDeviceCount; ++i) { 633b8e80941Smrg radv_physical_device_finish(instance->physicalDevices + i); 634b8e80941Smrg } 635b8e80941Smrg 636b8e80941Smrg VG(VALGRIND_DESTROY_MEMPOOL(instance)); 637b8e80941Smrg 638b8e80941Smrg glsl_type_singleton_decref(); 639b8e80941Smrg _mesa_locale_fini(); 640b8e80941Smrg 641b8e80941Smrg driDestroyOptionCache(&instance->dri_options); 642b8e80941Smrg driDestroyOptionInfo(&instance->available_dri_options); 643b8e80941Smrg 644b8e80941Smrg vk_debug_report_instance_destroy(&instance->debug_report_callbacks); 645b8e80941Smrg 646b8e80941Smrg vk_free(&instance->alloc, instance); 647b8e80941Smrg} 648b8e80941Smrg 649b8e80941Smrgstatic VkResult 650b8e80941Smrgradv_enumerate_devices(struct radv_instance *instance) 651b8e80941Smrg{ 652b8e80941Smrg /* TODO: Check for more devices ? */ 653b8e80941Smrg drmDevicePtr devices[8]; 654b8e80941Smrg VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; 655b8e80941Smrg int max_devices; 656b8e80941Smrg 657b8e80941Smrg instance->physicalDeviceCount = 0; 658b8e80941Smrg 659b8e80941Smrg max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); 660b8e80941Smrg 661b8e80941Smrg if (instance->debug_flags & RADV_DEBUG_STARTUP) 662b8e80941Smrg radv_logi("Found %d drm nodes", max_devices); 663b8e80941Smrg 664b8e80941Smrg if (max_devices < 1) 665b8e80941Smrg return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); 666b8e80941Smrg 667b8e80941Smrg for (unsigned i = 0; i < (unsigned)max_devices; i++) { 668b8e80941Smrg if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 669b8e80941Smrg devices[i]->bustype == DRM_BUS_PCI && 670b8e80941Smrg devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) { 671b8e80941Smrg 672b8e80941Smrg result = radv_physical_device_init(instance->physicalDevices + 673b8e80941Smrg instance->physicalDeviceCount, 674b8e80941Smrg instance, 675b8e80941Smrg devices[i]); 676b8e80941Smrg if (result == VK_SUCCESS) 677b8e80941Smrg ++instance->physicalDeviceCount; 678b8e80941Smrg else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 679b8e80941Smrg break; 680b8e80941Smrg } 681b8e80941Smrg } 682b8e80941Smrg drmFreeDevices(devices, max_devices); 683b8e80941Smrg 684b8e80941Smrg return result; 685b8e80941Smrg} 686b8e80941Smrg 687b8e80941SmrgVkResult radv_EnumeratePhysicalDevices( 688b8e80941Smrg VkInstance _instance, 689b8e80941Smrg uint32_t* pPhysicalDeviceCount, 690b8e80941Smrg VkPhysicalDevice* pPhysicalDevices) 691b8e80941Smrg{ 692b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 693b8e80941Smrg VkResult result; 694b8e80941Smrg 695b8e80941Smrg if (instance->physicalDeviceCount < 0) { 696b8e80941Smrg result = radv_enumerate_devices(instance); 697b8e80941Smrg if (result != VK_SUCCESS && 698b8e80941Smrg result != VK_ERROR_INCOMPATIBLE_DRIVER) 699b8e80941Smrg return result; 700b8e80941Smrg } 701b8e80941Smrg 702b8e80941Smrg if (!pPhysicalDevices) { 703b8e80941Smrg *pPhysicalDeviceCount = instance->physicalDeviceCount; 704b8e80941Smrg } else { 705b8e80941Smrg *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount); 706b8e80941Smrg for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i) 707b8e80941Smrg pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i); 708b8e80941Smrg } 709b8e80941Smrg 710b8e80941Smrg return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE 711b8e80941Smrg : VK_SUCCESS; 712b8e80941Smrg} 713b8e80941Smrg 714b8e80941SmrgVkResult radv_EnumeratePhysicalDeviceGroups( 715b8e80941Smrg VkInstance _instance, 716b8e80941Smrg uint32_t* pPhysicalDeviceGroupCount, 717b8e80941Smrg VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) 718b8e80941Smrg{ 719b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 720b8e80941Smrg VkResult result; 721b8e80941Smrg 722b8e80941Smrg if (instance->physicalDeviceCount < 0) { 723b8e80941Smrg result = radv_enumerate_devices(instance); 724b8e80941Smrg if (result != VK_SUCCESS && 725b8e80941Smrg result != VK_ERROR_INCOMPATIBLE_DRIVER) 726b8e80941Smrg return result; 727b8e80941Smrg } 728b8e80941Smrg 729b8e80941Smrg if (!pPhysicalDeviceGroupProperties) { 730b8e80941Smrg *pPhysicalDeviceGroupCount = instance->physicalDeviceCount; 731b8e80941Smrg } else { 732b8e80941Smrg *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount); 733b8e80941Smrg for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) { 734b8e80941Smrg pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1; 735b8e80941Smrg pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i); 736b8e80941Smrg pPhysicalDeviceGroupProperties[i].subsetAllocation = false; 737b8e80941Smrg } 738b8e80941Smrg } 739b8e80941Smrg return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE 740b8e80941Smrg : VK_SUCCESS; 741b8e80941Smrg} 742b8e80941Smrg 743b8e80941Smrgvoid radv_GetPhysicalDeviceFeatures( 744b8e80941Smrg VkPhysicalDevice physicalDevice, 745b8e80941Smrg VkPhysicalDeviceFeatures* pFeatures) 746b8e80941Smrg{ 747b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 748b8e80941Smrg memset(pFeatures, 0, sizeof(*pFeatures)); 749b8e80941Smrg 750b8e80941Smrg *pFeatures = (VkPhysicalDeviceFeatures) { 751b8e80941Smrg .robustBufferAccess = true, 752b8e80941Smrg .fullDrawIndexUint32 = true, 753b8e80941Smrg .imageCubeArray = true, 754b8e80941Smrg .independentBlend = true, 755b8e80941Smrg .geometryShader = true, 756b8e80941Smrg .tessellationShader = true, 757b8e80941Smrg .sampleRateShading = true, 758b8e80941Smrg .dualSrcBlend = true, 759b8e80941Smrg .logicOp = true, 760b8e80941Smrg .multiDrawIndirect = true, 761b8e80941Smrg .drawIndirectFirstInstance = true, 762b8e80941Smrg .depthClamp = true, 763b8e80941Smrg .depthBiasClamp = true, 764b8e80941Smrg .fillModeNonSolid = true, 765b8e80941Smrg .depthBounds = true, 766b8e80941Smrg .wideLines = true, 767b8e80941Smrg .largePoints = true, 768b8e80941Smrg .alphaToOne = true, 769b8e80941Smrg .multiViewport = true, 770b8e80941Smrg .samplerAnisotropy = true, 771b8e80941Smrg .textureCompressionETC2 = radv_device_supports_etc(pdevice), 772b8e80941Smrg .textureCompressionASTC_LDR = false, 773b8e80941Smrg .textureCompressionBC = true, 774b8e80941Smrg .occlusionQueryPrecise = true, 775b8e80941Smrg .pipelineStatisticsQuery = true, 776b8e80941Smrg .vertexPipelineStoresAndAtomics = true, 777b8e80941Smrg .fragmentStoresAndAtomics = true, 778b8e80941Smrg .shaderTessellationAndGeometryPointSize = true, 779b8e80941Smrg .shaderImageGatherExtended = true, 780b8e80941Smrg .shaderStorageImageExtendedFormats = true, 781b8e80941Smrg .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI, 782b8e80941Smrg .shaderUniformBufferArrayDynamicIndexing = true, 783b8e80941Smrg .shaderSampledImageArrayDynamicIndexing = true, 784b8e80941Smrg .shaderStorageBufferArrayDynamicIndexing = true, 785b8e80941Smrg .shaderStorageImageArrayDynamicIndexing = true, 786b8e80941Smrg .shaderStorageImageReadWithoutFormat = true, 787b8e80941Smrg .shaderStorageImageWriteWithoutFormat = true, 788b8e80941Smrg .shaderClipDistance = true, 789b8e80941Smrg .shaderCullDistance = true, 790b8e80941Smrg .shaderFloat64 = true, 791b8e80941Smrg .shaderInt64 = true, 792b8e80941Smrg .shaderInt16 = pdevice->rad_info.chip_class >= GFX9, 793b8e80941Smrg .sparseBinding = true, 794b8e80941Smrg .variableMultisampleRate = true, 795b8e80941Smrg .inheritedQueries = true, 796b8e80941Smrg }; 797b8e80941Smrg} 798b8e80941Smrg 799b8e80941Smrgvoid radv_GetPhysicalDeviceFeatures2( 800b8e80941Smrg VkPhysicalDevice physicalDevice, 801b8e80941Smrg VkPhysicalDeviceFeatures2 *pFeatures) 802b8e80941Smrg{ 803b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 804b8e80941Smrg vk_foreach_struct(ext, pFeatures->pNext) { 805b8e80941Smrg switch (ext->sType) { 806b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { 807b8e80941Smrg VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext; 808b8e80941Smrg features->variablePointersStorageBuffer = true; 809b8e80941Smrg features->variablePointers = true; 810b8e80941Smrg break; 811b8e80941Smrg } 812b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { 813b8e80941Smrg VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext; 814b8e80941Smrg features->multiview = true; 815b8e80941Smrg features->multiviewGeometryShader = true; 816b8e80941Smrg features->multiviewTessellationShader = true; 817b8e80941Smrg break; 818b8e80941Smrg } 819b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { 820b8e80941Smrg VkPhysicalDeviceShaderDrawParametersFeatures *features = 821b8e80941Smrg (VkPhysicalDeviceShaderDrawParametersFeatures*)ext; 822b8e80941Smrg features->shaderDrawParameters = true; 823b8e80941Smrg break; 824b8e80941Smrg } 825b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { 826b8e80941Smrg VkPhysicalDeviceProtectedMemoryFeatures *features = 827b8e80941Smrg (VkPhysicalDeviceProtectedMemoryFeatures*)ext; 828b8e80941Smrg features->protectedMemory = false; 829b8e80941Smrg break; 830b8e80941Smrg } 831b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { 832b8e80941Smrg VkPhysicalDevice16BitStorageFeatures *features = 833b8e80941Smrg (VkPhysicalDevice16BitStorageFeatures*)ext; 834b8e80941Smrg bool enabled = pdevice->rad_info.chip_class >= VI; 835b8e80941Smrg features->storageBuffer16BitAccess = enabled; 836b8e80941Smrg features->uniformAndStorageBuffer16BitAccess = enabled; 837b8e80941Smrg features->storagePushConstant16 = enabled; 838b8e80941Smrg features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900; 839b8e80941Smrg break; 840b8e80941Smrg } 841b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { 842b8e80941Smrg VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = 843b8e80941Smrg (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext; 844b8e80941Smrg features->samplerYcbcrConversion = true; 845b8e80941Smrg break; 846b8e80941Smrg } 847b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { 848b8e80941Smrg VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features = 849b8e80941Smrg (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext; 850b8e80941Smrg features->shaderInputAttachmentArrayDynamicIndexing = true; 851b8e80941Smrg features->shaderUniformTexelBufferArrayDynamicIndexing = true; 852b8e80941Smrg features->shaderStorageTexelBufferArrayDynamicIndexing = true; 853b8e80941Smrg features->shaderUniformBufferArrayNonUniformIndexing = true; 854b8e80941Smrg features->shaderSampledImageArrayNonUniformIndexing = true; 855b8e80941Smrg features->shaderStorageBufferArrayNonUniformIndexing = true; 856b8e80941Smrg features->shaderStorageImageArrayNonUniformIndexing = true; 857b8e80941Smrg features->shaderInputAttachmentArrayNonUniformIndexing = true; 858b8e80941Smrg features->shaderUniformTexelBufferArrayNonUniformIndexing = true; 859b8e80941Smrg features->shaderStorageTexelBufferArrayNonUniformIndexing = true; 860b8e80941Smrg features->descriptorBindingUniformBufferUpdateAfterBind = true; 861b8e80941Smrg features->descriptorBindingSampledImageUpdateAfterBind = true; 862b8e80941Smrg features->descriptorBindingStorageImageUpdateAfterBind = true; 863b8e80941Smrg features->descriptorBindingStorageBufferUpdateAfterBind = true; 864b8e80941Smrg features->descriptorBindingUniformTexelBufferUpdateAfterBind = true; 865b8e80941Smrg features->descriptorBindingStorageTexelBufferUpdateAfterBind = true; 866b8e80941Smrg features->descriptorBindingUpdateUnusedWhilePending = true; 867b8e80941Smrg features->descriptorBindingPartiallyBound = true; 868b8e80941Smrg features->descriptorBindingVariableDescriptorCount = true; 869b8e80941Smrg features->runtimeDescriptorArray = true; 870b8e80941Smrg break; 871b8e80941Smrg } 872b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { 873b8e80941Smrg VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = 874b8e80941Smrg (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; 875b8e80941Smrg features->conditionalRendering = true; 876b8e80941Smrg features->inheritedConditionalRendering = false; 877b8e80941Smrg break; 878b8e80941Smrg } 879b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { 880b8e80941Smrg VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = 881b8e80941Smrg (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; 882b8e80941Smrg features->vertexAttributeInstanceRateDivisor = VK_TRUE; 883b8e80941Smrg features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; 884b8e80941Smrg break; 885b8e80941Smrg } 886b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { 887b8e80941Smrg VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = 888b8e80941Smrg (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext; 889b8e80941Smrg features->transformFeedback = true; 890b8e80941Smrg features->geometryStreams = true; 891b8e80941Smrg break; 892b8e80941Smrg } 893b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { 894b8e80941Smrg VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = 895b8e80941Smrg (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; 896b8e80941Smrg features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK; 897b8e80941Smrg break; 898b8e80941Smrg } 899b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { 900b8e80941Smrg VkPhysicalDeviceMemoryPriorityFeaturesEXT *features = 901b8e80941Smrg (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext; 902b8e80941Smrg features->memoryPriority = VK_TRUE; 903b8e80941Smrg break; 904b8e80941Smrg } 905b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: { 906b8e80941Smrg VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = 907b8e80941Smrg (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext; 908b8e80941Smrg features->bufferDeviceAddress = true; 909b8e80941Smrg features->bufferDeviceAddressCaptureReplay = false; 910b8e80941Smrg features->bufferDeviceAddressMultiDevice = false; 911b8e80941Smrg break; 912b8e80941Smrg } 913b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { 914b8e80941Smrg VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = 915b8e80941Smrg (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; 916b8e80941Smrg features->depthClipEnable = true; 917b8e80941Smrg break; 918b8e80941Smrg } 919b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: { 920b8e80941Smrg VkPhysicalDeviceHostQueryResetFeaturesEXT *features = 921b8e80941Smrg (VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext; 922b8e80941Smrg features->hostQueryReset = true; 923b8e80941Smrg break; 924b8e80941Smrg } 925b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { 926b8e80941Smrg VkPhysicalDevice8BitStorageFeaturesKHR *features = 927b8e80941Smrg (VkPhysicalDevice8BitStorageFeaturesKHR*)ext; 928b8e80941Smrg bool enabled = pdevice->rad_info.chip_class >= VI; 929b8e80941Smrg features->storageBuffer8BitAccess = enabled; 930b8e80941Smrg features->uniformAndStorageBuffer8BitAccess = enabled; 931b8e80941Smrg features->storagePushConstant8 = enabled; 932b8e80941Smrg break; 933b8e80941Smrg } 934b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { 935b8e80941Smrg VkPhysicalDeviceFloat16Int8FeaturesKHR *features = 936b8e80941Smrg (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext; 937b8e80941Smrg features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800; 938b8e80941Smrg features->shaderInt8 = true; 939b8e80941Smrg break; 940b8e80941Smrg } 941b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { 942b8e80941Smrg VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = 943b8e80941Smrg (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext; 944b8e80941Smrg /* TODO: Enable this once the driver supports 64-bit 945b8e80941Smrg * compare&swap atomic operations. 946b8e80941Smrg */ 947b8e80941Smrg features->shaderBufferInt64Atomics = false; 948b8e80941Smrg features->shaderSharedInt64Atomics = false; 949b8e80941Smrg break; 950b8e80941Smrg } 951b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { 952b8e80941Smrg VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features = 953b8e80941Smrg (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext; 954b8e80941Smrg 955b8e80941Smrg features->inlineUniformBlock = true; 956b8e80941Smrg features->descriptorBindingInlineUniformBlockUpdateAfterBind = true; 957b8e80941Smrg break; 958b8e80941Smrg } 959b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: { 960b8e80941Smrg VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features = 961b8e80941Smrg (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext; 962b8e80941Smrg features->computeDerivativeGroupQuads = false; 963b8e80941Smrg features->computeDerivativeGroupLinear = true; 964b8e80941Smrg break; 965b8e80941Smrg } 966b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: { 967b8e80941Smrg VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features = 968b8e80941Smrg (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext; 969b8e80941Smrg features->ycbcrImageArrays = true; 970b8e80941Smrg break; 971b8e80941Smrg } 972b8e80941Smrg default: 973b8e80941Smrg break; 974b8e80941Smrg } 975b8e80941Smrg } 976b8e80941Smrg return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); 977b8e80941Smrg} 978b8e80941Smrg 979b8e80941Smrgvoid radv_GetPhysicalDeviceProperties( 980b8e80941Smrg VkPhysicalDevice physicalDevice, 981b8e80941Smrg VkPhysicalDeviceProperties* pProperties) 982b8e80941Smrg{ 983b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 984b8e80941Smrg VkSampleCountFlags sample_counts = 0xf; 985b8e80941Smrg 986b8e80941Smrg /* make sure that the entire descriptor set is addressable with a signed 987b8e80941Smrg * 32-bit int. So the sum of all limits scaled by descriptor size has to 988b8e80941Smrg * be at most 2 GiB. the combined image & samples object count as one of 989b8e80941Smrg * both. This limit is for the pipeline layout, not for the set layout, but 990b8e80941Smrg * there is no set limit, so we just set a pipeline limit. I don't think 991b8e80941Smrg * any app is going to hit this soon. */ 992b8e80941Smrg size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / 993b8e80941Smrg (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 994b8e80941Smrg 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 995b8e80941Smrg 32 /* sampler, largest when combined with image */ + 996b8e80941Smrg 64 /* sampled image */ + 997b8e80941Smrg 64 /* storage image */); 998b8e80941Smrg 999b8e80941Smrg VkPhysicalDeviceLimits limits = { 1000b8e80941Smrg .maxImageDimension1D = (1 << 14), 1001b8e80941Smrg .maxImageDimension2D = (1 << 14), 1002b8e80941Smrg .maxImageDimension3D = (1 << 11), 1003b8e80941Smrg .maxImageDimensionCube = (1 << 14), 1004b8e80941Smrg .maxImageArrayLayers = (1 << 11), 1005b8e80941Smrg .maxTexelBufferElements = 128 * 1024 * 1024, 1006b8e80941Smrg .maxUniformBufferRange = UINT32_MAX, 1007b8e80941Smrg .maxStorageBufferRange = UINT32_MAX, 1008b8e80941Smrg .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 1009b8e80941Smrg .maxMemoryAllocationCount = UINT32_MAX, 1010b8e80941Smrg .maxSamplerAllocationCount = 64 * 1024, 1011b8e80941Smrg .bufferImageGranularity = 64, /* A cache line */ 1012b8e80941Smrg .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ 1013b8e80941Smrg .maxBoundDescriptorSets = MAX_SETS, 1014b8e80941Smrg .maxPerStageDescriptorSamplers = max_descriptor_set_size, 1015b8e80941Smrg .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, 1016b8e80941Smrg .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, 1017b8e80941Smrg .maxPerStageDescriptorSampledImages = max_descriptor_set_size, 1018b8e80941Smrg .maxPerStageDescriptorStorageImages = max_descriptor_set_size, 1019b8e80941Smrg .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, 1020b8e80941Smrg .maxPerStageResources = max_descriptor_set_size, 1021b8e80941Smrg .maxDescriptorSetSamplers = max_descriptor_set_size, 1022b8e80941Smrg .maxDescriptorSetUniformBuffers = max_descriptor_set_size, 1023b8e80941Smrg .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, 1024b8e80941Smrg .maxDescriptorSetStorageBuffers = max_descriptor_set_size, 1025b8e80941Smrg .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, 1026b8e80941Smrg .maxDescriptorSetSampledImages = max_descriptor_set_size, 1027b8e80941Smrg .maxDescriptorSetStorageImages = max_descriptor_set_size, 1028b8e80941Smrg .maxDescriptorSetInputAttachments = max_descriptor_set_size, 1029b8e80941Smrg .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS, 1030b8e80941Smrg .maxVertexInputBindings = MAX_VBS, 1031b8e80941Smrg .maxVertexInputAttributeOffset = 2047, 1032b8e80941Smrg .maxVertexInputBindingStride = 2048, 1033b8e80941Smrg .maxVertexOutputComponents = 128, 1034b8e80941Smrg .maxTessellationGenerationLevel = 64, 1035b8e80941Smrg .maxTessellationPatchSize = 32, 1036b8e80941Smrg .maxTessellationControlPerVertexInputComponents = 128, 1037b8e80941Smrg .maxTessellationControlPerVertexOutputComponents = 128, 1038b8e80941Smrg .maxTessellationControlPerPatchOutputComponents = 120, 1039b8e80941Smrg .maxTessellationControlTotalOutputComponents = 4096, 1040b8e80941Smrg .maxTessellationEvaluationInputComponents = 128, 1041b8e80941Smrg .maxTessellationEvaluationOutputComponents = 128, 1042b8e80941Smrg .maxGeometryShaderInvocations = 127, 1043b8e80941Smrg .maxGeometryInputComponents = 64, 1044b8e80941Smrg .maxGeometryOutputComponents = 128, 1045b8e80941Smrg .maxGeometryOutputVertices = 256, 1046b8e80941Smrg .maxGeometryTotalOutputComponents = 1024, 1047b8e80941Smrg .maxFragmentInputComponents = 128, 1048b8e80941Smrg .maxFragmentOutputAttachments = 8, 1049b8e80941Smrg .maxFragmentDualSrcAttachments = 1, 1050b8e80941Smrg .maxFragmentCombinedOutputResources = 8, 1051b8e80941Smrg .maxComputeSharedMemorySize = 32768, 1052b8e80941Smrg .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 1053b8e80941Smrg .maxComputeWorkGroupInvocations = 2048, 1054b8e80941Smrg .maxComputeWorkGroupSize = { 1055b8e80941Smrg 2048, 1056b8e80941Smrg 2048, 1057b8e80941Smrg 2048 1058b8e80941Smrg }, 1059b8e80941Smrg .subPixelPrecisionBits = 8, 1060b8e80941Smrg .subTexelPrecisionBits = 8, 1061b8e80941Smrg .mipmapPrecisionBits = 8, 1062b8e80941Smrg .maxDrawIndexedIndexValue = UINT32_MAX, 1063b8e80941Smrg .maxDrawIndirectCount = UINT32_MAX, 1064b8e80941Smrg .maxSamplerLodBias = 16, 1065b8e80941Smrg .maxSamplerAnisotropy = 16, 1066b8e80941Smrg .maxViewports = MAX_VIEWPORTS, 1067b8e80941Smrg .maxViewportDimensions = { (1 << 14), (1 << 14) }, 1068b8e80941Smrg .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 1069b8e80941Smrg .viewportSubPixelBits = 8, 1070b8e80941Smrg .minMemoryMapAlignment = 4096, /* A page */ 1071b8e80941Smrg .minTexelBufferOffsetAlignment = 1, 1072b8e80941Smrg .minUniformBufferOffsetAlignment = 4, 1073b8e80941Smrg .minStorageBufferOffsetAlignment = 4, 1074b8e80941Smrg .minTexelOffset = -32, 1075b8e80941Smrg .maxTexelOffset = 31, 1076b8e80941Smrg .minTexelGatherOffset = -32, 1077b8e80941Smrg .maxTexelGatherOffset = 31, 1078b8e80941Smrg .minInterpolationOffset = -2, 1079b8e80941Smrg .maxInterpolationOffset = 2, 1080b8e80941Smrg .subPixelInterpolationOffsetBits = 8, 1081b8e80941Smrg .maxFramebufferWidth = (1 << 14), 1082b8e80941Smrg .maxFramebufferHeight = (1 << 14), 1083b8e80941Smrg .maxFramebufferLayers = (1 << 10), 1084b8e80941Smrg .framebufferColorSampleCounts = sample_counts, 1085b8e80941Smrg .framebufferDepthSampleCounts = sample_counts, 1086b8e80941Smrg .framebufferStencilSampleCounts = sample_counts, 1087b8e80941Smrg .framebufferNoAttachmentsSampleCounts = sample_counts, 1088b8e80941Smrg .maxColorAttachments = MAX_RTS, 1089b8e80941Smrg .sampledImageColorSampleCounts = sample_counts, 1090b8e80941Smrg .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, 1091b8e80941Smrg .sampledImageDepthSampleCounts = sample_counts, 1092b8e80941Smrg .sampledImageStencilSampleCounts = sample_counts, 1093b8e80941Smrg .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT, 1094b8e80941Smrg .maxSampleMaskWords = 1, 1095b8e80941Smrg .timestampComputeAndGraphics = true, 1096b8e80941Smrg .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, 1097b8e80941Smrg .maxClipDistances = 8, 1098b8e80941Smrg .maxCullDistances = 8, 1099b8e80941Smrg .maxCombinedClipAndCullDistances = 8, 1100b8e80941Smrg .discreteQueuePriorities = 2, 1101b8e80941Smrg .pointSizeRange = { 0.0, 8192.0 }, 1102b8e80941Smrg .lineWidthRange = { 0.0, 7.9921875 }, 1103b8e80941Smrg .pointSizeGranularity = (1.0 / 8.0), 1104b8e80941Smrg .lineWidthGranularity = (1.0 / 128.0), 1105b8e80941Smrg .strictLines = false, /* FINISHME */ 1106b8e80941Smrg .standardSampleLocations = true, 1107b8e80941Smrg .optimalBufferCopyOffsetAlignment = 128, 1108b8e80941Smrg .optimalBufferCopyRowPitchAlignment = 128, 1109b8e80941Smrg .nonCoherentAtomSize = 64, 1110b8e80941Smrg }; 1111b8e80941Smrg 1112b8e80941Smrg *pProperties = (VkPhysicalDeviceProperties) { 1113b8e80941Smrg .apiVersion = radv_physical_device_api_version(pdevice), 1114b8e80941Smrg .driverVersion = vk_get_driver_version(), 1115b8e80941Smrg .vendorID = ATI_VENDOR_ID, 1116b8e80941Smrg .deviceID = pdevice->rad_info.pci_id, 1117b8e80941Smrg .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 1118b8e80941Smrg .limits = limits, 1119b8e80941Smrg .sparseProperties = {0}, 1120b8e80941Smrg }; 1121b8e80941Smrg 1122b8e80941Smrg strcpy(pProperties->deviceName, pdevice->name); 1123b8e80941Smrg memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); 1124b8e80941Smrg} 1125b8e80941Smrg 1126b8e80941Smrgvoid radv_GetPhysicalDeviceProperties2( 1127b8e80941Smrg VkPhysicalDevice physicalDevice, 1128b8e80941Smrg VkPhysicalDeviceProperties2 *pProperties) 1129b8e80941Smrg{ 1130b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1131b8e80941Smrg radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); 1132b8e80941Smrg 1133b8e80941Smrg vk_foreach_struct(ext, pProperties->pNext) { 1134b8e80941Smrg switch (ext->sType) { 1135b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { 1136b8e80941Smrg VkPhysicalDevicePushDescriptorPropertiesKHR *properties = 1137b8e80941Smrg (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; 1138b8e80941Smrg properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; 1139b8e80941Smrg break; 1140b8e80941Smrg } 1141b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { 1142b8e80941Smrg VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext; 1143b8e80941Smrg memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); 1144b8e80941Smrg memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); 1145b8e80941Smrg properties->deviceLUIDValid = false; 1146b8e80941Smrg break; 1147b8e80941Smrg } 1148b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { 1149b8e80941Smrg VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext; 1150b8e80941Smrg properties->maxMultiviewViewCount = MAX_VIEWS; 1151b8e80941Smrg properties->maxMultiviewInstanceIndex = INT_MAX; 1152b8e80941Smrg break; 1153b8e80941Smrg } 1154b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { 1155b8e80941Smrg VkPhysicalDevicePointClippingProperties *properties = 1156b8e80941Smrg (VkPhysicalDevicePointClippingProperties*)ext; 1157b8e80941Smrg properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; 1158b8e80941Smrg break; 1159b8e80941Smrg } 1160b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: { 1161b8e80941Smrg VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties = 1162b8e80941Smrg (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext; 1163b8e80941Smrg properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES; 1164b8e80941Smrg break; 1165b8e80941Smrg } 1166b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: { 1167b8e80941Smrg VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties = 1168b8e80941Smrg (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext; 1169b8e80941Smrg properties->minImportedHostPointerAlignment = 4096; 1170b8e80941Smrg break; 1171b8e80941Smrg } 1172b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { 1173b8e80941Smrg VkPhysicalDeviceSubgroupProperties *properties = 1174b8e80941Smrg (VkPhysicalDeviceSubgroupProperties*)ext; 1175b8e80941Smrg properties->subgroupSize = 64; 1176b8e80941Smrg properties->supportedStages = VK_SHADER_STAGE_ALL; 1177b8e80941Smrg properties->supportedOperations = 1178b8e80941Smrg VK_SUBGROUP_FEATURE_BASIC_BIT | 1179b8e80941Smrg VK_SUBGROUP_FEATURE_BALLOT_BIT | 1180b8e80941Smrg VK_SUBGROUP_FEATURE_QUAD_BIT | 1181b8e80941Smrg VK_SUBGROUP_FEATURE_VOTE_BIT; 1182b8e80941Smrg if (pdevice->rad_info.chip_class >= VI) { 1183b8e80941Smrg properties->supportedOperations |= 1184b8e80941Smrg VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | 1185b8e80941Smrg VK_SUBGROUP_FEATURE_SHUFFLE_BIT | 1186b8e80941Smrg VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; 1187b8e80941Smrg } 1188b8e80941Smrg properties->quadOperationsInAllStages = true; 1189b8e80941Smrg break; 1190b8e80941Smrg } 1191b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { 1192b8e80941Smrg VkPhysicalDeviceMaintenance3Properties *properties = 1193b8e80941Smrg (VkPhysicalDeviceMaintenance3Properties*)ext; 1194b8e80941Smrg /* Make sure everything is addressable by a signed 32-bit int, and 1195b8e80941Smrg * our largest descriptors are 96 bytes. */ 1196b8e80941Smrg properties->maxPerSetDescriptors = (1ull << 31) / 96; 1197b8e80941Smrg /* Our buffer size fields allow only this much */ 1198b8e80941Smrg properties->maxMemoryAllocationSize = 0xFFFFFFFFull; 1199b8e80941Smrg break; 1200b8e80941Smrg } 1201b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { 1202b8e80941Smrg VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties = 1203b8e80941Smrg (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext; 1204b8e80941Smrg /* GFX6-8 only support single channel min/max filter. */ 1205b8e80941Smrg properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9; 1206b8e80941Smrg properties->filterMinmaxSingleComponentFormats = true; 1207b8e80941Smrg break; 1208b8e80941Smrg } 1209b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: { 1210b8e80941Smrg VkPhysicalDeviceShaderCorePropertiesAMD *properties = 1211b8e80941Smrg (VkPhysicalDeviceShaderCorePropertiesAMD *)ext; 1212b8e80941Smrg 1213b8e80941Smrg /* Shader engines. */ 1214b8e80941Smrg properties->shaderEngineCount = 1215b8e80941Smrg pdevice->rad_info.max_se; 1216b8e80941Smrg properties->shaderArraysPerEngineCount = 1217b8e80941Smrg pdevice->rad_info.max_sh_per_se; 1218b8e80941Smrg properties->computeUnitsPerShaderArray = 1219b8e80941Smrg pdevice->rad_info.num_good_cu_per_sh; 1220b8e80941Smrg properties->simdPerComputeUnit = 4; 1221b8e80941Smrg properties->wavefrontsPerSimd = 1222b8e80941Smrg pdevice->rad_info.family == CHIP_TONGA || 1223b8e80941Smrg pdevice->rad_info.family == CHIP_ICELAND || 1224b8e80941Smrg pdevice->rad_info.family == CHIP_POLARIS10 || 1225b8e80941Smrg pdevice->rad_info.family == CHIP_POLARIS11 || 1226b8e80941Smrg pdevice->rad_info.family == CHIP_POLARIS12 || 1227b8e80941Smrg pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10; 1228b8e80941Smrg properties->wavefrontSize = 64; 1229b8e80941Smrg 1230b8e80941Smrg /* SGPR. */ 1231b8e80941Smrg properties->sgprsPerSimd = 1232b8e80941Smrg ac_get_num_physical_sgprs(pdevice->rad_info.chip_class); 1233b8e80941Smrg properties->minSgprAllocation = 1234b8e80941Smrg pdevice->rad_info.chip_class >= VI ? 16 : 8; 1235b8e80941Smrg properties->maxSgprAllocation = 1236b8e80941Smrg pdevice->rad_info.family == CHIP_TONGA || 1237b8e80941Smrg pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104; 1238b8e80941Smrg properties->sgprAllocationGranularity = 1239b8e80941Smrg pdevice->rad_info.chip_class >= VI ? 16 : 8; 1240b8e80941Smrg 1241b8e80941Smrg /* VGPR. */ 1242b8e80941Smrg properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS; 1243b8e80941Smrg properties->minVgprAllocation = 4; 1244b8e80941Smrg properties->maxVgprAllocation = 256; 1245b8e80941Smrg properties->vgprAllocationGranularity = 4; 1246b8e80941Smrg break; 1247b8e80941Smrg } 1248b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { 1249b8e80941Smrg VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties = 1250b8e80941Smrg (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; 1251b8e80941Smrg properties->maxVertexAttribDivisor = UINT32_MAX; 1252b8e80941Smrg break; 1253b8e80941Smrg } 1254b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: { 1255b8e80941Smrg VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties = 1256b8e80941Smrg (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext; 1257b8e80941Smrg properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64; 1258b8e80941Smrg properties->shaderUniformBufferArrayNonUniformIndexingNative = false; 1259b8e80941Smrg properties->shaderSampledImageArrayNonUniformIndexingNative = false; 1260b8e80941Smrg properties->shaderStorageBufferArrayNonUniformIndexingNative = false; 1261b8e80941Smrg properties->shaderStorageImageArrayNonUniformIndexingNative = false; 1262b8e80941Smrg properties->shaderInputAttachmentArrayNonUniformIndexingNative = false; 1263b8e80941Smrg properties->robustBufferAccessUpdateAfterBind = false; 1264b8e80941Smrg properties->quadDivergentImplicitLod = false; 1265b8e80941Smrg 1266b8e80941Smrg size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - 1267b8e80941Smrg MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / 1268b8e80941Smrg (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 1269b8e80941Smrg 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 1270b8e80941Smrg 32 /* sampler, largest when combined with image */ + 1271b8e80941Smrg 64 /* sampled image */ + 1272b8e80941Smrg 64 /* storage image */); 1273b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size; 1274b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1275b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1276b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size; 1277b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size; 1278b8e80941Smrg properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size; 1279b8e80941Smrg properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size; 1280b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size; 1281b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1282b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS; 1283b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1284b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS; 1285b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size; 1286b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size; 1287b8e80941Smrg properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size; 1288b8e80941Smrg break; 1289b8e80941Smrg } 1290b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { 1291b8e80941Smrg VkPhysicalDeviceProtectedMemoryProperties *properties = 1292b8e80941Smrg (VkPhysicalDeviceProtectedMemoryProperties *)ext; 1293b8e80941Smrg properties->protectedNoFault = false; 1294b8e80941Smrg break; 1295b8e80941Smrg } 1296b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: { 1297b8e80941Smrg VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties = 1298b8e80941Smrg (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext; 1299b8e80941Smrg properties->primitiveOverestimationSize = 0; 1300b8e80941Smrg properties->maxExtraPrimitiveOverestimationSize = 0; 1301b8e80941Smrg properties->extraPrimitiveOverestimationSizeGranularity = 0; 1302b8e80941Smrg properties->primitiveUnderestimation = VK_FALSE; 1303b8e80941Smrg properties->conservativePointAndLineRasterization = VK_FALSE; 1304b8e80941Smrg properties->degenerateTrianglesRasterized = VK_FALSE; 1305b8e80941Smrg properties->degenerateLinesRasterized = VK_FALSE; 1306b8e80941Smrg properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE; 1307b8e80941Smrg properties->conservativeRasterizationPostDepthCoverage = VK_FALSE; 1308b8e80941Smrg break; 1309b8e80941Smrg } 1310b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { 1311b8e80941Smrg VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = 1312b8e80941Smrg (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; 1313b8e80941Smrg properties->pciDomain = pdevice->bus_info.domain; 1314b8e80941Smrg properties->pciBus = pdevice->bus_info.bus; 1315b8e80941Smrg properties->pciDevice = pdevice->bus_info.dev; 1316b8e80941Smrg properties->pciFunction = pdevice->bus_info.func; 1317b8e80941Smrg break; 1318b8e80941Smrg } 1319b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: { 1320b8e80941Smrg VkPhysicalDeviceDriverPropertiesKHR *driver_props = 1321b8e80941Smrg (VkPhysicalDeviceDriverPropertiesKHR *) ext; 1322b8e80941Smrg 1323b8e80941Smrg driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR; 1324b8e80941Smrg memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR); 1325b8e80941Smrg strcpy(driver_props->driverName, "radv"); 1326b8e80941Smrg 1327b8e80941Smrg memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR); 1328b8e80941Smrg snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR, 1329b8e80941Smrg "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 1330b8e80941Smrg " (LLVM " MESA_LLVM_VERSION_STRING ")"); 1331b8e80941Smrg 1332b8e80941Smrg driver_props->conformanceVersion = (VkConformanceVersionKHR) { 1333b8e80941Smrg .major = 1, 1334b8e80941Smrg .minor = 1, 1335b8e80941Smrg .subminor = 2, 1336b8e80941Smrg .patch = 0, 1337b8e80941Smrg }; 1338b8e80941Smrg break; 1339b8e80941Smrg } 1340b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { 1341b8e80941Smrg VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties = 1342b8e80941Smrg (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; 1343b8e80941Smrg properties->maxTransformFeedbackStreams = MAX_SO_STREAMS; 1344b8e80941Smrg properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS; 1345b8e80941Smrg properties->maxTransformFeedbackBufferSize = UINT32_MAX; 1346b8e80941Smrg properties->maxTransformFeedbackStreamDataSize = 512; 1347b8e80941Smrg properties->maxTransformFeedbackBufferDataSize = UINT32_MAX; 1348b8e80941Smrg properties->maxTransformFeedbackBufferDataStride = 512; 1349b8e80941Smrg properties->transformFeedbackQueries = true; 1350b8e80941Smrg properties->transformFeedbackStreamsLinesTriangles = false; 1351b8e80941Smrg properties->transformFeedbackRasterizationStreamSelect = false; 1352b8e80941Smrg properties->transformFeedbackDraw = true; 1353b8e80941Smrg break; 1354b8e80941Smrg } 1355b8e80941Smrg case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { 1356b8e80941Smrg VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props = 1357b8e80941Smrg (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext; 1358b8e80941Smrg 1359b8e80941Smrg props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; 1360b8e80941Smrg props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 1361b8e80941Smrg props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 1362b8e80941Smrg props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 1363b8e80941Smrg props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 1364b8e80941Smrg break; 1365b8e80941Smrg } 1366b8e80941Smrg default: 1367b8e80941Smrg break; 1368b8e80941Smrg } 1369b8e80941Smrg } 1370b8e80941Smrg} 1371b8e80941Smrg 1372b8e80941Smrgstatic void radv_get_physical_device_queue_family_properties( 1373b8e80941Smrg struct radv_physical_device* pdevice, 1374b8e80941Smrg uint32_t* pCount, 1375b8e80941Smrg VkQueueFamilyProperties** pQueueFamilyProperties) 1376b8e80941Smrg{ 1377b8e80941Smrg int num_queue_families = 1; 1378b8e80941Smrg int idx; 1379b8e80941Smrg if (pdevice->rad_info.num_compute_rings > 0 && 1380b8e80941Smrg !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) 1381b8e80941Smrg num_queue_families++; 1382b8e80941Smrg 1383b8e80941Smrg if (pQueueFamilyProperties == NULL) { 1384b8e80941Smrg *pCount = num_queue_families; 1385b8e80941Smrg return; 1386b8e80941Smrg } 1387b8e80941Smrg 1388b8e80941Smrg if (!*pCount) 1389b8e80941Smrg return; 1390b8e80941Smrg 1391b8e80941Smrg idx = 0; 1392b8e80941Smrg if (*pCount >= 1) { 1393b8e80941Smrg *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 1394b8e80941Smrg .queueFlags = VK_QUEUE_GRAPHICS_BIT | 1395b8e80941Smrg VK_QUEUE_COMPUTE_BIT | 1396b8e80941Smrg VK_QUEUE_TRANSFER_BIT | 1397b8e80941Smrg VK_QUEUE_SPARSE_BINDING_BIT, 1398b8e80941Smrg .queueCount = 1, 1399b8e80941Smrg .timestampValidBits = 64, 1400b8e80941Smrg .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 1401b8e80941Smrg }; 1402b8e80941Smrg idx++; 1403b8e80941Smrg } 1404b8e80941Smrg 1405b8e80941Smrg if (pdevice->rad_info.num_compute_rings > 0 && 1406b8e80941Smrg !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { 1407b8e80941Smrg if (*pCount > idx) { 1408b8e80941Smrg *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 1409b8e80941Smrg .queueFlags = VK_QUEUE_COMPUTE_BIT | 1410b8e80941Smrg VK_QUEUE_TRANSFER_BIT | 1411b8e80941Smrg VK_QUEUE_SPARSE_BINDING_BIT, 1412b8e80941Smrg .queueCount = pdevice->rad_info.num_compute_rings, 1413b8e80941Smrg .timestampValidBits = 64, 1414b8e80941Smrg .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 1415b8e80941Smrg }; 1416b8e80941Smrg idx++; 1417b8e80941Smrg } 1418b8e80941Smrg } 1419b8e80941Smrg *pCount = idx; 1420b8e80941Smrg} 1421b8e80941Smrg 1422b8e80941Smrgvoid radv_GetPhysicalDeviceQueueFamilyProperties( 1423b8e80941Smrg VkPhysicalDevice physicalDevice, 1424b8e80941Smrg uint32_t* pCount, 1425b8e80941Smrg VkQueueFamilyProperties* pQueueFamilyProperties) 1426b8e80941Smrg{ 1427b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1428b8e80941Smrg if (!pQueueFamilyProperties) { 1429b8e80941Smrg radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); 1430b8e80941Smrg return; 1431b8e80941Smrg } 1432b8e80941Smrg VkQueueFamilyProperties *properties[] = { 1433b8e80941Smrg pQueueFamilyProperties + 0, 1434b8e80941Smrg pQueueFamilyProperties + 1, 1435b8e80941Smrg pQueueFamilyProperties + 2, 1436b8e80941Smrg }; 1437b8e80941Smrg radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); 1438b8e80941Smrg assert(*pCount <= 3); 1439b8e80941Smrg} 1440b8e80941Smrg 1441b8e80941Smrgvoid radv_GetPhysicalDeviceQueueFamilyProperties2( 1442b8e80941Smrg VkPhysicalDevice physicalDevice, 1443b8e80941Smrg uint32_t* pCount, 1444b8e80941Smrg VkQueueFamilyProperties2 *pQueueFamilyProperties) 1445b8e80941Smrg{ 1446b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1447b8e80941Smrg if (!pQueueFamilyProperties) { 1448b8e80941Smrg radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); 1449b8e80941Smrg return; 1450b8e80941Smrg } 1451b8e80941Smrg VkQueueFamilyProperties *properties[] = { 1452b8e80941Smrg &pQueueFamilyProperties[0].queueFamilyProperties, 1453b8e80941Smrg &pQueueFamilyProperties[1].queueFamilyProperties, 1454b8e80941Smrg &pQueueFamilyProperties[2].queueFamilyProperties, 1455b8e80941Smrg }; 1456b8e80941Smrg radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); 1457b8e80941Smrg assert(*pCount <= 3); 1458b8e80941Smrg} 1459b8e80941Smrg 1460b8e80941Smrgvoid radv_GetPhysicalDeviceMemoryProperties( 1461b8e80941Smrg VkPhysicalDevice physicalDevice, 1462b8e80941Smrg VkPhysicalDeviceMemoryProperties *pMemoryProperties) 1463b8e80941Smrg{ 1464b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 1465b8e80941Smrg 1466b8e80941Smrg *pMemoryProperties = physical_device->memory_properties; 1467b8e80941Smrg} 1468b8e80941Smrg 1469b8e80941Smrgstatic void 1470b8e80941Smrgradv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, 1471b8e80941Smrg VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) 1472b8e80941Smrg{ 1473b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); 1474b8e80941Smrg VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; 1475b8e80941Smrg uint64_t visible_vram_size = radv_get_visible_vram_size(device); 1476b8e80941Smrg uint64_t vram_size = radv_get_vram_size(device); 1477b8e80941Smrg uint64_t gtt_size = device->rad_info.gart_size; 1478b8e80941Smrg uint64_t heap_budget, heap_usage; 1479b8e80941Smrg 1480b8e80941Smrg /* For all memory heaps, the computation of budget is as follow: 1481b8e80941Smrg * heap_budget = heap_size - global_heap_usage + app_heap_usage 1482b8e80941Smrg * 1483b8e80941Smrg * The Vulkan spec 1.1.97 says that the budget should include any 1484b8e80941Smrg * currently allocated device memory. 1485b8e80941Smrg * 1486b8e80941Smrg * Note that the application heap usages are not really accurate (eg. 1487b8e80941Smrg * in presence of shared buffers). 1488b8e80941Smrg */ 1489b8e80941Smrg for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { 1490b8e80941Smrg uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex; 1491b8e80941Smrg 1492b8e80941Smrg switch (device->mem_type_indices[i]) { 1493b8e80941Smrg case RADV_MEM_TYPE_VRAM: 1494b8e80941Smrg heap_usage = device->ws->query_value(device->ws, 1495b8e80941Smrg RADEON_ALLOCATED_VRAM); 1496b8e80941Smrg 1497b8e80941Smrg heap_budget = vram_size - 1498b8e80941Smrg device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + 1499b8e80941Smrg heap_usage; 1500b8e80941Smrg 1501b8e80941Smrg memoryBudget->heapBudget[heap_index] = heap_budget; 1502b8e80941Smrg memoryBudget->heapUsage[heap_index] = heap_usage; 1503b8e80941Smrg break; 1504b8e80941Smrg case RADV_MEM_TYPE_VRAM_CPU_ACCESS: 1505b8e80941Smrg heap_usage = device->ws->query_value(device->ws, 1506b8e80941Smrg RADEON_ALLOCATED_VRAM_VIS); 1507b8e80941Smrg 1508b8e80941Smrg heap_budget = visible_vram_size - 1509b8e80941Smrg device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + 1510b8e80941Smrg heap_usage; 1511b8e80941Smrg 1512b8e80941Smrg memoryBudget->heapBudget[heap_index] = heap_budget; 1513b8e80941Smrg memoryBudget->heapUsage[heap_index] = heap_usage; 1514b8e80941Smrg break; 1515b8e80941Smrg case RADV_MEM_TYPE_GTT_WRITE_COMBINE: 1516b8e80941Smrg heap_usage = device->ws->query_value(device->ws, 1517b8e80941Smrg RADEON_ALLOCATED_GTT); 1518b8e80941Smrg 1519b8e80941Smrg heap_budget = gtt_size - 1520b8e80941Smrg device->ws->query_value(device->ws, RADEON_GTT_USAGE) + 1521b8e80941Smrg heap_usage; 1522b8e80941Smrg 1523b8e80941Smrg memoryBudget->heapBudget[heap_index] = heap_budget; 1524b8e80941Smrg memoryBudget->heapUsage[heap_index] = heap_usage; 1525b8e80941Smrg break; 1526b8e80941Smrg default: 1527b8e80941Smrg break; 1528b8e80941Smrg } 1529b8e80941Smrg } 1530b8e80941Smrg 1531b8e80941Smrg /* The heapBudget and heapUsage values must be zero for array elements 1532b8e80941Smrg * greater than or equal to 1533b8e80941Smrg * VkPhysicalDeviceMemoryProperties::memoryHeapCount. 1534b8e80941Smrg */ 1535b8e80941Smrg for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { 1536b8e80941Smrg memoryBudget->heapBudget[i] = 0; 1537b8e80941Smrg memoryBudget->heapUsage[i] = 0; 1538b8e80941Smrg } 1539b8e80941Smrg} 1540b8e80941Smrg 1541b8e80941Smrgvoid radv_GetPhysicalDeviceMemoryProperties2( 1542b8e80941Smrg VkPhysicalDevice physicalDevice, 1543b8e80941Smrg VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) 1544b8e80941Smrg{ 1545b8e80941Smrg radv_GetPhysicalDeviceMemoryProperties(physicalDevice, 1546b8e80941Smrg &pMemoryProperties->memoryProperties); 1547b8e80941Smrg 1548b8e80941Smrg VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = 1549b8e80941Smrg vk_find_struct(pMemoryProperties->pNext, 1550b8e80941Smrg PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); 1551b8e80941Smrg if (memory_budget) 1552b8e80941Smrg radv_get_memory_budget_properties(physicalDevice, memory_budget); 1553b8e80941Smrg} 1554b8e80941Smrg 1555b8e80941SmrgVkResult radv_GetMemoryHostPointerPropertiesEXT( 1556b8e80941Smrg VkDevice _device, 1557b8e80941Smrg VkExternalMemoryHandleTypeFlagBits handleType, 1558b8e80941Smrg const void *pHostPointer, 1559b8e80941Smrg VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) 1560b8e80941Smrg{ 1561b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 1562b8e80941Smrg 1563b8e80941Smrg switch (handleType) 1564b8e80941Smrg { 1565b8e80941Smrg case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: { 1566b8e80941Smrg const struct radv_physical_device *physical_device = device->physical_device; 1567b8e80941Smrg uint32_t memoryTypeBits = 0; 1568b8e80941Smrg for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) { 1569b8e80941Smrg if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) { 1570b8e80941Smrg memoryTypeBits = (1 << i); 1571b8e80941Smrg break; 1572b8e80941Smrg } 1573b8e80941Smrg } 1574b8e80941Smrg pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits; 1575b8e80941Smrg return VK_SUCCESS; 1576b8e80941Smrg } 1577b8e80941Smrg default: 1578b8e80941Smrg return VK_ERROR_INVALID_EXTERNAL_HANDLE; 1579b8e80941Smrg } 1580b8e80941Smrg} 1581b8e80941Smrg 1582b8e80941Smrgstatic enum radeon_ctx_priority 1583b8e80941Smrgradv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj) 1584b8e80941Smrg{ 1585b8e80941Smrg /* Default to MEDIUM when a specific global priority isn't requested */ 1586b8e80941Smrg if (!pObj) 1587b8e80941Smrg return RADEON_CTX_PRIORITY_MEDIUM; 1588b8e80941Smrg 1589b8e80941Smrg switch(pObj->globalPriority) { 1590b8e80941Smrg case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT: 1591b8e80941Smrg return RADEON_CTX_PRIORITY_REALTIME; 1592b8e80941Smrg case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: 1593b8e80941Smrg return RADEON_CTX_PRIORITY_HIGH; 1594b8e80941Smrg case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: 1595b8e80941Smrg return RADEON_CTX_PRIORITY_MEDIUM; 1596b8e80941Smrg case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: 1597b8e80941Smrg return RADEON_CTX_PRIORITY_LOW; 1598b8e80941Smrg default: 1599b8e80941Smrg unreachable("Illegal global priority value"); 1600b8e80941Smrg return RADEON_CTX_PRIORITY_INVALID; 1601b8e80941Smrg } 1602b8e80941Smrg} 1603b8e80941Smrg 1604b8e80941Smrgstatic int 1605b8e80941Smrgradv_queue_init(struct radv_device *device, struct radv_queue *queue, 1606b8e80941Smrg uint32_t queue_family_index, int idx, 1607b8e80941Smrg VkDeviceQueueCreateFlags flags, 1608b8e80941Smrg const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority) 1609b8e80941Smrg{ 1610b8e80941Smrg queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1611b8e80941Smrg queue->device = device; 1612b8e80941Smrg queue->queue_family_index = queue_family_index; 1613b8e80941Smrg queue->queue_idx = idx; 1614b8e80941Smrg queue->priority = radv_get_queue_global_priority(global_priority); 1615b8e80941Smrg queue->flags = flags; 1616b8e80941Smrg 1617b8e80941Smrg queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority); 1618b8e80941Smrg if (!queue->hw_ctx) 1619b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1620b8e80941Smrg 1621b8e80941Smrg return VK_SUCCESS; 1622b8e80941Smrg} 1623b8e80941Smrg 1624b8e80941Smrgstatic void 1625b8e80941Smrgradv_queue_finish(struct radv_queue *queue) 1626b8e80941Smrg{ 1627b8e80941Smrg if (queue->hw_ctx) 1628b8e80941Smrg queue->device->ws->ctx_destroy(queue->hw_ctx); 1629b8e80941Smrg 1630b8e80941Smrg if (queue->initial_full_flush_preamble_cs) 1631b8e80941Smrg queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); 1632b8e80941Smrg if (queue->initial_preamble_cs) 1633b8e80941Smrg queue->device->ws->cs_destroy(queue->initial_preamble_cs); 1634b8e80941Smrg if (queue->continue_preamble_cs) 1635b8e80941Smrg queue->device->ws->cs_destroy(queue->continue_preamble_cs); 1636b8e80941Smrg if (queue->descriptor_bo) 1637b8e80941Smrg queue->device->ws->buffer_destroy(queue->descriptor_bo); 1638b8e80941Smrg if (queue->scratch_bo) 1639b8e80941Smrg queue->device->ws->buffer_destroy(queue->scratch_bo); 1640b8e80941Smrg if (queue->esgs_ring_bo) 1641b8e80941Smrg queue->device->ws->buffer_destroy(queue->esgs_ring_bo); 1642b8e80941Smrg if (queue->gsvs_ring_bo) 1643b8e80941Smrg queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); 1644b8e80941Smrg if (queue->tess_rings_bo) 1645b8e80941Smrg queue->device->ws->buffer_destroy(queue->tess_rings_bo); 1646b8e80941Smrg if (queue->compute_scratch_bo) 1647b8e80941Smrg queue->device->ws->buffer_destroy(queue->compute_scratch_bo); 1648b8e80941Smrg} 1649b8e80941Smrg 1650b8e80941Smrgstatic void 1651b8e80941Smrgradv_bo_list_init(struct radv_bo_list *bo_list) 1652b8e80941Smrg{ 1653b8e80941Smrg pthread_mutex_init(&bo_list->mutex, NULL); 1654b8e80941Smrg bo_list->list.count = bo_list->capacity = 0; 1655b8e80941Smrg bo_list->list.bos = NULL; 1656b8e80941Smrg} 1657b8e80941Smrg 1658b8e80941Smrgstatic void 1659b8e80941Smrgradv_bo_list_finish(struct radv_bo_list *bo_list) 1660b8e80941Smrg{ 1661b8e80941Smrg free(bo_list->list.bos); 1662b8e80941Smrg pthread_mutex_destroy(&bo_list->mutex); 1663b8e80941Smrg} 1664b8e80941Smrg 1665b8e80941Smrgstatic VkResult radv_bo_list_add(struct radv_device *device, 1666b8e80941Smrg struct radeon_winsys_bo *bo) 1667b8e80941Smrg{ 1668b8e80941Smrg struct radv_bo_list *bo_list = &device->bo_list; 1669b8e80941Smrg 1670b8e80941Smrg if (bo->is_local) 1671b8e80941Smrg return VK_SUCCESS; 1672b8e80941Smrg 1673b8e80941Smrg if (unlikely(!device->use_global_bo_list)) 1674b8e80941Smrg return VK_SUCCESS; 1675b8e80941Smrg 1676b8e80941Smrg pthread_mutex_lock(&bo_list->mutex); 1677b8e80941Smrg if (bo_list->list.count == bo_list->capacity) { 1678b8e80941Smrg unsigned capacity = MAX2(4, bo_list->capacity * 2); 1679b8e80941Smrg void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*)); 1680b8e80941Smrg 1681b8e80941Smrg if (!data) { 1682b8e80941Smrg pthread_mutex_unlock(&bo_list->mutex); 1683b8e80941Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 1684b8e80941Smrg } 1685b8e80941Smrg 1686b8e80941Smrg bo_list->list.bos = (struct radeon_winsys_bo**)data; 1687b8e80941Smrg bo_list->capacity = capacity; 1688b8e80941Smrg } 1689b8e80941Smrg 1690b8e80941Smrg bo_list->list.bos[bo_list->list.count++] = bo; 1691b8e80941Smrg pthread_mutex_unlock(&bo_list->mutex); 1692b8e80941Smrg return VK_SUCCESS; 1693b8e80941Smrg} 1694b8e80941Smrg 1695b8e80941Smrgstatic void radv_bo_list_remove(struct radv_device *device, 1696b8e80941Smrg struct radeon_winsys_bo *bo) 1697b8e80941Smrg{ 1698b8e80941Smrg struct radv_bo_list *bo_list = &device->bo_list; 1699b8e80941Smrg 1700b8e80941Smrg if (bo->is_local) 1701b8e80941Smrg return; 1702b8e80941Smrg 1703b8e80941Smrg if (unlikely(!device->use_global_bo_list)) 1704b8e80941Smrg return; 1705b8e80941Smrg 1706b8e80941Smrg pthread_mutex_lock(&bo_list->mutex); 1707b8e80941Smrg for(unsigned i = 0; i < bo_list->list.count; ++i) { 1708b8e80941Smrg if (bo_list->list.bos[i] == bo) { 1709b8e80941Smrg bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1]; 1710b8e80941Smrg --bo_list->list.count; 1711b8e80941Smrg break; 1712b8e80941Smrg } 1713b8e80941Smrg } 1714b8e80941Smrg pthread_mutex_unlock(&bo_list->mutex); 1715b8e80941Smrg} 1716b8e80941Smrg 1717b8e80941Smrgstatic void 1718b8e80941Smrgradv_device_init_gs_info(struct radv_device *device) 1719b8e80941Smrg{ 1720b8e80941Smrg device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class, 1721b8e80941Smrg device->physical_device->rad_info.family); 1722b8e80941Smrg} 1723b8e80941Smrg 1724b8e80941Smrgstatic int radv_get_device_extension_index(const char *name) 1725b8e80941Smrg{ 1726b8e80941Smrg for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) { 1727b8e80941Smrg if (strcmp(name, radv_device_extensions[i].extensionName) == 0) 1728b8e80941Smrg return i; 1729b8e80941Smrg } 1730b8e80941Smrg return -1; 1731b8e80941Smrg} 1732b8e80941Smrg 1733b8e80941Smrgstatic int 1734b8e80941Smrgradv_get_int_debug_option(const char *name, int default_value) 1735b8e80941Smrg{ 1736b8e80941Smrg const char *str; 1737b8e80941Smrg int result; 1738b8e80941Smrg 1739b8e80941Smrg str = getenv(name); 1740b8e80941Smrg if (!str) { 1741b8e80941Smrg result = default_value; 1742b8e80941Smrg } else { 1743b8e80941Smrg char *endptr; 1744b8e80941Smrg 1745b8e80941Smrg result = strtol(str, &endptr, 0); 1746b8e80941Smrg if (str == endptr) { 1747b8e80941Smrg /* No digits founs. */ 1748b8e80941Smrg result = default_value; 1749b8e80941Smrg } 1750b8e80941Smrg } 1751b8e80941Smrg 1752b8e80941Smrg return result; 1753b8e80941Smrg} 1754b8e80941Smrg 1755b8e80941SmrgVkResult radv_CreateDevice( 1756b8e80941Smrg VkPhysicalDevice physicalDevice, 1757b8e80941Smrg const VkDeviceCreateInfo* pCreateInfo, 1758b8e80941Smrg const VkAllocationCallbacks* pAllocator, 1759b8e80941Smrg VkDevice* pDevice) 1760b8e80941Smrg{ 1761b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 1762b8e80941Smrg VkResult result; 1763b8e80941Smrg struct radv_device *device; 1764b8e80941Smrg 1765b8e80941Smrg bool keep_shader_info = false; 1766b8e80941Smrg 1767b8e80941Smrg /* Check enabled features */ 1768b8e80941Smrg if (pCreateInfo->pEnabledFeatures) { 1769b8e80941Smrg VkPhysicalDeviceFeatures supported_features; 1770b8e80941Smrg radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); 1771b8e80941Smrg VkBool32 *supported_feature = (VkBool32 *)&supported_features; 1772b8e80941Smrg VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; 1773b8e80941Smrg unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); 1774b8e80941Smrg for (uint32_t i = 0; i < num_features; i++) { 1775b8e80941Smrg if (enabled_feature[i] && !supported_feature[i]) 1776b8e80941Smrg return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT); 1777b8e80941Smrg } 1778b8e80941Smrg } 1779b8e80941Smrg 1780b8e80941Smrg device = vk_zalloc2(&physical_device->instance->alloc, pAllocator, 1781b8e80941Smrg sizeof(*device), 8, 1782b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1783b8e80941Smrg if (!device) 1784b8e80941Smrg return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1785b8e80941Smrg 1786b8e80941Smrg device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1787b8e80941Smrg device->instance = physical_device->instance; 1788b8e80941Smrg device->physical_device = physical_device; 1789b8e80941Smrg 1790b8e80941Smrg device->ws = physical_device->ws; 1791b8e80941Smrg if (pAllocator) 1792b8e80941Smrg device->alloc = *pAllocator; 1793b8e80941Smrg else 1794b8e80941Smrg device->alloc = physical_device->instance->alloc; 1795b8e80941Smrg 1796b8e80941Smrg for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 1797b8e80941Smrg const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; 1798b8e80941Smrg int index = radv_get_device_extension_index(ext_name); 1799b8e80941Smrg if (index < 0 || !physical_device->supported_extensions.extensions[index]) { 1800b8e80941Smrg vk_free(&device->alloc, device); 1801b8e80941Smrg return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT); 1802b8e80941Smrg } 1803b8e80941Smrg 1804b8e80941Smrg device->enabled_extensions.extensions[index] = true; 1805b8e80941Smrg } 1806b8e80941Smrg 1807b8e80941Smrg keep_shader_info = device->enabled_extensions.AMD_shader_info; 1808b8e80941Smrg 1809b8e80941Smrg /* With update after bind we can't attach bo's to the command buffer 1810b8e80941Smrg * from the descriptor set anymore, so we have to use a global BO list. 1811b8e80941Smrg */ 1812b8e80941Smrg device->use_global_bo_list = 1813b8e80941Smrg (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || 1814b8e80941Smrg device->enabled_extensions.EXT_descriptor_indexing || 1815b8e80941Smrg device->enabled_extensions.EXT_buffer_device_address; 1816b8e80941Smrg 1817b8e80941Smrg mtx_init(&device->shader_slab_mutex, mtx_plain); 1818b8e80941Smrg list_inithead(&device->shader_slabs); 1819b8e80941Smrg 1820b8e80941Smrg radv_bo_list_init(&device->bo_list); 1821b8e80941Smrg 1822b8e80941Smrg for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 1823b8e80941Smrg const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; 1824b8e80941Smrg uint32_t qfi = queue_create->queueFamilyIndex; 1825b8e80941Smrg const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority = 1826b8e80941Smrg vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT); 1827b8e80941Smrg 1828b8e80941Smrg assert(!global_priority || device->physical_device->rad_info.has_ctx_priority); 1829b8e80941Smrg 1830b8e80941Smrg device->queues[qfi] = vk_alloc(&device->alloc, 1831b8e80941Smrg queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1832b8e80941Smrg if (!device->queues[qfi]) { 1833b8e80941Smrg result = VK_ERROR_OUT_OF_HOST_MEMORY; 1834b8e80941Smrg goto fail; 1835b8e80941Smrg } 1836b8e80941Smrg 1837b8e80941Smrg memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); 1838b8e80941Smrg 1839b8e80941Smrg device->queue_count[qfi] = queue_create->queueCount; 1840b8e80941Smrg 1841b8e80941Smrg for (unsigned q = 0; q < queue_create->queueCount; q++) { 1842b8e80941Smrg result = radv_queue_init(device, &device->queues[qfi][q], 1843b8e80941Smrg qfi, q, queue_create->flags, 1844b8e80941Smrg global_priority); 1845b8e80941Smrg if (result != VK_SUCCESS) 1846b8e80941Smrg goto fail; 1847b8e80941Smrg } 1848b8e80941Smrg } 1849b8e80941Smrg 1850b8e80941Smrg device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 && 1851b8e80941Smrg !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); 1852b8e80941Smrg 1853b8e80941Smrg /* Disabled and not implemented for now. */ 1854b8e80941Smrg device->dfsm_allowed = device->pbb_allowed && 1855b8e80941Smrg (device->physical_device->rad_info.family == CHIP_RAVEN || 1856b8e80941Smrg device->physical_device->rad_info.family == CHIP_RAVEN2); 1857b8e80941Smrg 1858b8e80941Smrg#ifdef ANDROID 1859b8e80941Smrg device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit; 1860b8e80941Smrg#endif 1861b8e80941Smrg 1862b8e80941Smrg /* The maximum number of scratch waves. Scratch space isn't divided 1863b8e80941Smrg * evenly between CUs. The number is only a function of the number of CUs. 1864b8e80941Smrg * We can decrease the constant to decrease the scratch buffer size. 1865b8e80941Smrg * 1866b8e80941Smrg * sctx->scratch_waves must be >= the maximum possible size of 1867b8e80941Smrg * 1 threadgroup, so that the hw doesn't hang from being unable 1868b8e80941Smrg * to start any. 1869b8e80941Smrg * 1870b8e80941Smrg * The recommended value is 4 per CU at most. Higher numbers don't 1871b8e80941Smrg * bring much benefit, but they still occupy chip resources (think 1872b8e80941Smrg * async compute). I've seen ~2% performance difference between 4 and 32. 1873b8e80941Smrg */ 1874b8e80941Smrg uint32_t max_threads_per_block = 2048; 1875b8e80941Smrg device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, 1876b8e80941Smrg max_threads_per_block / 64); 1877b8e80941Smrg 1878b8e80941Smrg device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); 1879b8e80941Smrg 1880b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) { 1881b8e80941Smrg /* If the KMD allows it (there is a KMD hw register for it), 1882b8e80941Smrg * allow launching waves out-of-order. 1883b8e80941Smrg */ 1884b8e80941Smrg device->dispatch_initiator |= S_00B800_ORDER_MODE(1); 1885b8e80941Smrg } 1886b8e80941Smrg 1887b8e80941Smrg radv_device_init_gs_info(device); 1888b8e80941Smrg 1889b8e80941Smrg device->tess_offchip_block_dw_size = 1890b8e80941Smrg device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; 1891b8e80941Smrg device->has_distributed_tess = 1892b8e80941Smrg device->physical_device->rad_info.chip_class >= VI && 1893b8e80941Smrg device->physical_device->rad_info.max_se >= 2; 1894b8e80941Smrg 1895b8e80941Smrg if (getenv("RADV_TRACE_FILE")) { 1896b8e80941Smrg const char *filename = getenv("RADV_TRACE_FILE"); 1897b8e80941Smrg 1898b8e80941Smrg keep_shader_info = true; 1899b8e80941Smrg 1900b8e80941Smrg if (!radv_init_trace(device)) 1901b8e80941Smrg goto fail; 1902b8e80941Smrg 1903b8e80941Smrg fprintf(stderr, "*****************************************************************************\n"); 1904b8e80941Smrg fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n"); 1905b8e80941Smrg fprintf(stderr, "*****************************************************************************\n"); 1906b8e80941Smrg 1907b8e80941Smrg fprintf(stderr, "Trace file will be dumped to %s\n", filename); 1908b8e80941Smrg radv_dump_enabled_options(device, stderr); 1909b8e80941Smrg } 1910b8e80941Smrg 1911b8e80941Smrg device->keep_shader_info = keep_shader_info; 1912b8e80941Smrg 1913b8e80941Smrg result = radv_device_init_meta(device); 1914b8e80941Smrg if (result != VK_SUCCESS) 1915b8e80941Smrg goto fail; 1916b8e80941Smrg 1917b8e80941Smrg radv_device_init_msaa(device); 1918b8e80941Smrg 1919b8e80941Smrg for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) { 1920b8e80941Smrg device->empty_cs[family] = device->ws->cs_create(device->ws, family); 1921b8e80941Smrg switch (family) { 1922b8e80941Smrg case RADV_QUEUE_GENERAL: 1923b8e80941Smrg radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 1924b8e80941Smrg radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1)); 1925b8e80941Smrg radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1)); 1926b8e80941Smrg break; 1927b8e80941Smrg case RADV_QUEUE_COMPUTE: 1928b8e80941Smrg radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0)); 1929b8e80941Smrg radeon_emit(device->empty_cs[family], 0); 1930b8e80941Smrg break; 1931b8e80941Smrg } 1932b8e80941Smrg device->ws->cs_finalize(device->empty_cs[family]); 1933b8e80941Smrg } 1934b8e80941Smrg 1935b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) 1936b8e80941Smrg cik_create_gfx_config(device); 1937b8e80941Smrg 1938b8e80941Smrg VkPipelineCacheCreateInfo ci; 1939b8e80941Smrg ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; 1940b8e80941Smrg ci.pNext = NULL; 1941b8e80941Smrg ci.flags = 0; 1942b8e80941Smrg ci.pInitialData = NULL; 1943b8e80941Smrg ci.initialDataSize = 0; 1944b8e80941Smrg VkPipelineCache pc; 1945b8e80941Smrg result = radv_CreatePipelineCache(radv_device_to_handle(device), 1946b8e80941Smrg &ci, NULL, &pc); 1947b8e80941Smrg if (result != VK_SUCCESS) 1948b8e80941Smrg goto fail_meta; 1949b8e80941Smrg 1950b8e80941Smrg device->mem_cache = radv_pipeline_cache_from_handle(pc); 1951b8e80941Smrg 1952b8e80941Smrg device->force_aniso = 1953b8e80941Smrg MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1)); 1954b8e80941Smrg if (device->force_aniso >= 0) { 1955b8e80941Smrg fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1956b8e80941Smrg 1 << util_logbase2(device->force_aniso)); 1957b8e80941Smrg } 1958b8e80941Smrg 1959b8e80941Smrg *pDevice = radv_device_to_handle(device); 1960b8e80941Smrg return VK_SUCCESS; 1961b8e80941Smrg 1962b8e80941Smrgfail_meta: 1963b8e80941Smrg radv_device_finish_meta(device); 1964b8e80941Smrgfail: 1965b8e80941Smrg radv_bo_list_finish(&device->bo_list); 1966b8e80941Smrg 1967b8e80941Smrg if (device->trace_bo) 1968b8e80941Smrg device->ws->buffer_destroy(device->trace_bo); 1969b8e80941Smrg 1970b8e80941Smrg if (device->gfx_init) 1971b8e80941Smrg device->ws->buffer_destroy(device->gfx_init); 1972b8e80941Smrg 1973b8e80941Smrg for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 1974b8e80941Smrg for (unsigned q = 0; q < device->queue_count[i]; q++) 1975b8e80941Smrg radv_queue_finish(&device->queues[i][q]); 1976b8e80941Smrg if (device->queue_count[i]) 1977b8e80941Smrg vk_free(&device->alloc, device->queues[i]); 1978b8e80941Smrg } 1979b8e80941Smrg 1980b8e80941Smrg vk_free(&device->alloc, device); 1981b8e80941Smrg return result; 1982b8e80941Smrg} 1983b8e80941Smrg 1984b8e80941Smrgvoid radv_DestroyDevice( 1985b8e80941Smrg VkDevice _device, 1986b8e80941Smrg const VkAllocationCallbacks* pAllocator) 1987b8e80941Smrg{ 1988b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 1989b8e80941Smrg 1990b8e80941Smrg if (!device) 1991b8e80941Smrg return; 1992b8e80941Smrg 1993b8e80941Smrg if (device->trace_bo) 1994b8e80941Smrg device->ws->buffer_destroy(device->trace_bo); 1995b8e80941Smrg 1996b8e80941Smrg if (device->gfx_init) 1997b8e80941Smrg device->ws->buffer_destroy(device->gfx_init); 1998b8e80941Smrg 1999b8e80941Smrg for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 2000b8e80941Smrg for (unsigned q = 0; q < device->queue_count[i]; q++) 2001b8e80941Smrg radv_queue_finish(&device->queues[i][q]); 2002b8e80941Smrg if (device->queue_count[i]) 2003b8e80941Smrg vk_free(&device->alloc, device->queues[i]); 2004b8e80941Smrg if (device->empty_cs[i]) 2005b8e80941Smrg device->ws->cs_destroy(device->empty_cs[i]); 2006b8e80941Smrg } 2007b8e80941Smrg radv_device_finish_meta(device); 2008b8e80941Smrg 2009b8e80941Smrg VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); 2010b8e80941Smrg radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); 2011b8e80941Smrg 2012b8e80941Smrg radv_destroy_shader_slabs(device); 2013b8e80941Smrg 2014b8e80941Smrg radv_bo_list_finish(&device->bo_list); 2015b8e80941Smrg vk_free(&device->alloc, device); 2016b8e80941Smrg} 2017b8e80941Smrg 2018b8e80941SmrgVkResult radv_EnumerateInstanceLayerProperties( 2019b8e80941Smrg uint32_t* pPropertyCount, 2020b8e80941Smrg VkLayerProperties* pProperties) 2021b8e80941Smrg{ 2022b8e80941Smrg if (pProperties == NULL) { 2023b8e80941Smrg *pPropertyCount = 0; 2024b8e80941Smrg return VK_SUCCESS; 2025b8e80941Smrg } 2026b8e80941Smrg 2027b8e80941Smrg /* None supported at this time */ 2028b8e80941Smrg return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 2029b8e80941Smrg} 2030b8e80941Smrg 2031b8e80941SmrgVkResult radv_EnumerateDeviceLayerProperties( 2032b8e80941Smrg VkPhysicalDevice physicalDevice, 2033b8e80941Smrg uint32_t* pPropertyCount, 2034b8e80941Smrg VkLayerProperties* pProperties) 2035b8e80941Smrg{ 2036b8e80941Smrg if (pProperties == NULL) { 2037b8e80941Smrg *pPropertyCount = 0; 2038b8e80941Smrg return VK_SUCCESS; 2039b8e80941Smrg } 2040b8e80941Smrg 2041b8e80941Smrg /* None supported at this time */ 2042b8e80941Smrg return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 2043b8e80941Smrg} 2044b8e80941Smrg 2045b8e80941Smrgvoid radv_GetDeviceQueue2( 2046b8e80941Smrg VkDevice _device, 2047b8e80941Smrg const VkDeviceQueueInfo2* pQueueInfo, 2048b8e80941Smrg VkQueue* pQueue) 2049b8e80941Smrg{ 2050b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 2051b8e80941Smrg struct radv_queue *queue; 2052b8e80941Smrg 2053b8e80941Smrg queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]; 2054b8e80941Smrg if (pQueueInfo->flags != queue->flags) { 2055b8e80941Smrg /* From the Vulkan 1.1.70 spec: 2056b8e80941Smrg * 2057b8e80941Smrg * "The queue returned by vkGetDeviceQueue2 must have the same 2058b8e80941Smrg * flags value from this structure as that used at device 2059b8e80941Smrg * creation time in a VkDeviceQueueCreateInfo instance. If no 2060b8e80941Smrg * matching flags were specified at device creation time then 2061b8e80941Smrg * pQueue will return VK_NULL_HANDLE." 2062b8e80941Smrg */ 2063b8e80941Smrg *pQueue = VK_NULL_HANDLE; 2064b8e80941Smrg return; 2065b8e80941Smrg } 2066b8e80941Smrg 2067b8e80941Smrg *pQueue = radv_queue_to_handle(queue); 2068b8e80941Smrg} 2069b8e80941Smrg 2070b8e80941Smrgvoid radv_GetDeviceQueue( 2071b8e80941Smrg VkDevice _device, 2072b8e80941Smrg uint32_t queueFamilyIndex, 2073b8e80941Smrg uint32_t queueIndex, 2074b8e80941Smrg VkQueue* pQueue) 2075b8e80941Smrg{ 2076b8e80941Smrg const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) { 2077b8e80941Smrg .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, 2078b8e80941Smrg .queueFamilyIndex = queueFamilyIndex, 2079b8e80941Smrg .queueIndex = queueIndex 2080b8e80941Smrg }; 2081b8e80941Smrg 2082b8e80941Smrg radv_GetDeviceQueue2(_device, &info, pQueue); 2083b8e80941Smrg} 2084b8e80941Smrg 2085b8e80941Smrgstatic void 2086b8e80941Smrgfill_geom_tess_rings(struct radv_queue *queue, 2087b8e80941Smrg uint32_t *map, 2088b8e80941Smrg bool add_sample_positions, 2089b8e80941Smrg uint32_t esgs_ring_size, 2090b8e80941Smrg struct radeon_winsys_bo *esgs_ring_bo, 2091b8e80941Smrg uint32_t gsvs_ring_size, 2092b8e80941Smrg struct radeon_winsys_bo *gsvs_ring_bo, 2093b8e80941Smrg uint32_t tess_factor_ring_size, 2094b8e80941Smrg uint32_t tess_offchip_ring_offset, 2095b8e80941Smrg uint32_t tess_offchip_ring_size, 2096b8e80941Smrg struct radeon_winsys_bo *tess_rings_bo) 2097b8e80941Smrg{ 2098b8e80941Smrg uint32_t *desc = &map[4]; 2099b8e80941Smrg 2100b8e80941Smrg if (esgs_ring_bo) { 2101b8e80941Smrg uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo); 2102b8e80941Smrg 2103b8e80941Smrg /* stride 0, num records - size, add tid, swizzle, elsize4, 2104b8e80941Smrg index stride 64 */ 2105b8e80941Smrg desc[0] = esgs_va; 2106b8e80941Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | 2107b8e80941Smrg S_008F04_STRIDE(0) | 2108b8e80941Smrg S_008F04_SWIZZLE_ENABLE(true); 2109b8e80941Smrg desc[2] = esgs_ring_size; 2110b8e80941Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2111b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2112b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2113b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2114b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2115b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2116b8e80941Smrg S_008F0C_ELEMENT_SIZE(1) | 2117b8e80941Smrg S_008F0C_INDEX_STRIDE(3) | 2118b8e80941Smrg S_008F0C_ADD_TID_ENABLE(true); 2119b8e80941Smrg 2120b8e80941Smrg /* GS entry for ES->GS ring */ 2121b8e80941Smrg /* stride 0, num records - size, elsize0, 2122b8e80941Smrg index stride 0 */ 2123b8e80941Smrg desc[4] = esgs_va; 2124b8e80941Smrg desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| 2125b8e80941Smrg S_008F04_STRIDE(0) | 2126b8e80941Smrg S_008F04_SWIZZLE_ENABLE(false); 2127b8e80941Smrg desc[6] = esgs_ring_size; 2128b8e80941Smrg desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2129b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2130b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2131b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2132b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2133b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2134b8e80941Smrg S_008F0C_ELEMENT_SIZE(0) | 2135b8e80941Smrg S_008F0C_INDEX_STRIDE(0) | 2136b8e80941Smrg S_008F0C_ADD_TID_ENABLE(false); 2137b8e80941Smrg } 2138b8e80941Smrg 2139b8e80941Smrg desc += 8; 2140b8e80941Smrg 2141b8e80941Smrg if (gsvs_ring_bo) { 2142b8e80941Smrg uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo); 2143b8e80941Smrg 2144b8e80941Smrg /* VS entry for GS->VS ring */ 2145b8e80941Smrg /* stride 0, num records - size, elsize0, 2146b8e80941Smrg index stride 0 */ 2147b8e80941Smrg desc[0] = gsvs_va; 2148b8e80941Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| 2149b8e80941Smrg S_008F04_STRIDE(0) | 2150b8e80941Smrg S_008F04_SWIZZLE_ENABLE(false); 2151b8e80941Smrg desc[2] = gsvs_ring_size; 2152b8e80941Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2153b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2154b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2155b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2156b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2157b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2158b8e80941Smrg S_008F0C_ELEMENT_SIZE(0) | 2159b8e80941Smrg S_008F0C_INDEX_STRIDE(0) | 2160b8e80941Smrg S_008F0C_ADD_TID_ENABLE(false); 2161b8e80941Smrg 2162b8e80941Smrg /* stride gsvs_itemsize, num records 64 2163b8e80941Smrg elsize 4, index stride 16 */ 2164b8e80941Smrg /* shader will patch stride and desc[2] */ 2165b8e80941Smrg desc[4] = gsvs_va; 2166b8e80941Smrg desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| 2167b8e80941Smrg S_008F04_STRIDE(0) | 2168b8e80941Smrg S_008F04_SWIZZLE_ENABLE(true); 2169b8e80941Smrg desc[6] = 0; 2170b8e80941Smrg desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2171b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2172b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2173b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2174b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2175b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2176b8e80941Smrg S_008F0C_ELEMENT_SIZE(1) | 2177b8e80941Smrg S_008F0C_INDEX_STRIDE(1) | 2178b8e80941Smrg S_008F0C_ADD_TID_ENABLE(true); 2179b8e80941Smrg } 2180b8e80941Smrg 2181b8e80941Smrg desc += 8; 2182b8e80941Smrg 2183b8e80941Smrg if (tess_rings_bo) { 2184b8e80941Smrg uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); 2185b8e80941Smrg uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset; 2186b8e80941Smrg 2187b8e80941Smrg desc[0] = tess_va; 2188b8e80941Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | 2189b8e80941Smrg S_008F04_STRIDE(0) | 2190b8e80941Smrg S_008F04_SWIZZLE_ENABLE(false); 2191b8e80941Smrg desc[2] = tess_factor_ring_size; 2192b8e80941Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2193b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2194b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2195b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2196b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2197b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2198b8e80941Smrg S_008F0C_ELEMENT_SIZE(0) | 2199b8e80941Smrg S_008F0C_INDEX_STRIDE(0) | 2200b8e80941Smrg S_008F0C_ADD_TID_ENABLE(false); 2201b8e80941Smrg 2202b8e80941Smrg desc[4] = tess_offchip_va; 2203b8e80941Smrg desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | 2204b8e80941Smrg S_008F04_STRIDE(0) | 2205b8e80941Smrg S_008F04_SWIZZLE_ENABLE(false); 2206b8e80941Smrg desc[6] = tess_offchip_ring_size; 2207b8e80941Smrg desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2208b8e80941Smrg S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2209b8e80941Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2210b8e80941Smrg S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2211b8e80941Smrg S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2212b8e80941Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 2213b8e80941Smrg S_008F0C_ELEMENT_SIZE(0) | 2214b8e80941Smrg S_008F0C_INDEX_STRIDE(0) | 2215b8e80941Smrg S_008F0C_ADD_TID_ENABLE(false); 2216b8e80941Smrg } 2217b8e80941Smrg 2218b8e80941Smrg desc += 8; 2219b8e80941Smrg 2220b8e80941Smrg if (add_sample_positions) { 2221b8e80941Smrg /* add sample positions after all rings */ 2222b8e80941Smrg memcpy(desc, queue->device->sample_locations_1x, 8); 2223b8e80941Smrg desc += 2; 2224b8e80941Smrg memcpy(desc, queue->device->sample_locations_2x, 16); 2225b8e80941Smrg desc += 4; 2226b8e80941Smrg memcpy(desc, queue->device->sample_locations_4x, 32); 2227b8e80941Smrg desc += 8; 2228b8e80941Smrg memcpy(desc, queue->device->sample_locations_8x, 64); 2229b8e80941Smrg } 2230b8e80941Smrg} 2231b8e80941Smrg 2232b8e80941Smrgstatic unsigned 2233b8e80941Smrgradv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p) 2234b8e80941Smrg{ 2235b8e80941Smrg bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK && 2236b8e80941Smrg device->physical_device->rad_info.family != CHIP_CARRIZO && 2237b8e80941Smrg device->physical_device->rad_info.family != CHIP_STONEY; 2238b8e80941Smrg unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; 2239b8e80941Smrg unsigned max_offchip_buffers; 2240b8e80941Smrg unsigned offchip_granularity; 2241b8e80941Smrg unsigned hs_offchip_param; 2242b8e80941Smrg 2243b8e80941Smrg /* 2244b8e80941Smrg * Per RadeonSI: 2245b8e80941Smrg * This must be one less than the maximum number due to a hw limitation. 2246b8e80941Smrg * Various hardware bugs in SI, CIK, and GFX9 need this. 2247b8e80941Smrg * 2248b8e80941Smrg * Per AMDVLK: 2249b8e80941Smrg * Vega10 should limit max_offchip_buffers to 508 (4 * 127). 2250b8e80941Smrg * Gfx7 should limit max_offchip_buffers to 508 2251b8e80941Smrg * Gfx6 should limit max_offchip_buffers to 126 (2 * 63) 2252b8e80941Smrg * 2253b8e80941Smrg * Follow AMDVLK here. 2254b8e80941Smrg */ 2255b8e80941Smrg if (device->physical_device->rad_info.family == CHIP_VEGA10 || 2256b8e80941Smrg device->physical_device->rad_info.chip_class == CIK || 2257b8e80941Smrg device->physical_device->rad_info.chip_class == SI) 2258b8e80941Smrg --max_offchip_buffers_per_se; 2259b8e80941Smrg 2260b8e80941Smrg max_offchip_buffers = max_offchip_buffers_per_se * 2261b8e80941Smrg device->physical_device->rad_info.max_se; 2262b8e80941Smrg 2263b8e80941Smrg /* Hawaii has a bug with offchip buffers > 256 that can be worked 2264b8e80941Smrg * around by setting 4K granularity. 2265b8e80941Smrg */ 2266b8e80941Smrg if (device->tess_offchip_block_dw_size == 4096) { 2267b8e80941Smrg assert(device->physical_device->rad_info.family == CHIP_HAWAII); 2268b8e80941Smrg offchip_granularity = V_03093C_X_4K_DWORDS; 2269b8e80941Smrg } else { 2270b8e80941Smrg assert(device->tess_offchip_block_dw_size == 8192); 2271b8e80941Smrg offchip_granularity = V_03093C_X_8K_DWORDS; 2272b8e80941Smrg } 2273b8e80941Smrg 2274b8e80941Smrg switch (device->physical_device->rad_info.chip_class) { 2275b8e80941Smrg case SI: 2276b8e80941Smrg max_offchip_buffers = MIN2(max_offchip_buffers, 126); 2277b8e80941Smrg break; 2278b8e80941Smrg case CIK: 2279b8e80941Smrg case VI: 2280b8e80941Smrg case GFX9: 2281b8e80941Smrg default: 2282b8e80941Smrg max_offchip_buffers = MIN2(max_offchip_buffers, 508); 2283b8e80941Smrg break; 2284b8e80941Smrg } 2285b8e80941Smrg 2286b8e80941Smrg *max_offchip_buffers_p = max_offchip_buffers; 2287b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) { 2288b8e80941Smrg if (device->physical_device->rad_info.chip_class >= VI) 2289b8e80941Smrg --max_offchip_buffers; 2290b8e80941Smrg hs_offchip_param = 2291b8e80941Smrg S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | 2292b8e80941Smrg S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); 2293b8e80941Smrg } else { 2294b8e80941Smrg hs_offchip_param = 2295b8e80941Smrg S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); 2296b8e80941Smrg } 2297b8e80941Smrg return hs_offchip_param; 2298b8e80941Smrg} 2299b8e80941Smrg 2300b8e80941Smrgstatic void 2301b8e80941Smrgradv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2302b8e80941Smrg struct radeon_winsys_bo *esgs_ring_bo, 2303b8e80941Smrg uint32_t esgs_ring_size, 2304b8e80941Smrg struct radeon_winsys_bo *gsvs_ring_bo, 2305b8e80941Smrg uint32_t gsvs_ring_size) 2306b8e80941Smrg{ 2307b8e80941Smrg if (!esgs_ring_bo && !gsvs_ring_bo) 2308b8e80941Smrg return; 2309b8e80941Smrg 2310b8e80941Smrg if (esgs_ring_bo) 2311b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo); 2312b8e80941Smrg 2313b8e80941Smrg if (gsvs_ring_bo) 2314b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo); 2315b8e80941Smrg 2316b8e80941Smrg if (queue->device->physical_device->rad_info.chip_class >= CIK) { 2317b8e80941Smrg radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); 2318b8e80941Smrg radeon_emit(cs, esgs_ring_size >> 8); 2319b8e80941Smrg radeon_emit(cs, gsvs_ring_size >> 8); 2320b8e80941Smrg } else { 2321b8e80941Smrg radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2); 2322b8e80941Smrg radeon_emit(cs, esgs_ring_size >> 8); 2323b8e80941Smrg radeon_emit(cs, gsvs_ring_size >> 8); 2324b8e80941Smrg } 2325b8e80941Smrg} 2326b8e80941Smrg 2327b8e80941Smrgstatic void 2328b8e80941Smrgradv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2329b8e80941Smrg unsigned hs_offchip_param, unsigned tf_ring_size, 2330b8e80941Smrg struct radeon_winsys_bo *tess_rings_bo) 2331b8e80941Smrg{ 2332b8e80941Smrg uint64_t tf_va; 2333b8e80941Smrg 2334b8e80941Smrg if (!tess_rings_bo) 2335b8e80941Smrg return; 2336b8e80941Smrg 2337b8e80941Smrg tf_va = radv_buffer_get_va(tess_rings_bo); 2338b8e80941Smrg 2339b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo); 2340b8e80941Smrg 2341b8e80941Smrg if (queue->device->physical_device->rad_info.chip_class >= CIK) { 2342b8e80941Smrg radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, 2343b8e80941Smrg S_030938_SIZE(tf_ring_size / 4)); 2344b8e80941Smrg radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, 2345b8e80941Smrg tf_va >> 8); 2346b8e80941Smrg if (queue->device->physical_device->rad_info.chip_class >= GFX9) { 2347b8e80941Smrg radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, 2348b8e80941Smrg S_030944_BASE_HI(tf_va >> 40)); 2349b8e80941Smrg } 2350b8e80941Smrg radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, 2351b8e80941Smrg hs_offchip_param); 2352b8e80941Smrg } else { 2353b8e80941Smrg radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, 2354b8e80941Smrg S_008988_SIZE(tf_ring_size / 4)); 2355b8e80941Smrg radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, 2356b8e80941Smrg tf_va >> 8); 2357b8e80941Smrg radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, 2358b8e80941Smrg hs_offchip_param); 2359b8e80941Smrg } 2360b8e80941Smrg} 2361b8e80941Smrg 2362b8e80941Smrgstatic void 2363b8e80941Smrgradv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs, 2364b8e80941Smrg struct radeon_winsys_bo *compute_scratch_bo) 2365b8e80941Smrg{ 2366b8e80941Smrg uint64_t scratch_va; 2367b8e80941Smrg 2368b8e80941Smrg if (!compute_scratch_bo) 2369b8e80941Smrg return; 2370b8e80941Smrg 2371b8e80941Smrg scratch_va = radv_buffer_get_va(compute_scratch_bo); 2372b8e80941Smrg 2373b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo); 2374b8e80941Smrg 2375b8e80941Smrg radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2); 2376b8e80941Smrg radeon_emit(cs, scratch_va); 2377b8e80941Smrg radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | 2378b8e80941Smrg S_008F04_SWIZZLE_ENABLE(1)); 2379b8e80941Smrg} 2380b8e80941Smrg 2381b8e80941Smrgstatic void 2382b8e80941Smrgradv_emit_global_shader_pointers(struct radv_queue *queue, 2383b8e80941Smrg struct radeon_cmdbuf *cs, 2384b8e80941Smrg struct radeon_winsys_bo *descriptor_bo) 2385b8e80941Smrg{ 2386b8e80941Smrg uint64_t va; 2387b8e80941Smrg 2388b8e80941Smrg if (!descriptor_bo) 2389b8e80941Smrg return; 2390b8e80941Smrg 2391b8e80941Smrg va = radv_buffer_get_va(descriptor_bo); 2392b8e80941Smrg 2393b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo); 2394b8e80941Smrg 2395b8e80941Smrg if (queue->device->physical_device->rad_info.chip_class >= GFX9) { 2396b8e80941Smrg uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, 2397b8e80941Smrg R_00B130_SPI_SHADER_USER_DATA_VS_0, 2398b8e80941Smrg R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, 2399b8e80941Smrg R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; 2400b8e80941Smrg 2401b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 2402b8e80941Smrg radv_emit_shader_pointer(queue->device, cs, regs[i], 2403b8e80941Smrg va, true); 2404b8e80941Smrg } 2405b8e80941Smrg } else { 2406b8e80941Smrg uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, 2407b8e80941Smrg R_00B130_SPI_SHADER_USER_DATA_VS_0, 2408b8e80941Smrg R_00B230_SPI_SHADER_USER_DATA_GS_0, 2409b8e80941Smrg R_00B330_SPI_SHADER_USER_DATA_ES_0, 2410b8e80941Smrg R_00B430_SPI_SHADER_USER_DATA_HS_0, 2411b8e80941Smrg R_00B530_SPI_SHADER_USER_DATA_LS_0}; 2412b8e80941Smrg 2413b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 2414b8e80941Smrg radv_emit_shader_pointer(queue->device, cs, regs[i], 2415b8e80941Smrg va, true); 2416b8e80941Smrg } 2417b8e80941Smrg } 2418b8e80941Smrg} 2419b8e80941Smrg 2420b8e80941Smrgstatic void 2421b8e80941Smrgradv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) 2422b8e80941Smrg{ 2423b8e80941Smrg struct radv_device *device = queue->device; 2424b8e80941Smrg 2425b8e80941Smrg if (device->gfx_init) { 2426b8e80941Smrg uint64_t va = radv_buffer_get_va(device->gfx_init); 2427b8e80941Smrg 2428b8e80941Smrg radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 2429b8e80941Smrg radeon_emit(cs, va); 2430b8e80941Smrg radeon_emit(cs, va >> 32); 2431b8e80941Smrg radeon_emit(cs, device->gfx_init_size_dw & 0xffff); 2432b8e80941Smrg 2433b8e80941Smrg radv_cs_add_buffer(device->ws, cs, device->gfx_init); 2434b8e80941Smrg } else { 2435b8e80941Smrg struct radv_physical_device *physical_device = device->physical_device; 2436b8e80941Smrg si_emit_graphics(physical_device, cs); 2437b8e80941Smrg } 2438b8e80941Smrg} 2439b8e80941Smrg 2440b8e80941Smrgstatic void 2441b8e80941Smrgradv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) 2442b8e80941Smrg{ 2443b8e80941Smrg struct radv_physical_device *physical_device = queue->device->physical_device; 2444b8e80941Smrg si_emit_compute(physical_device, cs); 2445b8e80941Smrg} 2446b8e80941Smrg 2447b8e80941Smrgstatic VkResult 2448b8e80941Smrgradv_get_preamble_cs(struct radv_queue *queue, 2449b8e80941Smrg uint32_t scratch_size, 2450b8e80941Smrg uint32_t compute_scratch_size, 2451b8e80941Smrg uint32_t esgs_ring_size, 2452b8e80941Smrg uint32_t gsvs_ring_size, 2453b8e80941Smrg bool needs_tess_rings, 2454b8e80941Smrg bool needs_sample_positions, 2455b8e80941Smrg struct radeon_cmdbuf **initial_full_flush_preamble_cs, 2456b8e80941Smrg struct radeon_cmdbuf **initial_preamble_cs, 2457b8e80941Smrg struct radeon_cmdbuf **continue_preamble_cs) 2458b8e80941Smrg{ 2459b8e80941Smrg struct radeon_winsys_bo *scratch_bo = NULL; 2460b8e80941Smrg struct radeon_winsys_bo *descriptor_bo = NULL; 2461b8e80941Smrg struct radeon_winsys_bo *compute_scratch_bo = NULL; 2462b8e80941Smrg struct radeon_winsys_bo *esgs_ring_bo = NULL; 2463b8e80941Smrg struct radeon_winsys_bo *gsvs_ring_bo = NULL; 2464b8e80941Smrg struct radeon_winsys_bo *tess_rings_bo = NULL; 2465b8e80941Smrg struct radeon_cmdbuf *dest_cs[3] = {0}; 2466b8e80941Smrg bool add_tess_rings = false, add_sample_positions = false; 2467b8e80941Smrg unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; 2468b8e80941Smrg unsigned max_offchip_buffers; 2469b8e80941Smrg unsigned hs_offchip_param = 0; 2470b8e80941Smrg unsigned tess_offchip_ring_offset; 2471b8e80941Smrg uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING; 2472b8e80941Smrg if (!queue->has_tess_rings) { 2473b8e80941Smrg if (needs_tess_rings) 2474b8e80941Smrg add_tess_rings = true; 2475b8e80941Smrg } 2476b8e80941Smrg if (!queue->has_sample_positions) { 2477b8e80941Smrg if (needs_sample_positions) 2478b8e80941Smrg add_sample_positions = true; 2479b8e80941Smrg } 2480b8e80941Smrg tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se; 2481b8e80941Smrg hs_offchip_param = radv_get_hs_offchip_param(queue->device, 2482b8e80941Smrg &max_offchip_buffers); 2483b8e80941Smrg tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024); 2484b8e80941Smrg tess_offchip_ring_size = max_offchip_buffers * 2485b8e80941Smrg queue->device->tess_offchip_block_dw_size * 4; 2486b8e80941Smrg 2487b8e80941Smrg if (scratch_size <= queue->scratch_size && 2488b8e80941Smrg compute_scratch_size <= queue->compute_scratch_size && 2489b8e80941Smrg esgs_ring_size <= queue->esgs_ring_size && 2490b8e80941Smrg gsvs_ring_size <= queue->gsvs_ring_size && 2491b8e80941Smrg !add_tess_rings && !add_sample_positions && 2492b8e80941Smrg queue->initial_preamble_cs) { 2493b8e80941Smrg *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; 2494b8e80941Smrg *initial_preamble_cs = queue->initial_preamble_cs; 2495b8e80941Smrg *continue_preamble_cs = queue->continue_preamble_cs; 2496b8e80941Smrg if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) 2497b8e80941Smrg *continue_preamble_cs = NULL; 2498b8e80941Smrg return VK_SUCCESS; 2499b8e80941Smrg } 2500b8e80941Smrg 2501b8e80941Smrg if (scratch_size > queue->scratch_size) { 2502b8e80941Smrg scratch_bo = queue->device->ws->buffer_create(queue->device->ws, 2503b8e80941Smrg scratch_size, 2504b8e80941Smrg 4096, 2505b8e80941Smrg RADEON_DOMAIN_VRAM, 2506b8e80941Smrg ring_bo_flags, 2507b8e80941Smrg RADV_BO_PRIORITY_SCRATCH); 2508b8e80941Smrg if (!scratch_bo) 2509b8e80941Smrg goto fail; 2510b8e80941Smrg } else 2511b8e80941Smrg scratch_bo = queue->scratch_bo; 2512b8e80941Smrg 2513b8e80941Smrg if (compute_scratch_size > queue->compute_scratch_size) { 2514b8e80941Smrg compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, 2515b8e80941Smrg compute_scratch_size, 2516b8e80941Smrg 4096, 2517b8e80941Smrg RADEON_DOMAIN_VRAM, 2518b8e80941Smrg ring_bo_flags, 2519b8e80941Smrg RADV_BO_PRIORITY_SCRATCH); 2520b8e80941Smrg if (!compute_scratch_bo) 2521b8e80941Smrg goto fail; 2522b8e80941Smrg 2523b8e80941Smrg } else 2524b8e80941Smrg compute_scratch_bo = queue->compute_scratch_bo; 2525b8e80941Smrg 2526b8e80941Smrg if (esgs_ring_size > queue->esgs_ring_size) { 2527b8e80941Smrg esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, 2528b8e80941Smrg esgs_ring_size, 2529b8e80941Smrg 4096, 2530b8e80941Smrg RADEON_DOMAIN_VRAM, 2531b8e80941Smrg ring_bo_flags, 2532b8e80941Smrg RADV_BO_PRIORITY_SCRATCH); 2533b8e80941Smrg if (!esgs_ring_bo) 2534b8e80941Smrg goto fail; 2535b8e80941Smrg } else { 2536b8e80941Smrg esgs_ring_bo = queue->esgs_ring_bo; 2537b8e80941Smrg esgs_ring_size = queue->esgs_ring_size; 2538b8e80941Smrg } 2539b8e80941Smrg 2540b8e80941Smrg if (gsvs_ring_size > queue->gsvs_ring_size) { 2541b8e80941Smrg gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, 2542b8e80941Smrg gsvs_ring_size, 2543b8e80941Smrg 4096, 2544b8e80941Smrg RADEON_DOMAIN_VRAM, 2545b8e80941Smrg ring_bo_flags, 2546b8e80941Smrg RADV_BO_PRIORITY_SCRATCH); 2547b8e80941Smrg if (!gsvs_ring_bo) 2548b8e80941Smrg goto fail; 2549b8e80941Smrg } else { 2550b8e80941Smrg gsvs_ring_bo = queue->gsvs_ring_bo; 2551b8e80941Smrg gsvs_ring_size = queue->gsvs_ring_size; 2552b8e80941Smrg } 2553b8e80941Smrg 2554b8e80941Smrg if (add_tess_rings) { 2555b8e80941Smrg tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws, 2556b8e80941Smrg tess_offchip_ring_offset + tess_offchip_ring_size, 2557b8e80941Smrg 256, 2558b8e80941Smrg RADEON_DOMAIN_VRAM, 2559b8e80941Smrg ring_bo_flags, 2560b8e80941Smrg RADV_BO_PRIORITY_SCRATCH); 2561b8e80941Smrg if (!tess_rings_bo) 2562b8e80941Smrg goto fail; 2563b8e80941Smrg } else { 2564b8e80941Smrg tess_rings_bo = queue->tess_rings_bo; 2565b8e80941Smrg } 2566b8e80941Smrg 2567b8e80941Smrg if (scratch_bo != queue->scratch_bo || 2568b8e80941Smrg esgs_ring_bo != queue->esgs_ring_bo || 2569b8e80941Smrg gsvs_ring_bo != queue->gsvs_ring_bo || 2570b8e80941Smrg tess_rings_bo != queue->tess_rings_bo || 2571b8e80941Smrg add_sample_positions) { 2572b8e80941Smrg uint32_t size = 0; 2573b8e80941Smrg if (gsvs_ring_bo || esgs_ring_bo || 2574b8e80941Smrg tess_rings_bo || add_sample_positions) { 2575b8e80941Smrg size = 112; /* 2 dword + 2 padding + 4 dword * 6 */ 2576b8e80941Smrg if (add_sample_positions) 2577b8e80941Smrg size += 128; /* 64+32+16+8 = 120 bytes */ 2578b8e80941Smrg } 2579b8e80941Smrg else if (scratch_bo) 2580b8e80941Smrg size = 8; /* 2 dword */ 2581b8e80941Smrg 2582b8e80941Smrg descriptor_bo = queue->device->ws->buffer_create(queue->device->ws, 2583b8e80941Smrg size, 2584b8e80941Smrg 4096, 2585b8e80941Smrg RADEON_DOMAIN_VRAM, 2586b8e80941Smrg RADEON_FLAG_CPU_ACCESS | 2587b8e80941Smrg RADEON_FLAG_NO_INTERPROCESS_SHARING | 2588b8e80941Smrg RADEON_FLAG_READ_ONLY, 2589b8e80941Smrg RADV_BO_PRIORITY_DESCRIPTOR); 2590b8e80941Smrg if (!descriptor_bo) 2591b8e80941Smrg goto fail; 2592b8e80941Smrg } else 2593b8e80941Smrg descriptor_bo = queue->descriptor_bo; 2594b8e80941Smrg 2595b8e80941Smrg if (descriptor_bo != queue->descriptor_bo) { 2596b8e80941Smrg uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); 2597b8e80941Smrg 2598b8e80941Smrg if (scratch_bo) { 2599b8e80941Smrg uint64_t scratch_va = radv_buffer_get_va(scratch_bo); 2600b8e80941Smrg uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | 2601b8e80941Smrg S_008F04_SWIZZLE_ENABLE(1); 2602b8e80941Smrg map[0] = scratch_va; 2603b8e80941Smrg map[1] = rsrc1; 2604b8e80941Smrg } 2605b8e80941Smrg 2606b8e80941Smrg if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions) 2607b8e80941Smrg fill_geom_tess_rings(queue, map, add_sample_positions, 2608b8e80941Smrg esgs_ring_size, esgs_ring_bo, 2609b8e80941Smrg gsvs_ring_size, gsvs_ring_bo, 2610b8e80941Smrg tess_factor_ring_size, 2611b8e80941Smrg tess_offchip_ring_offset, 2612b8e80941Smrg tess_offchip_ring_size, 2613b8e80941Smrg tess_rings_bo); 2614b8e80941Smrg 2615b8e80941Smrg queue->device->ws->buffer_unmap(descriptor_bo); 2616b8e80941Smrg } 2617b8e80941Smrg 2618b8e80941Smrg for(int i = 0; i < 3; ++i) { 2619b8e80941Smrg struct radeon_cmdbuf *cs = NULL; 2620b8e80941Smrg cs = queue->device->ws->cs_create(queue->device->ws, 2621b8e80941Smrg queue->queue_family_index ? RING_COMPUTE : RING_GFX); 2622b8e80941Smrg if (!cs) 2623b8e80941Smrg goto fail; 2624b8e80941Smrg 2625b8e80941Smrg dest_cs[i] = cs; 2626b8e80941Smrg 2627b8e80941Smrg if (scratch_bo) 2628b8e80941Smrg radv_cs_add_buffer(queue->device->ws, cs, scratch_bo); 2629b8e80941Smrg 2630b8e80941Smrg /* Emit initial configuration. */ 2631b8e80941Smrg switch (queue->queue_family_index) { 2632b8e80941Smrg case RADV_QUEUE_GENERAL: 2633b8e80941Smrg radv_init_graphics_state(cs, queue); 2634b8e80941Smrg break; 2635b8e80941Smrg case RADV_QUEUE_COMPUTE: 2636b8e80941Smrg radv_init_compute_state(cs, queue); 2637b8e80941Smrg break; 2638b8e80941Smrg case RADV_QUEUE_TRANSFER: 2639b8e80941Smrg break; 2640b8e80941Smrg } 2641b8e80941Smrg 2642b8e80941Smrg if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) { 2643b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2644b8e80941Smrg radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2645b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2646b8e80941Smrg radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 2647b8e80941Smrg } 2648b8e80941Smrg 2649b8e80941Smrg radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, 2650b8e80941Smrg gsvs_ring_bo, gsvs_ring_size); 2651b8e80941Smrg radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, 2652b8e80941Smrg tess_factor_ring_size, tess_rings_bo); 2653b8e80941Smrg radv_emit_global_shader_pointers(queue, cs, descriptor_bo); 2654b8e80941Smrg radv_emit_compute_scratch(queue, cs, compute_scratch_bo); 2655b8e80941Smrg 2656b8e80941Smrg if (i == 0) { 2657b8e80941Smrg si_cs_emit_cache_flush(cs, 2658b8e80941Smrg queue->device->physical_device->rad_info.chip_class, 2659b8e80941Smrg NULL, 0, 2660b8e80941Smrg queue->queue_family_index == RING_COMPUTE && 2661b8e80941Smrg queue->device->physical_device->rad_info.chip_class >= CIK, 2662b8e80941Smrg (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | 2663b8e80941Smrg RADV_CMD_FLAG_INV_ICACHE | 2664b8e80941Smrg RADV_CMD_FLAG_INV_SMEM_L1 | 2665b8e80941Smrg RADV_CMD_FLAG_INV_VMEM_L1 | 2666b8e80941Smrg RADV_CMD_FLAG_INV_GLOBAL_L2 | 2667b8e80941Smrg RADV_CMD_FLAG_START_PIPELINE_STATS, 0); 2668b8e80941Smrg } else if (i == 1) { 2669b8e80941Smrg si_cs_emit_cache_flush(cs, 2670b8e80941Smrg queue->device->physical_device->rad_info.chip_class, 2671b8e80941Smrg NULL, 0, 2672b8e80941Smrg queue->queue_family_index == RING_COMPUTE && 2673b8e80941Smrg queue->device->physical_device->rad_info.chip_class >= CIK, 2674b8e80941Smrg RADV_CMD_FLAG_INV_ICACHE | 2675b8e80941Smrg RADV_CMD_FLAG_INV_SMEM_L1 | 2676b8e80941Smrg RADV_CMD_FLAG_INV_VMEM_L1 | 2677b8e80941Smrg RADV_CMD_FLAG_INV_GLOBAL_L2 | 2678b8e80941Smrg RADV_CMD_FLAG_START_PIPELINE_STATS, 0); 2679b8e80941Smrg } 2680b8e80941Smrg 2681b8e80941Smrg if (!queue->device->ws->cs_finalize(cs)) 2682b8e80941Smrg goto fail; 2683b8e80941Smrg } 2684b8e80941Smrg 2685b8e80941Smrg if (queue->initial_full_flush_preamble_cs) 2686b8e80941Smrg queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); 2687b8e80941Smrg 2688b8e80941Smrg if (queue->initial_preamble_cs) 2689b8e80941Smrg queue->device->ws->cs_destroy(queue->initial_preamble_cs); 2690b8e80941Smrg 2691b8e80941Smrg if (queue->continue_preamble_cs) 2692b8e80941Smrg queue->device->ws->cs_destroy(queue->continue_preamble_cs); 2693b8e80941Smrg 2694b8e80941Smrg queue->initial_full_flush_preamble_cs = dest_cs[0]; 2695b8e80941Smrg queue->initial_preamble_cs = dest_cs[1]; 2696b8e80941Smrg queue->continue_preamble_cs = dest_cs[2]; 2697b8e80941Smrg 2698b8e80941Smrg if (scratch_bo != queue->scratch_bo) { 2699b8e80941Smrg if (queue->scratch_bo) 2700b8e80941Smrg queue->device->ws->buffer_destroy(queue->scratch_bo); 2701b8e80941Smrg queue->scratch_bo = scratch_bo; 2702b8e80941Smrg queue->scratch_size = scratch_size; 2703b8e80941Smrg } 2704b8e80941Smrg 2705b8e80941Smrg if (compute_scratch_bo != queue->compute_scratch_bo) { 2706b8e80941Smrg if (queue->compute_scratch_bo) 2707b8e80941Smrg queue->device->ws->buffer_destroy(queue->compute_scratch_bo); 2708b8e80941Smrg queue->compute_scratch_bo = compute_scratch_bo; 2709b8e80941Smrg queue->compute_scratch_size = compute_scratch_size; 2710b8e80941Smrg } 2711b8e80941Smrg 2712b8e80941Smrg if (esgs_ring_bo != queue->esgs_ring_bo) { 2713b8e80941Smrg if (queue->esgs_ring_bo) 2714b8e80941Smrg queue->device->ws->buffer_destroy(queue->esgs_ring_bo); 2715b8e80941Smrg queue->esgs_ring_bo = esgs_ring_bo; 2716b8e80941Smrg queue->esgs_ring_size = esgs_ring_size; 2717b8e80941Smrg } 2718b8e80941Smrg 2719b8e80941Smrg if (gsvs_ring_bo != queue->gsvs_ring_bo) { 2720b8e80941Smrg if (queue->gsvs_ring_bo) 2721b8e80941Smrg queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); 2722b8e80941Smrg queue->gsvs_ring_bo = gsvs_ring_bo; 2723b8e80941Smrg queue->gsvs_ring_size = gsvs_ring_size; 2724b8e80941Smrg } 2725b8e80941Smrg 2726b8e80941Smrg if (tess_rings_bo != queue->tess_rings_bo) { 2727b8e80941Smrg queue->tess_rings_bo = tess_rings_bo; 2728b8e80941Smrg queue->has_tess_rings = true; 2729b8e80941Smrg } 2730b8e80941Smrg 2731b8e80941Smrg if (descriptor_bo != queue->descriptor_bo) { 2732b8e80941Smrg if (queue->descriptor_bo) 2733b8e80941Smrg queue->device->ws->buffer_destroy(queue->descriptor_bo); 2734b8e80941Smrg 2735b8e80941Smrg queue->descriptor_bo = descriptor_bo; 2736b8e80941Smrg } 2737b8e80941Smrg 2738b8e80941Smrg if (add_sample_positions) 2739b8e80941Smrg queue->has_sample_positions = true; 2740b8e80941Smrg 2741b8e80941Smrg *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; 2742b8e80941Smrg *initial_preamble_cs = queue->initial_preamble_cs; 2743b8e80941Smrg *continue_preamble_cs = queue->continue_preamble_cs; 2744b8e80941Smrg if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) 2745b8e80941Smrg *continue_preamble_cs = NULL; 2746b8e80941Smrg return VK_SUCCESS; 2747b8e80941Smrgfail: 2748b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i) 2749b8e80941Smrg if (dest_cs[i]) 2750b8e80941Smrg queue->device->ws->cs_destroy(dest_cs[i]); 2751b8e80941Smrg if (descriptor_bo && descriptor_bo != queue->descriptor_bo) 2752b8e80941Smrg queue->device->ws->buffer_destroy(descriptor_bo); 2753b8e80941Smrg if (scratch_bo && scratch_bo != queue->scratch_bo) 2754b8e80941Smrg queue->device->ws->buffer_destroy(scratch_bo); 2755b8e80941Smrg if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo) 2756b8e80941Smrg queue->device->ws->buffer_destroy(compute_scratch_bo); 2757b8e80941Smrg if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo) 2758b8e80941Smrg queue->device->ws->buffer_destroy(esgs_ring_bo); 2759b8e80941Smrg if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo) 2760b8e80941Smrg queue->device->ws->buffer_destroy(gsvs_ring_bo); 2761b8e80941Smrg if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo) 2762b8e80941Smrg queue->device->ws->buffer_destroy(tess_rings_bo); 2763b8e80941Smrg return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2764b8e80941Smrg} 2765b8e80941Smrg 2766b8e80941Smrgstatic VkResult radv_alloc_sem_counts(struct radv_instance *instance, 2767b8e80941Smrg struct radv_winsys_sem_counts *counts, 2768b8e80941Smrg int num_sems, 2769b8e80941Smrg const VkSemaphore *sems, 2770b8e80941Smrg VkFence _fence, 2771b8e80941Smrg bool reset_temp) 2772b8e80941Smrg{ 2773b8e80941Smrg int syncobj_idx = 0, sem_idx = 0; 2774b8e80941Smrg 2775b8e80941Smrg if (num_sems == 0 && _fence == VK_NULL_HANDLE) 2776b8e80941Smrg return VK_SUCCESS; 2777b8e80941Smrg 2778b8e80941Smrg for (uint32_t i = 0; i < num_sems; i++) { 2779b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2780b8e80941Smrg 2781b8e80941Smrg if (sem->temp_syncobj || sem->syncobj) 2782b8e80941Smrg counts->syncobj_count++; 2783b8e80941Smrg else 2784b8e80941Smrg counts->sem_count++; 2785b8e80941Smrg } 2786b8e80941Smrg 2787b8e80941Smrg if (_fence != VK_NULL_HANDLE) { 2788b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 2789b8e80941Smrg if (fence->temp_syncobj || fence->syncobj) 2790b8e80941Smrg counts->syncobj_count++; 2791b8e80941Smrg } 2792b8e80941Smrg 2793b8e80941Smrg if (counts->syncobj_count) { 2794b8e80941Smrg counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count); 2795b8e80941Smrg if (!counts->syncobj) 2796b8e80941Smrg return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 2797b8e80941Smrg } 2798b8e80941Smrg 2799b8e80941Smrg if (counts->sem_count) { 2800b8e80941Smrg counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count); 2801b8e80941Smrg if (!counts->sem) { 2802b8e80941Smrg free(counts->syncobj); 2803b8e80941Smrg return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 2804b8e80941Smrg } 2805b8e80941Smrg } 2806b8e80941Smrg 2807b8e80941Smrg for (uint32_t i = 0; i < num_sems; i++) { 2808b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2809b8e80941Smrg 2810b8e80941Smrg if (sem->temp_syncobj) { 2811b8e80941Smrg counts->syncobj[syncobj_idx++] = sem->temp_syncobj; 2812b8e80941Smrg } 2813b8e80941Smrg else if (sem->syncobj) 2814b8e80941Smrg counts->syncobj[syncobj_idx++] = sem->syncobj; 2815b8e80941Smrg else { 2816b8e80941Smrg assert(sem->sem); 2817b8e80941Smrg counts->sem[sem_idx++] = sem->sem; 2818b8e80941Smrg } 2819b8e80941Smrg } 2820b8e80941Smrg 2821b8e80941Smrg if (_fence != VK_NULL_HANDLE) { 2822b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 2823b8e80941Smrg if (fence->temp_syncobj) 2824b8e80941Smrg counts->syncobj[syncobj_idx++] = fence->temp_syncobj; 2825b8e80941Smrg else if (fence->syncobj) 2826b8e80941Smrg counts->syncobj[syncobj_idx++] = fence->syncobj; 2827b8e80941Smrg } 2828b8e80941Smrg 2829b8e80941Smrg return VK_SUCCESS; 2830b8e80941Smrg} 2831b8e80941Smrg 2832b8e80941Smrgstatic void 2833b8e80941Smrgradv_free_sem_info(struct radv_winsys_sem_info *sem_info) 2834b8e80941Smrg{ 2835b8e80941Smrg free(sem_info->wait.syncobj); 2836b8e80941Smrg free(sem_info->wait.sem); 2837b8e80941Smrg free(sem_info->signal.syncobj); 2838b8e80941Smrg free(sem_info->signal.sem); 2839b8e80941Smrg} 2840b8e80941Smrg 2841b8e80941Smrg 2842b8e80941Smrgstatic void radv_free_temp_syncobjs(struct radv_device *device, 2843b8e80941Smrg int num_sems, 2844b8e80941Smrg const VkSemaphore *sems) 2845b8e80941Smrg{ 2846b8e80941Smrg for (uint32_t i = 0; i < num_sems; i++) { 2847b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); 2848b8e80941Smrg 2849b8e80941Smrg if (sem->temp_syncobj) { 2850b8e80941Smrg device->ws->destroy_syncobj(device->ws, sem->temp_syncobj); 2851b8e80941Smrg sem->temp_syncobj = 0; 2852b8e80941Smrg } 2853b8e80941Smrg } 2854b8e80941Smrg} 2855b8e80941Smrg 2856b8e80941Smrgstatic VkResult 2857b8e80941Smrgradv_alloc_sem_info(struct radv_instance *instance, 2858b8e80941Smrg struct radv_winsys_sem_info *sem_info, 2859b8e80941Smrg int num_wait_sems, 2860b8e80941Smrg const VkSemaphore *wait_sems, 2861b8e80941Smrg int num_signal_sems, 2862b8e80941Smrg const VkSemaphore *signal_sems, 2863b8e80941Smrg VkFence fence) 2864b8e80941Smrg{ 2865b8e80941Smrg VkResult ret; 2866b8e80941Smrg memset(sem_info, 0, sizeof(*sem_info)); 2867b8e80941Smrg 2868b8e80941Smrg ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true); 2869b8e80941Smrg if (ret) 2870b8e80941Smrg return ret; 2871b8e80941Smrg ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false); 2872b8e80941Smrg if (ret) 2873b8e80941Smrg radv_free_sem_info(sem_info); 2874b8e80941Smrg 2875b8e80941Smrg /* caller can override these */ 2876b8e80941Smrg sem_info->cs_emit_wait = true; 2877b8e80941Smrg sem_info->cs_emit_signal = true; 2878b8e80941Smrg return ret; 2879b8e80941Smrg} 2880b8e80941Smrg 2881b8e80941Smrg/* Signals fence as soon as all the work currently put on queue is done. */ 2882b8e80941Smrgstatic VkResult radv_signal_fence(struct radv_queue *queue, 2883b8e80941Smrg struct radv_fence *fence) 2884b8e80941Smrg{ 2885b8e80941Smrg int ret; 2886b8e80941Smrg VkResult result; 2887b8e80941Smrg struct radv_winsys_sem_info sem_info; 2888b8e80941Smrg 2889b8e80941Smrg result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL, 2890b8e80941Smrg radv_fence_to_handle(fence)); 2891b8e80941Smrg if (result != VK_SUCCESS) 2892b8e80941Smrg return result; 2893b8e80941Smrg 2894b8e80941Smrg ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, 2895b8e80941Smrg &queue->device->empty_cs[queue->queue_family_index], 2896b8e80941Smrg 1, NULL, NULL, &sem_info, NULL, 2897b8e80941Smrg false, fence->fence); 2898b8e80941Smrg radv_free_sem_info(&sem_info); 2899b8e80941Smrg 2900b8e80941Smrg if (ret) 2901b8e80941Smrg return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST); 2902b8e80941Smrg 2903b8e80941Smrg return VK_SUCCESS; 2904b8e80941Smrg} 2905b8e80941Smrg 2906b8e80941SmrgVkResult radv_QueueSubmit( 2907b8e80941Smrg VkQueue _queue, 2908b8e80941Smrg uint32_t submitCount, 2909b8e80941Smrg const VkSubmitInfo* pSubmits, 2910b8e80941Smrg VkFence _fence) 2911b8e80941Smrg{ 2912b8e80941Smrg RADV_FROM_HANDLE(radv_queue, queue, _queue); 2913b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 2914b8e80941Smrg struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; 2915b8e80941Smrg struct radeon_winsys_ctx *ctx = queue->hw_ctx; 2916b8e80941Smrg int ret; 2917b8e80941Smrg uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; 2918b8e80941Smrg uint32_t scratch_size = 0; 2919b8e80941Smrg uint32_t compute_scratch_size = 0; 2920b8e80941Smrg uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; 2921b8e80941Smrg struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL; 2922b8e80941Smrg VkResult result; 2923b8e80941Smrg bool fence_emitted = false; 2924b8e80941Smrg bool tess_rings_needed = false; 2925b8e80941Smrg bool sample_positions_needed = false; 2926b8e80941Smrg 2927b8e80941Smrg /* Do this first so failing to allocate scratch buffers can't result in 2928b8e80941Smrg * partially executed submissions. */ 2929b8e80941Smrg for (uint32_t i = 0; i < submitCount; i++) { 2930b8e80941Smrg for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 2931b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, 2932b8e80941Smrg pSubmits[i].pCommandBuffers[j]); 2933b8e80941Smrg 2934b8e80941Smrg scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed); 2935b8e80941Smrg compute_scratch_size = MAX2(compute_scratch_size, 2936b8e80941Smrg cmd_buffer->compute_scratch_size_needed); 2937b8e80941Smrg esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed); 2938b8e80941Smrg gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); 2939b8e80941Smrg tess_rings_needed |= cmd_buffer->tess_rings_needed; 2940b8e80941Smrg sample_positions_needed |= cmd_buffer->sample_positions_needed; 2941b8e80941Smrg } 2942b8e80941Smrg } 2943b8e80941Smrg 2944b8e80941Smrg result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, 2945b8e80941Smrg esgs_ring_size, gsvs_ring_size, tess_rings_needed, 2946b8e80941Smrg sample_positions_needed, &initial_flush_preamble_cs, 2947b8e80941Smrg &initial_preamble_cs, &continue_preamble_cs); 2948b8e80941Smrg if (result != VK_SUCCESS) 2949b8e80941Smrg return result; 2950b8e80941Smrg 2951b8e80941Smrg for (uint32_t i = 0; i < submitCount; i++) { 2952b8e80941Smrg struct radeon_cmdbuf **cs_array; 2953b8e80941Smrg bool do_flush = !i || pSubmits[i].pWaitDstStageMask; 2954b8e80941Smrg bool can_patch = true; 2955b8e80941Smrg uint32_t advance; 2956b8e80941Smrg struct radv_winsys_sem_info sem_info; 2957b8e80941Smrg 2958b8e80941Smrg result = radv_alloc_sem_info(queue->device->instance, 2959b8e80941Smrg &sem_info, 2960b8e80941Smrg pSubmits[i].waitSemaphoreCount, 2961b8e80941Smrg pSubmits[i].pWaitSemaphores, 2962b8e80941Smrg pSubmits[i].signalSemaphoreCount, 2963b8e80941Smrg pSubmits[i].pSignalSemaphores, 2964b8e80941Smrg _fence); 2965b8e80941Smrg if (result != VK_SUCCESS) 2966b8e80941Smrg return result; 2967b8e80941Smrg 2968b8e80941Smrg if (!pSubmits[i].commandBufferCount) { 2969b8e80941Smrg if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { 2970b8e80941Smrg ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, 2971b8e80941Smrg &queue->device->empty_cs[queue->queue_family_index], 2972b8e80941Smrg 1, NULL, NULL, 2973b8e80941Smrg &sem_info, NULL, 2974b8e80941Smrg false, base_fence); 2975b8e80941Smrg if (ret) { 2976b8e80941Smrg radv_loge("failed to submit CS %d\n", i); 2977b8e80941Smrg abort(); 2978b8e80941Smrg } 2979b8e80941Smrg fence_emitted = true; 2980b8e80941Smrg } 2981b8e80941Smrg radv_free_sem_info(&sem_info); 2982b8e80941Smrg continue; 2983b8e80941Smrg } 2984b8e80941Smrg 2985b8e80941Smrg cs_array = malloc(sizeof(struct radeon_cmdbuf *) * 2986b8e80941Smrg (pSubmits[i].commandBufferCount)); 2987b8e80941Smrg 2988b8e80941Smrg for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 2989b8e80941Smrg RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, 2990b8e80941Smrg pSubmits[i].pCommandBuffers[j]); 2991b8e80941Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 2992b8e80941Smrg 2993b8e80941Smrg cs_array[j] = cmd_buffer->cs; 2994b8e80941Smrg if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) 2995b8e80941Smrg can_patch = false; 2996b8e80941Smrg 2997b8e80941Smrg cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; 2998b8e80941Smrg } 2999b8e80941Smrg 3000b8e80941Smrg for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { 3001b8e80941Smrg struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; 3002b8e80941Smrg const struct radv_winsys_bo_list *bo_list = NULL; 3003b8e80941Smrg 3004b8e80941Smrg advance = MIN2(max_cs_submission, 3005b8e80941Smrg pSubmits[i].commandBufferCount - j); 3006b8e80941Smrg 3007b8e80941Smrg if (queue->device->trace_bo) 3008b8e80941Smrg *queue->device->trace_id_ptr = 0; 3009b8e80941Smrg 3010b8e80941Smrg sem_info.cs_emit_wait = j == 0; 3011b8e80941Smrg sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount; 3012b8e80941Smrg 3013b8e80941Smrg if (unlikely(queue->device->use_global_bo_list)) { 3014b8e80941Smrg pthread_mutex_lock(&queue->device->bo_list.mutex); 3015b8e80941Smrg bo_list = &queue->device->bo_list.list; 3016b8e80941Smrg } 3017b8e80941Smrg 3018b8e80941Smrg ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, 3019b8e80941Smrg advance, initial_preamble, continue_preamble_cs, 3020b8e80941Smrg &sem_info, bo_list, 3021b8e80941Smrg can_patch, base_fence); 3022b8e80941Smrg 3023b8e80941Smrg if (unlikely(queue->device->use_global_bo_list)) 3024b8e80941Smrg pthread_mutex_unlock(&queue->device->bo_list.mutex); 3025b8e80941Smrg 3026b8e80941Smrg if (ret) { 3027b8e80941Smrg radv_loge("failed to submit CS %d\n", i); 3028b8e80941Smrg abort(); 3029b8e80941Smrg } 3030b8e80941Smrg fence_emitted = true; 3031b8e80941Smrg if (queue->device->trace_bo) { 3032b8e80941Smrg radv_check_gpu_hangs(queue, cs_array[j]); 3033b8e80941Smrg } 3034b8e80941Smrg } 3035b8e80941Smrg 3036b8e80941Smrg radv_free_temp_syncobjs(queue->device, 3037b8e80941Smrg pSubmits[i].waitSemaphoreCount, 3038b8e80941Smrg pSubmits[i].pWaitSemaphores); 3039b8e80941Smrg radv_free_sem_info(&sem_info); 3040b8e80941Smrg free(cs_array); 3041b8e80941Smrg } 3042b8e80941Smrg 3043b8e80941Smrg if (fence) { 3044b8e80941Smrg if (!fence_emitted) { 3045b8e80941Smrg result = radv_signal_fence(queue, fence); 3046b8e80941Smrg if (result != VK_SUCCESS) 3047b8e80941Smrg return result; 3048b8e80941Smrg } 3049b8e80941Smrg fence->submitted = true; 3050b8e80941Smrg } 3051b8e80941Smrg 3052b8e80941Smrg return VK_SUCCESS; 3053b8e80941Smrg} 3054b8e80941Smrg 3055b8e80941SmrgVkResult radv_QueueWaitIdle( 3056b8e80941Smrg VkQueue _queue) 3057b8e80941Smrg{ 3058b8e80941Smrg RADV_FROM_HANDLE(radv_queue, queue, _queue); 3059b8e80941Smrg 3060b8e80941Smrg queue->device->ws->ctx_wait_idle(queue->hw_ctx, 3061b8e80941Smrg radv_queue_family_to_ring(queue->queue_family_index), 3062b8e80941Smrg queue->queue_idx); 3063b8e80941Smrg return VK_SUCCESS; 3064b8e80941Smrg} 3065b8e80941Smrg 3066b8e80941SmrgVkResult radv_DeviceWaitIdle( 3067b8e80941Smrg VkDevice _device) 3068b8e80941Smrg{ 3069b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3070b8e80941Smrg 3071b8e80941Smrg for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 3072b8e80941Smrg for (unsigned q = 0; q < device->queue_count[i]; q++) { 3073b8e80941Smrg radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q])); 3074b8e80941Smrg } 3075b8e80941Smrg } 3076b8e80941Smrg return VK_SUCCESS; 3077b8e80941Smrg} 3078b8e80941Smrg 3079b8e80941SmrgVkResult radv_EnumerateInstanceExtensionProperties( 3080b8e80941Smrg const char* pLayerName, 3081b8e80941Smrg uint32_t* pPropertyCount, 3082b8e80941Smrg VkExtensionProperties* pProperties) 3083b8e80941Smrg{ 3084b8e80941Smrg VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 3085b8e80941Smrg 3086b8e80941Smrg for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) { 3087b8e80941Smrg if (radv_supported_instance_extensions.extensions[i]) { 3088b8e80941Smrg vk_outarray_append(&out, prop) { 3089b8e80941Smrg *prop = radv_instance_extensions[i]; 3090b8e80941Smrg } 3091b8e80941Smrg } 3092b8e80941Smrg } 3093b8e80941Smrg 3094b8e80941Smrg return vk_outarray_status(&out); 3095b8e80941Smrg} 3096b8e80941Smrg 3097b8e80941SmrgVkResult radv_EnumerateDeviceExtensionProperties( 3098b8e80941Smrg VkPhysicalDevice physicalDevice, 3099b8e80941Smrg const char* pLayerName, 3100b8e80941Smrg uint32_t* pPropertyCount, 3101b8e80941Smrg VkExtensionProperties* pProperties) 3102b8e80941Smrg{ 3103b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); 3104b8e80941Smrg VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 3105b8e80941Smrg 3106b8e80941Smrg for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) { 3107b8e80941Smrg if (device->supported_extensions.extensions[i]) { 3108b8e80941Smrg vk_outarray_append(&out, prop) { 3109b8e80941Smrg *prop = radv_device_extensions[i]; 3110b8e80941Smrg } 3111b8e80941Smrg } 3112b8e80941Smrg } 3113b8e80941Smrg 3114b8e80941Smrg return vk_outarray_status(&out); 3115b8e80941Smrg} 3116b8e80941Smrg 3117b8e80941SmrgPFN_vkVoidFunction radv_GetInstanceProcAddr( 3118b8e80941Smrg VkInstance _instance, 3119b8e80941Smrg const char* pName) 3120b8e80941Smrg{ 3121b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 3122b8e80941Smrg 3123b8e80941Smrg return radv_lookup_entrypoint_checked(pName, 3124b8e80941Smrg instance ? instance->apiVersion : 0, 3125b8e80941Smrg instance ? &instance->enabled_extensions : NULL, 3126b8e80941Smrg NULL); 3127b8e80941Smrg} 3128b8e80941Smrg 3129b8e80941Smrg/* The loader wants us to expose a second GetInstanceProcAddr function 3130b8e80941Smrg * to work around certain LD_PRELOAD issues seen in apps. 3131b8e80941Smrg */ 3132b8e80941SmrgPUBLIC 3133b8e80941SmrgVKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 3134b8e80941Smrg VkInstance instance, 3135b8e80941Smrg const char* pName); 3136b8e80941Smrg 3137b8e80941SmrgPUBLIC 3138b8e80941SmrgVKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 3139b8e80941Smrg VkInstance instance, 3140b8e80941Smrg const char* pName) 3141b8e80941Smrg{ 3142b8e80941Smrg return radv_GetInstanceProcAddr(instance, pName); 3143b8e80941Smrg} 3144b8e80941Smrg 3145b8e80941SmrgPFN_vkVoidFunction radv_GetDeviceProcAddr( 3146b8e80941Smrg VkDevice _device, 3147b8e80941Smrg const char* pName) 3148b8e80941Smrg{ 3149b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3150b8e80941Smrg 3151b8e80941Smrg return radv_lookup_entrypoint_checked(pName, 3152b8e80941Smrg device->instance->apiVersion, 3153b8e80941Smrg &device->instance->enabled_extensions, 3154b8e80941Smrg &device->enabled_extensions); 3155b8e80941Smrg} 3156b8e80941Smrg 3157b8e80941Smrgbool radv_get_memory_fd(struct radv_device *device, 3158b8e80941Smrg struct radv_device_memory *memory, 3159b8e80941Smrg int *pFD) 3160b8e80941Smrg{ 3161b8e80941Smrg struct radeon_bo_metadata metadata; 3162b8e80941Smrg 3163b8e80941Smrg if (memory->image) { 3164b8e80941Smrg radv_init_metadata(device, memory->image, &metadata); 3165b8e80941Smrg device->ws->buffer_set_metadata(memory->bo, &metadata); 3166b8e80941Smrg } 3167b8e80941Smrg 3168b8e80941Smrg return device->ws->buffer_get_fd(device->ws, memory->bo, 3169b8e80941Smrg pFD); 3170b8e80941Smrg} 3171b8e80941Smrg 3172b8e80941Smrgstatic VkResult radv_alloc_memory(struct radv_device *device, 3173b8e80941Smrg const VkMemoryAllocateInfo* pAllocateInfo, 3174b8e80941Smrg const VkAllocationCallbacks* pAllocator, 3175b8e80941Smrg VkDeviceMemory* pMem) 3176b8e80941Smrg{ 3177b8e80941Smrg struct radv_device_memory *mem; 3178b8e80941Smrg VkResult result; 3179b8e80941Smrg enum radeon_bo_domain domain; 3180b8e80941Smrg uint32_t flags = 0; 3181b8e80941Smrg enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex]; 3182b8e80941Smrg 3183b8e80941Smrg assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 3184b8e80941Smrg 3185b8e80941Smrg if (pAllocateInfo->allocationSize == 0) { 3186b8e80941Smrg /* Apparently, this is allowed */ 3187b8e80941Smrg *pMem = VK_NULL_HANDLE; 3188b8e80941Smrg return VK_SUCCESS; 3189b8e80941Smrg } 3190b8e80941Smrg 3191b8e80941Smrg const VkImportMemoryFdInfoKHR *import_info = 3192b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); 3193b8e80941Smrg const VkMemoryDedicatedAllocateInfo *dedicate_info = 3194b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); 3195b8e80941Smrg const VkExportMemoryAllocateInfo *export_info = 3196b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); 3197b8e80941Smrg const VkImportMemoryHostPointerInfoEXT *host_ptr_info = 3198b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT); 3199b8e80941Smrg 3200b8e80941Smrg const struct wsi_memory_allocate_info *wsi_info = 3201b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); 3202b8e80941Smrg 3203b8e80941Smrg mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, 3204b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3205b8e80941Smrg if (mem == NULL) 3206b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3207b8e80941Smrg 3208b8e80941Smrg if (wsi_info && wsi_info->implicit_sync) 3209b8e80941Smrg flags |= RADEON_FLAG_IMPLICIT_SYNC; 3210b8e80941Smrg 3211b8e80941Smrg if (dedicate_info) { 3212b8e80941Smrg mem->image = radv_image_from_handle(dedicate_info->image); 3213b8e80941Smrg mem->buffer = radv_buffer_from_handle(dedicate_info->buffer); 3214b8e80941Smrg } else { 3215b8e80941Smrg mem->image = NULL; 3216b8e80941Smrg mem->buffer = NULL; 3217b8e80941Smrg } 3218b8e80941Smrg 3219b8e80941Smrg float priority_float = 0.5; 3220b8e80941Smrg const struct VkMemoryPriorityAllocateInfoEXT *priority_ext = 3221b8e80941Smrg vk_find_struct_const(pAllocateInfo->pNext, 3222b8e80941Smrg MEMORY_PRIORITY_ALLOCATE_INFO_EXT); 3223b8e80941Smrg if (priority_ext) 3224b8e80941Smrg priority_float = priority_ext->priority; 3225b8e80941Smrg 3226b8e80941Smrg unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, 3227b8e80941Smrg (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); 3228b8e80941Smrg 3229b8e80941Smrg mem->user_ptr = NULL; 3230b8e80941Smrg 3231b8e80941Smrg if (import_info) { 3232b8e80941Smrg assert(import_info->handleType == 3233b8e80941Smrg VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 3234b8e80941Smrg import_info->handleType == 3235b8e80941Smrg VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 3236b8e80941Smrg mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd, 3237b8e80941Smrg priority, NULL, NULL); 3238b8e80941Smrg if (!mem->bo) { 3239b8e80941Smrg result = VK_ERROR_INVALID_EXTERNAL_HANDLE; 3240b8e80941Smrg goto fail; 3241b8e80941Smrg } else { 3242b8e80941Smrg close(import_info->fd); 3243b8e80941Smrg } 3244b8e80941Smrg } else if (host_ptr_info) { 3245b8e80941Smrg assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); 3246b8e80941Smrg assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED); 3247b8e80941Smrg mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, 3248b8e80941Smrg pAllocateInfo->allocationSize, 3249b8e80941Smrg priority); 3250b8e80941Smrg if (!mem->bo) { 3251b8e80941Smrg result = VK_ERROR_INVALID_EXTERNAL_HANDLE; 3252b8e80941Smrg goto fail; 3253b8e80941Smrg } else { 3254b8e80941Smrg mem->user_ptr = host_ptr_info->pHostPointer; 3255b8e80941Smrg } 3256b8e80941Smrg } else { 3257b8e80941Smrg uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); 3258b8e80941Smrg if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE || 3259b8e80941Smrg mem_type_index == RADV_MEM_TYPE_GTT_CACHED) 3260b8e80941Smrg domain = RADEON_DOMAIN_GTT; 3261b8e80941Smrg else 3262b8e80941Smrg domain = RADEON_DOMAIN_VRAM; 3263b8e80941Smrg 3264b8e80941Smrg if (mem_type_index == RADV_MEM_TYPE_VRAM) 3265b8e80941Smrg flags |= RADEON_FLAG_NO_CPU_ACCESS; 3266b8e80941Smrg else 3267b8e80941Smrg flags |= RADEON_FLAG_CPU_ACCESS; 3268b8e80941Smrg 3269b8e80941Smrg if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE) 3270b8e80941Smrg flags |= RADEON_FLAG_GTT_WC; 3271b8e80941Smrg 3272b8e80941Smrg if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) { 3273b8e80941Smrg flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; 3274b8e80941Smrg if (device->use_global_bo_list) { 3275b8e80941Smrg flags |= RADEON_FLAG_PREFER_LOCAL_BO; 3276b8e80941Smrg } 3277b8e80941Smrg } 3278b8e80941Smrg 3279b8e80941Smrg mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, 3280b8e80941Smrg domain, flags, priority); 3281b8e80941Smrg 3282b8e80941Smrg if (!mem->bo) { 3283b8e80941Smrg result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 3284b8e80941Smrg goto fail; 3285b8e80941Smrg } 3286b8e80941Smrg mem->type_index = mem_type_index; 3287b8e80941Smrg } 3288b8e80941Smrg 3289b8e80941Smrg result = radv_bo_list_add(device, mem->bo); 3290b8e80941Smrg if (result != VK_SUCCESS) 3291b8e80941Smrg goto fail_bo; 3292b8e80941Smrg 3293b8e80941Smrg *pMem = radv_device_memory_to_handle(mem); 3294b8e80941Smrg 3295b8e80941Smrg return VK_SUCCESS; 3296b8e80941Smrg 3297b8e80941Smrgfail_bo: 3298b8e80941Smrg device->ws->buffer_destroy(mem->bo); 3299b8e80941Smrgfail: 3300b8e80941Smrg vk_free2(&device->alloc, pAllocator, mem); 3301b8e80941Smrg 3302b8e80941Smrg return result; 3303b8e80941Smrg} 3304b8e80941Smrg 3305b8e80941SmrgVkResult radv_AllocateMemory( 3306b8e80941Smrg VkDevice _device, 3307b8e80941Smrg const VkMemoryAllocateInfo* pAllocateInfo, 3308b8e80941Smrg const VkAllocationCallbacks* pAllocator, 3309b8e80941Smrg VkDeviceMemory* pMem) 3310b8e80941Smrg{ 3311b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3312b8e80941Smrg return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem); 3313b8e80941Smrg} 3314b8e80941Smrg 3315b8e80941Smrgvoid radv_FreeMemory( 3316b8e80941Smrg VkDevice _device, 3317b8e80941Smrg VkDeviceMemory _mem, 3318b8e80941Smrg const VkAllocationCallbacks* pAllocator) 3319b8e80941Smrg{ 3320b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3321b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, mem, _mem); 3322b8e80941Smrg 3323b8e80941Smrg if (mem == NULL) 3324b8e80941Smrg return; 3325b8e80941Smrg 3326b8e80941Smrg radv_bo_list_remove(device, mem->bo); 3327b8e80941Smrg device->ws->buffer_destroy(mem->bo); 3328b8e80941Smrg mem->bo = NULL; 3329b8e80941Smrg 3330b8e80941Smrg vk_free2(&device->alloc, pAllocator, mem); 3331b8e80941Smrg} 3332b8e80941Smrg 3333b8e80941SmrgVkResult radv_MapMemory( 3334b8e80941Smrg VkDevice _device, 3335b8e80941Smrg VkDeviceMemory _memory, 3336b8e80941Smrg VkDeviceSize offset, 3337b8e80941Smrg VkDeviceSize size, 3338b8e80941Smrg VkMemoryMapFlags flags, 3339b8e80941Smrg void** ppData) 3340b8e80941Smrg{ 3341b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3342b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 3343b8e80941Smrg 3344b8e80941Smrg if (mem == NULL) { 3345b8e80941Smrg *ppData = NULL; 3346b8e80941Smrg return VK_SUCCESS; 3347b8e80941Smrg } 3348b8e80941Smrg 3349b8e80941Smrg if (mem->user_ptr) 3350b8e80941Smrg *ppData = mem->user_ptr; 3351b8e80941Smrg else 3352b8e80941Smrg *ppData = device->ws->buffer_map(mem->bo); 3353b8e80941Smrg 3354b8e80941Smrg if (*ppData) { 3355b8e80941Smrg *ppData += offset; 3356b8e80941Smrg return VK_SUCCESS; 3357b8e80941Smrg } 3358b8e80941Smrg 3359b8e80941Smrg return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED); 3360b8e80941Smrg} 3361b8e80941Smrg 3362b8e80941Smrgvoid radv_UnmapMemory( 3363b8e80941Smrg VkDevice _device, 3364b8e80941Smrg VkDeviceMemory _memory) 3365b8e80941Smrg{ 3366b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3367b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 3368b8e80941Smrg 3369b8e80941Smrg if (mem == NULL) 3370b8e80941Smrg return; 3371b8e80941Smrg 3372b8e80941Smrg if (mem->user_ptr == NULL) 3373b8e80941Smrg device->ws->buffer_unmap(mem->bo); 3374b8e80941Smrg} 3375b8e80941Smrg 3376b8e80941SmrgVkResult radv_FlushMappedMemoryRanges( 3377b8e80941Smrg VkDevice _device, 3378b8e80941Smrg uint32_t memoryRangeCount, 3379b8e80941Smrg const VkMappedMemoryRange* pMemoryRanges) 3380b8e80941Smrg{ 3381b8e80941Smrg return VK_SUCCESS; 3382b8e80941Smrg} 3383b8e80941Smrg 3384b8e80941SmrgVkResult radv_InvalidateMappedMemoryRanges( 3385b8e80941Smrg VkDevice _device, 3386b8e80941Smrg uint32_t memoryRangeCount, 3387b8e80941Smrg const VkMappedMemoryRange* pMemoryRanges) 3388b8e80941Smrg{ 3389b8e80941Smrg return VK_SUCCESS; 3390b8e80941Smrg} 3391b8e80941Smrg 3392b8e80941Smrgvoid radv_GetBufferMemoryRequirements( 3393b8e80941Smrg VkDevice _device, 3394b8e80941Smrg VkBuffer _buffer, 3395b8e80941Smrg VkMemoryRequirements* pMemoryRequirements) 3396b8e80941Smrg{ 3397b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3398b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3399b8e80941Smrg 3400b8e80941Smrg pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; 3401b8e80941Smrg 3402b8e80941Smrg if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) 3403b8e80941Smrg pMemoryRequirements->alignment = 4096; 3404b8e80941Smrg else 3405b8e80941Smrg pMemoryRequirements->alignment = 16; 3406b8e80941Smrg 3407b8e80941Smrg pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment); 3408b8e80941Smrg} 3409b8e80941Smrg 3410b8e80941Smrgvoid radv_GetBufferMemoryRequirements2( 3411b8e80941Smrg VkDevice device, 3412b8e80941Smrg const VkBufferMemoryRequirementsInfo2 *pInfo, 3413b8e80941Smrg VkMemoryRequirements2 *pMemoryRequirements) 3414b8e80941Smrg{ 3415b8e80941Smrg radv_GetBufferMemoryRequirements(device, pInfo->buffer, 3416b8e80941Smrg &pMemoryRequirements->memoryRequirements); 3417b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 3418b8e80941Smrg vk_foreach_struct(ext, pMemoryRequirements->pNext) { 3419b8e80941Smrg switch (ext->sType) { 3420b8e80941Smrg case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 3421b8e80941Smrg VkMemoryDedicatedRequirements *req = 3422b8e80941Smrg (VkMemoryDedicatedRequirements *) ext; 3423b8e80941Smrg req->requiresDedicatedAllocation = buffer->shareable; 3424b8e80941Smrg req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 3425b8e80941Smrg break; 3426b8e80941Smrg } 3427b8e80941Smrg default: 3428b8e80941Smrg break; 3429b8e80941Smrg } 3430b8e80941Smrg } 3431b8e80941Smrg} 3432b8e80941Smrg 3433b8e80941Smrgvoid radv_GetImageMemoryRequirements( 3434b8e80941Smrg VkDevice _device, 3435b8e80941Smrg VkImage _image, 3436b8e80941Smrg VkMemoryRequirements* pMemoryRequirements) 3437b8e80941Smrg{ 3438b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3439b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, _image); 3440b8e80941Smrg 3441b8e80941Smrg pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; 3442b8e80941Smrg 3443b8e80941Smrg pMemoryRequirements->size = image->size; 3444b8e80941Smrg pMemoryRequirements->alignment = image->alignment; 3445b8e80941Smrg} 3446b8e80941Smrg 3447b8e80941Smrgvoid radv_GetImageMemoryRequirements2( 3448b8e80941Smrg VkDevice device, 3449b8e80941Smrg const VkImageMemoryRequirementsInfo2 *pInfo, 3450b8e80941Smrg VkMemoryRequirements2 *pMemoryRequirements) 3451b8e80941Smrg{ 3452b8e80941Smrg radv_GetImageMemoryRequirements(device, pInfo->image, 3453b8e80941Smrg &pMemoryRequirements->memoryRequirements); 3454b8e80941Smrg 3455b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, pInfo->image); 3456b8e80941Smrg 3457b8e80941Smrg vk_foreach_struct(ext, pMemoryRequirements->pNext) { 3458b8e80941Smrg switch (ext->sType) { 3459b8e80941Smrg case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 3460b8e80941Smrg VkMemoryDedicatedRequirements *req = 3461b8e80941Smrg (VkMemoryDedicatedRequirements *) ext; 3462b8e80941Smrg req->requiresDedicatedAllocation = image->shareable; 3463b8e80941Smrg req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 3464b8e80941Smrg break; 3465b8e80941Smrg } 3466b8e80941Smrg default: 3467b8e80941Smrg break; 3468b8e80941Smrg } 3469b8e80941Smrg } 3470b8e80941Smrg} 3471b8e80941Smrg 3472b8e80941Smrgvoid radv_GetImageSparseMemoryRequirements( 3473b8e80941Smrg VkDevice device, 3474b8e80941Smrg VkImage image, 3475b8e80941Smrg uint32_t* pSparseMemoryRequirementCount, 3476b8e80941Smrg VkSparseImageMemoryRequirements* pSparseMemoryRequirements) 3477b8e80941Smrg{ 3478b8e80941Smrg stub(); 3479b8e80941Smrg} 3480b8e80941Smrg 3481b8e80941Smrgvoid radv_GetImageSparseMemoryRequirements2( 3482b8e80941Smrg VkDevice device, 3483b8e80941Smrg const VkImageSparseMemoryRequirementsInfo2 *pInfo, 3484b8e80941Smrg uint32_t* pSparseMemoryRequirementCount, 3485b8e80941Smrg VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) 3486b8e80941Smrg{ 3487b8e80941Smrg stub(); 3488b8e80941Smrg} 3489b8e80941Smrg 3490b8e80941Smrgvoid radv_GetDeviceMemoryCommitment( 3491b8e80941Smrg VkDevice device, 3492b8e80941Smrg VkDeviceMemory memory, 3493b8e80941Smrg VkDeviceSize* pCommittedMemoryInBytes) 3494b8e80941Smrg{ 3495b8e80941Smrg *pCommittedMemoryInBytes = 0; 3496b8e80941Smrg} 3497b8e80941Smrg 3498b8e80941SmrgVkResult radv_BindBufferMemory2(VkDevice device, 3499b8e80941Smrg uint32_t bindInfoCount, 3500b8e80941Smrg const VkBindBufferMemoryInfo *pBindInfos) 3501b8e80941Smrg{ 3502b8e80941Smrg for (uint32_t i = 0; i < bindInfoCount; ++i) { 3503b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 3504b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer); 3505b8e80941Smrg 3506b8e80941Smrg if (mem) { 3507b8e80941Smrg buffer->bo = mem->bo; 3508b8e80941Smrg buffer->offset = pBindInfos[i].memoryOffset; 3509b8e80941Smrg } else { 3510b8e80941Smrg buffer->bo = NULL; 3511b8e80941Smrg } 3512b8e80941Smrg } 3513b8e80941Smrg return VK_SUCCESS; 3514b8e80941Smrg} 3515b8e80941Smrg 3516b8e80941SmrgVkResult radv_BindBufferMemory( 3517b8e80941Smrg VkDevice device, 3518b8e80941Smrg VkBuffer buffer, 3519b8e80941Smrg VkDeviceMemory memory, 3520b8e80941Smrg VkDeviceSize memoryOffset) 3521b8e80941Smrg{ 3522b8e80941Smrg const VkBindBufferMemoryInfo info = { 3523b8e80941Smrg .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 3524b8e80941Smrg .buffer = buffer, 3525b8e80941Smrg .memory = memory, 3526b8e80941Smrg .memoryOffset = memoryOffset 3527b8e80941Smrg }; 3528b8e80941Smrg 3529b8e80941Smrg return radv_BindBufferMemory2(device, 1, &info); 3530b8e80941Smrg} 3531b8e80941Smrg 3532b8e80941SmrgVkResult radv_BindImageMemory2(VkDevice device, 3533b8e80941Smrg uint32_t bindInfoCount, 3534b8e80941Smrg const VkBindImageMemoryInfo *pBindInfos) 3535b8e80941Smrg{ 3536b8e80941Smrg for (uint32_t i = 0; i < bindInfoCount; ++i) { 3537b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 3538b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image); 3539b8e80941Smrg 3540b8e80941Smrg if (mem) { 3541b8e80941Smrg image->bo = mem->bo; 3542b8e80941Smrg image->offset = pBindInfos[i].memoryOffset; 3543b8e80941Smrg } else { 3544b8e80941Smrg image->bo = NULL; 3545b8e80941Smrg image->offset = 0; 3546b8e80941Smrg } 3547b8e80941Smrg } 3548b8e80941Smrg return VK_SUCCESS; 3549b8e80941Smrg} 3550b8e80941Smrg 3551b8e80941Smrg 3552b8e80941SmrgVkResult radv_BindImageMemory( 3553b8e80941Smrg VkDevice device, 3554b8e80941Smrg VkImage image, 3555b8e80941Smrg VkDeviceMemory memory, 3556b8e80941Smrg VkDeviceSize memoryOffset) 3557b8e80941Smrg{ 3558b8e80941Smrg const VkBindImageMemoryInfo info = { 3559b8e80941Smrg .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 3560b8e80941Smrg .image = image, 3561b8e80941Smrg .memory = memory, 3562b8e80941Smrg .memoryOffset = memoryOffset 3563b8e80941Smrg }; 3564b8e80941Smrg 3565b8e80941Smrg return radv_BindImageMemory2(device, 1, &info); 3566b8e80941Smrg} 3567b8e80941Smrg 3568b8e80941Smrg 3569b8e80941Smrgstatic void 3570b8e80941Smrgradv_sparse_buffer_bind_memory(struct radv_device *device, 3571b8e80941Smrg const VkSparseBufferMemoryBindInfo *bind) 3572b8e80941Smrg{ 3573b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer); 3574b8e80941Smrg 3575b8e80941Smrg for (uint32_t i = 0; i < bind->bindCount; ++i) { 3576b8e80941Smrg struct radv_device_memory *mem = NULL; 3577b8e80941Smrg 3578b8e80941Smrg if (bind->pBinds[i].memory != VK_NULL_HANDLE) 3579b8e80941Smrg mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 3580b8e80941Smrg 3581b8e80941Smrg device->ws->buffer_virtual_bind(buffer->bo, 3582b8e80941Smrg bind->pBinds[i].resourceOffset, 3583b8e80941Smrg bind->pBinds[i].size, 3584b8e80941Smrg mem ? mem->bo : NULL, 3585b8e80941Smrg bind->pBinds[i].memoryOffset); 3586b8e80941Smrg } 3587b8e80941Smrg} 3588b8e80941Smrg 3589b8e80941Smrgstatic void 3590b8e80941Smrgradv_sparse_image_opaque_bind_memory(struct radv_device *device, 3591b8e80941Smrg const VkSparseImageOpaqueMemoryBindInfo *bind) 3592b8e80941Smrg{ 3593b8e80941Smrg RADV_FROM_HANDLE(radv_image, image, bind->image); 3594b8e80941Smrg 3595b8e80941Smrg for (uint32_t i = 0; i < bind->bindCount; ++i) { 3596b8e80941Smrg struct radv_device_memory *mem = NULL; 3597b8e80941Smrg 3598b8e80941Smrg if (bind->pBinds[i].memory != VK_NULL_HANDLE) 3599b8e80941Smrg mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 3600b8e80941Smrg 3601b8e80941Smrg device->ws->buffer_virtual_bind(image->bo, 3602b8e80941Smrg bind->pBinds[i].resourceOffset, 3603b8e80941Smrg bind->pBinds[i].size, 3604b8e80941Smrg mem ? mem->bo : NULL, 3605b8e80941Smrg bind->pBinds[i].memoryOffset); 3606b8e80941Smrg } 3607b8e80941Smrg} 3608b8e80941Smrg 3609b8e80941Smrg VkResult radv_QueueBindSparse( 3610b8e80941Smrg VkQueue _queue, 3611b8e80941Smrg uint32_t bindInfoCount, 3612b8e80941Smrg const VkBindSparseInfo* pBindInfo, 3613b8e80941Smrg VkFence _fence) 3614b8e80941Smrg{ 3615b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 3616b8e80941Smrg RADV_FROM_HANDLE(radv_queue, queue, _queue); 3617b8e80941Smrg struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; 3618b8e80941Smrg bool fence_emitted = false; 3619b8e80941Smrg VkResult result; 3620b8e80941Smrg int ret; 3621b8e80941Smrg 3622b8e80941Smrg for (uint32_t i = 0; i < bindInfoCount; ++i) { 3623b8e80941Smrg struct radv_winsys_sem_info sem_info; 3624b8e80941Smrg for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { 3625b8e80941Smrg radv_sparse_buffer_bind_memory(queue->device, 3626b8e80941Smrg pBindInfo[i].pBufferBinds + j); 3627b8e80941Smrg } 3628b8e80941Smrg 3629b8e80941Smrg for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) { 3630b8e80941Smrg radv_sparse_image_opaque_bind_memory(queue->device, 3631b8e80941Smrg pBindInfo[i].pImageOpaqueBinds + j); 3632b8e80941Smrg } 3633b8e80941Smrg 3634b8e80941Smrg VkResult result; 3635b8e80941Smrg result = radv_alloc_sem_info(queue->device->instance, 3636b8e80941Smrg &sem_info, 3637b8e80941Smrg pBindInfo[i].waitSemaphoreCount, 3638b8e80941Smrg pBindInfo[i].pWaitSemaphores, 3639b8e80941Smrg pBindInfo[i].signalSemaphoreCount, 3640b8e80941Smrg pBindInfo[i].pSignalSemaphores, 3641b8e80941Smrg _fence); 3642b8e80941Smrg if (result != VK_SUCCESS) 3643b8e80941Smrg return result; 3644b8e80941Smrg 3645b8e80941Smrg if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { 3646b8e80941Smrg ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, 3647b8e80941Smrg &queue->device->empty_cs[queue->queue_family_index], 3648b8e80941Smrg 1, NULL, NULL, 3649b8e80941Smrg &sem_info, NULL, 3650b8e80941Smrg false, base_fence); 3651b8e80941Smrg if (ret) { 3652b8e80941Smrg radv_loge("failed to submit CS %d\n", i); 3653b8e80941Smrg abort(); 3654b8e80941Smrg } 3655b8e80941Smrg 3656b8e80941Smrg fence_emitted = true; 3657b8e80941Smrg if (fence) 3658b8e80941Smrg fence->submitted = true; 3659b8e80941Smrg } 3660b8e80941Smrg 3661b8e80941Smrg radv_free_sem_info(&sem_info); 3662b8e80941Smrg 3663b8e80941Smrg } 3664b8e80941Smrg 3665b8e80941Smrg if (fence) { 3666b8e80941Smrg if (!fence_emitted) { 3667b8e80941Smrg result = radv_signal_fence(queue, fence); 3668b8e80941Smrg if (result != VK_SUCCESS) 3669b8e80941Smrg return result; 3670b8e80941Smrg } 3671b8e80941Smrg fence->submitted = true; 3672b8e80941Smrg } 3673b8e80941Smrg 3674b8e80941Smrg return VK_SUCCESS; 3675b8e80941Smrg} 3676b8e80941Smrg 3677b8e80941SmrgVkResult radv_CreateFence( 3678b8e80941Smrg VkDevice _device, 3679b8e80941Smrg const VkFenceCreateInfo* pCreateInfo, 3680b8e80941Smrg const VkAllocationCallbacks* pAllocator, 3681b8e80941Smrg VkFence* pFence) 3682b8e80941Smrg{ 3683b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3684b8e80941Smrg const VkExportFenceCreateInfo *export = 3685b8e80941Smrg vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); 3686b8e80941Smrg VkExternalFenceHandleTypeFlags handleTypes = 3687b8e80941Smrg export ? export->handleTypes : 0; 3688b8e80941Smrg 3689b8e80941Smrg struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator, 3690b8e80941Smrg sizeof(*fence), 8, 3691b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3692b8e80941Smrg 3693b8e80941Smrg if (!fence) 3694b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3695b8e80941Smrg 3696b8e80941Smrg fence->fence_wsi = NULL; 3697b8e80941Smrg fence->submitted = false; 3698b8e80941Smrg fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT); 3699b8e80941Smrg fence->temp_syncobj = 0; 3700b8e80941Smrg if (device->always_use_syncobj || handleTypes) { 3701b8e80941Smrg int ret = device->ws->create_syncobj(device->ws, &fence->syncobj); 3702b8e80941Smrg if (ret) { 3703b8e80941Smrg vk_free2(&device->alloc, pAllocator, fence); 3704b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3705b8e80941Smrg } 3706b8e80941Smrg if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { 3707b8e80941Smrg device->ws->signal_syncobj(device->ws, fence->syncobj); 3708b8e80941Smrg } 3709b8e80941Smrg fence->fence = NULL; 3710b8e80941Smrg } else { 3711b8e80941Smrg fence->fence = device->ws->create_fence(); 3712b8e80941Smrg if (!fence->fence) { 3713b8e80941Smrg vk_free2(&device->alloc, pAllocator, fence); 3714b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3715b8e80941Smrg } 3716b8e80941Smrg fence->syncobj = 0; 3717b8e80941Smrg } 3718b8e80941Smrg 3719b8e80941Smrg *pFence = radv_fence_to_handle(fence); 3720b8e80941Smrg 3721b8e80941Smrg return VK_SUCCESS; 3722b8e80941Smrg} 3723b8e80941Smrg 3724b8e80941Smrgvoid radv_DestroyFence( 3725b8e80941Smrg VkDevice _device, 3726b8e80941Smrg VkFence _fence, 3727b8e80941Smrg const VkAllocationCallbacks* pAllocator) 3728b8e80941Smrg{ 3729b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3730b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 3731b8e80941Smrg 3732b8e80941Smrg if (!fence) 3733b8e80941Smrg return; 3734b8e80941Smrg 3735b8e80941Smrg if (fence->temp_syncobj) 3736b8e80941Smrg device->ws->destroy_syncobj(device->ws, fence->temp_syncobj); 3737b8e80941Smrg if (fence->syncobj) 3738b8e80941Smrg device->ws->destroy_syncobj(device->ws, fence->syncobj); 3739b8e80941Smrg if (fence->fence) 3740b8e80941Smrg device->ws->destroy_fence(fence->fence); 3741b8e80941Smrg if (fence->fence_wsi) 3742b8e80941Smrg fence->fence_wsi->destroy(fence->fence_wsi); 3743b8e80941Smrg vk_free2(&device->alloc, pAllocator, fence); 3744b8e80941Smrg} 3745b8e80941Smrg 3746b8e80941Smrg 3747b8e80941Smrguint64_t radv_get_current_time(void) 3748b8e80941Smrg{ 3749b8e80941Smrg struct timespec tv; 3750b8e80941Smrg clock_gettime(CLOCK_MONOTONIC, &tv); 3751b8e80941Smrg return tv.tv_nsec + tv.tv_sec*1000000000ull; 3752b8e80941Smrg} 3753b8e80941Smrg 3754b8e80941Smrgstatic uint64_t radv_get_absolute_timeout(uint64_t timeout) 3755b8e80941Smrg{ 3756b8e80941Smrg uint64_t current_time = radv_get_current_time(); 3757b8e80941Smrg 3758b8e80941Smrg timeout = MIN2(UINT64_MAX - current_time, timeout); 3759b8e80941Smrg 3760b8e80941Smrg return current_time + timeout; 3761b8e80941Smrg} 3762b8e80941Smrg 3763b8e80941Smrg 3764b8e80941Smrgstatic bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences) 3765b8e80941Smrg{ 3766b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3767b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3768b8e80941Smrg if (fence->fence == NULL || fence->syncobj || 3769b8e80941Smrg fence->temp_syncobj || 3770b8e80941Smrg (!fence->signalled && !fence->submitted)) 3771b8e80941Smrg return false; 3772b8e80941Smrg } 3773b8e80941Smrg return true; 3774b8e80941Smrg} 3775b8e80941Smrg 3776b8e80941Smrgstatic bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences) 3777b8e80941Smrg{ 3778b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3779b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3780b8e80941Smrg if (fence->syncobj == 0 && fence->temp_syncobj == 0) 3781b8e80941Smrg return false; 3782b8e80941Smrg } 3783b8e80941Smrg return true; 3784b8e80941Smrg} 3785b8e80941Smrg 3786b8e80941SmrgVkResult radv_WaitForFences( 3787b8e80941Smrg VkDevice _device, 3788b8e80941Smrg uint32_t fenceCount, 3789b8e80941Smrg const VkFence* pFences, 3790b8e80941Smrg VkBool32 waitAll, 3791b8e80941Smrg uint64_t timeout) 3792b8e80941Smrg{ 3793b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3794b8e80941Smrg timeout = radv_get_absolute_timeout(timeout); 3795b8e80941Smrg 3796b8e80941Smrg if (device->always_use_syncobj && 3797b8e80941Smrg radv_all_fences_syncobj(fenceCount, pFences)) 3798b8e80941Smrg { 3799b8e80941Smrg uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount); 3800b8e80941Smrg if (!handles) 3801b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3802b8e80941Smrg 3803b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3804b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3805b8e80941Smrg handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj; 3806b8e80941Smrg } 3807b8e80941Smrg 3808b8e80941Smrg bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout); 3809b8e80941Smrg 3810b8e80941Smrg free(handles); 3811b8e80941Smrg return success ? VK_SUCCESS : VK_TIMEOUT; 3812b8e80941Smrg } 3813b8e80941Smrg 3814b8e80941Smrg if (!waitAll && fenceCount > 1) { 3815b8e80941Smrg /* Not doing this by default for waitAll, due to needing to allocate twice. */ 3816b8e80941Smrg if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) { 3817b8e80941Smrg uint32_t wait_count = 0; 3818b8e80941Smrg struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount); 3819b8e80941Smrg if (!fences) 3820b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3821b8e80941Smrg 3822b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3823b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3824b8e80941Smrg 3825b8e80941Smrg if (fence->signalled) { 3826b8e80941Smrg free(fences); 3827b8e80941Smrg return VK_SUCCESS; 3828b8e80941Smrg } 3829b8e80941Smrg 3830b8e80941Smrg fences[wait_count++] = fence->fence; 3831b8e80941Smrg } 3832b8e80941Smrg 3833b8e80941Smrg bool success = device->ws->fences_wait(device->ws, fences, wait_count, 3834b8e80941Smrg waitAll, timeout - radv_get_current_time()); 3835b8e80941Smrg 3836b8e80941Smrg free(fences); 3837b8e80941Smrg return success ? VK_SUCCESS : VK_TIMEOUT; 3838b8e80941Smrg } 3839b8e80941Smrg 3840b8e80941Smrg while(radv_get_current_time() <= timeout) { 3841b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3842b8e80941Smrg if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS) 3843b8e80941Smrg return VK_SUCCESS; 3844b8e80941Smrg } 3845b8e80941Smrg } 3846b8e80941Smrg return VK_TIMEOUT; 3847b8e80941Smrg } 3848b8e80941Smrg 3849b8e80941Smrg for (uint32_t i = 0; i < fenceCount; ++i) { 3850b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3851b8e80941Smrg bool expired = false; 3852b8e80941Smrg 3853b8e80941Smrg if (fence->temp_syncobj) { 3854b8e80941Smrg if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout)) 3855b8e80941Smrg return VK_TIMEOUT; 3856b8e80941Smrg continue; 3857b8e80941Smrg } 3858b8e80941Smrg 3859b8e80941Smrg if (fence->syncobj) { 3860b8e80941Smrg if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout)) 3861b8e80941Smrg return VK_TIMEOUT; 3862b8e80941Smrg continue; 3863b8e80941Smrg } 3864b8e80941Smrg 3865b8e80941Smrg if (fence->signalled) 3866b8e80941Smrg continue; 3867b8e80941Smrg 3868b8e80941Smrg if (fence->fence) { 3869b8e80941Smrg if (!fence->submitted) { 3870b8e80941Smrg while(radv_get_current_time() <= timeout && 3871b8e80941Smrg !fence->submitted) 3872b8e80941Smrg /* Do nothing */; 3873b8e80941Smrg 3874b8e80941Smrg if (!fence->submitted) 3875b8e80941Smrg return VK_TIMEOUT; 3876b8e80941Smrg 3877b8e80941Smrg /* Recheck as it may have been set by 3878b8e80941Smrg * submitting operations. */ 3879b8e80941Smrg 3880b8e80941Smrg if (fence->signalled) 3881b8e80941Smrg continue; 3882b8e80941Smrg } 3883b8e80941Smrg 3884b8e80941Smrg expired = device->ws->fence_wait(device->ws, 3885b8e80941Smrg fence->fence, 3886b8e80941Smrg true, timeout); 3887b8e80941Smrg if (!expired) 3888b8e80941Smrg return VK_TIMEOUT; 3889b8e80941Smrg } 3890b8e80941Smrg 3891b8e80941Smrg if (fence->fence_wsi) { 3892b8e80941Smrg VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout); 3893b8e80941Smrg if (result != VK_SUCCESS) 3894b8e80941Smrg return result; 3895b8e80941Smrg } 3896b8e80941Smrg 3897b8e80941Smrg fence->signalled = true; 3898b8e80941Smrg } 3899b8e80941Smrg 3900b8e80941Smrg return VK_SUCCESS; 3901b8e80941Smrg} 3902b8e80941Smrg 3903b8e80941SmrgVkResult radv_ResetFences(VkDevice _device, 3904b8e80941Smrg uint32_t fenceCount, 3905b8e80941Smrg const VkFence *pFences) 3906b8e80941Smrg{ 3907b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3908b8e80941Smrg 3909b8e80941Smrg for (unsigned i = 0; i < fenceCount; ++i) { 3910b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 3911b8e80941Smrg fence->submitted = fence->signalled = false; 3912b8e80941Smrg 3913b8e80941Smrg /* Per spec, we first restore the permanent payload, and then reset, so 3914b8e80941Smrg * having a temp syncobj should not skip resetting the permanent syncobj. */ 3915b8e80941Smrg if (fence->temp_syncobj) { 3916b8e80941Smrg device->ws->destroy_syncobj(device->ws, fence->temp_syncobj); 3917b8e80941Smrg fence->temp_syncobj = 0; 3918b8e80941Smrg } 3919b8e80941Smrg 3920b8e80941Smrg if (fence->syncobj) { 3921b8e80941Smrg device->ws->reset_syncobj(device->ws, fence->syncobj); 3922b8e80941Smrg } 3923b8e80941Smrg } 3924b8e80941Smrg 3925b8e80941Smrg return VK_SUCCESS; 3926b8e80941Smrg} 3927b8e80941Smrg 3928b8e80941SmrgVkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence) 3929b8e80941Smrg{ 3930b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3931b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, _fence); 3932b8e80941Smrg 3933b8e80941Smrg if (fence->temp_syncobj) { 3934b8e80941Smrg bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0); 3935b8e80941Smrg return success ? VK_SUCCESS : VK_NOT_READY; 3936b8e80941Smrg } 3937b8e80941Smrg 3938b8e80941Smrg if (fence->syncobj) { 3939b8e80941Smrg bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0); 3940b8e80941Smrg return success ? VK_SUCCESS : VK_NOT_READY; 3941b8e80941Smrg } 3942b8e80941Smrg 3943b8e80941Smrg if (fence->signalled) 3944b8e80941Smrg return VK_SUCCESS; 3945b8e80941Smrg if (!fence->submitted) 3946b8e80941Smrg return VK_NOT_READY; 3947b8e80941Smrg if (fence->fence) { 3948b8e80941Smrg if (!device->ws->fence_wait(device->ws, fence->fence, false, 0)) 3949b8e80941Smrg return VK_NOT_READY; 3950b8e80941Smrg } 3951b8e80941Smrg if (fence->fence_wsi) { 3952b8e80941Smrg VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0); 3953b8e80941Smrg 3954b8e80941Smrg if (result != VK_SUCCESS) { 3955b8e80941Smrg if (result == VK_TIMEOUT) 3956b8e80941Smrg return VK_NOT_READY; 3957b8e80941Smrg return result; 3958b8e80941Smrg } 3959b8e80941Smrg } 3960b8e80941Smrg return VK_SUCCESS; 3961b8e80941Smrg} 3962b8e80941Smrg 3963b8e80941Smrg 3964b8e80941Smrg// Queue semaphore functions 3965b8e80941Smrg 3966b8e80941SmrgVkResult radv_CreateSemaphore( 3967b8e80941Smrg VkDevice _device, 3968b8e80941Smrg const VkSemaphoreCreateInfo* pCreateInfo, 3969b8e80941Smrg const VkAllocationCallbacks* pAllocator, 3970b8e80941Smrg VkSemaphore* pSemaphore) 3971b8e80941Smrg{ 3972b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 3973b8e80941Smrg const VkExportSemaphoreCreateInfo *export = 3974b8e80941Smrg vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); 3975b8e80941Smrg VkExternalSemaphoreHandleTypeFlags handleTypes = 3976b8e80941Smrg export ? export->handleTypes : 0; 3977b8e80941Smrg 3978b8e80941Smrg struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, 3979b8e80941Smrg sizeof(*sem), 8, 3980b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3981b8e80941Smrg if (!sem) 3982b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3983b8e80941Smrg 3984b8e80941Smrg sem->temp_syncobj = 0; 3985b8e80941Smrg /* create a syncobject if we are going to export this semaphore */ 3986b8e80941Smrg if (device->always_use_syncobj || handleTypes) { 3987b8e80941Smrg assert (device->physical_device->rad_info.has_syncobj); 3988b8e80941Smrg int ret = device->ws->create_syncobj(device->ws, &sem->syncobj); 3989b8e80941Smrg if (ret) { 3990b8e80941Smrg vk_free2(&device->alloc, pAllocator, sem); 3991b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3992b8e80941Smrg } 3993b8e80941Smrg sem->sem = NULL; 3994b8e80941Smrg } else { 3995b8e80941Smrg sem->sem = device->ws->create_sem(device->ws); 3996b8e80941Smrg if (!sem->sem) { 3997b8e80941Smrg vk_free2(&device->alloc, pAllocator, sem); 3998b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3999b8e80941Smrg } 4000b8e80941Smrg sem->syncobj = 0; 4001b8e80941Smrg } 4002b8e80941Smrg 4003b8e80941Smrg *pSemaphore = radv_semaphore_to_handle(sem); 4004b8e80941Smrg return VK_SUCCESS; 4005b8e80941Smrg} 4006b8e80941Smrg 4007b8e80941Smrgvoid radv_DestroySemaphore( 4008b8e80941Smrg VkDevice _device, 4009b8e80941Smrg VkSemaphore _semaphore, 4010b8e80941Smrg const VkAllocationCallbacks* pAllocator) 4011b8e80941Smrg{ 4012b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4013b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore); 4014b8e80941Smrg if (!_semaphore) 4015b8e80941Smrg return; 4016b8e80941Smrg 4017b8e80941Smrg if (sem->syncobj) 4018b8e80941Smrg device->ws->destroy_syncobj(device->ws, sem->syncobj); 4019b8e80941Smrg else 4020b8e80941Smrg device->ws->destroy_sem(sem->sem); 4021b8e80941Smrg vk_free2(&device->alloc, pAllocator, sem); 4022b8e80941Smrg} 4023b8e80941Smrg 4024b8e80941SmrgVkResult radv_CreateEvent( 4025b8e80941Smrg VkDevice _device, 4026b8e80941Smrg const VkEventCreateInfo* pCreateInfo, 4027b8e80941Smrg const VkAllocationCallbacks* pAllocator, 4028b8e80941Smrg VkEvent* pEvent) 4029b8e80941Smrg{ 4030b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4031b8e80941Smrg struct radv_event *event = vk_alloc2(&device->alloc, pAllocator, 4032b8e80941Smrg sizeof(*event), 8, 4033b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4034b8e80941Smrg 4035b8e80941Smrg if (!event) 4036b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4037b8e80941Smrg 4038b8e80941Smrg event->bo = device->ws->buffer_create(device->ws, 8, 8, 4039b8e80941Smrg RADEON_DOMAIN_GTT, 4040b8e80941Smrg RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, 4041b8e80941Smrg RADV_BO_PRIORITY_FENCE); 4042b8e80941Smrg if (!event->bo) { 4043b8e80941Smrg vk_free2(&device->alloc, pAllocator, event); 4044b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4045b8e80941Smrg } 4046b8e80941Smrg 4047b8e80941Smrg event->map = (uint64_t*)device->ws->buffer_map(event->bo); 4048b8e80941Smrg 4049b8e80941Smrg *pEvent = radv_event_to_handle(event); 4050b8e80941Smrg 4051b8e80941Smrg return VK_SUCCESS; 4052b8e80941Smrg} 4053b8e80941Smrg 4054b8e80941Smrgvoid radv_DestroyEvent( 4055b8e80941Smrg VkDevice _device, 4056b8e80941Smrg VkEvent _event, 4057b8e80941Smrg const VkAllocationCallbacks* pAllocator) 4058b8e80941Smrg{ 4059b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4060b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4061b8e80941Smrg 4062b8e80941Smrg if (!event) 4063b8e80941Smrg return; 4064b8e80941Smrg device->ws->buffer_destroy(event->bo); 4065b8e80941Smrg vk_free2(&device->alloc, pAllocator, event); 4066b8e80941Smrg} 4067b8e80941Smrg 4068b8e80941SmrgVkResult radv_GetEventStatus( 4069b8e80941Smrg VkDevice _device, 4070b8e80941Smrg VkEvent _event) 4071b8e80941Smrg{ 4072b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4073b8e80941Smrg 4074b8e80941Smrg if (*event->map == 1) 4075b8e80941Smrg return VK_EVENT_SET; 4076b8e80941Smrg return VK_EVENT_RESET; 4077b8e80941Smrg} 4078b8e80941Smrg 4079b8e80941SmrgVkResult radv_SetEvent( 4080b8e80941Smrg VkDevice _device, 4081b8e80941Smrg VkEvent _event) 4082b8e80941Smrg{ 4083b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4084b8e80941Smrg *event->map = 1; 4085b8e80941Smrg 4086b8e80941Smrg return VK_SUCCESS; 4087b8e80941Smrg} 4088b8e80941Smrg 4089b8e80941SmrgVkResult radv_ResetEvent( 4090b8e80941Smrg VkDevice _device, 4091b8e80941Smrg VkEvent _event) 4092b8e80941Smrg{ 4093b8e80941Smrg RADV_FROM_HANDLE(radv_event, event, _event); 4094b8e80941Smrg *event->map = 0; 4095b8e80941Smrg 4096b8e80941Smrg return VK_SUCCESS; 4097b8e80941Smrg} 4098b8e80941Smrg 4099b8e80941SmrgVkResult radv_CreateBuffer( 4100b8e80941Smrg VkDevice _device, 4101b8e80941Smrg const VkBufferCreateInfo* pCreateInfo, 4102b8e80941Smrg const VkAllocationCallbacks* pAllocator, 4103b8e80941Smrg VkBuffer* pBuffer) 4104b8e80941Smrg{ 4105b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4106b8e80941Smrg struct radv_buffer *buffer; 4107b8e80941Smrg 4108b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 4109b8e80941Smrg 4110b8e80941Smrg buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, 4111b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4112b8e80941Smrg if (buffer == NULL) 4113b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4114b8e80941Smrg 4115b8e80941Smrg buffer->size = pCreateInfo->size; 4116b8e80941Smrg buffer->usage = pCreateInfo->usage; 4117b8e80941Smrg buffer->bo = NULL; 4118b8e80941Smrg buffer->offset = 0; 4119b8e80941Smrg buffer->flags = pCreateInfo->flags; 4120b8e80941Smrg 4121b8e80941Smrg buffer->shareable = vk_find_struct_const(pCreateInfo->pNext, 4122b8e80941Smrg EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL; 4123b8e80941Smrg 4124b8e80941Smrg if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { 4125b8e80941Smrg buffer->bo = device->ws->buffer_create(device->ws, 4126b8e80941Smrg align64(buffer->size, 4096), 4127b8e80941Smrg 4096, 0, RADEON_FLAG_VIRTUAL, 4128b8e80941Smrg RADV_BO_PRIORITY_VIRTUAL); 4129b8e80941Smrg if (!buffer->bo) { 4130b8e80941Smrg vk_free2(&device->alloc, pAllocator, buffer); 4131b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4132b8e80941Smrg } 4133b8e80941Smrg } 4134b8e80941Smrg 4135b8e80941Smrg *pBuffer = radv_buffer_to_handle(buffer); 4136b8e80941Smrg 4137b8e80941Smrg return VK_SUCCESS; 4138b8e80941Smrg} 4139b8e80941Smrg 4140b8e80941Smrgvoid radv_DestroyBuffer( 4141b8e80941Smrg VkDevice _device, 4142b8e80941Smrg VkBuffer _buffer, 4143b8e80941Smrg const VkAllocationCallbacks* pAllocator) 4144b8e80941Smrg{ 4145b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4146b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 4147b8e80941Smrg 4148b8e80941Smrg if (!buffer) 4149b8e80941Smrg return; 4150b8e80941Smrg 4151b8e80941Smrg if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) 4152b8e80941Smrg device->ws->buffer_destroy(buffer->bo); 4153b8e80941Smrg 4154b8e80941Smrg vk_free2(&device->alloc, pAllocator, buffer); 4155b8e80941Smrg} 4156b8e80941Smrg 4157b8e80941SmrgVkDeviceAddress radv_GetBufferDeviceAddressEXT( 4158b8e80941Smrg VkDevice device, 4159b8e80941Smrg const VkBufferDeviceAddressInfoEXT* pInfo) 4160b8e80941Smrg{ 4161b8e80941Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 4162b8e80941Smrg return radv_buffer_get_va(buffer->bo) + buffer->offset; 4163b8e80941Smrg} 4164b8e80941Smrg 4165b8e80941Smrg 4166b8e80941Smrgstatic inline unsigned 4167b8e80941Smrgsi_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil) 4168b8e80941Smrg{ 4169b8e80941Smrg if (stencil) 4170b8e80941Smrg return plane->surface.u.legacy.stencil_tiling_index[level]; 4171b8e80941Smrg else 4172b8e80941Smrg return plane->surface.u.legacy.tiling_index[level]; 4173b8e80941Smrg} 4174b8e80941Smrg 4175b8e80941Smrgstatic uint32_t radv_surface_max_layer_count(struct radv_image_view *iview) 4176b8e80941Smrg{ 4177b8e80941Smrg return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count); 4178b8e80941Smrg} 4179b8e80941Smrg 4180b8e80941Smrgstatic uint32_t 4181b8e80941Smrgradv_init_dcc_control_reg(struct radv_device *device, 4182b8e80941Smrg struct radv_image_view *iview) 4183b8e80941Smrg{ 4184b8e80941Smrg unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 4185b8e80941Smrg unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 4186b8e80941Smrg unsigned max_compressed_block_size; 4187b8e80941Smrg unsigned independent_64b_blocks; 4188b8e80941Smrg 4189b8e80941Smrg if (!radv_image_has_dcc(iview->image)) 4190b8e80941Smrg return 0; 4191b8e80941Smrg 4192b8e80941Smrg if (iview->image->info.samples > 1) { 4193b8e80941Smrg if (iview->image->planes[0].surface.bpe == 1) 4194b8e80941Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 4195b8e80941Smrg else if (iview->image->planes[0].surface.bpe == 2) 4196b8e80941Smrg max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 4197b8e80941Smrg } 4198b8e80941Smrg 4199b8e80941Smrg if (!device->physical_device->rad_info.has_dedicated_vram) { 4200b8e80941Smrg /* amdvlk: [min-compressed-block-size] should be set to 32 for 4201b8e80941Smrg * dGPU and 64 for APU because all of our APUs to date use 4202b8e80941Smrg * DIMMs which have a request granularity size of 64B while all 4203b8e80941Smrg * other chips have a 32B request size. 4204b8e80941Smrg */ 4205b8e80941Smrg min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 4206b8e80941Smrg } 4207b8e80941Smrg 4208b8e80941Smrg if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | 4209b8e80941Smrg VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 4210b8e80941Smrg VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { 4211b8e80941Smrg /* If this DCC image is potentially going to be used in texture 4212b8e80941Smrg * fetches, we need some special settings. 4213b8e80941Smrg */ 4214b8e80941Smrg independent_64b_blocks = 1; 4215b8e80941Smrg max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 4216b8e80941Smrg } else { 4217b8e80941Smrg /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >= 4218b8e80941Smrg * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as 4219b8e80941Smrg * big as possible for better compression state. 4220b8e80941Smrg */ 4221b8e80941Smrg independent_64b_blocks = 0; 4222b8e80941Smrg max_compressed_block_size = max_uncompressed_block_size; 4223b8e80941Smrg } 4224b8e80941Smrg 4225b8e80941Smrg return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 4226b8e80941Smrg S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) | 4227b8e80941Smrg S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 4228b8e80941Smrg S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks); 4229b8e80941Smrg} 4230b8e80941Smrg 4231b8e80941Smrgstatic void 4232b8e80941Smrgradv_initialise_color_surface(struct radv_device *device, 4233b8e80941Smrg struct radv_color_buffer_info *cb, 4234b8e80941Smrg struct radv_image_view *iview) 4235b8e80941Smrg{ 4236b8e80941Smrg const struct vk_format_description *desc; 4237b8e80941Smrg unsigned ntype, format, swap, endian; 4238b8e80941Smrg unsigned blend_clamp = 0, blend_bypass = 0; 4239b8e80941Smrg uint64_t va; 4240b8e80941Smrg const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id]; 4241b8e80941Smrg const struct radeon_surf *surf = &plane->surface; 4242b8e80941Smrg 4243b8e80941Smrg desc = vk_format_description(iview->vk_format); 4244b8e80941Smrg 4245b8e80941Smrg memset(cb, 0, sizeof(*cb)); 4246b8e80941Smrg 4247b8e80941Smrg /* Intensity is implemented as Red, so treat it that way. */ 4248b8e80941Smrg cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1); 4249b8e80941Smrg 4250b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset; 4251b8e80941Smrg 4252b8e80941Smrg cb->cb_color_base = va >> 8; 4253b8e80941Smrg 4254b8e80941Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 4255b8e80941Smrg struct gfx9_surf_meta_flags meta; 4256b8e80941Smrg if (iview->image->dcc_offset) 4257b8e80941Smrg meta = surf->u.gfx9.dcc; 4258b8e80941Smrg else 4259b8e80941Smrg meta = surf->u.gfx9.cmask; 4260b8e80941Smrg 4261b8e80941Smrg cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) | 4262b8e80941Smrg S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) | 4263b8e80941Smrg S_028C74_RB_ALIGNED(meta.rb_aligned) | 4264b8e80941Smrg S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 4265b8e80941Smrg 4266b8e80941Smrg cb->cb_color_base += surf->u.gfx9.surf_offset >> 8; 4267b8e80941Smrg cb->cb_color_base |= surf->tile_swizzle; 4268b8e80941Smrg 4269b8e80941Smrg cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch); 4270b8e80941Smrg } else { 4271b8e80941Smrg const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip]; 4272b8e80941Smrg unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 4273b8e80941Smrg 4274b8e80941Smrg cb->cb_color_base += level_info->offset >> 8; 4275b8e80941Smrg if (level_info->mode == RADEON_SURF_MODE_2D) 4276b8e80941Smrg cb->cb_color_base |= surf->tile_swizzle; 4277b8e80941Smrg 4278b8e80941Smrg pitch_tile_max = level_info->nblk_x / 8 - 1; 4279b8e80941Smrg slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1; 4280b8e80941Smrg tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false); 4281b8e80941Smrg 4282b8e80941Smrg cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 4283b8e80941Smrg cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 4284b8e80941Smrg cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max; 4285b8e80941Smrg 4286b8e80941Smrg cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 4287b8e80941Smrg 4288b8e80941Smrg if (radv_image_has_fmask(iview->image)) { 4289b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) 4290b8e80941Smrg cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); 4291b8e80941Smrg cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); 4292b8e80941Smrg cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); 4293b8e80941Smrg } else { 4294b8e80941Smrg /* This must be set for fast clear to work without FMASK. */ 4295b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) 4296b8e80941Smrg cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 4297b8e80941Smrg cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 4298b8e80941Smrg cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 4299b8e80941Smrg } 4300b8e80941Smrg } 4301b8e80941Smrg 4302b8e80941Smrg /* CMASK variables */ 4303b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4304b8e80941Smrg va += iview->image->cmask.offset; 4305b8e80941Smrg cb->cb_color_cmask = va >> 8; 4306b8e80941Smrg 4307b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4308b8e80941Smrg va += iview->image->dcc_offset; 4309b8e80941Smrg cb->cb_dcc_base = va >> 8; 4310b8e80941Smrg cb->cb_dcc_base |= surf->tile_swizzle; 4311b8e80941Smrg 4312b8e80941Smrg uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 4313b8e80941Smrg cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | 4314b8e80941Smrg S_028C6C_SLICE_MAX(max_slice); 4315b8e80941Smrg 4316b8e80941Smrg if (iview->image->info.samples > 1) { 4317b8e80941Smrg unsigned log_samples = util_logbase2(iview->image->info.samples); 4318b8e80941Smrg 4319b8e80941Smrg cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 4320b8e80941Smrg S_028C74_NUM_FRAGMENTS(log_samples); 4321b8e80941Smrg } 4322b8e80941Smrg 4323b8e80941Smrg if (radv_image_has_fmask(iview->image)) { 4324b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; 4325b8e80941Smrg cb->cb_color_fmask = va >> 8; 4326b8e80941Smrg cb->cb_color_fmask |= iview->image->fmask.tile_swizzle; 4327b8e80941Smrg } else { 4328b8e80941Smrg cb->cb_color_fmask = cb->cb_color_base; 4329b8e80941Smrg } 4330b8e80941Smrg 4331b8e80941Smrg ntype = radv_translate_color_numformat(iview->vk_format, 4332b8e80941Smrg desc, 4333b8e80941Smrg vk_format_get_first_non_void_channel(iview->vk_format)); 4334b8e80941Smrg format = radv_translate_colorformat(iview->vk_format); 4335b8e80941Smrg if (format == V_028C70_COLOR_INVALID || ntype == ~0u) 4336b8e80941Smrg radv_finishme("Illegal color\n"); 4337b8e80941Smrg swap = radv_translate_colorswap(iview->vk_format, FALSE); 4338b8e80941Smrg endian = radv_colorformat_endian_swap(format); 4339b8e80941Smrg 4340b8e80941Smrg /* blend clamp should be set for all NORM/SRGB types */ 4341b8e80941Smrg if (ntype == V_028C70_NUMBER_UNORM || 4342b8e80941Smrg ntype == V_028C70_NUMBER_SNORM || 4343b8e80941Smrg ntype == V_028C70_NUMBER_SRGB) 4344b8e80941Smrg blend_clamp = 1; 4345b8e80941Smrg 4346b8e80941Smrg /* set blend bypass according to docs if SINT/UINT or 4347b8e80941Smrg 8/24 COLOR variants */ 4348b8e80941Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 4349b8e80941Smrg format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 4350b8e80941Smrg format == V_028C70_COLOR_X24_8_32_FLOAT) { 4351b8e80941Smrg blend_clamp = 0; 4352b8e80941Smrg blend_bypass = 1; 4353b8e80941Smrg } 4354b8e80941Smrg#if 0 4355b8e80941Smrg if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 4356b8e80941Smrg (format == V_028C70_COLOR_8 || 4357b8e80941Smrg format == V_028C70_COLOR_8_8 || 4358b8e80941Smrg format == V_028C70_COLOR_8_8_8_8)) 4359b8e80941Smrg ->color_is_int8 = true; 4360b8e80941Smrg#endif 4361b8e80941Smrg cb->cb_color_info = S_028C70_FORMAT(format) | 4362b8e80941Smrg S_028C70_COMP_SWAP(swap) | 4363b8e80941Smrg S_028C70_BLEND_CLAMP(blend_clamp) | 4364b8e80941Smrg S_028C70_BLEND_BYPASS(blend_bypass) | 4365b8e80941Smrg S_028C70_SIMPLE_FLOAT(1) | 4366b8e80941Smrg S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 4367b8e80941Smrg ntype != V_028C70_NUMBER_SNORM && 4368b8e80941Smrg ntype != V_028C70_NUMBER_SRGB && 4369b8e80941Smrg format != V_028C70_COLOR_8_24 && 4370b8e80941Smrg format != V_028C70_COLOR_24_8) | 4371b8e80941Smrg S_028C70_NUMBER_TYPE(ntype) | 4372b8e80941Smrg S_028C70_ENDIAN(endian); 4373b8e80941Smrg if (radv_image_has_fmask(iview->image)) { 4374b8e80941Smrg cb->cb_color_info |= S_028C70_COMPRESSION(1); 4375b8e80941Smrg if (device->physical_device->rad_info.chip_class == SI) { 4376b8e80941Smrg unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); 4377b8e80941Smrg cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 4378b8e80941Smrg } 4379b8e80941Smrg } 4380b8e80941Smrg 4381b8e80941Smrg if (radv_image_has_cmask(iview->image) && 4382b8e80941Smrg !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) 4383b8e80941Smrg cb->cb_color_info |= S_028C70_FAST_CLEAR(1); 4384b8e80941Smrg 4385b8e80941Smrg if (radv_dcc_enabled(iview->image, iview->base_mip)) 4386b8e80941Smrg cb->cb_color_info |= S_028C70_DCC_ENABLE(1); 4387b8e80941Smrg 4388b8e80941Smrg cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview); 4389b8e80941Smrg 4390b8e80941Smrg /* This must be set for fast clear to work without FMASK. */ 4391b8e80941Smrg if (!radv_image_has_fmask(iview->image) && 4392b8e80941Smrg device->physical_device->rad_info.chip_class == SI) { 4393b8e80941Smrg unsigned bankh = util_logbase2(surf->u.legacy.bankh); 4394b8e80941Smrg cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 4395b8e80941Smrg } 4396b8e80941Smrg 4397b8e80941Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 4398b8e80941Smrg const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format); 4399b8e80941Smrg 4400b8e80941Smrg unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ? 4401b8e80941Smrg (iview->extent.depth - 1) : (iview->image->info.array_size - 1); 4402b8e80941Smrg unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1); 4403b8e80941Smrg unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1); 4404b8e80941Smrg 4405b8e80941Smrg cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip); 4406b8e80941Smrg cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 4407b8e80941Smrg S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); 4408b8e80941Smrg cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | 4409b8e80941Smrg S_028C68_MIP0_HEIGHT(height - 1) | 4410b8e80941Smrg S_028C68_MAX_MIP(iview->image->info.levels - 1); 4411b8e80941Smrg } 4412b8e80941Smrg} 4413b8e80941Smrg 4414b8e80941Smrgstatic unsigned 4415b8e80941Smrgradv_calc_decompress_on_z_planes(struct radv_device *device, 4416b8e80941Smrg struct radv_image_view *iview) 4417b8e80941Smrg{ 4418b8e80941Smrg unsigned max_zplanes = 0; 4419b8e80941Smrg 4420b8e80941Smrg assert(radv_image_is_tc_compat_htile(iview->image)); 4421b8e80941Smrg 4422b8e80941Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 4423b8e80941Smrg /* Default value for 32-bit depth surfaces. */ 4424b8e80941Smrg max_zplanes = 4; 4425b8e80941Smrg 4426b8e80941Smrg if (iview->vk_format == VK_FORMAT_D16_UNORM && 4427b8e80941Smrg iview->image->info.samples > 1) 4428b8e80941Smrg max_zplanes = 2; 4429b8e80941Smrg 4430b8e80941Smrg max_zplanes = max_zplanes + 1; 4431b8e80941Smrg } else { 4432b8e80941Smrg if (iview->vk_format == VK_FORMAT_D16_UNORM) { 4433b8e80941Smrg /* Do not enable Z plane compression for 16-bit depth 4434b8e80941Smrg * surfaces because isn't supported on GFX8. Only 4435b8e80941Smrg * 32-bit depth surfaces are supported by the hardware. 4436b8e80941Smrg * This allows to maintain shader compatibility and to 4437b8e80941Smrg * reduce the number of depth decompressions. 4438b8e80941Smrg */ 4439b8e80941Smrg max_zplanes = 1; 4440b8e80941Smrg } else { 4441b8e80941Smrg if (iview->image->info.samples <= 1) 4442b8e80941Smrg max_zplanes = 5; 4443b8e80941Smrg else if (iview->image->info.samples <= 4) 4444b8e80941Smrg max_zplanes = 3; 4445b8e80941Smrg else 4446b8e80941Smrg max_zplanes = 2; 4447b8e80941Smrg } 4448b8e80941Smrg } 4449b8e80941Smrg 4450b8e80941Smrg return max_zplanes; 4451b8e80941Smrg} 4452b8e80941Smrg 4453b8e80941Smrgstatic void 4454b8e80941Smrgradv_initialise_ds_surface(struct radv_device *device, 4455b8e80941Smrg struct radv_ds_buffer_info *ds, 4456b8e80941Smrg struct radv_image_view *iview) 4457b8e80941Smrg{ 4458b8e80941Smrg unsigned level = iview->base_mip; 4459b8e80941Smrg unsigned format, stencil_format; 4460b8e80941Smrg uint64_t va, s_offs, z_offs; 4461b8e80941Smrg bool stencil_only = false; 4462b8e80941Smrg const struct radv_image_plane *plane = &iview->image->planes[0]; 4463b8e80941Smrg const struct radeon_surf *surf = &plane->surface; 4464b8e80941Smrg 4465b8e80941Smrg assert(vk_format_get_plane_count(iview->image->vk_format) == 1); 4466b8e80941Smrg 4467b8e80941Smrg memset(ds, 0, sizeof(*ds)); 4468b8e80941Smrg switch (iview->image->vk_format) { 4469b8e80941Smrg case VK_FORMAT_D24_UNORM_S8_UINT: 4470b8e80941Smrg case VK_FORMAT_X8_D24_UNORM_PACK32: 4471b8e80941Smrg ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 4472b8e80941Smrg ds->offset_scale = 2.0f; 4473b8e80941Smrg break; 4474b8e80941Smrg case VK_FORMAT_D16_UNORM: 4475b8e80941Smrg case VK_FORMAT_D16_UNORM_S8_UINT: 4476b8e80941Smrg ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 4477b8e80941Smrg ds->offset_scale = 4.0f; 4478b8e80941Smrg break; 4479b8e80941Smrg case VK_FORMAT_D32_SFLOAT: 4480b8e80941Smrg case VK_FORMAT_D32_SFLOAT_S8_UINT: 4481b8e80941Smrg ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 4482b8e80941Smrg S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 4483b8e80941Smrg ds->offset_scale = 1.0f; 4484b8e80941Smrg break; 4485b8e80941Smrg case VK_FORMAT_S8_UINT: 4486b8e80941Smrg stencil_only = true; 4487b8e80941Smrg break; 4488b8e80941Smrg default: 4489b8e80941Smrg break; 4490b8e80941Smrg } 4491b8e80941Smrg 4492b8e80941Smrg format = radv_translate_dbformat(iview->image->vk_format); 4493b8e80941Smrg stencil_format = surf->has_stencil ? 4494b8e80941Smrg V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 4495b8e80941Smrg 4496b8e80941Smrg uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 4497b8e80941Smrg ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | 4498b8e80941Smrg S_028008_SLICE_MAX(max_slice); 4499b8e80941Smrg 4500b8e80941Smrg ds->db_htile_data_base = 0; 4501b8e80941Smrg ds->db_htile_surface = 0; 4502b8e80941Smrg 4503b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset; 4504b8e80941Smrg s_offs = z_offs = va; 4505b8e80941Smrg 4506b8e80941Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 4507b8e80941Smrg assert(surf->u.gfx9.surf_offset == 0); 4508b8e80941Smrg s_offs += surf->u.gfx9.stencil_offset; 4509b8e80941Smrg 4510b8e80941Smrg ds->db_z_info = S_028038_FORMAT(format) | 4511b8e80941Smrg S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) | 4512b8e80941Smrg S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) | 4513b8e80941Smrg S_028038_MAXMIP(iview->image->info.levels - 1) | 4514b8e80941Smrg S_028038_ZRANGE_PRECISION(1); 4515b8e80941Smrg ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | 4516b8e80941Smrg S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode); 4517b8e80941Smrg 4518b8e80941Smrg ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch); 4519b8e80941Smrg ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch); 4520b8e80941Smrg ds->db_depth_view |= S_028008_MIPID(level); 4521b8e80941Smrg 4522b8e80941Smrg ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) | 4523b8e80941Smrg S_02801C_Y_MAX(iview->image->info.height - 1); 4524b8e80941Smrg 4525b8e80941Smrg if (radv_htile_enabled(iview->image, level)) { 4526b8e80941Smrg ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); 4527b8e80941Smrg 4528b8e80941Smrg if (radv_image_is_tc_compat_htile(iview->image)) { 4529b8e80941Smrg unsigned max_zplanes = 4530b8e80941Smrg radv_calc_decompress_on_z_planes(device, iview); 4531b8e80941Smrg 4532b8e80941Smrg ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) | 4533b8e80941Smrg S_028038_ITERATE_FLUSH(1); 4534b8e80941Smrg ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 4535b8e80941Smrg } 4536b8e80941Smrg 4537b8e80941Smrg if (!surf->has_stencil) 4538b8e80941Smrg /* Use all of the htile_buffer for depth if there's no stencil. */ 4539b8e80941Smrg ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); 4540b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset + 4541b8e80941Smrg iview->image->htile_offset; 4542b8e80941Smrg ds->db_htile_data_base = va >> 8; 4543b8e80941Smrg ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | 4544b8e80941Smrg S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned) | 4545b8e80941Smrg S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned); 4546b8e80941Smrg } 4547b8e80941Smrg } else { 4548b8e80941Smrg const struct legacy_surf_level *level_info = &surf->u.legacy.level[level]; 4549b8e80941Smrg 4550b8e80941Smrg if (stencil_only) 4551b8e80941Smrg level_info = &surf->u.legacy.stencil_level[level]; 4552b8e80941Smrg 4553b8e80941Smrg z_offs += surf->u.legacy.level[level].offset; 4554b8e80941Smrg s_offs += surf->u.legacy.stencil_level[level].offset; 4555b8e80941Smrg 4556b8e80941Smrg ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image)); 4557b8e80941Smrg ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); 4558b8e80941Smrg ds->db_stencil_info = S_028044_FORMAT(stencil_format); 4559b8e80941Smrg 4560b8e80941Smrg if (iview->image->info.samples > 1) 4561b8e80941Smrg ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples)); 4562b8e80941Smrg 4563b8e80941Smrg if (device->physical_device->rad_info.chip_class >= CIK) { 4564b8e80941Smrg struct radeon_info *info = &device->physical_device->rad_info; 4565b8e80941Smrg unsigned tiling_index = surf->u.legacy.tiling_index[level]; 4566b8e80941Smrg unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level]; 4567b8e80941Smrg unsigned macro_index = surf->u.legacy.macro_tile_index; 4568b8e80941Smrg unsigned tile_mode = info->si_tile_mode_array[tiling_index]; 4569b8e80941Smrg unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 4570b8e80941Smrg unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 4571b8e80941Smrg 4572b8e80941Smrg if (stencil_only) 4573b8e80941Smrg tile_mode = stencil_tile_mode; 4574b8e80941Smrg 4575b8e80941Smrg ds->db_depth_info |= 4576b8e80941Smrg S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 4577b8e80941Smrg S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 4578b8e80941Smrg S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 4579b8e80941Smrg S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 4580b8e80941Smrg S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 4581b8e80941Smrg S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 4582b8e80941Smrg ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 4583b8e80941Smrg ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 4584b8e80941Smrg } else { 4585b8e80941Smrg unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false); 4586b8e80941Smrg ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 4587b8e80941Smrg tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true); 4588b8e80941Smrg ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 4589b8e80941Smrg if (stencil_only) 4590b8e80941Smrg ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 4591b8e80941Smrg } 4592b8e80941Smrg 4593b8e80941Smrg ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | 4594b8e80941Smrg S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); 4595b8e80941Smrg ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); 4596b8e80941Smrg 4597b8e80941Smrg if (radv_htile_enabled(iview->image, level)) { 4598b8e80941Smrg ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); 4599b8e80941Smrg 4600b8e80941Smrg if (!surf->has_stencil && 4601b8e80941Smrg !radv_image_is_tc_compat_htile(iview->image)) 4602b8e80941Smrg /* Use all of the htile_buffer for depth if there's no stencil. */ 4603b8e80941Smrg ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 4604b8e80941Smrg 4605b8e80941Smrg va = radv_buffer_get_va(iview->bo) + iview->image->offset + 4606b8e80941Smrg iview->image->htile_offset; 4607b8e80941Smrg ds->db_htile_data_base = va >> 8; 4608b8e80941Smrg ds->db_htile_surface = S_028ABC_FULL_CACHE(1); 4609b8e80941Smrg 4610b8e80941Smrg if (radv_image_is_tc_compat_htile(iview->image)) { 4611b8e80941Smrg unsigned max_zplanes = 4612b8e80941Smrg radv_calc_decompress_on_z_planes(device, iview); 4613b8e80941Smrg 4614b8e80941Smrg ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 4615b8e80941Smrg ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes); 4616b8e80941Smrg } 4617b8e80941Smrg } 4618b8e80941Smrg } 4619b8e80941Smrg 4620b8e80941Smrg ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8; 4621b8e80941Smrg ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8; 4622b8e80941Smrg} 4623b8e80941Smrg 4624b8e80941SmrgVkResult radv_CreateFramebuffer( 4625b8e80941Smrg VkDevice _device, 4626b8e80941Smrg const VkFramebufferCreateInfo* pCreateInfo, 4627b8e80941Smrg const VkAllocationCallbacks* pAllocator, 4628b8e80941Smrg VkFramebuffer* pFramebuffer) 4629b8e80941Smrg{ 4630b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4631b8e80941Smrg struct radv_framebuffer *framebuffer; 4632b8e80941Smrg 4633b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 4634b8e80941Smrg 4635b8e80941Smrg size_t size = sizeof(*framebuffer) + 4636b8e80941Smrg sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount; 4637b8e80941Smrg framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, 4638b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4639b8e80941Smrg if (framebuffer == NULL) 4640b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4641b8e80941Smrg 4642b8e80941Smrg framebuffer->attachment_count = pCreateInfo->attachmentCount; 4643b8e80941Smrg framebuffer->width = pCreateInfo->width; 4644b8e80941Smrg framebuffer->height = pCreateInfo->height; 4645b8e80941Smrg framebuffer->layers = pCreateInfo->layers; 4646b8e80941Smrg for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 4647b8e80941Smrg VkImageView _iview = pCreateInfo->pAttachments[i]; 4648b8e80941Smrg struct radv_image_view *iview = radv_image_view_from_handle(_iview); 4649b8e80941Smrg framebuffer->attachments[i].attachment = iview; 4650b8e80941Smrg if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 4651b8e80941Smrg radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview); 4652b8e80941Smrg } else { 4653b8e80941Smrg radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview); 4654b8e80941Smrg } 4655b8e80941Smrg framebuffer->width = MIN2(framebuffer->width, iview->extent.width); 4656b8e80941Smrg framebuffer->height = MIN2(framebuffer->height, iview->extent.height); 4657b8e80941Smrg framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview)); 4658b8e80941Smrg } 4659b8e80941Smrg 4660b8e80941Smrg *pFramebuffer = radv_framebuffer_to_handle(framebuffer); 4661b8e80941Smrg return VK_SUCCESS; 4662b8e80941Smrg} 4663b8e80941Smrg 4664b8e80941Smrgvoid radv_DestroyFramebuffer( 4665b8e80941Smrg VkDevice _device, 4666b8e80941Smrg VkFramebuffer _fb, 4667b8e80941Smrg const VkAllocationCallbacks* pAllocator) 4668b8e80941Smrg{ 4669b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4670b8e80941Smrg RADV_FROM_HANDLE(radv_framebuffer, fb, _fb); 4671b8e80941Smrg 4672b8e80941Smrg if (!fb) 4673b8e80941Smrg return; 4674b8e80941Smrg vk_free2(&device->alloc, pAllocator, fb); 4675b8e80941Smrg} 4676b8e80941Smrg 4677b8e80941Smrgstatic unsigned radv_tex_wrap(VkSamplerAddressMode address_mode) 4678b8e80941Smrg{ 4679b8e80941Smrg switch (address_mode) { 4680b8e80941Smrg case VK_SAMPLER_ADDRESS_MODE_REPEAT: 4681b8e80941Smrg return V_008F30_SQ_TEX_WRAP; 4682b8e80941Smrg case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: 4683b8e80941Smrg return V_008F30_SQ_TEX_MIRROR; 4684b8e80941Smrg case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: 4685b8e80941Smrg return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 4686b8e80941Smrg case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: 4687b8e80941Smrg return V_008F30_SQ_TEX_CLAMP_BORDER; 4688b8e80941Smrg case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: 4689b8e80941Smrg return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 4690b8e80941Smrg default: 4691b8e80941Smrg unreachable("illegal tex wrap mode"); 4692b8e80941Smrg break; 4693b8e80941Smrg } 4694b8e80941Smrg} 4695b8e80941Smrg 4696b8e80941Smrgstatic unsigned 4697b8e80941Smrgradv_tex_compare(VkCompareOp op) 4698b8e80941Smrg{ 4699b8e80941Smrg switch (op) { 4700b8e80941Smrg case VK_COMPARE_OP_NEVER: 4701b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 4702b8e80941Smrg case VK_COMPARE_OP_LESS: 4703b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 4704b8e80941Smrg case VK_COMPARE_OP_EQUAL: 4705b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 4706b8e80941Smrg case VK_COMPARE_OP_LESS_OR_EQUAL: 4707b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 4708b8e80941Smrg case VK_COMPARE_OP_GREATER: 4709b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 4710b8e80941Smrg case VK_COMPARE_OP_NOT_EQUAL: 4711b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 4712b8e80941Smrg case VK_COMPARE_OP_GREATER_OR_EQUAL: 4713b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 4714b8e80941Smrg case VK_COMPARE_OP_ALWAYS: 4715b8e80941Smrg return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 4716b8e80941Smrg default: 4717b8e80941Smrg unreachable("illegal compare mode"); 4718b8e80941Smrg break; 4719b8e80941Smrg } 4720b8e80941Smrg} 4721b8e80941Smrg 4722b8e80941Smrgstatic unsigned 4723b8e80941Smrgradv_tex_filter(VkFilter filter, unsigned max_ansio) 4724b8e80941Smrg{ 4725b8e80941Smrg switch (filter) { 4726b8e80941Smrg case VK_FILTER_NEAREST: 4727b8e80941Smrg return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT : 4728b8e80941Smrg V_008F38_SQ_TEX_XY_FILTER_POINT); 4729b8e80941Smrg case VK_FILTER_LINEAR: 4730b8e80941Smrg return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR : 4731b8e80941Smrg V_008F38_SQ_TEX_XY_FILTER_BILINEAR); 4732b8e80941Smrg case VK_FILTER_CUBIC_IMG: 4733b8e80941Smrg default: 4734b8e80941Smrg fprintf(stderr, "illegal texture filter"); 4735b8e80941Smrg return 0; 4736b8e80941Smrg } 4737b8e80941Smrg} 4738b8e80941Smrg 4739b8e80941Smrgstatic unsigned 4740b8e80941Smrgradv_tex_mipfilter(VkSamplerMipmapMode mode) 4741b8e80941Smrg{ 4742b8e80941Smrg switch (mode) { 4743b8e80941Smrg case VK_SAMPLER_MIPMAP_MODE_NEAREST: 4744b8e80941Smrg return V_008F38_SQ_TEX_Z_FILTER_POINT; 4745b8e80941Smrg case VK_SAMPLER_MIPMAP_MODE_LINEAR: 4746b8e80941Smrg return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 4747b8e80941Smrg default: 4748b8e80941Smrg return V_008F38_SQ_TEX_Z_FILTER_NONE; 4749b8e80941Smrg } 4750b8e80941Smrg} 4751b8e80941Smrg 4752b8e80941Smrgstatic unsigned 4753b8e80941Smrgradv_tex_bordercolor(VkBorderColor bcolor) 4754b8e80941Smrg{ 4755b8e80941Smrg switch (bcolor) { 4756b8e80941Smrg case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: 4757b8e80941Smrg case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: 4758b8e80941Smrg return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 4759b8e80941Smrg case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: 4760b8e80941Smrg case VK_BORDER_COLOR_INT_OPAQUE_BLACK: 4761b8e80941Smrg return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 4762b8e80941Smrg case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: 4763b8e80941Smrg case VK_BORDER_COLOR_INT_OPAQUE_WHITE: 4764b8e80941Smrg return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 4765b8e80941Smrg default: 4766b8e80941Smrg break; 4767b8e80941Smrg } 4768b8e80941Smrg return 0; 4769b8e80941Smrg} 4770b8e80941Smrg 4771b8e80941Smrgstatic unsigned 4772b8e80941Smrgradv_tex_aniso_filter(unsigned filter) 4773b8e80941Smrg{ 4774b8e80941Smrg if (filter < 2) 4775b8e80941Smrg return 0; 4776b8e80941Smrg if (filter < 4) 4777b8e80941Smrg return 1; 4778b8e80941Smrg if (filter < 8) 4779b8e80941Smrg return 2; 4780b8e80941Smrg if (filter < 16) 4781b8e80941Smrg return 3; 4782b8e80941Smrg return 4; 4783b8e80941Smrg} 4784b8e80941Smrg 4785b8e80941Smrgstatic unsigned 4786b8e80941Smrgradv_tex_filter_mode(VkSamplerReductionModeEXT mode) 4787b8e80941Smrg{ 4788b8e80941Smrg switch (mode) { 4789b8e80941Smrg case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT: 4790b8e80941Smrg return V_008F30_SQ_IMG_FILTER_MODE_BLEND; 4791b8e80941Smrg case VK_SAMPLER_REDUCTION_MODE_MIN_EXT: 4792b8e80941Smrg return V_008F30_SQ_IMG_FILTER_MODE_MIN; 4793b8e80941Smrg case VK_SAMPLER_REDUCTION_MODE_MAX_EXT: 4794b8e80941Smrg return V_008F30_SQ_IMG_FILTER_MODE_MAX; 4795b8e80941Smrg default: 4796b8e80941Smrg break; 4797b8e80941Smrg } 4798b8e80941Smrg return 0; 4799b8e80941Smrg} 4800b8e80941Smrg 4801b8e80941Smrgstatic uint32_t 4802b8e80941Smrgradv_get_max_anisotropy(struct radv_device *device, 4803b8e80941Smrg const VkSamplerCreateInfo *pCreateInfo) 4804b8e80941Smrg{ 4805b8e80941Smrg if (device->force_aniso >= 0) 4806b8e80941Smrg return device->force_aniso; 4807b8e80941Smrg 4808b8e80941Smrg if (pCreateInfo->anisotropyEnable && 4809b8e80941Smrg pCreateInfo->maxAnisotropy > 1.0f) 4810b8e80941Smrg return (uint32_t)pCreateInfo->maxAnisotropy; 4811b8e80941Smrg 4812b8e80941Smrg return 0; 4813b8e80941Smrg} 4814b8e80941Smrg 4815b8e80941Smrgstatic void 4816b8e80941Smrgradv_init_sampler(struct radv_device *device, 4817b8e80941Smrg struct radv_sampler *sampler, 4818b8e80941Smrg const VkSamplerCreateInfo *pCreateInfo) 4819b8e80941Smrg{ 4820b8e80941Smrg uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); 4821b8e80941Smrg uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); 4822b8e80941Smrg bool is_vi = (device->physical_device->rad_info.chip_class >= VI); 4823b8e80941Smrg unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; 4824b8e80941Smrg 4825b8e80941Smrg const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction = 4826b8e80941Smrg vk_find_struct_const(pCreateInfo->pNext, 4827b8e80941Smrg SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT); 4828b8e80941Smrg if (sampler_reduction) 4829b8e80941Smrg filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode); 4830b8e80941Smrg 4831b8e80941Smrg sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | 4832b8e80941Smrg S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | 4833b8e80941Smrg S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | 4834b8e80941Smrg S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4835b8e80941Smrg S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) | 4836b8e80941Smrg S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | 4837b8e80941Smrg S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 4838b8e80941Smrg S_008F30_ANISO_BIAS(max_aniso_ratio) | 4839b8e80941Smrg S_008F30_DISABLE_CUBE_WRAP(0) | 4840b8e80941Smrg S_008F30_COMPAT_MODE(is_vi) | 4841b8e80941Smrg S_008F30_FILTER_MODE(filter_mode)); 4842b8e80941Smrg sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | 4843b8e80941Smrg S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | 4844b8e80941Smrg S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4845b8e80941Smrg sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | 4846b8e80941Smrg S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | 4847b8e80941Smrg S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | 4848b8e80941Smrg S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | 4849b8e80941Smrg S_008F38_MIP_POINT_PRECLAMP(0) | 4850b8e80941Smrg S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) | 4851b8e80941Smrg S_008F38_FILTER_PREC_FIX(1) | 4852b8e80941Smrg S_008F38_ANISO_OVERRIDE(is_vi)); 4853b8e80941Smrg sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) | 4854b8e80941Smrg S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor))); 4855b8e80941Smrg} 4856b8e80941Smrg 4857b8e80941SmrgVkResult radv_CreateSampler( 4858b8e80941Smrg VkDevice _device, 4859b8e80941Smrg const VkSamplerCreateInfo* pCreateInfo, 4860b8e80941Smrg const VkAllocationCallbacks* pAllocator, 4861b8e80941Smrg VkSampler* pSampler) 4862b8e80941Smrg{ 4863b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4864b8e80941Smrg struct radv_sampler *sampler; 4865b8e80941Smrg 4866b8e80941Smrg const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = 4867b8e80941Smrg vk_find_struct_const(pCreateInfo->pNext, 4868b8e80941Smrg SAMPLER_YCBCR_CONVERSION_INFO); 4869b8e80941Smrg 4870b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 4871b8e80941Smrg 4872b8e80941Smrg sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, 4873b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4874b8e80941Smrg if (!sampler) 4875b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 4876b8e80941Smrg 4877b8e80941Smrg radv_init_sampler(device, sampler, pCreateInfo); 4878b8e80941Smrg 4879b8e80941Smrg sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL; 4880b8e80941Smrg *pSampler = radv_sampler_to_handle(sampler); 4881b8e80941Smrg 4882b8e80941Smrg return VK_SUCCESS; 4883b8e80941Smrg} 4884b8e80941Smrg 4885b8e80941Smrgvoid radv_DestroySampler( 4886b8e80941Smrg VkDevice _device, 4887b8e80941Smrg VkSampler _sampler, 4888b8e80941Smrg const VkAllocationCallbacks* pAllocator) 4889b8e80941Smrg{ 4890b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4891b8e80941Smrg RADV_FROM_HANDLE(radv_sampler, sampler, _sampler); 4892b8e80941Smrg 4893b8e80941Smrg if (!sampler) 4894b8e80941Smrg return; 4895b8e80941Smrg vk_free2(&device->alloc, pAllocator, sampler); 4896b8e80941Smrg} 4897b8e80941Smrg 4898b8e80941Smrg/* vk_icd.h does not declare this function, so we declare it here to 4899b8e80941Smrg * suppress Wmissing-prototypes. 4900b8e80941Smrg */ 4901b8e80941SmrgPUBLIC VKAPI_ATTR VkResult VKAPI_CALL 4902b8e80941Smrgvk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); 4903b8e80941Smrg 4904b8e80941SmrgPUBLIC VKAPI_ATTR VkResult VKAPI_CALL 4905b8e80941Smrgvk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 4906b8e80941Smrg{ 4907b8e80941Smrg /* For the full details on loader interface versioning, see 4908b8e80941Smrg * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 4909b8e80941Smrg * What follows is a condensed summary, to help you navigate the large and 4910b8e80941Smrg * confusing official doc. 4911b8e80941Smrg * 4912b8e80941Smrg * - Loader interface v0 is incompatible with later versions. We don't 4913b8e80941Smrg * support it. 4914b8e80941Smrg * 4915b8e80941Smrg * - In loader interface v1: 4916b8e80941Smrg * - The first ICD entrypoint called by the loader is 4917b8e80941Smrg * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 4918b8e80941Smrg * entrypoint. 4919b8e80941Smrg * - The ICD must statically expose no other Vulkan symbol unless it is 4920b8e80941Smrg * linked with -Bsymbolic. 4921b8e80941Smrg * - Each dispatchable Vulkan handle created by the ICD must be 4922b8e80941Smrg * a pointer to a struct whose first member is VK_LOADER_DATA. The 4923b8e80941Smrg * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 4924b8e80941Smrg * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 4925b8e80941Smrg * vkDestroySurfaceKHR(). The ICD must be capable of working with 4926b8e80941Smrg * such loader-managed surfaces. 4927b8e80941Smrg * 4928b8e80941Smrg * - Loader interface v2 differs from v1 in: 4929b8e80941Smrg * - The first ICD entrypoint called by the loader is 4930b8e80941Smrg * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 4931b8e80941Smrg * statically expose this entrypoint. 4932b8e80941Smrg * 4933b8e80941Smrg * - Loader interface v3 differs from v2 in: 4934b8e80941Smrg * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 4935b8e80941Smrg * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 4936b8e80941Smrg * because the loader no longer does so. 4937b8e80941Smrg */ 4938b8e80941Smrg *pSupportedVersion = MIN2(*pSupportedVersion, 3u); 4939b8e80941Smrg return VK_SUCCESS; 4940b8e80941Smrg} 4941b8e80941Smrg 4942b8e80941SmrgVkResult radv_GetMemoryFdKHR(VkDevice _device, 4943b8e80941Smrg const VkMemoryGetFdInfoKHR *pGetFdInfo, 4944b8e80941Smrg int *pFD) 4945b8e80941Smrg{ 4946b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4947b8e80941Smrg RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory); 4948b8e80941Smrg 4949b8e80941Smrg assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 4950b8e80941Smrg 4951b8e80941Smrg /* At the moment, we support only the below handle types. */ 4952b8e80941Smrg assert(pGetFdInfo->handleType == 4953b8e80941Smrg VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 4954b8e80941Smrg pGetFdInfo->handleType == 4955b8e80941Smrg VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 4956b8e80941Smrg 4957b8e80941Smrg bool ret = radv_get_memory_fd(device, memory, pFD); 4958b8e80941Smrg if (ret == false) 4959b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 4960b8e80941Smrg return VK_SUCCESS; 4961b8e80941Smrg} 4962b8e80941Smrg 4963b8e80941SmrgVkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device, 4964b8e80941Smrg VkExternalMemoryHandleTypeFlagBits handleType, 4965b8e80941Smrg int fd, 4966b8e80941Smrg VkMemoryFdPropertiesKHR *pMemoryFdProperties) 4967b8e80941Smrg{ 4968b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 4969b8e80941Smrg 4970b8e80941Smrg switch (handleType) { 4971b8e80941Smrg case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: 4972b8e80941Smrg pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1; 4973b8e80941Smrg return VK_SUCCESS; 4974b8e80941Smrg 4975b8e80941Smrg default: 4976b8e80941Smrg /* The valid usage section for this function says: 4977b8e80941Smrg * 4978b8e80941Smrg * "handleType must not be one of the handle types defined as 4979b8e80941Smrg * opaque." 4980b8e80941Smrg * 4981b8e80941Smrg * So opaque handle types fall into the default "unsupported" case. 4982b8e80941Smrg */ 4983b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 4984b8e80941Smrg } 4985b8e80941Smrg} 4986b8e80941Smrg 4987b8e80941Smrgstatic VkResult radv_import_opaque_fd(struct radv_device *device, 4988b8e80941Smrg int fd, 4989b8e80941Smrg uint32_t *syncobj) 4990b8e80941Smrg{ 4991b8e80941Smrg uint32_t syncobj_handle = 0; 4992b8e80941Smrg int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle); 4993b8e80941Smrg if (ret != 0) 4994b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 4995b8e80941Smrg 4996b8e80941Smrg if (*syncobj) 4997b8e80941Smrg device->ws->destroy_syncobj(device->ws, *syncobj); 4998b8e80941Smrg 4999b8e80941Smrg *syncobj = syncobj_handle; 5000b8e80941Smrg close(fd); 5001b8e80941Smrg 5002b8e80941Smrg return VK_SUCCESS; 5003b8e80941Smrg} 5004b8e80941Smrg 5005b8e80941Smrgstatic VkResult radv_import_sync_fd(struct radv_device *device, 5006b8e80941Smrg int fd, 5007b8e80941Smrg uint32_t *syncobj) 5008b8e80941Smrg{ 5009b8e80941Smrg /* If we create a syncobj we do it locally so that if we have an error, we don't 5010b8e80941Smrg * leave a syncobj in an undetermined state in the fence. */ 5011b8e80941Smrg uint32_t syncobj_handle = *syncobj; 5012b8e80941Smrg if (!syncobj_handle) { 5013b8e80941Smrg int ret = device->ws->create_syncobj(device->ws, &syncobj_handle); 5014b8e80941Smrg if (ret) { 5015b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5016b8e80941Smrg } 5017b8e80941Smrg } 5018b8e80941Smrg 5019b8e80941Smrg if (fd == -1) { 5020b8e80941Smrg device->ws->signal_syncobj(device->ws, syncobj_handle); 5021b8e80941Smrg } else { 5022b8e80941Smrg int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd); 5023b8e80941Smrg if (ret != 0) 5024b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5025b8e80941Smrg } 5026b8e80941Smrg 5027b8e80941Smrg *syncobj = syncobj_handle; 5028b8e80941Smrg if (fd != -1) 5029b8e80941Smrg close(fd); 5030b8e80941Smrg 5031b8e80941Smrg return VK_SUCCESS; 5032b8e80941Smrg} 5033b8e80941Smrg 5034b8e80941SmrgVkResult radv_ImportSemaphoreFdKHR(VkDevice _device, 5035b8e80941Smrg const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) 5036b8e80941Smrg{ 5037b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 5038b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); 5039b8e80941Smrg uint32_t *syncobj_dst = NULL; 5040b8e80941Smrg 5041b8e80941Smrg if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { 5042b8e80941Smrg syncobj_dst = &sem->temp_syncobj; 5043b8e80941Smrg } else { 5044b8e80941Smrg syncobj_dst = &sem->syncobj; 5045b8e80941Smrg } 5046b8e80941Smrg 5047b8e80941Smrg switch(pImportSemaphoreFdInfo->handleType) { 5048b8e80941Smrg case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: 5049b8e80941Smrg return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); 5050b8e80941Smrg case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: 5051b8e80941Smrg return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); 5052b8e80941Smrg default: 5053b8e80941Smrg unreachable("Unhandled semaphore handle type"); 5054b8e80941Smrg } 5055b8e80941Smrg} 5056b8e80941Smrg 5057b8e80941SmrgVkResult radv_GetSemaphoreFdKHR(VkDevice _device, 5058b8e80941Smrg const VkSemaphoreGetFdInfoKHR *pGetFdInfo, 5059b8e80941Smrg int *pFd) 5060b8e80941Smrg{ 5061b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 5062b8e80941Smrg RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore); 5063b8e80941Smrg int ret; 5064b8e80941Smrg uint32_t syncobj_handle; 5065b8e80941Smrg 5066b8e80941Smrg if (sem->temp_syncobj) 5067b8e80941Smrg syncobj_handle = sem->temp_syncobj; 5068b8e80941Smrg else 5069b8e80941Smrg syncobj_handle = sem->syncobj; 5070b8e80941Smrg 5071b8e80941Smrg switch(pGetFdInfo->handleType) { 5072b8e80941Smrg case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: 5073b8e80941Smrg ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); 5074b8e80941Smrg break; 5075b8e80941Smrg case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: 5076b8e80941Smrg ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); 5077b8e80941Smrg if (!ret) { 5078b8e80941Smrg if (sem->temp_syncobj) { 5079b8e80941Smrg close (sem->temp_syncobj); 5080b8e80941Smrg sem->temp_syncobj = 0; 5081b8e80941Smrg } else { 5082b8e80941Smrg device->ws->reset_syncobj(device->ws, syncobj_handle); 5083b8e80941Smrg } 5084b8e80941Smrg } 5085b8e80941Smrg break; 5086b8e80941Smrg default: 5087b8e80941Smrg unreachable("Unhandled semaphore handle type"); 5088b8e80941Smrg } 5089b8e80941Smrg 5090b8e80941Smrg if (ret) 5091b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5092b8e80941Smrg return VK_SUCCESS; 5093b8e80941Smrg} 5094b8e80941Smrg 5095b8e80941Smrgvoid radv_GetPhysicalDeviceExternalSemaphoreProperties( 5096b8e80941Smrg VkPhysicalDevice physicalDevice, 5097b8e80941Smrg const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, 5098b8e80941Smrg VkExternalSemaphoreProperties *pExternalSemaphoreProperties) 5099b8e80941Smrg{ 5100b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 5101b8e80941Smrg 5102b8e80941Smrg /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */ 5103b8e80941Smrg if (pdevice->rad_info.has_syncobj_wait_for_submit && 5104b8e80941Smrg (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 5105b8e80941Smrg pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) { 5106b8e80941Smrg pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 5107b8e80941Smrg pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; 5108b8e80941Smrg pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | 5109b8e80941Smrg VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5110b8e80941Smrg } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { 5111b8e80941Smrg pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; 5112b8e80941Smrg pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; 5113b8e80941Smrg pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | 5114b8e80941Smrg VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5115b8e80941Smrg } else { 5116b8e80941Smrg pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; 5117b8e80941Smrg pExternalSemaphoreProperties->compatibleHandleTypes = 0; 5118b8e80941Smrg pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; 5119b8e80941Smrg } 5120b8e80941Smrg} 5121b8e80941Smrg 5122b8e80941SmrgVkResult radv_ImportFenceFdKHR(VkDevice _device, 5123b8e80941Smrg const VkImportFenceFdInfoKHR *pImportFenceFdInfo) 5124b8e80941Smrg{ 5125b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 5126b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence); 5127b8e80941Smrg uint32_t *syncobj_dst = NULL; 5128b8e80941Smrg 5129b8e80941Smrg 5130b8e80941Smrg if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) { 5131b8e80941Smrg syncobj_dst = &fence->temp_syncobj; 5132b8e80941Smrg } else { 5133b8e80941Smrg syncobj_dst = &fence->syncobj; 5134b8e80941Smrg } 5135b8e80941Smrg 5136b8e80941Smrg switch(pImportFenceFdInfo->handleType) { 5137b8e80941Smrg case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: 5138b8e80941Smrg return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst); 5139b8e80941Smrg case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: 5140b8e80941Smrg return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst); 5141b8e80941Smrg default: 5142b8e80941Smrg unreachable("Unhandled fence handle type"); 5143b8e80941Smrg } 5144b8e80941Smrg} 5145b8e80941Smrg 5146b8e80941SmrgVkResult radv_GetFenceFdKHR(VkDevice _device, 5147b8e80941Smrg const VkFenceGetFdInfoKHR *pGetFdInfo, 5148b8e80941Smrg int *pFd) 5149b8e80941Smrg{ 5150b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 5151b8e80941Smrg RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence); 5152b8e80941Smrg int ret; 5153b8e80941Smrg uint32_t syncobj_handle; 5154b8e80941Smrg 5155b8e80941Smrg if (fence->temp_syncobj) 5156b8e80941Smrg syncobj_handle = fence->temp_syncobj; 5157b8e80941Smrg else 5158b8e80941Smrg syncobj_handle = fence->syncobj; 5159b8e80941Smrg 5160b8e80941Smrg switch(pGetFdInfo->handleType) { 5161b8e80941Smrg case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: 5162b8e80941Smrg ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); 5163b8e80941Smrg break; 5164b8e80941Smrg case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: 5165b8e80941Smrg ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); 5166b8e80941Smrg if (!ret) { 5167b8e80941Smrg if (fence->temp_syncobj) { 5168b8e80941Smrg close (fence->temp_syncobj); 5169b8e80941Smrg fence->temp_syncobj = 0; 5170b8e80941Smrg } else { 5171b8e80941Smrg device->ws->reset_syncobj(device->ws, syncobj_handle); 5172b8e80941Smrg } 5173b8e80941Smrg } 5174b8e80941Smrg break; 5175b8e80941Smrg default: 5176b8e80941Smrg unreachable("Unhandled fence handle type"); 5177b8e80941Smrg } 5178b8e80941Smrg 5179b8e80941Smrg if (ret) 5180b8e80941Smrg return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); 5181b8e80941Smrg return VK_SUCCESS; 5182b8e80941Smrg} 5183b8e80941Smrg 5184b8e80941Smrgvoid radv_GetPhysicalDeviceExternalFenceProperties( 5185b8e80941Smrg VkPhysicalDevice physicalDevice, 5186b8e80941Smrg const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, 5187b8e80941Smrg VkExternalFenceProperties *pExternalFenceProperties) 5188b8e80941Smrg{ 5189b8e80941Smrg RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 5190b8e80941Smrg 5191b8e80941Smrg if (pdevice->rad_info.has_syncobj_wait_for_submit && 5192b8e80941Smrg (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT || 5193b8e80941Smrg pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) { 5194b8e80941Smrg pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; 5195b8e80941Smrg pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; 5196b8e80941Smrg pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | 5197b8e80941Smrg VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; 5198b8e80941Smrg } else { 5199b8e80941Smrg pExternalFenceProperties->exportFromImportedHandleTypes = 0; 5200b8e80941Smrg pExternalFenceProperties->compatibleHandleTypes = 0; 5201b8e80941Smrg pExternalFenceProperties->externalFenceFeatures = 0; 5202b8e80941Smrg } 5203b8e80941Smrg} 5204b8e80941Smrg 5205b8e80941SmrgVkResult 5206b8e80941Smrgradv_CreateDebugReportCallbackEXT(VkInstance _instance, 5207b8e80941Smrg const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, 5208b8e80941Smrg const VkAllocationCallbacks* pAllocator, 5209b8e80941Smrg VkDebugReportCallbackEXT* pCallback) 5210b8e80941Smrg{ 5211b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 5212b8e80941Smrg return vk_create_debug_report_callback(&instance->debug_report_callbacks, 5213b8e80941Smrg pCreateInfo, pAllocator, &instance->alloc, 5214b8e80941Smrg pCallback); 5215b8e80941Smrg} 5216b8e80941Smrg 5217b8e80941Smrgvoid 5218b8e80941Smrgradv_DestroyDebugReportCallbackEXT(VkInstance _instance, 5219b8e80941Smrg VkDebugReportCallbackEXT _callback, 5220b8e80941Smrg const VkAllocationCallbacks* pAllocator) 5221b8e80941Smrg{ 5222b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 5223b8e80941Smrg vk_destroy_debug_report_callback(&instance->debug_report_callbacks, 5224b8e80941Smrg _callback, pAllocator, &instance->alloc); 5225b8e80941Smrg} 5226b8e80941Smrg 5227b8e80941Smrgvoid 5228b8e80941Smrgradv_DebugReportMessageEXT(VkInstance _instance, 5229b8e80941Smrg VkDebugReportFlagsEXT flags, 5230b8e80941Smrg VkDebugReportObjectTypeEXT objectType, 5231b8e80941Smrg uint64_t object, 5232b8e80941Smrg size_t location, 5233b8e80941Smrg int32_t messageCode, 5234b8e80941Smrg const char* pLayerPrefix, 5235b8e80941Smrg const char* pMessage) 5236b8e80941Smrg{ 5237b8e80941Smrg RADV_FROM_HANDLE(radv_instance, instance, _instance); 5238b8e80941Smrg vk_debug_report(&instance->debug_report_callbacks, flags, objectType, 5239b8e80941Smrg object, location, messageCode, pLayerPrefix, pMessage); 5240b8e80941Smrg} 5241b8e80941Smrg 5242b8e80941Smrgvoid 5243b8e80941Smrgradv_GetDeviceGroupPeerMemoryFeatures( 5244b8e80941Smrg VkDevice device, 5245b8e80941Smrg uint32_t heapIndex, 5246b8e80941Smrg uint32_t localDeviceIndex, 5247b8e80941Smrg uint32_t remoteDeviceIndex, 5248b8e80941Smrg VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) 5249b8e80941Smrg{ 5250b8e80941Smrg assert(localDeviceIndex == remoteDeviceIndex); 5251b8e80941Smrg 5252b8e80941Smrg *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | 5253b8e80941Smrg VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | 5254b8e80941Smrg VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | 5255b8e80941Smrg VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; 5256b8e80941Smrg} 5257b8e80941Smrg 5258b8e80941Smrgstatic const VkTimeDomainEXT radv_time_domains[] = { 5259b8e80941Smrg VK_TIME_DOMAIN_DEVICE_EXT, 5260b8e80941Smrg VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, 5261b8e80941Smrg VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, 5262b8e80941Smrg}; 5263b8e80941Smrg 5264b8e80941SmrgVkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT( 5265b8e80941Smrg VkPhysicalDevice physicalDevice, 5266b8e80941Smrg uint32_t *pTimeDomainCount, 5267b8e80941Smrg VkTimeDomainEXT *pTimeDomains) 5268b8e80941Smrg{ 5269b8e80941Smrg int d; 5270b8e80941Smrg VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount); 5271b8e80941Smrg 5272b8e80941Smrg for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) { 5273b8e80941Smrg vk_outarray_append(&out, i) { 5274b8e80941Smrg *i = radv_time_domains[d]; 5275b8e80941Smrg } 5276b8e80941Smrg } 5277b8e80941Smrg 5278b8e80941Smrg return vk_outarray_status(&out); 5279b8e80941Smrg} 5280b8e80941Smrg 5281b8e80941Smrgstatic uint64_t 5282b8e80941Smrgradv_clock_gettime(clockid_t clock_id) 5283b8e80941Smrg{ 5284b8e80941Smrg struct timespec current; 5285b8e80941Smrg int ret; 5286b8e80941Smrg 5287b8e80941Smrg ret = clock_gettime(clock_id, ¤t); 5288b8e80941Smrg if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW) 5289b8e80941Smrg ret = clock_gettime(CLOCK_MONOTONIC, ¤t); 5290b8e80941Smrg if (ret < 0) 5291b8e80941Smrg return 0; 5292b8e80941Smrg 5293b8e80941Smrg return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec; 5294b8e80941Smrg} 5295b8e80941Smrg 5296b8e80941SmrgVkResult radv_GetCalibratedTimestampsEXT( 5297b8e80941Smrg VkDevice _device, 5298b8e80941Smrg uint32_t timestampCount, 5299b8e80941Smrg const VkCalibratedTimestampInfoEXT *pTimestampInfos, 5300b8e80941Smrg uint64_t *pTimestamps, 5301b8e80941Smrg uint64_t *pMaxDeviation) 5302b8e80941Smrg{ 5303b8e80941Smrg RADV_FROM_HANDLE(radv_device, device, _device); 5304b8e80941Smrg uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq; 5305b8e80941Smrg int d; 5306b8e80941Smrg uint64_t begin, end; 5307b8e80941Smrg uint64_t max_clock_period = 0; 5308b8e80941Smrg 5309b8e80941Smrg begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 5310b8e80941Smrg 5311b8e80941Smrg for (d = 0; d < timestampCount; d++) { 5312b8e80941Smrg switch (pTimestampInfos[d].timeDomain) { 5313b8e80941Smrg case VK_TIME_DOMAIN_DEVICE_EXT: 5314b8e80941Smrg pTimestamps[d] = device->ws->query_value(device->ws, 5315b8e80941Smrg RADEON_TIMESTAMP); 5316b8e80941Smrg uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq); 5317b8e80941Smrg max_clock_period = MAX2(max_clock_period, device_period); 5318b8e80941Smrg break; 5319b8e80941Smrg case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT: 5320b8e80941Smrg pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC); 5321b8e80941Smrg max_clock_period = MAX2(max_clock_period, 1); 5322b8e80941Smrg break; 5323b8e80941Smrg 5324b8e80941Smrg case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT: 5325b8e80941Smrg pTimestamps[d] = begin; 5326b8e80941Smrg break; 5327b8e80941Smrg default: 5328b8e80941Smrg pTimestamps[d] = 0; 5329b8e80941Smrg break; 5330b8e80941Smrg } 5331b8e80941Smrg } 5332b8e80941Smrg 5333b8e80941Smrg end = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 5334b8e80941Smrg 5335b8e80941Smrg /* 5336b8e80941Smrg * The maximum deviation is the sum of the interval over which we 5337b8e80941Smrg * perform the sampling and the maximum period of any sampled 5338b8e80941Smrg * clock. That's because the maximum skew between any two sampled 5339b8e80941Smrg * clock edges is when the sampled clock with the largest period is 5340b8e80941Smrg * sampled at the end of that period but right at the beginning of the 5341b8e80941Smrg * sampling interval and some other clock is sampled right at the 5342b8e80941Smrg * begining of its sampling period and right at the end of the 5343b8e80941Smrg * sampling interval. Let's assume the GPU has the longest clock 5344b8e80941Smrg * period and that the application is sampling GPU and monotonic: 5345b8e80941Smrg * 5346b8e80941Smrg * s e 5347b8e80941Smrg * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f 5348b8e80941Smrg * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 5349b8e80941Smrg * 5350b8e80941Smrg * g 5351b8e80941Smrg * 0 1 2 3 5352b8e80941Smrg * GPU -----_____-----_____-----_____-----_____ 5353b8e80941Smrg * 5354b8e80941Smrg * m 5355b8e80941Smrg * x y z 0 1 2 3 4 5 6 7 8 9 a b c 5356b8e80941Smrg * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 5357b8e80941Smrg * 5358b8e80941Smrg * Interval <-----------------> 5359b8e80941Smrg * Deviation <--------------------------> 5360b8e80941Smrg * 5361b8e80941Smrg * s = read(raw) 2 5362b8e80941Smrg * g = read(GPU) 1 5363b8e80941Smrg * m = read(monotonic) 2 5364b8e80941Smrg * e = read(raw) b 5365b8e80941Smrg * 5366b8e80941Smrg * We round the sample interval up by one tick to cover sampling error 5367b8e80941Smrg * in the interval clock 5368b8e80941Smrg */ 5369b8e80941Smrg 5370b8e80941Smrg uint64_t sample_interval = end - begin + 1; 5371b8e80941Smrg 5372b8e80941Smrg *pMaxDeviation = sample_interval + max_clock_period; 5373b8e80941Smrg 5374b8e80941Smrg return VK_SUCCESS; 5375b8e80941Smrg} 5376