anv_device.c revision 01e04c3f
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <sys/mman.h> 28#include <sys/sysinfo.h> 29#include <unistd.h> 30#include <fcntl.h> 31#include <xf86drm.h> 32#include <drm_fourcc.h> 33 34#include "anv_private.h" 35#include "util/strtod.h" 36#include "util/debug.h" 37#include "util/build_id.h" 38#include "util/disk_cache.h" 39#include "util/mesa-sha1.h" 40#include "util/u_string.h" 41#include "git_sha1.h" 42#include "vk_util.h" 43#include "common/gen_defines.h" 44 45#include "genxml/gen7_pack.h" 46 47static void 48compiler_debug_log(void *data, const char *fmt, ...) 49{ } 50 51static void 52compiler_perf_log(void *data, const char *fmt, ...) 53{ 54 va_list args; 55 va_start(args, fmt); 56 57 if (unlikely(INTEL_DEBUG & DEBUG_PERF)) 58 intel_logd_v(fmt, args); 59 60 va_end(args); 61} 62 63static VkResult 64anv_compute_heap_size(int fd, uint64_t gtt_size, uint64_t *heap_size) 65{ 66 /* Query the total ram from the system */ 67 struct sysinfo info; 68 sysinfo(&info); 69 70 uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit; 71 72 /* We don't want to burn too much ram with the GPU. If the user has 4GiB 73 * or less, we use at most half. If they have more than 4GiB, we use 3/4. 74 */ 75 uint64_t available_ram; 76 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) 77 available_ram = total_ram / 2; 78 else 79 available_ram = total_ram * 3 / 4; 80 81 /* We also want to leave some padding for things we allocate in the driver, 82 * so don't go over 3/4 of the GTT either. 83 */ 84 uint64_t available_gtt = gtt_size * 3 / 4; 85 86 *heap_size = MIN2(available_ram, available_gtt); 87 88 return VK_SUCCESS; 89} 90 91static VkResult 92anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) 93{ 94 uint64_t gtt_size; 95 if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, 96 >t_size) == -1) { 97 /* If, for whatever reason, we can't actually get the GTT size from the 98 * kernel (too old?) fall back to the aperture size. 99 */ 100 anv_perf_warn(NULL, NULL, 101 "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m"); 102 103 if (anv_gem_get_aperture(fd, >t_size) == -1) { 104 return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED, 105 "failed to get aperture size: %m"); 106 } 107 } 108 109 device->supports_48bit_addresses = (device->info.gen >= 8) && 110 gtt_size > (4ULL << 30 /* GiB */); 111 112 uint64_t heap_size = 0; 113 VkResult result = anv_compute_heap_size(fd, gtt_size, &heap_size); 114 if (result != VK_SUCCESS) 115 return result; 116 117 if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) { 118 /* When running with an overridden PCI ID, we may get a GTT size from 119 * the kernel that is greater than 2 GiB but the execbuf check for 48bit 120 * address support can still fail. Just clamp the address space size to 121 * 2 GiB if we don't have 48-bit support. 122 */ 123 intel_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but " 124 "not support for 48-bit addresses", 125 __FILE__, __LINE__); 126 heap_size = 2ull << 30; 127 } 128 129 if (heap_size <= 3ull * (1ull << 30)) { 130 /* In this case, everything fits nicely into the 32-bit address space, 131 * so there's no need for supporting 48bit addresses on client-allocated 132 * memory objects. 133 */ 134 device->memory.heap_count = 1; 135 device->memory.heaps[0] = (struct anv_memory_heap) { 136 .size = heap_size, 137 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 138 .supports_48bit_addresses = false, 139 }; 140 } else { 141 /* Not everything will fit nicely into a 32-bit address space. In this 142 * case we need a 64-bit heap. Advertise a small 32-bit heap and a 143 * larger 48-bit heap. If we're in this case, then we have a total heap 144 * size larger than 3GiB which most likely means they have 8 GiB of 145 * video memory and so carving off 1 GiB for the 32-bit heap should be 146 * reasonable. 147 */ 148 const uint64_t heap_size_32bit = 1ull << 30; 149 const uint64_t heap_size_48bit = heap_size - heap_size_32bit; 150 151 assert(device->supports_48bit_addresses); 152 153 device->memory.heap_count = 2; 154 device->memory.heaps[0] = (struct anv_memory_heap) { 155 .size = heap_size_48bit, 156 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 157 .supports_48bit_addresses = true, 158 }; 159 device->memory.heaps[1] = (struct anv_memory_heap) { 160 .size = heap_size_32bit, 161 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 162 .supports_48bit_addresses = false, 163 }; 164 } 165 166 uint32_t type_count = 0; 167 for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) { 168 uint32_t valid_buffer_usage = ~0; 169 170 /* There appears to be a hardware issue in the VF cache where it only 171 * considers the bottom 32 bits of memory addresses. If you happen to 172 * have two vertex buffers which get placed exactly 4 GiB apart and use 173 * them in back-to-back draw calls, you can get collisions. In order to 174 * solve this problem, we require vertex and index buffers be bound to 175 * memory allocated out of the 32-bit heap. 176 */ 177 if (device->memory.heaps[heap].supports_48bit_addresses) { 178 valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT | 179 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 180 } 181 182 if (device->info.has_llc) { 183 /* Big core GPUs share LLC with the CPU and thus one memory type can be 184 * both cached and coherent at the same time. 185 */ 186 device->memory.types[type_count++] = (struct anv_memory_type) { 187 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 188 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 189 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 190 VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 191 .heapIndex = heap, 192 .valid_buffer_usage = valid_buffer_usage, 193 }; 194 } else { 195 /* The spec requires that we expose a host-visible, coherent memory 196 * type, but Atom GPUs don't share LLC. Thus we offer two memory types 197 * to give the application a choice between cached, but not coherent and 198 * coherent but uncached (WC though). 199 */ 200 device->memory.types[type_count++] = (struct anv_memory_type) { 201 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 202 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 203 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 204 .heapIndex = heap, 205 .valid_buffer_usage = valid_buffer_usage, 206 }; 207 device->memory.types[type_count++] = (struct anv_memory_type) { 208 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 209 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 210 VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 211 .heapIndex = heap, 212 .valid_buffer_usage = valid_buffer_usage, 213 }; 214 } 215 } 216 device->memory.type_count = type_count; 217 218 return VK_SUCCESS; 219} 220 221static VkResult 222anv_physical_device_init_uuids(struct anv_physical_device *device) 223{ 224 const struct build_id_note *note = 225 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids); 226 if (!note) { 227 return vk_errorf(device->instance, device, 228 VK_ERROR_INITIALIZATION_FAILED, 229 "Failed to find build-id"); 230 } 231 232 unsigned build_id_len = build_id_length(note); 233 if (build_id_len < 20) { 234 return vk_errorf(device->instance, device, 235 VK_ERROR_INITIALIZATION_FAILED, 236 "build-id too short. It needs to be a SHA"); 237 } 238 239 memcpy(device->driver_build_sha1, build_id_data(note), 20); 240 241 struct mesa_sha1 sha1_ctx; 242 uint8_t sha1[20]; 243 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); 244 245 /* The pipeline cache UUID is used for determining when a pipeline cache is 246 * invalid. It needs both a driver build and the PCI ID of the device. 247 */ 248 _mesa_sha1_init(&sha1_ctx); 249 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); 250 _mesa_sha1_update(&sha1_ctx, &device->chipset_id, 251 sizeof(device->chipset_id)); 252 _mesa_sha1_final(&sha1_ctx, sha1); 253 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); 254 255 /* The driver UUID is used for determining sharability of images and memory 256 * between two Vulkan instances in separate processes. People who want to 257 * share memory need to also check the device UUID (below) so all this 258 * needs to be is the build-id. 259 */ 260 memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE); 261 262 /* The device UUID uniquely identifies the given device within the machine. 263 * Since we never have more than one device, this doesn't need to be a real 264 * UUID. However, on the off-chance that someone tries to use this to 265 * cache pre-tiled images or something of the like, we use the PCI ID and 266 * some bits of ISL info to ensure that this is safe. 267 */ 268 _mesa_sha1_init(&sha1_ctx); 269 _mesa_sha1_update(&sha1_ctx, &device->chipset_id, 270 sizeof(device->chipset_id)); 271 _mesa_sha1_update(&sha1_ctx, &device->isl_dev.has_bit6_swizzling, 272 sizeof(device->isl_dev.has_bit6_swizzling)); 273 _mesa_sha1_final(&sha1_ctx, sha1); 274 memcpy(device->device_uuid, sha1, VK_UUID_SIZE); 275 276 return VK_SUCCESS; 277} 278 279static void 280anv_physical_device_init_disk_cache(struct anv_physical_device *device) 281{ 282#ifdef ENABLE_SHADER_CACHE 283 char renderer[10]; 284 MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x", 285 device->chipset_id); 286 assert(len == sizeof(renderer) - 2); 287 288 char timestamp[41]; 289 _mesa_sha1_format(timestamp, device->driver_build_sha1); 290 291 const uint64_t driver_flags = 292 brw_get_compiler_config_value(device->compiler); 293 device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags); 294#else 295 device->disk_cache = NULL; 296#endif 297} 298 299static void 300anv_physical_device_free_disk_cache(struct anv_physical_device *device) 301{ 302#ifdef ENABLE_SHADER_CACHE 303 if (device->disk_cache) 304 disk_cache_destroy(device->disk_cache); 305#else 306 assert(device->disk_cache == NULL); 307#endif 308} 309 310static VkResult 311anv_physical_device_init(struct anv_physical_device *device, 312 struct anv_instance *instance, 313 drmDevicePtr drm_device) 314{ 315 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY]; 316 const char *path = drm_device->nodes[DRM_NODE_RENDER]; 317 VkResult result; 318 int fd; 319 int master_fd = -1; 320 321 brw_process_intel_debug_variable(); 322 323 fd = open(path, O_RDWR | O_CLOEXEC); 324 if (fd < 0) 325 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 326 327 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 328 device->instance = instance; 329 330 assert(strlen(path) < ARRAY_SIZE(device->path)); 331 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path); 332 333 device->no_hw = getenv("INTEL_NO_HW") != NULL; 334 335 const int pci_id_override = gen_get_pci_device_id_override(); 336 if (pci_id_override < 0) { 337 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); 338 if (!device->chipset_id) { 339 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 340 goto fail; 341 } 342 } else { 343 device->chipset_id = pci_id_override; 344 device->no_hw = true; 345 } 346 347 device->pci_info.domain = drm_device->businfo.pci->domain; 348 device->pci_info.bus = drm_device->businfo.pci->bus; 349 device->pci_info.device = drm_device->businfo.pci->dev; 350 device->pci_info.function = drm_device->businfo.pci->func; 351 352 device->name = gen_get_device_name(device->chipset_id); 353 if (!gen_get_device_info(device->chipset_id, &device->info)) { 354 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 355 goto fail; 356 } 357 358 if (device->info.is_haswell) { 359 intel_logw("Haswell Vulkan support is incomplete"); 360 } else if (device->info.gen == 7 && !device->info.is_baytrail) { 361 intel_logw("Ivy Bridge Vulkan support is incomplete"); 362 } else if (device->info.gen == 7 && device->info.is_baytrail) { 363 intel_logw("Bay Trail Vulkan support is incomplete"); 364 } else if (device->info.gen >= 8 && device->info.gen <= 10) { 365 /* Gen8-10 fully supported */ 366 } else if (device->info.gen == 11) { 367 intel_logw("Vulkan is not yet fully supported on gen11."); 368 } else { 369 result = vk_errorf(device->instance, device, 370 VK_ERROR_INCOMPATIBLE_DRIVER, 371 "Vulkan not yet supported on %s", device->name); 372 goto fail; 373 } 374 375 device->cmd_parser_version = -1; 376 if (device->info.gen == 7) { 377 device->cmd_parser_version = 378 anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION); 379 if (device->cmd_parser_version == -1) { 380 result = vk_errorf(device->instance, device, 381 VK_ERROR_INITIALIZATION_FAILED, 382 "failed to get command parser version"); 383 goto fail; 384 } 385 } 386 387 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { 388 result = vk_errorf(device->instance, device, 389 VK_ERROR_INITIALIZATION_FAILED, 390 "kernel missing gem wait"); 391 goto fail; 392 } 393 394 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { 395 result = vk_errorf(device->instance, device, 396 VK_ERROR_INITIALIZATION_FAILED, 397 "kernel missing execbuf2"); 398 goto fail; 399 } 400 401 if (!device->info.has_llc && 402 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { 403 result = vk_errorf(device->instance, device, 404 VK_ERROR_INITIALIZATION_FAILED, 405 "kernel missing wc mmap"); 406 goto fail; 407 } 408 409 result = anv_physical_device_init_heaps(device, fd); 410 if (result != VK_SUCCESS) 411 goto fail; 412 413 device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC); 414 device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE); 415 device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE); 416 device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY); 417 device->has_syncobj_wait = device->has_syncobj && 418 anv_gem_supports_syncobj_wait(fd); 419 device->has_context_priority = anv_gem_has_context_priority(fd); 420 421 device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) 422 && device->supports_48bit_addresses; 423 424 device->has_context_isolation = 425 anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION); 426 427 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); 428 429 /* Starting with Gen10, the timestamp frequency of the command streamer may 430 * vary from one part to another. We can query the value from the kernel. 431 */ 432 if (device->info.gen >= 10) { 433 int timestamp_frequency = 434 anv_gem_get_param(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY); 435 436 if (timestamp_frequency < 0) 437 intel_logw("Kernel 4.16-rc1+ required to properly query CS timestamp frequency"); 438 else 439 device->info.timestamp_frequency = timestamp_frequency; 440 } 441 442 /* GENs prior to 8 do not support EU/Subslice info */ 443 if (device->info.gen >= 8) { 444 device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL); 445 device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL); 446 447 /* Without this information, we cannot get the right Braswell 448 * brandstrings, and we have to use conservative numbers for GPGPU on 449 * many platforms, but otherwise, things will just work. 450 */ 451 if (device->subslice_total < 1 || device->eu_total < 1) { 452 intel_logw("Kernel 4.1 required to properly query GPU properties"); 453 } 454 } else if (device->info.gen == 7) { 455 device->subslice_total = 1 << (device->info.gt - 1); 456 } 457 458 if (device->info.is_cherryview && 459 device->subslice_total > 0 && device->eu_total > 0) { 460 /* Logical CS threads = EUs per subslice * num threads per EU */ 461 uint32_t max_cs_threads = 462 device->eu_total / device->subslice_total * device->info.num_thread_per_eu; 463 464 /* Fuse configurations may give more threads than expected, never less. */ 465 if (max_cs_threads > device->info.max_cs_threads) 466 device->info.max_cs_threads = max_cs_threads; 467 } 468 469 device->compiler = brw_compiler_create(NULL, &device->info); 470 if (device->compiler == NULL) { 471 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 472 goto fail; 473 } 474 device->compiler->shader_debug_log = compiler_debug_log; 475 device->compiler->shader_perf_log = compiler_perf_log; 476 device->compiler->supports_pull_constants = false; 477 device->compiler->constant_buffer_0_is_relative = 478 device->info.gen < 8 || !device->has_context_isolation; 479 device->compiler->supports_shader_constants = true; 480 481 isl_device_init(&device->isl_dev, &device->info, swizzled); 482 483 result = anv_physical_device_init_uuids(device); 484 if (result != VK_SUCCESS) 485 goto fail; 486 487 anv_physical_device_init_disk_cache(device); 488 489 if (instance->enabled_extensions.KHR_display) { 490 master_fd = open(primary_path, O_RDWR | O_CLOEXEC); 491 if (master_fd >= 0) { 492 /* prod the device with a GETPARAM call which will fail if 493 * we don't have permission to even render on this device 494 */ 495 if (anv_gem_get_param(master_fd, I915_PARAM_CHIPSET_ID) == 0) { 496 close(master_fd); 497 master_fd = -1; 498 } 499 } 500 } 501 device->master_fd = master_fd; 502 503 result = anv_init_wsi(device); 504 if (result != VK_SUCCESS) { 505 ralloc_free(device->compiler); 506 anv_physical_device_free_disk_cache(device); 507 goto fail; 508 } 509 510 anv_physical_device_get_supported_extensions(device, 511 &device->supported_extensions); 512 513 514 device->local_fd = fd; 515 516 return VK_SUCCESS; 517 518fail: 519 close(fd); 520 if (master_fd != -1) 521 close(master_fd); 522 return result; 523} 524 525static void 526anv_physical_device_finish(struct anv_physical_device *device) 527{ 528 anv_finish_wsi(device); 529 anv_physical_device_free_disk_cache(device); 530 ralloc_free(device->compiler); 531 close(device->local_fd); 532 if (device->master_fd >= 0) 533 close(device->master_fd); 534} 535 536static void * 537default_alloc_func(void *pUserData, size_t size, size_t align, 538 VkSystemAllocationScope allocationScope) 539{ 540 return malloc(size); 541} 542 543static void * 544default_realloc_func(void *pUserData, void *pOriginal, size_t size, 545 size_t align, VkSystemAllocationScope allocationScope) 546{ 547 return realloc(pOriginal, size); 548} 549 550static void 551default_free_func(void *pUserData, void *pMemory) 552{ 553 free(pMemory); 554} 555 556static const VkAllocationCallbacks default_alloc = { 557 .pUserData = NULL, 558 .pfnAllocation = default_alloc_func, 559 .pfnReallocation = default_realloc_func, 560 .pfnFree = default_free_func, 561}; 562 563VkResult anv_EnumerateInstanceExtensionProperties( 564 const char* pLayerName, 565 uint32_t* pPropertyCount, 566 VkExtensionProperties* pProperties) 567{ 568 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 569 570 for (int i = 0; i < ANV_INSTANCE_EXTENSION_COUNT; i++) { 571 if (anv_instance_extensions_supported.extensions[i]) { 572 vk_outarray_append(&out, prop) { 573 *prop = anv_instance_extensions[i]; 574 } 575 } 576 } 577 578 return vk_outarray_status(&out); 579} 580 581VkResult anv_CreateInstance( 582 const VkInstanceCreateInfo* pCreateInfo, 583 const VkAllocationCallbacks* pAllocator, 584 VkInstance* pInstance) 585{ 586 struct anv_instance *instance; 587 VkResult result; 588 589 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 590 591 struct anv_instance_extension_table enabled_extensions = {}; 592 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 593 int idx; 594 for (idx = 0; idx < ANV_INSTANCE_EXTENSION_COUNT; idx++) { 595 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], 596 anv_instance_extensions[idx].extensionName) == 0) 597 break; 598 } 599 600 if (idx >= ANV_INSTANCE_EXTENSION_COUNT) 601 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 602 603 if (!anv_instance_extensions_supported.extensions[idx]) 604 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 605 606 enabled_extensions.extensions[idx] = true; 607 } 608 609 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, 610 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 611 if (!instance) 612 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 613 614 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 615 616 if (pAllocator) 617 instance->alloc = *pAllocator; 618 else 619 instance->alloc = default_alloc; 620 621 instance->app_info = (struct anv_app_info) { .api_version = 0 }; 622 if (pCreateInfo->pApplicationInfo) { 623 const VkApplicationInfo *app = pCreateInfo->pApplicationInfo; 624 625 instance->app_info.app_name = 626 vk_strdup(&instance->alloc, app->pApplicationName, 627 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 628 instance->app_info.app_version = app->applicationVersion; 629 630 instance->app_info.engine_name = 631 vk_strdup(&instance->alloc, app->pEngineName, 632 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 633 instance->app_info.engine_version = app->engineVersion; 634 635 instance->app_info.api_version = app->apiVersion; 636 } 637 638 if (instance->app_info.api_version == 0) 639 instance->app_info.api_version = VK_API_VERSION_1_0; 640 641 instance->enabled_extensions = enabled_extensions; 642 643 for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) { 644 /* Vulkan requires that entrypoints for extensions which have not been 645 * enabled must not be advertised. 646 */ 647 if (!anv_instance_entrypoint_is_enabled(i, instance->app_info.api_version, 648 &instance->enabled_extensions)) { 649 instance->dispatch.entrypoints[i] = NULL; 650 } else { 651 instance->dispatch.entrypoints[i] = 652 anv_instance_dispatch_table.entrypoints[i]; 653 } 654 } 655 656 for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) { 657 /* Vulkan requires that entrypoints for extensions which have not been 658 * enabled must not be advertised. 659 */ 660 if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version, 661 &instance->enabled_extensions, NULL)) { 662 instance->device_dispatch.entrypoints[i] = NULL; 663 } else { 664 instance->device_dispatch.entrypoints[i] = 665 anv_device_dispatch_table.entrypoints[i]; 666 } 667 } 668 669 instance->physicalDeviceCount = -1; 670 671 result = vk_debug_report_instance_init(&instance->debug_report_callbacks); 672 if (result != VK_SUCCESS) { 673 vk_free2(&default_alloc, pAllocator, instance); 674 return vk_error(result); 675 } 676 677 instance->pipeline_cache_enabled = 678 env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); 679 680 _mesa_locale_init(); 681 682 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 683 684 *pInstance = anv_instance_to_handle(instance); 685 686 return VK_SUCCESS; 687} 688 689void anv_DestroyInstance( 690 VkInstance _instance, 691 const VkAllocationCallbacks* pAllocator) 692{ 693 ANV_FROM_HANDLE(anv_instance, instance, _instance); 694 695 if (!instance) 696 return; 697 698 if (instance->physicalDeviceCount > 0) { 699 /* We support at most one physical device. */ 700 assert(instance->physicalDeviceCount == 1); 701 anv_physical_device_finish(&instance->physicalDevice); 702 } 703 704 vk_free(&instance->alloc, (char *)instance->app_info.app_name); 705 vk_free(&instance->alloc, (char *)instance->app_info.engine_name); 706 707 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 708 709 vk_debug_report_instance_destroy(&instance->debug_report_callbacks); 710 711 _mesa_locale_fini(); 712 713 vk_free(&instance->alloc, instance); 714} 715 716static VkResult 717anv_enumerate_devices(struct anv_instance *instance) 718{ 719 /* TODO: Check for more devices ? */ 720 drmDevicePtr devices[8]; 721 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; 722 int max_devices; 723 724 instance->physicalDeviceCount = 0; 725 726 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); 727 if (max_devices < 1) 728 return VK_ERROR_INCOMPATIBLE_DRIVER; 729 730 for (unsigned i = 0; i < (unsigned)max_devices; i++) { 731 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 732 devices[i]->bustype == DRM_BUS_PCI && 733 devices[i]->deviceinfo.pci->vendor_id == 0x8086) { 734 735 result = anv_physical_device_init(&instance->physicalDevice, 736 instance, devices[i]); 737 if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 738 break; 739 } 740 } 741 drmFreeDevices(devices, max_devices); 742 743 if (result == VK_SUCCESS) 744 instance->physicalDeviceCount = 1; 745 746 return result; 747} 748 749static VkResult 750anv_instance_ensure_physical_device(struct anv_instance *instance) 751{ 752 if (instance->physicalDeviceCount < 0) { 753 VkResult result = anv_enumerate_devices(instance); 754 if (result != VK_SUCCESS && 755 result != VK_ERROR_INCOMPATIBLE_DRIVER) 756 return result; 757 } 758 759 return VK_SUCCESS; 760} 761 762VkResult anv_EnumeratePhysicalDevices( 763 VkInstance _instance, 764 uint32_t* pPhysicalDeviceCount, 765 VkPhysicalDevice* pPhysicalDevices) 766{ 767 ANV_FROM_HANDLE(anv_instance, instance, _instance); 768 VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); 769 770 VkResult result = anv_instance_ensure_physical_device(instance); 771 if (result != VK_SUCCESS) 772 return result; 773 774 if (instance->physicalDeviceCount == 0) 775 return VK_SUCCESS; 776 777 assert(instance->physicalDeviceCount == 1); 778 vk_outarray_append(&out, i) { 779 *i = anv_physical_device_to_handle(&instance->physicalDevice); 780 } 781 782 return vk_outarray_status(&out); 783} 784 785VkResult anv_EnumeratePhysicalDeviceGroups( 786 VkInstance _instance, 787 uint32_t* pPhysicalDeviceGroupCount, 788 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) 789{ 790 ANV_FROM_HANDLE(anv_instance, instance, _instance); 791 VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, 792 pPhysicalDeviceGroupCount); 793 794 VkResult result = anv_instance_ensure_physical_device(instance); 795 if (result != VK_SUCCESS) 796 return result; 797 798 if (instance->physicalDeviceCount == 0) 799 return VK_SUCCESS; 800 801 assert(instance->physicalDeviceCount == 1); 802 803 vk_outarray_append(&out, p) { 804 p->physicalDeviceCount = 1; 805 memset(p->physicalDevices, 0, sizeof(p->physicalDevices)); 806 p->physicalDevices[0] = 807 anv_physical_device_to_handle(&instance->physicalDevice); 808 p->subsetAllocation = VK_FALSE; 809 810 vk_foreach_struct(ext, p->pNext) 811 anv_debug_ignored_stype(ext->sType); 812 } 813 814 return vk_outarray_status(&out); 815} 816 817void anv_GetPhysicalDeviceFeatures( 818 VkPhysicalDevice physicalDevice, 819 VkPhysicalDeviceFeatures* pFeatures) 820{ 821 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 822 823 *pFeatures = (VkPhysicalDeviceFeatures) { 824 .robustBufferAccess = true, 825 .fullDrawIndexUint32 = true, 826 .imageCubeArray = true, 827 .independentBlend = true, 828 .geometryShader = true, 829 .tessellationShader = true, 830 .sampleRateShading = true, 831 .dualSrcBlend = true, 832 .logicOp = true, 833 .multiDrawIndirect = true, 834 .drawIndirectFirstInstance = true, 835 .depthClamp = true, 836 .depthBiasClamp = true, 837 .fillModeNonSolid = true, 838 .depthBounds = false, 839 .wideLines = true, 840 .largePoints = true, 841 .alphaToOne = true, 842 .multiViewport = true, 843 .samplerAnisotropy = true, 844 .textureCompressionETC2 = pdevice->info.gen >= 8 || 845 pdevice->info.is_baytrail, 846 .textureCompressionASTC_LDR = pdevice->info.gen >= 9, /* FINISHME CHV */ 847 .textureCompressionBC = true, 848 .occlusionQueryPrecise = true, 849 .pipelineStatisticsQuery = true, 850 .fragmentStoresAndAtomics = true, 851 .shaderTessellationAndGeometryPointSize = true, 852 .shaderImageGatherExtended = true, 853 .shaderStorageImageExtendedFormats = true, 854 .shaderStorageImageMultisample = false, 855 .shaderStorageImageReadWithoutFormat = false, 856 .shaderStorageImageWriteWithoutFormat = true, 857 .shaderUniformBufferArrayDynamicIndexing = true, 858 .shaderSampledImageArrayDynamicIndexing = true, 859 .shaderStorageBufferArrayDynamicIndexing = true, 860 .shaderStorageImageArrayDynamicIndexing = true, 861 .shaderClipDistance = true, 862 .shaderCullDistance = true, 863 .shaderFloat64 = pdevice->info.gen >= 8 && 864 pdevice->info.has_64bit_types, 865 .shaderInt64 = pdevice->info.gen >= 8 && 866 pdevice->info.has_64bit_types, 867 .shaderInt16 = pdevice->info.gen >= 8, 868 .shaderResourceMinLod = false, 869 .variableMultisampleRate = true, 870 .inheritedQueries = true, 871 }; 872 873 /* We can't do image stores in vec4 shaders */ 874 pFeatures->vertexPipelineStoresAndAtomics = 875 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] && 876 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY]; 877 878 struct anv_app_info *app_info = &pdevice->instance->app_info; 879 880 /* The new DOOM and Wolfenstein games require depthBounds without 881 * checking for it. They seem to run fine without it so just claim it's 882 * there and accept the consequences. 883 */ 884 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0) 885 pFeatures->depthBounds = true; 886} 887 888void anv_GetPhysicalDeviceFeatures2( 889 VkPhysicalDevice physicalDevice, 890 VkPhysicalDeviceFeatures2* pFeatures) 891{ 892 anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); 893 894 vk_foreach_struct(ext, pFeatures->pNext) { 895 switch (ext->sType) { 896 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { 897 VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext; 898 features->protectedMemory = VK_FALSE; 899 break; 900 } 901 902 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { 903 VkPhysicalDeviceMultiviewFeatures *features = 904 (VkPhysicalDeviceMultiviewFeatures *)ext; 905 features->multiview = true; 906 features->multiviewGeometryShader = true; 907 features->multiviewTessellationShader = true; 908 break; 909 } 910 911 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: { 912 VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext; 913 features->variablePointersStorageBuffer = true; 914 features->variablePointers = true; 915 break; 916 } 917 918 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { 919 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = 920 (VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext; 921 features->samplerYcbcrConversion = true; 922 break; 923 } 924 925 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: { 926 VkPhysicalDeviceShaderDrawParameterFeatures *features = (void *)ext; 927 features->shaderDrawParameters = true; 928 break; 929 } 930 931 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: { 932 VkPhysicalDevice16BitStorageFeaturesKHR *features = 933 (VkPhysicalDevice16BitStorageFeaturesKHR *)ext; 934 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 935 936 features->storageBuffer16BitAccess = pdevice->info.gen >= 8; 937 features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8; 938 features->storagePushConstant16 = pdevice->info.gen >= 8; 939 features->storageInputOutput16 = false; 940 break; 941 } 942 943 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { 944 VkPhysicalDevice8BitStorageFeaturesKHR *features = 945 (VkPhysicalDevice8BitStorageFeaturesKHR *)ext; 946 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 947 948 features->storageBuffer8BitAccess = pdevice->info.gen >= 8; 949 features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8; 950 features->storagePushConstant8 = pdevice->info.gen >= 8; 951 break; 952 } 953 954 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { 955 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = 956 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; 957 features->vertexAttributeInstanceRateDivisor = VK_TRUE; 958 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; 959 break; 960 } 961 962 default: 963 anv_debug_ignored_stype(ext->sType); 964 break; 965 } 966 } 967} 968 969void anv_GetPhysicalDeviceProperties( 970 VkPhysicalDevice physicalDevice, 971 VkPhysicalDeviceProperties* pProperties) 972{ 973 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 974 const struct gen_device_info *devinfo = &pdevice->info; 975 976 /* See assertions made when programming the buffer surface state. */ 977 const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? 978 (1ul << 30) : (1ul << 27); 979 980 const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? 981 128 : 16; 982 983 const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES; 984 985 VkSampleCountFlags sample_counts = 986 isl_device_get_sample_counts(&pdevice->isl_dev); 987 988 989 VkPhysicalDeviceLimits limits = { 990 .maxImageDimension1D = (1 << 14), 991 .maxImageDimension2D = (1 << 14), 992 .maxImageDimension3D = (1 << 11), 993 .maxImageDimensionCube = (1 << 14), 994 .maxImageArrayLayers = (1 << 11), 995 .maxTexelBufferElements = 128 * 1024 * 1024, 996 .maxUniformBufferRange = (1ul << 27), 997 .maxStorageBufferRange = max_raw_buffer_sz, 998 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 999 .maxMemoryAllocationCount = UINT32_MAX, 1000 .maxSamplerAllocationCount = 64 * 1024, 1001 .bufferImageGranularity = 64, /* A cache line */ 1002 .sparseAddressSpaceSize = 0, 1003 .maxBoundDescriptorSets = MAX_SETS, 1004 .maxPerStageDescriptorSamplers = max_samplers, 1005 .maxPerStageDescriptorUniformBuffers = 64, 1006 .maxPerStageDescriptorStorageBuffers = 64, 1007 .maxPerStageDescriptorSampledImages = max_samplers, 1008 .maxPerStageDescriptorStorageImages = max_images, 1009 .maxPerStageDescriptorInputAttachments = 64, 1010 .maxPerStageResources = 250, 1011 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ 1012 .maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */ 1013 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, 1014 .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */ 1015 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, 1016 .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */ 1017 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */ 1018 .maxDescriptorSetInputAttachments = 256, 1019 .maxVertexInputAttributes = MAX_VBS, 1020 .maxVertexInputBindings = MAX_VBS, 1021 .maxVertexInputAttributeOffset = 2047, 1022 .maxVertexInputBindingStride = 2048, 1023 .maxVertexOutputComponents = 128, 1024 .maxTessellationGenerationLevel = 64, 1025 .maxTessellationPatchSize = 32, 1026 .maxTessellationControlPerVertexInputComponents = 128, 1027 .maxTessellationControlPerVertexOutputComponents = 128, 1028 .maxTessellationControlPerPatchOutputComponents = 128, 1029 .maxTessellationControlTotalOutputComponents = 2048, 1030 .maxTessellationEvaluationInputComponents = 128, 1031 .maxTessellationEvaluationOutputComponents = 128, 1032 .maxGeometryShaderInvocations = 32, 1033 .maxGeometryInputComponents = 64, 1034 .maxGeometryOutputComponents = 128, 1035 .maxGeometryOutputVertices = 256, 1036 .maxGeometryTotalOutputComponents = 1024, 1037 .maxFragmentInputComponents = 112, /* 128 components - (POS, PSIZ, CLIP_DIST0, CLIP_DIST1) */ 1038 .maxFragmentOutputAttachments = 8, 1039 .maxFragmentDualSrcAttachments = 1, 1040 .maxFragmentCombinedOutputResources = 8, 1041 .maxComputeSharedMemorySize = 32768, 1042 .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 1043 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, 1044 .maxComputeWorkGroupSize = { 1045 16 * devinfo->max_cs_threads, 1046 16 * devinfo->max_cs_threads, 1047 16 * devinfo->max_cs_threads, 1048 }, 1049 .subPixelPrecisionBits = 4 /* FIXME */, 1050 .subTexelPrecisionBits = 4 /* FIXME */, 1051 .mipmapPrecisionBits = 4 /* FIXME */, 1052 .maxDrawIndexedIndexValue = UINT32_MAX, 1053 .maxDrawIndirectCount = UINT32_MAX, 1054 .maxSamplerLodBias = 16, 1055 .maxSamplerAnisotropy = 16, 1056 .maxViewports = MAX_VIEWPORTS, 1057 .maxViewportDimensions = { (1 << 14), (1 << 14) }, 1058 .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 1059 .viewportSubPixelBits = 13, /* We take a float? */ 1060 .minMemoryMapAlignment = 4096, /* A page */ 1061 .minTexelBufferOffsetAlignment = 1, 1062 /* We need 16 for UBO block reads to work and 32 for push UBOs */ 1063 .minUniformBufferOffsetAlignment = 32, 1064 .minStorageBufferOffsetAlignment = 4, 1065 .minTexelOffset = -8, 1066 .maxTexelOffset = 7, 1067 .minTexelGatherOffset = -32, 1068 .maxTexelGatherOffset = 31, 1069 .minInterpolationOffset = -0.5, 1070 .maxInterpolationOffset = 0.4375, 1071 .subPixelInterpolationOffsetBits = 4, 1072 .maxFramebufferWidth = (1 << 14), 1073 .maxFramebufferHeight = (1 << 14), 1074 .maxFramebufferLayers = (1 << 11), 1075 .framebufferColorSampleCounts = sample_counts, 1076 .framebufferDepthSampleCounts = sample_counts, 1077 .framebufferStencilSampleCounts = sample_counts, 1078 .framebufferNoAttachmentsSampleCounts = sample_counts, 1079 .maxColorAttachments = MAX_RTS, 1080 .sampledImageColorSampleCounts = sample_counts, 1081 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, 1082 .sampledImageDepthSampleCounts = sample_counts, 1083 .sampledImageStencilSampleCounts = sample_counts, 1084 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, 1085 .maxSampleMaskWords = 1, 1086 .timestampComputeAndGraphics = false, 1087 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency, 1088 .maxClipDistances = 8, 1089 .maxCullDistances = 8, 1090 .maxCombinedClipAndCullDistances = 8, 1091 .discreteQueuePriorities = 2, 1092 .pointSizeRange = { 0.125, 255.875 }, 1093 .lineWidthRange = { 0.0, 7.9921875 }, 1094 .pointSizeGranularity = (1.0 / 8.0), 1095 .lineWidthGranularity = (1.0 / 128.0), 1096 .strictLines = false, /* FINISHME */ 1097 .standardSampleLocations = true, 1098 .optimalBufferCopyOffsetAlignment = 128, 1099 .optimalBufferCopyRowPitchAlignment = 128, 1100 .nonCoherentAtomSize = 64, 1101 }; 1102 1103 *pProperties = (VkPhysicalDeviceProperties) { 1104 .apiVersion = anv_physical_device_api_version(pdevice), 1105 .driverVersion = vk_get_driver_version(), 1106 .vendorID = 0x8086, 1107 .deviceID = pdevice->chipset_id, 1108 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 1109 .limits = limits, 1110 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ 1111 }; 1112 1113 snprintf(pProperties->deviceName, sizeof(pProperties->deviceName), 1114 "%s", pdevice->name); 1115 memcpy(pProperties->pipelineCacheUUID, 1116 pdevice->pipeline_cache_uuid, VK_UUID_SIZE); 1117} 1118 1119void anv_GetPhysicalDeviceProperties2( 1120 VkPhysicalDevice physicalDevice, 1121 VkPhysicalDeviceProperties2* pProperties) 1122{ 1123 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 1124 1125 anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); 1126 1127 vk_foreach_struct(ext, pProperties->pNext) { 1128 switch (ext->sType) { 1129 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { 1130 VkPhysicalDevicePushDescriptorPropertiesKHR *properties = 1131 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; 1132 1133 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; 1134 break; 1135 } 1136 1137 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: { 1138 VkPhysicalDeviceDriverPropertiesKHR *driver_props = 1139 (VkPhysicalDeviceDriverPropertiesKHR *) ext; 1140 1141 driver_props->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR; 1142 util_snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, 1143 "Intel open-source Mesa driver"); 1144 1145 util_snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR, 1146 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); 1147 1148 driver_props->conformanceVersion = (VkConformanceVersionKHR) { 1149 .major = 1, 1150 .minor = 1, 1151 .subminor = 2, 1152 .patch = 0, 1153 }; 1154 break; 1155 } 1156 1157 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { 1158 VkPhysicalDeviceIDProperties *id_props = 1159 (VkPhysicalDeviceIDProperties *)ext; 1160 memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); 1161 memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); 1162 /* The LUID is for Windows. */ 1163 id_props->deviceLUIDValid = false; 1164 break; 1165 } 1166 1167 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { 1168 VkPhysicalDeviceMaintenance3Properties *props = 1169 (VkPhysicalDeviceMaintenance3Properties *)ext; 1170 /* This value doesn't matter for us today as our per-stage 1171 * descriptors are the real limit. 1172 */ 1173 props->maxPerSetDescriptors = 1024; 1174 props->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE; 1175 break; 1176 } 1177 1178 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { 1179 VkPhysicalDeviceMultiviewProperties *properties = 1180 (VkPhysicalDeviceMultiviewProperties *)ext; 1181 properties->maxMultiviewViewCount = 16; 1182 properties->maxMultiviewInstanceIndex = UINT32_MAX / 16; 1183 break; 1184 } 1185 1186 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { 1187 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = 1188 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; 1189 properties->pciDomain = pdevice->pci_info.domain; 1190 properties->pciBus = pdevice->pci_info.bus; 1191 properties->pciDevice = pdevice->pci_info.device; 1192 properties->pciFunction = pdevice->pci_info.function; 1193 break; 1194 } 1195 1196 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { 1197 VkPhysicalDevicePointClippingProperties *properties = 1198 (VkPhysicalDevicePointClippingProperties *) ext; 1199 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; 1200 anv_finishme("Implement pop-free point clipping"); 1201 break; 1202 } 1203 1204 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { 1205 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties = 1206 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext; 1207 properties->filterMinmaxImageComponentMapping = pdevice->info.gen >= 9; 1208 properties->filterMinmaxSingleComponentFormats = true; 1209 break; 1210 } 1211 1212 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { 1213 VkPhysicalDeviceSubgroupProperties *properties = (void *)ext; 1214 1215 properties->subgroupSize = BRW_SUBGROUP_SIZE; 1216 1217 VkShaderStageFlags scalar_stages = 0; 1218 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { 1219 if (pdevice->compiler->scalar_stage[stage]) 1220 scalar_stages |= mesa_to_vk_shader_stage(stage); 1221 } 1222 properties->supportedStages = scalar_stages; 1223 1224 properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | 1225 VK_SUBGROUP_FEATURE_VOTE_BIT | 1226 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | 1227 VK_SUBGROUP_FEATURE_BALLOT_BIT | 1228 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | 1229 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | 1230 VK_SUBGROUP_FEATURE_CLUSTERED_BIT | 1231 VK_SUBGROUP_FEATURE_QUAD_BIT; 1232 properties->quadOperationsInAllStages = VK_TRUE; 1233 break; 1234 } 1235 1236 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { 1237 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props = 1238 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; 1239 /* We have to restrict this a bit for multiview */ 1240 props->maxVertexAttribDivisor = UINT32_MAX / 16; 1241 break; 1242 } 1243 1244 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { 1245 VkPhysicalDeviceProtectedMemoryProperties *props = 1246 (VkPhysicalDeviceProtectedMemoryProperties *)ext; 1247 props->protectedNoFault = false; 1248 break; 1249 } 1250 1251 default: 1252 anv_debug_ignored_stype(ext->sType); 1253 break; 1254 } 1255 } 1256} 1257 1258/* We support exactly one queue family. */ 1259static const VkQueueFamilyProperties 1260anv_queue_family_properties = { 1261 .queueFlags = VK_QUEUE_GRAPHICS_BIT | 1262 VK_QUEUE_COMPUTE_BIT | 1263 VK_QUEUE_TRANSFER_BIT, 1264 .queueCount = 1, 1265 .timestampValidBits = 36, /* XXX: Real value here */ 1266 .minImageTransferGranularity = { 1, 1, 1 }, 1267}; 1268 1269void anv_GetPhysicalDeviceQueueFamilyProperties( 1270 VkPhysicalDevice physicalDevice, 1271 uint32_t* pCount, 1272 VkQueueFamilyProperties* pQueueFamilyProperties) 1273{ 1274 VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount); 1275 1276 vk_outarray_append(&out, p) { 1277 *p = anv_queue_family_properties; 1278 } 1279} 1280 1281void anv_GetPhysicalDeviceQueueFamilyProperties2( 1282 VkPhysicalDevice physicalDevice, 1283 uint32_t* pQueueFamilyPropertyCount, 1284 VkQueueFamilyProperties2* pQueueFamilyProperties) 1285{ 1286 1287 VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); 1288 1289 vk_outarray_append(&out, p) { 1290 p->queueFamilyProperties = anv_queue_family_properties; 1291 1292 vk_foreach_struct(s, p->pNext) { 1293 anv_debug_ignored_stype(s->sType); 1294 } 1295 } 1296} 1297 1298void anv_GetPhysicalDeviceMemoryProperties( 1299 VkPhysicalDevice physicalDevice, 1300 VkPhysicalDeviceMemoryProperties* pMemoryProperties) 1301{ 1302 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); 1303 1304 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count; 1305 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) { 1306 pMemoryProperties->memoryTypes[i] = (VkMemoryType) { 1307 .propertyFlags = physical_device->memory.types[i].propertyFlags, 1308 .heapIndex = physical_device->memory.types[i].heapIndex, 1309 }; 1310 } 1311 1312 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count; 1313 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) { 1314 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) { 1315 .size = physical_device->memory.heaps[i].size, 1316 .flags = physical_device->memory.heaps[i].flags, 1317 }; 1318 } 1319} 1320 1321void anv_GetPhysicalDeviceMemoryProperties2( 1322 VkPhysicalDevice physicalDevice, 1323 VkPhysicalDeviceMemoryProperties2* pMemoryProperties) 1324{ 1325 anv_GetPhysicalDeviceMemoryProperties(physicalDevice, 1326 &pMemoryProperties->memoryProperties); 1327 1328 vk_foreach_struct(ext, pMemoryProperties->pNext) { 1329 switch (ext->sType) { 1330 default: 1331 anv_debug_ignored_stype(ext->sType); 1332 break; 1333 } 1334 } 1335} 1336 1337void 1338anv_GetDeviceGroupPeerMemoryFeatures( 1339 VkDevice device, 1340 uint32_t heapIndex, 1341 uint32_t localDeviceIndex, 1342 uint32_t remoteDeviceIndex, 1343 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) 1344{ 1345 assert(localDeviceIndex == 0 && remoteDeviceIndex == 0); 1346 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | 1347 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | 1348 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | 1349 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; 1350} 1351 1352PFN_vkVoidFunction anv_GetInstanceProcAddr( 1353 VkInstance _instance, 1354 const char* pName) 1355{ 1356 ANV_FROM_HANDLE(anv_instance, instance, _instance); 1357 1358 /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly 1359 * when we have to return valid function pointers, NULL, or it's left 1360 * undefined. See the table for exact details. 1361 */ 1362 if (pName == NULL) 1363 return NULL; 1364 1365#define LOOKUP_ANV_ENTRYPOINT(entrypoint) \ 1366 if (strcmp(pName, "vk" #entrypoint) == 0) \ 1367 return (PFN_vkVoidFunction)anv_##entrypoint 1368 1369 LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceExtensionProperties); 1370 LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceLayerProperties); 1371 LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceVersion); 1372 LOOKUP_ANV_ENTRYPOINT(CreateInstance); 1373 1374#undef LOOKUP_ANV_ENTRYPOINT 1375 1376 if (instance == NULL) 1377 return NULL; 1378 1379 int idx = anv_get_instance_entrypoint_index(pName); 1380 if (idx >= 0) 1381 return instance->dispatch.entrypoints[idx]; 1382 1383 idx = anv_get_device_entrypoint_index(pName); 1384 if (idx >= 0) 1385 return instance->device_dispatch.entrypoints[idx]; 1386 1387 return NULL; 1388} 1389 1390/* With version 1+ of the loader interface the ICD should expose 1391 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. 1392 */ 1393PUBLIC 1394VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 1395 VkInstance instance, 1396 const char* pName); 1397 1398PUBLIC 1399VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 1400 VkInstance instance, 1401 const char* pName) 1402{ 1403 return anv_GetInstanceProcAddr(instance, pName); 1404} 1405 1406PFN_vkVoidFunction anv_GetDeviceProcAddr( 1407 VkDevice _device, 1408 const char* pName) 1409{ 1410 ANV_FROM_HANDLE(anv_device, device, _device); 1411 1412 if (!device || !pName) 1413 return NULL; 1414 1415 int idx = anv_get_device_entrypoint_index(pName); 1416 if (idx < 0) 1417 return NULL; 1418 1419 return device->dispatch.entrypoints[idx]; 1420} 1421 1422VkResult 1423anv_CreateDebugReportCallbackEXT(VkInstance _instance, 1424 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, 1425 const VkAllocationCallbacks* pAllocator, 1426 VkDebugReportCallbackEXT* pCallback) 1427{ 1428 ANV_FROM_HANDLE(anv_instance, instance, _instance); 1429 return vk_create_debug_report_callback(&instance->debug_report_callbacks, 1430 pCreateInfo, pAllocator, &instance->alloc, 1431 pCallback); 1432} 1433 1434void 1435anv_DestroyDebugReportCallbackEXT(VkInstance _instance, 1436 VkDebugReportCallbackEXT _callback, 1437 const VkAllocationCallbacks* pAllocator) 1438{ 1439 ANV_FROM_HANDLE(anv_instance, instance, _instance); 1440 vk_destroy_debug_report_callback(&instance->debug_report_callbacks, 1441 _callback, pAllocator, &instance->alloc); 1442} 1443 1444void 1445anv_DebugReportMessageEXT(VkInstance _instance, 1446 VkDebugReportFlagsEXT flags, 1447 VkDebugReportObjectTypeEXT objectType, 1448 uint64_t object, 1449 size_t location, 1450 int32_t messageCode, 1451 const char* pLayerPrefix, 1452 const char* pMessage) 1453{ 1454 ANV_FROM_HANDLE(anv_instance, instance, _instance); 1455 vk_debug_report(&instance->debug_report_callbacks, flags, objectType, 1456 object, location, messageCode, pLayerPrefix, pMessage); 1457} 1458 1459static void 1460anv_queue_init(struct anv_device *device, struct anv_queue *queue) 1461{ 1462 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1463 queue->device = device; 1464 queue->flags = 0; 1465} 1466 1467static void 1468anv_queue_finish(struct anv_queue *queue) 1469{ 1470} 1471 1472static struct anv_state 1473anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) 1474{ 1475 struct anv_state state; 1476 1477 state = anv_state_pool_alloc(pool, size, align); 1478 memcpy(state.map, p, size); 1479 1480 anv_state_flush(pool->block_pool.device, state); 1481 1482 return state; 1483} 1484 1485struct gen8_border_color { 1486 union { 1487 float float32[4]; 1488 uint32_t uint32[4]; 1489 }; 1490 /* Pad out to 64 bytes */ 1491 uint32_t _pad[12]; 1492}; 1493 1494static void 1495anv_device_init_border_colors(struct anv_device *device) 1496{ 1497 static const struct gen8_border_color border_colors[] = { 1498 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, 1499 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, 1500 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, 1501 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, 1502 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, 1503 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, 1504 }; 1505 1506 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, 1507 sizeof(border_colors), 64, 1508 border_colors); 1509} 1510 1511static void 1512anv_device_init_trivial_batch(struct anv_device *device) 1513{ 1514 anv_bo_init_new(&device->trivial_batch_bo, device, 4096); 1515 1516 if (device->instance->physicalDevice.has_exec_async) 1517 device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC; 1518 1519 if (device->instance->physicalDevice.use_softpin) 1520 device->trivial_batch_bo.flags |= EXEC_OBJECT_PINNED; 1521 1522 anv_vma_alloc(device, &device->trivial_batch_bo); 1523 1524 void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle, 1525 0, 4096, 0); 1526 1527 struct anv_batch batch = { 1528 .start = map, 1529 .next = map, 1530 .end = map + 4096, 1531 }; 1532 1533 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); 1534 anv_batch_emit(&batch, GEN7_MI_NOOP, noop); 1535 1536 if (!device->info.has_llc) 1537 gen_clflush_range(map, batch.next - map); 1538 1539 anv_gem_munmap(map, device->trivial_batch_bo.size); 1540} 1541 1542VkResult anv_EnumerateDeviceExtensionProperties( 1543 VkPhysicalDevice physicalDevice, 1544 const char* pLayerName, 1545 uint32_t* pPropertyCount, 1546 VkExtensionProperties* pProperties) 1547{ 1548 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); 1549 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); 1550 1551 for (int i = 0; i < ANV_DEVICE_EXTENSION_COUNT; i++) { 1552 if (device->supported_extensions.extensions[i]) { 1553 vk_outarray_append(&out, prop) { 1554 *prop = anv_device_extensions[i]; 1555 } 1556 } 1557 } 1558 1559 return vk_outarray_status(&out); 1560} 1561 1562static void 1563anv_device_init_dispatch(struct anv_device *device) 1564{ 1565 const struct anv_device_dispatch_table *genX_table; 1566 switch (device->info.gen) { 1567 case 11: 1568 genX_table = &gen11_device_dispatch_table; 1569 break; 1570 case 10: 1571 genX_table = &gen10_device_dispatch_table; 1572 break; 1573 case 9: 1574 genX_table = &gen9_device_dispatch_table; 1575 break; 1576 case 8: 1577 genX_table = &gen8_device_dispatch_table; 1578 break; 1579 case 7: 1580 if (device->info.is_haswell) 1581 genX_table = &gen75_device_dispatch_table; 1582 else 1583 genX_table = &gen7_device_dispatch_table; 1584 break; 1585 default: 1586 unreachable("unsupported gen\n"); 1587 } 1588 1589 for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) { 1590 /* Vulkan requires that entrypoints for extensions which have not been 1591 * enabled must not be advertised. 1592 */ 1593 if (!anv_device_entrypoint_is_enabled(i, device->instance->app_info.api_version, 1594 &device->instance->enabled_extensions, 1595 &device->enabled_extensions)) { 1596 device->dispatch.entrypoints[i] = NULL; 1597 } else if (genX_table->entrypoints[i]) { 1598 device->dispatch.entrypoints[i] = genX_table->entrypoints[i]; 1599 } else { 1600 device->dispatch.entrypoints[i] = 1601 anv_device_dispatch_table.entrypoints[i]; 1602 } 1603 } 1604} 1605 1606static int 1607vk_priority_to_gen(int priority) 1608{ 1609 switch (priority) { 1610 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: 1611 return GEN_CONTEXT_LOW_PRIORITY; 1612 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: 1613 return GEN_CONTEXT_MEDIUM_PRIORITY; 1614 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: 1615 return GEN_CONTEXT_HIGH_PRIORITY; 1616 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT: 1617 return GEN_CONTEXT_REALTIME_PRIORITY; 1618 default: 1619 unreachable("Invalid priority"); 1620 } 1621} 1622 1623static void 1624anv_device_init_hiz_clear_value_bo(struct anv_device *device) 1625{ 1626 anv_bo_init_new(&device->hiz_clear_bo, device, 4096); 1627 1628 if (device->instance->physicalDevice.has_exec_async) 1629 device->hiz_clear_bo.flags |= EXEC_OBJECT_ASYNC; 1630 1631 if (device->instance->physicalDevice.use_softpin) 1632 device->hiz_clear_bo.flags |= EXEC_OBJECT_PINNED; 1633 1634 anv_vma_alloc(device, &device->hiz_clear_bo); 1635 1636 uint32_t *map = anv_gem_mmap(device, device->hiz_clear_bo.gem_handle, 1637 0, 4096, 0); 1638 1639 union isl_color_value hiz_clear = { .u32 = { 0, } }; 1640 hiz_clear.f32[0] = ANV_HZ_FC_VAL; 1641 1642 memcpy(map, hiz_clear.u32, sizeof(hiz_clear.u32)); 1643 anv_gem_munmap(map, device->hiz_clear_bo.size); 1644} 1645 1646VkResult anv_CreateDevice( 1647 VkPhysicalDevice physicalDevice, 1648 const VkDeviceCreateInfo* pCreateInfo, 1649 const VkAllocationCallbacks* pAllocator, 1650 VkDevice* pDevice) 1651{ 1652 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); 1653 VkResult result; 1654 struct anv_device *device; 1655 1656 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); 1657 1658 struct anv_device_extension_table enabled_extensions = { }; 1659 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 1660 int idx; 1661 for (idx = 0; idx < ANV_DEVICE_EXTENSION_COUNT; idx++) { 1662 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], 1663 anv_device_extensions[idx].extensionName) == 0) 1664 break; 1665 } 1666 1667 if (idx >= ANV_DEVICE_EXTENSION_COUNT) 1668 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 1669 1670 if (!physical_device->supported_extensions.extensions[idx]) 1671 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 1672 1673 enabled_extensions.extensions[idx] = true; 1674 } 1675 1676 /* Check enabled features */ 1677 if (pCreateInfo->pEnabledFeatures) { 1678 VkPhysicalDeviceFeatures supported_features; 1679 anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); 1680 VkBool32 *supported_feature = (VkBool32 *)&supported_features; 1681 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; 1682 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); 1683 for (uint32_t i = 0; i < num_features; i++) { 1684 if (enabled_feature[i] && !supported_feature[i]) 1685 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); 1686 } 1687 } 1688 1689 /* Check requested queues and fail if we are requested to create any 1690 * queues with flags we don't support. 1691 */ 1692 assert(pCreateInfo->queueCreateInfoCount > 0); 1693 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 1694 if (pCreateInfo->pQueueCreateInfos[i].flags != 0) 1695 return vk_error(VK_ERROR_INITIALIZATION_FAILED); 1696 } 1697 1698 /* Check if client specified queue priority. */ 1699 const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority = 1700 vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext, 1701 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT); 1702 1703 VkQueueGlobalPriorityEXT priority = 1704 queue_priority ? queue_priority->globalPriority : 1705 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT; 1706 1707 device = vk_alloc2(&physical_device->instance->alloc, pAllocator, 1708 sizeof(*device), 8, 1709 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1710 if (!device) 1711 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1712 1713 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1714 device->instance = physical_device->instance; 1715 device->chipset_id = physical_device->chipset_id; 1716 device->no_hw = physical_device->no_hw; 1717 device->_lost = false; 1718 1719 if (pAllocator) 1720 device->alloc = *pAllocator; 1721 else 1722 device->alloc = physical_device->instance->alloc; 1723 1724 /* XXX(chadv): Can we dup() physicalDevice->fd here? */ 1725 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); 1726 if (device->fd == -1) { 1727 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1728 goto fail_device; 1729 } 1730 1731 device->context_id = anv_gem_create_context(device); 1732 if (device->context_id == -1) { 1733 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1734 goto fail_fd; 1735 } 1736 1737 if (physical_device->use_softpin) { 1738 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) { 1739 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1740 goto fail_fd; 1741 } 1742 1743 /* keep the page with address zero out of the allocator */ 1744 util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE); 1745 device->vma_lo_available = 1746 physical_device->memory.heaps[physical_device->memory.heap_count - 1].size; 1747 1748 /* Leave the last 4GiB out of the high vma range, so that no state base 1749 * address + size can overflow 48 bits. For more information see the 1750 * comment about Wa32bitGeneralStateOffset in anv_allocator.c 1751 */ 1752 util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, 1753 HIGH_HEAP_SIZE); 1754 device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 : 1755 physical_device->memory.heaps[0].size; 1756 } 1757 1758 /* As per spec, the driver implementation may deny requests to acquire 1759 * a priority above the default priority (MEDIUM) if the caller does not 1760 * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT 1761 * is returned. 1762 */ 1763 if (physical_device->has_context_priority) { 1764 int err = anv_gem_set_context_param(device->fd, device->context_id, 1765 I915_CONTEXT_PARAM_PRIORITY, 1766 vk_priority_to_gen(priority)); 1767 if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) { 1768 result = vk_error(VK_ERROR_NOT_PERMITTED_EXT); 1769 goto fail_fd; 1770 } 1771 } 1772 1773 device->info = physical_device->info; 1774 device->isl_dev = physical_device->isl_dev; 1775 1776 /* On Broadwell and later, we can use batch chaining to more efficiently 1777 * implement growing command buffers. Prior to Haswell, the kernel 1778 * command parser gets in the way and we have to fall back to growing 1779 * the batch. 1780 */ 1781 device->can_chain_batches = device->info.gen >= 8; 1782 1783 device->robust_buffer_access = pCreateInfo->pEnabledFeatures && 1784 pCreateInfo->pEnabledFeatures->robustBufferAccess; 1785 device->enabled_extensions = enabled_extensions; 1786 1787 anv_device_init_dispatch(device); 1788 1789 if (pthread_mutex_init(&device->mutex, NULL) != 0) { 1790 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1791 goto fail_context_id; 1792 } 1793 1794 pthread_condattr_t condattr; 1795 if (pthread_condattr_init(&condattr) != 0) { 1796 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1797 goto fail_mutex; 1798 } 1799 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) { 1800 pthread_condattr_destroy(&condattr); 1801 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1802 goto fail_mutex; 1803 } 1804 if (pthread_cond_init(&device->queue_submit, NULL) != 0) { 1805 pthread_condattr_destroy(&condattr); 1806 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 1807 goto fail_mutex; 1808 } 1809 pthread_condattr_destroy(&condattr); 1810 1811 uint64_t bo_flags = 1812 (physical_device->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) | 1813 (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) | 1814 (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) | 1815 (physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0); 1816 1817 anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags); 1818 1819 result = anv_bo_cache_init(&device->bo_cache); 1820 if (result != VK_SUCCESS) 1821 goto fail_batch_bo_pool; 1822 1823 if (!physical_device->use_softpin) 1824 bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 1825 1826 result = anv_state_pool_init(&device->dynamic_state_pool, device, 1827 DYNAMIC_STATE_POOL_MIN_ADDRESS, 1828 16384, 1829 bo_flags); 1830 if (result != VK_SUCCESS) 1831 goto fail_bo_cache; 1832 1833 result = anv_state_pool_init(&device->instruction_state_pool, device, 1834 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 1835 16384, 1836 bo_flags); 1837 if (result != VK_SUCCESS) 1838 goto fail_dynamic_state_pool; 1839 1840 result = anv_state_pool_init(&device->surface_state_pool, device, 1841 SURFACE_STATE_POOL_MIN_ADDRESS, 1842 4096, 1843 bo_flags); 1844 if (result != VK_SUCCESS) 1845 goto fail_instruction_state_pool; 1846 1847 if (physical_device->use_softpin) { 1848 result = anv_state_pool_init(&device->binding_table_pool, device, 1849 BINDING_TABLE_POOL_MIN_ADDRESS, 1850 4096, 1851 bo_flags); 1852 if (result != VK_SUCCESS) 1853 goto fail_surface_state_pool; 1854 } 1855 1856 result = anv_bo_init_new(&device->workaround_bo, device, 1024); 1857 if (result != VK_SUCCESS) 1858 goto fail_binding_table_pool; 1859 1860 if (physical_device->use_softpin) 1861 device->workaround_bo.flags |= EXEC_OBJECT_PINNED; 1862 1863 if (!anv_vma_alloc(device, &device->workaround_bo)) 1864 goto fail_workaround_bo; 1865 1866 anv_device_init_trivial_batch(device); 1867 1868 if (device->info.gen >= 10) 1869 anv_device_init_hiz_clear_value_bo(device); 1870 1871 anv_scratch_pool_init(device, &device->scratch_pool); 1872 1873 anv_queue_init(device, &device->queue); 1874 1875 switch (device->info.gen) { 1876 case 7: 1877 if (!device->info.is_haswell) 1878 result = gen7_init_device_state(device); 1879 else 1880 result = gen75_init_device_state(device); 1881 break; 1882 case 8: 1883 result = gen8_init_device_state(device); 1884 break; 1885 case 9: 1886 result = gen9_init_device_state(device); 1887 break; 1888 case 10: 1889 result = gen10_init_device_state(device); 1890 break; 1891 case 11: 1892 result = gen11_init_device_state(device); 1893 break; 1894 default: 1895 /* Shouldn't get here as we don't create physical devices for any other 1896 * gens. */ 1897 unreachable("unhandled gen"); 1898 } 1899 if (result != VK_SUCCESS) 1900 goto fail_workaround_bo; 1901 1902 anv_pipeline_cache_init(&device->default_pipeline_cache, device, true); 1903 1904 anv_device_init_blorp(device); 1905 1906 anv_device_init_border_colors(device); 1907 1908 *pDevice = anv_device_to_handle(device); 1909 1910 return VK_SUCCESS; 1911 1912 fail_workaround_bo: 1913 anv_queue_finish(&device->queue); 1914 anv_scratch_pool_finish(device, &device->scratch_pool); 1915 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); 1916 anv_gem_close(device, device->workaround_bo.gem_handle); 1917 fail_binding_table_pool: 1918 if (physical_device->use_softpin) 1919 anv_state_pool_finish(&device->binding_table_pool); 1920 fail_surface_state_pool: 1921 anv_state_pool_finish(&device->surface_state_pool); 1922 fail_instruction_state_pool: 1923 anv_state_pool_finish(&device->instruction_state_pool); 1924 fail_dynamic_state_pool: 1925 anv_state_pool_finish(&device->dynamic_state_pool); 1926 fail_bo_cache: 1927 anv_bo_cache_finish(&device->bo_cache); 1928 fail_batch_bo_pool: 1929 anv_bo_pool_finish(&device->batch_bo_pool); 1930 pthread_cond_destroy(&device->queue_submit); 1931 fail_mutex: 1932 pthread_mutex_destroy(&device->mutex); 1933 fail_context_id: 1934 anv_gem_destroy_context(device, device->context_id); 1935 fail_fd: 1936 close(device->fd); 1937 fail_device: 1938 vk_free(&device->alloc, device); 1939 1940 return result; 1941} 1942 1943void anv_DestroyDevice( 1944 VkDevice _device, 1945 const VkAllocationCallbacks* pAllocator) 1946{ 1947 ANV_FROM_HANDLE(anv_device, device, _device); 1948 struct anv_physical_device *physical_device; 1949 1950 if (!device) 1951 return; 1952 1953 physical_device = &device->instance->physicalDevice; 1954 1955 anv_device_finish_blorp(device); 1956 1957 anv_pipeline_cache_finish(&device->default_pipeline_cache); 1958 1959 anv_queue_finish(&device->queue); 1960 1961#ifdef HAVE_VALGRIND 1962 /* We only need to free these to prevent valgrind errors. The backing 1963 * BO will go away in a couple of lines so we don't actually leak. 1964 */ 1965 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); 1966#endif 1967 1968 anv_scratch_pool_finish(device, &device->scratch_pool); 1969 1970 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); 1971 anv_vma_free(device, &device->workaround_bo); 1972 anv_gem_close(device, device->workaround_bo.gem_handle); 1973 1974 anv_vma_free(device, &device->trivial_batch_bo); 1975 anv_gem_close(device, device->trivial_batch_bo.gem_handle); 1976 if (device->info.gen >= 10) 1977 anv_gem_close(device, device->hiz_clear_bo.gem_handle); 1978 1979 if (physical_device->use_softpin) 1980 anv_state_pool_finish(&device->binding_table_pool); 1981 anv_state_pool_finish(&device->surface_state_pool); 1982 anv_state_pool_finish(&device->instruction_state_pool); 1983 anv_state_pool_finish(&device->dynamic_state_pool); 1984 1985 anv_bo_cache_finish(&device->bo_cache); 1986 1987 anv_bo_pool_finish(&device->batch_bo_pool); 1988 1989 pthread_cond_destroy(&device->queue_submit); 1990 pthread_mutex_destroy(&device->mutex); 1991 1992 anv_gem_destroy_context(device, device->context_id); 1993 1994 close(device->fd); 1995 1996 vk_free(&device->alloc, device); 1997} 1998 1999VkResult anv_EnumerateInstanceLayerProperties( 2000 uint32_t* pPropertyCount, 2001 VkLayerProperties* pProperties) 2002{ 2003 if (pProperties == NULL) { 2004 *pPropertyCount = 0; 2005 return VK_SUCCESS; 2006 } 2007 2008 /* None supported at this time */ 2009 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 2010} 2011 2012VkResult anv_EnumerateDeviceLayerProperties( 2013 VkPhysicalDevice physicalDevice, 2014 uint32_t* pPropertyCount, 2015 VkLayerProperties* pProperties) 2016{ 2017 if (pProperties == NULL) { 2018 *pPropertyCount = 0; 2019 return VK_SUCCESS; 2020 } 2021 2022 /* None supported at this time */ 2023 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 2024} 2025 2026void anv_GetDeviceQueue( 2027 VkDevice _device, 2028 uint32_t queueNodeIndex, 2029 uint32_t queueIndex, 2030 VkQueue* pQueue) 2031{ 2032 ANV_FROM_HANDLE(anv_device, device, _device); 2033 2034 assert(queueIndex == 0); 2035 2036 *pQueue = anv_queue_to_handle(&device->queue); 2037} 2038 2039void anv_GetDeviceQueue2( 2040 VkDevice _device, 2041 const VkDeviceQueueInfo2* pQueueInfo, 2042 VkQueue* pQueue) 2043{ 2044 ANV_FROM_HANDLE(anv_device, device, _device); 2045 2046 assert(pQueueInfo->queueIndex == 0); 2047 2048 if (pQueueInfo->flags == device->queue.flags) 2049 *pQueue = anv_queue_to_handle(&device->queue); 2050 else 2051 *pQueue = NULL; 2052} 2053 2054VkResult 2055_anv_device_set_lost(struct anv_device *device, 2056 const char *file, int line, 2057 const char *msg, ...) 2058{ 2059 VkResult err; 2060 va_list ap; 2061 2062 device->_lost = true; 2063 2064 va_start(ap, msg); 2065 err = __vk_errorv(device->instance, device, 2066 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, 2067 VK_ERROR_DEVICE_LOST, file, line, msg, ap); 2068 va_end(ap); 2069 2070 if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false)) 2071 abort(); 2072 2073 return err; 2074} 2075 2076VkResult 2077anv_device_query_status(struct anv_device *device) 2078{ 2079 /* This isn't likely as most of the callers of this function already check 2080 * for it. However, it doesn't hurt to check and it potentially lets us 2081 * avoid an ioctl. 2082 */ 2083 if (anv_device_is_lost(device)) 2084 return VK_ERROR_DEVICE_LOST; 2085 2086 uint32_t active, pending; 2087 int ret = anv_gem_gpu_get_reset_stats(device, &active, &pending); 2088 if (ret == -1) { 2089 /* We don't know the real error. */ 2090 return anv_device_set_lost(device, "get_reset_stats failed: %m"); 2091 } 2092 2093 if (active) { 2094 return anv_device_set_lost(device, "GPU hung on one of our command buffers"); 2095 } else if (pending) { 2096 return anv_device_set_lost(device, "GPU hung with commands in-flight"); 2097 } 2098 2099 return VK_SUCCESS; 2100} 2101 2102VkResult 2103anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo) 2104{ 2105 /* Note: This only returns whether or not the BO is in use by an i915 GPU. 2106 * Other usages of the BO (such as on different hardware) will not be 2107 * flagged as "busy" by this ioctl. Use with care. 2108 */ 2109 int ret = anv_gem_busy(device, bo->gem_handle); 2110 if (ret == 1) { 2111 return VK_NOT_READY; 2112 } else if (ret == -1) { 2113 /* We don't know the real error. */ 2114 return anv_device_set_lost(device, "gem wait failed: %m"); 2115 } 2116 2117 /* Query for device status after the busy call. If the BO we're checking 2118 * got caught in a GPU hang we don't want to return VK_SUCCESS to the 2119 * client because it clearly doesn't have valid data. Yes, this most 2120 * likely means an ioctl, but we just did an ioctl to query the busy status 2121 * so it's no great loss. 2122 */ 2123 return anv_device_query_status(device); 2124} 2125 2126VkResult 2127anv_device_wait(struct anv_device *device, struct anv_bo *bo, 2128 int64_t timeout) 2129{ 2130 int ret = anv_gem_wait(device, bo->gem_handle, &timeout); 2131 if (ret == -1 && errno == ETIME) { 2132 return VK_TIMEOUT; 2133 } else if (ret == -1) { 2134 /* We don't know the real error. */ 2135 return anv_device_set_lost(device, "gem wait failed: %m"); 2136 } 2137 2138 /* Query for device status after the wait. If the BO we're waiting on got 2139 * caught in a GPU hang we don't want to return VK_SUCCESS to the client 2140 * because it clearly doesn't have valid data. Yes, this most likely means 2141 * an ioctl, but we just did an ioctl to wait so it's no great loss. 2142 */ 2143 return anv_device_query_status(device); 2144} 2145 2146VkResult anv_DeviceWaitIdle( 2147 VkDevice _device) 2148{ 2149 ANV_FROM_HANDLE(anv_device, device, _device); 2150 if (anv_device_is_lost(device)) 2151 return VK_ERROR_DEVICE_LOST; 2152 2153 struct anv_batch batch; 2154 2155 uint32_t cmds[8]; 2156 batch.start = batch.next = cmds; 2157 batch.end = (void *) cmds + sizeof(cmds); 2158 2159 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); 2160 anv_batch_emit(&batch, GEN7_MI_NOOP, noop); 2161 2162 return anv_device_submit_simple_batch(device, &batch); 2163} 2164 2165bool 2166anv_vma_alloc(struct anv_device *device, struct anv_bo *bo) 2167{ 2168 if (!(bo->flags & EXEC_OBJECT_PINNED)) 2169 return true; 2170 2171 pthread_mutex_lock(&device->vma_mutex); 2172 2173 bo->offset = 0; 2174 2175 if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS && 2176 device->vma_hi_available >= bo->size) { 2177 uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096); 2178 if (addr) { 2179 bo->offset = gen_canonical_address(addr); 2180 assert(addr == gen_48b_address(bo->offset)); 2181 device->vma_hi_available -= bo->size; 2182 } 2183 } 2184 2185 if (bo->offset == 0 && device->vma_lo_available >= bo->size) { 2186 uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096); 2187 if (addr) { 2188 bo->offset = gen_canonical_address(addr); 2189 assert(addr == gen_48b_address(bo->offset)); 2190 device->vma_lo_available -= bo->size; 2191 } 2192 } 2193 2194 pthread_mutex_unlock(&device->vma_mutex); 2195 2196 return bo->offset != 0; 2197} 2198 2199void 2200anv_vma_free(struct anv_device *device, struct anv_bo *bo) 2201{ 2202 if (!(bo->flags & EXEC_OBJECT_PINNED)) 2203 return; 2204 2205 const uint64_t addr_48b = gen_48b_address(bo->offset); 2206 2207 pthread_mutex_lock(&device->vma_mutex); 2208 2209 if (addr_48b >= LOW_HEAP_MIN_ADDRESS && 2210 addr_48b <= LOW_HEAP_MAX_ADDRESS) { 2211 util_vma_heap_free(&device->vma_lo, addr_48b, bo->size); 2212 device->vma_lo_available += bo->size; 2213 } else { 2214 assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS && 2215 addr_48b <= HIGH_HEAP_MAX_ADDRESS); 2216 util_vma_heap_free(&device->vma_hi, addr_48b, bo->size); 2217 device->vma_hi_available += bo->size; 2218 } 2219 2220 pthread_mutex_unlock(&device->vma_mutex); 2221 2222 bo->offset = 0; 2223} 2224 2225VkResult 2226anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) 2227{ 2228 uint32_t gem_handle = anv_gem_create(device, size); 2229 if (!gem_handle) 2230 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); 2231 2232 anv_bo_init(bo, gem_handle, size); 2233 2234 return VK_SUCCESS; 2235} 2236 2237VkResult anv_AllocateMemory( 2238 VkDevice _device, 2239 const VkMemoryAllocateInfo* pAllocateInfo, 2240 const VkAllocationCallbacks* pAllocator, 2241 VkDeviceMemory* pMem) 2242{ 2243 ANV_FROM_HANDLE(anv_device, device, _device); 2244 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 2245 struct anv_device_memory *mem; 2246 VkResult result = VK_SUCCESS; 2247 2248 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 2249 2250 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ 2251 assert(pAllocateInfo->allocationSize > 0); 2252 2253 if (pAllocateInfo->allocationSize > MAX_MEMORY_ALLOCATION_SIZE) 2254 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2255 2256 /* FINISHME: Fail if allocation request exceeds heap size. */ 2257 2258 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, 2259 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2260 if (mem == NULL) 2261 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 2262 2263 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count); 2264 mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex]; 2265 mem->map = NULL; 2266 mem->map_size = 0; 2267 2268 uint64_t bo_flags = 0; 2269 2270 assert(mem->type->heapIndex < pdevice->memory.heap_count); 2271 if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses) 2272 bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2273 2274 const struct wsi_memory_allocate_info *wsi_info = 2275 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); 2276 if (wsi_info && wsi_info->implicit_sync) { 2277 /* We need to set the WRITE flag on window system buffers so that GEM 2278 * will know we're writing to them and synchronize uses on other rings 2279 * (eg if the display server uses the blitter ring). 2280 */ 2281 bo_flags |= EXEC_OBJECT_WRITE; 2282 } else if (pdevice->has_exec_async) { 2283 bo_flags |= EXEC_OBJECT_ASYNC; 2284 } 2285 2286 if (pdevice->use_softpin) 2287 bo_flags |= EXEC_OBJECT_PINNED; 2288 2289 const VkImportMemoryFdInfoKHR *fd_info = 2290 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); 2291 2292 /* The Vulkan spec permits handleType to be 0, in which case the struct is 2293 * ignored. 2294 */ 2295 if (fd_info && fd_info->handleType) { 2296 /* At the moment, we support only the below handle types. */ 2297 assert(fd_info->handleType == 2298 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 2299 fd_info->handleType == 2300 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2301 2302 result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd, 2303 bo_flags | ANV_BO_EXTERNAL, &mem->bo); 2304 if (result != VK_SUCCESS) 2305 goto fail; 2306 2307 VkDeviceSize aligned_alloc_size = 2308 align_u64(pAllocateInfo->allocationSize, 4096); 2309 2310 /* For security purposes, we reject importing the bo if it's smaller 2311 * than the requested allocation size. This prevents a malicious client 2312 * from passing a buffer to a trusted client, lying about the size, and 2313 * telling the trusted client to try and texture from an image that goes 2314 * out-of-bounds. This sort of thing could lead to GPU hangs or worse 2315 * in the trusted client. The trusted client can protect itself against 2316 * this sort of attack but only if it can trust the buffer size. 2317 */ 2318 if (mem->bo->size < aligned_alloc_size) { 2319 result = vk_errorf(device->instance, device, 2320 VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, 2321 "aligned allocationSize too large for " 2322 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: " 2323 "%"PRIu64"B > %"PRIu64"B", 2324 aligned_alloc_size, mem->bo->size); 2325 anv_bo_cache_release(device, &device->bo_cache, mem->bo); 2326 goto fail; 2327 } 2328 2329 /* From the Vulkan spec: 2330 * 2331 * "Importing memory from a file descriptor transfers ownership of 2332 * the file descriptor from the application to the Vulkan 2333 * implementation. The application must not perform any operations on 2334 * the file descriptor after a successful import." 2335 * 2336 * If the import fails, we leave the file descriptor open. 2337 */ 2338 close(fd_info->fd); 2339 } else { 2340 const VkExportMemoryAllocateInfoKHR *fd_info = 2341 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR); 2342 if (fd_info && fd_info->handleTypes) 2343 bo_flags |= ANV_BO_EXTERNAL; 2344 2345 result = anv_bo_cache_alloc(device, &device->bo_cache, 2346 pAllocateInfo->allocationSize, bo_flags, 2347 &mem->bo); 2348 if (result != VK_SUCCESS) 2349 goto fail; 2350 2351 const VkMemoryDedicatedAllocateInfoKHR *dedicated_info = 2352 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR); 2353 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) { 2354 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); 2355 2356 /* Some legacy (non-modifiers) consumers need the tiling to be set on 2357 * the BO. In this case, we have a dedicated allocation. 2358 */ 2359 if (image->needs_set_tiling) { 2360 const uint32_t i915_tiling = 2361 isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling); 2362 int ret = anv_gem_set_tiling(device, mem->bo->gem_handle, 2363 image->planes[0].surface.isl.row_pitch_B, 2364 i915_tiling); 2365 if (ret) { 2366 anv_bo_cache_release(device, &device->bo_cache, mem->bo); 2367 return vk_errorf(device->instance, NULL, 2368 VK_ERROR_OUT_OF_DEVICE_MEMORY, 2369 "failed to set BO tiling: %m"); 2370 } 2371 } 2372 } 2373 } 2374 2375 *pMem = anv_device_memory_to_handle(mem); 2376 2377 return VK_SUCCESS; 2378 2379 fail: 2380 vk_free2(&device->alloc, pAllocator, mem); 2381 2382 return result; 2383} 2384 2385VkResult anv_GetMemoryFdKHR( 2386 VkDevice device_h, 2387 const VkMemoryGetFdInfoKHR* pGetFdInfo, 2388 int* pFd) 2389{ 2390 ANV_FROM_HANDLE(anv_device, dev, device_h); 2391 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory); 2392 2393 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 2394 2395 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 2396 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2397 2398 return anv_bo_cache_export(dev, &dev->bo_cache, mem->bo, pFd); 2399} 2400 2401VkResult anv_GetMemoryFdPropertiesKHR( 2402 VkDevice _device, 2403 VkExternalMemoryHandleTypeFlagBitsKHR handleType, 2404 int fd, 2405 VkMemoryFdPropertiesKHR* pMemoryFdProperties) 2406{ 2407 ANV_FROM_HANDLE(anv_device, device, _device); 2408 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 2409 2410 switch (handleType) { 2411 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: 2412 /* dma-buf can be imported as any memory type */ 2413 pMemoryFdProperties->memoryTypeBits = 2414 (1 << pdevice->memory.type_count) - 1; 2415 return VK_SUCCESS; 2416 2417 default: 2418 /* The valid usage section for this function says: 2419 * 2420 * "handleType must not be one of the handle types defined as 2421 * opaque." 2422 * 2423 * So opaque handle types fall into the default "unsupported" case. 2424 */ 2425 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); 2426 } 2427} 2428 2429void anv_FreeMemory( 2430 VkDevice _device, 2431 VkDeviceMemory _mem, 2432 const VkAllocationCallbacks* pAllocator) 2433{ 2434 ANV_FROM_HANDLE(anv_device, device, _device); 2435 ANV_FROM_HANDLE(anv_device_memory, mem, _mem); 2436 2437 if (mem == NULL) 2438 return; 2439 2440 if (mem->map) 2441 anv_UnmapMemory(_device, _mem); 2442 2443 anv_bo_cache_release(device, &device->bo_cache, mem->bo); 2444 2445 vk_free2(&device->alloc, pAllocator, mem); 2446} 2447 2448VkResult anv_MapMemory( 2449 VkDevice _device, 2450 VkDeviceMemory _memory, 2451 VkDeviceSize offset, 2452 VkDeviceSize size, 2453 VkMemoryMapFlags flags, 2454 void** ppData) 2455{ 2456 ANV_FROM_HANDLE(anv_device, device, _device); 2457 ANV_FROM_HANDLE(anv_device_memory, mem, _memory); 2458 2459 if (mem == NULL) { 2460 *ppData = NULL; 2461 return VK_SUCCESS; 2462 } 2463 2464 if (size == VK_WHOLE_SIZE) 2465 size = mem->bo->size - offset; 2466 2467 /* From the Vulkan spec version 1.0.32 docs for MapMemory: 2468 * 2469 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 2470 * assert(size != 0); 2471 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or 2472 * equal to the size of the memory minus offset 2473 */ 2474 assert(size > 0); 2475 assert(offset + size <= mem->bo->size); 2476 2477 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only 2478 * takes a VkDeviceMemory pointer, it seems like only one map of the memory 2479 * at a time is valid. We could just mmap up front and return an offset 2480 * pointer here, but that may exhaust virtual memory on 32 bit 2481 * userspace. */ 2482 2483 uint32_t gem_flags = 0; 2484 2485 if (!device->info.has_llc && 2486 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) 2487 gem_flags |= I915_MMAP_WC; 2488 2489 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ 2490 uint64_t map_offset = offset & ~4095ull; 2491 assert(offset >= map_offset); 2492 uint64_t map_size = (offset + size) - map_offset; 2493 2494 /* Let's map whole pages */ 2495 map_size = align_u64(map_size, 4096); 2496 2497 void *map = anv_gem_mmap(device, mem->bo->gem_handle, 2498 map_offset, map_size, gem_flags); 2499 if (map == MAP_FAILED) 2500 return vk_error(VK_ERROR_MEMORY_MAP_FAILED); 2501 2502 mem->map = map; 2503 mem->map_size = map_size; 2504 2505 *ppData = mem->map + (offset - map_offset); 2506 2507 return VK_SUCCESS; 2508} 2509 2510void anv_UnmapMemory( 2511 VkDevice _device, 2512 VkDeviceMemory _memory) 2513{ 2514 ANV_FROM_HANDLE(anv_device_memory, mem, _memory); 2515 2516 if (mem == NULL) 2517 return; 2518 2519 anv_gem_munmap(mem->map, mem->map_size); 2520 2521 mem->map = NULL; 2522 mem->map_size = 0; 2523} 2524 2525static void 2526clflush_mapped_ranges(struct anv_device *device, 2527 uint32_t count, 2528 const VkMappedMemoryRange *ranges) 2529{ 2530 for (uint32_t i = 0; i < count; i++) { 2531 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); 2532 if (ranges[i].offset >= mem->map_size) 2533 continue; 2534 2535 gen_clflush_range(mem->map + ranges[i].offset, 2536 MIN2(ranges[i].size, mem->map_size - ranges[i].offset)); 2537 } 2538} 2539 2540VkResult anv_FlushMappedMemoryRanges( 2541 VkDevice _device, 2542 uint32_t memoryRangeCount, 2543 const VkMappedMemoryRange* pMemoryRanges) 2544{ 2545 ANV_FROM_HANDLE(anv_device, device, _device); 2546 2547 if (device->info.has_llc) 2548 return VK_SUCCESS; 2549 2550 /* Make sure the writes we're flushing have landed. */ 2551 __builtin_ia32_mfence(); 2552 2553 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); 2554 2555 return VK_SUCCESS; 2556} 2557 2558VkResult anv_InvalidateMappedMemoryRanges( 2559 VkDevice _device, 2560 uint32_t memoryRangeCount, 2561 const VkMappedMemoryRange* pMemoryRanges) 2562{ 2563 ANV_FROM_HANDLE(anv_device, device, _device); 2564 2565 if (device->info.has_llc) 2566 return VK_SUCCESS; 2567 2568 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); 2569 2570 /* Make sure no reads get moved up above the invalidate. */ 2571 __builtin_ia32_mfence(); 2572 2573 return VK_SUCCESS; 2574} 2575 2576void anv_GetBufferMemoryRequirements( 2577 VkDevice _device, 2578 VkBuffer _buffer, 2579 VkMemoryRequirements* pMemoryRequirements) 2580{ 2581 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 2582 ANV_FROM_HANDLE(anv_device, device, _device); 2583 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 2584 2585 /* The Vulkan spec (git aaed022) says: 2586 * 2587 * memoryTypeBits is a bitfield and contains one bit set for every 2588 * supported memory type for the resource. The bit `1<<i` is set if and 2589 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties 2590 * structure for the physical device is supported. 2591 */ 2592 uint32_t memory_types = 0; 2593 for (uint32_t i = 0; i < pdevice->memory.type_count; i++) { 2594 uint32_t valid_usage = pdevice->memory.types[i].valid_buffer_usage; 2595 if ((valid_usage & buffer->usage) == buffer->usage) 2596 memory_types |= (1u << i); 2597 } 2598 2599 /* Base alignment requirement of a cache line */ 2600 uint32_t alignment = 16; 2601 2602 /* We need an alignment of 32 for pushing UBOs */ 2603 if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) 2604 alignment = MAX2(alignment, 32); 2605 2606 pMemoryRequirements->size = buffer->size; 2607 pMemoryRequirements->alignment = alignment; 2608 2609 /* Storage and Uniform buffers should have their size aligned to 2610 * 32-bits to avoid boundary checks when last DWord is not complete. 2611 * This would ensure that not internal padding would be needed for 2612 * 16-bit types. 2613 */ 2614 if (device->robust_buffer_access && 2615 (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT || 2616 buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) 2617 pMemoryRequirements->size = align_u64(buffer->size, 4); 2618 2619 pMemoryRequirements->memoryTypeBits = memory_types; 2620} 2621 2622void anv_GetBufferMemoryRequirements2( 2623 VkDevice _device, 2624 const VkBufferMemoryRequirementsInfo2* pInfo, 2625 VkMemoryRequirements2* pMemoryRequirements) 2626{ 2627 anv_GetBufferMemoryRequirements(_device, pInfo->buffer, 2628 &pMemoryRequirements->memoryRequirements); 2629 2630 vk_foreach_struct(ext, pMemoryRequirements->pNext) { 2631 switch (ext->sType) { 2632 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 2633 VkMemoryDedicatedRequirements *requirements = (void *)ext; 2634 requirements->prefersDedicatedAllocation = VK_FALSE; 2635 requirements->requiresDedicatedAllocation = VK_FALSE; 2636 break; 2637 } 2638 2639 default: 2640 anv_debug_ignored_stype(ext->sType); 2641 break; 2642 } 2643 } 2644} 2645 2646void anv_GetImageMemoryRequirements( 2647 VkDevice _device, 2648 VkImage _image, 2649 VkMemoryRequirements* pMemoryRequirements) 2650{ 2651 ANV_FROM_HANDLE(anv_image, image, _image); 2652 ANV_FROM_HANDLE(anv_device, device, _device); 2653 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 2654 2655 /* The Vulkan spec (git aaed022) says: 2656 * 2657 * memoryTypeBits is a bitfield and contains one bit set for every 2658 * supported memory type for the resource. The bit `1<<i` is set if and 2659 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties 2660 * structure for the physical device is supported. 2661 * 2662 * All types are currently supported for images. 2663 */ 2664 uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1; 2665 2666 pMemoryRequirements->size = image->size; 2667 pMemoryRequirements->alignment = image->alignment; 2668 pMemoryRequirements->memoryTypeBits = memory_types; 2669} 2670 2671void anv_GetImageMemoryRequirements2( 2672 VkDevice _device, 2673 const VkImageMemoryRequirementsInfo2* pInfo, 2674 VkMemoryRequirements2* pMemoryRequirements) 2675{ 2676 ANV_FROM_HANDLE(anv_device, device, _device); 2677 ANV_FROM_HANDLE(anv_image, image, pInfo->image); 2678 2679 anv_GetImageMemoryRequirements(_device, pInfo->image, 2680 &pMemoryRequirements->memoryRequirements); 2681 2682 vk_foreach_struct_const(ext, pInfo->pNext) { 2683 switch (ext->sType) { 2684 case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: { 2685 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 2686 const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs = 2687 (const VkImagePlaneMemoryRequirementsInfoKHR *) ext; 2688 uint32_t plane = anv_image_aspect_to_plane(image->aspects, 2689 plane_reqs->planeAspect); 2690 2691 assert(image->planes[plane].offset == 0); 2692 2693 /* The Vulkan spec (git aaed022) says: 2694 * 2695 * memoryTypeBits is a bitfield and contains one bit set for every 2696 * supported memory type for the resource. The bit `1<<i` is set 2697 * if and only if the memory type `i` in the 2698 * VkPhysicalDeviceMemoryProperties structure for the physical 2699 * device is supported. 2700 * 2701 * All types are currently supported for images. 2702 */ 2703 pMemoryRequirements->memoryRequirements.memoryTypeBits = 2704 (1ull << pdevice->memory.type_count) - 1; 2705 2706 pMemoryRequirements->memoryRequirements.size = image->planes[plane].size; 2707 pMemoryRequirements->memoryRequirements.alignment = 2708 image->planes[plane].alignment; 2709 break; 2710 } 2711 2712 default: 2713 anv_debug_ignored_stype(ext->sType); 2714 break; 2715 } 2716 } 2717 2718 vk_foreach_struct(ext, pMemoryRequirements->pNext) { 2719 switch (ext->sType) { 2720 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 2721 VkMemoryDedicatedRequirements *requirements = (void *)ext; 2722 if (image->needs_set_tiling) { 2723 /* If we need to set the tiling for external consumers, we need a 2724 * dedicated allocation. 2725 * 2726 * See also anv_AllocateMemory. 2727 */ 2728 requirements->prefersDedicatedAllocation = VK_TRUE; 2729 requirements->requiresDedicatedAllocation = VK_TRUE; 2730 } else { 2731 requirements->prefersDedicatedAllocation = VK_FALSE; 2732 requirements->requiresDedicatedAllocation = VK_FALSE; 2733 } 2734 break; 2735 } 2736 2737 default: 2738 anv_debug_ignored_stype(ext->sType); 2739 break; 2740 } 2741 } 2742} 2743 2744void anv_GetImageSparseMemoryRequirements( 2745 VkDevice device, 2746 VkImage image, 2747 uint32_t* pSparseMemoryRequirementCount, 2748 VkSparseImageMemoryRequirements* pSparseMemoryRequirements) 2749{ 2750 *pSparseMemoryRequirementCount = 0; 2751} 2752 2753void anv_GetImageSparseMemoryRequirements2( 2754 VkDevice device, 2755 const VkImageSparseMemoryRequirementsInfo2* pInfo, 2756 uint32_t* pSparseMemoryRequirementCount, 2757 VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) 2758{ 2759 *pSparseMemoryRequirementCount = 0; 2760} 2761 2762void anv_GetDeviceMemoryCommitment( 2763 VkDevice device, 2764 VkDeviceMemory memory, 2765 VkDeviceSize* pCommittedMemoryInBytes) 2766{ 2767 *pCommittedMemoryInBytes = 0; 2768} 2769 2770static void 2771anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo) 2772{ 2773 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory); 2774 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer); 2775 2776 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); 2777 2778 if (mem) { 2779 assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage); 2780 buffer->address = (struct anv_address) { 2781 .bo = mem->bo, 2782 .offset = pBindInfo->memoryOffset, 2783 }; 2784 } else { 2785 buffer->address = ANV_NULL_ADDRESS; 2786 } 2787} 2788 2789VkResult anv_BindBufferMemory( 2790 VkDevice device, 2791 VkBuffer buffer, 2792 VkDeviceMemory memory, 2793 VkDeviceSize memoryOffset) 2794{ 2795 anv_bind_buffer_memory( 2796 &(VkBindBufferMemoryInfo) { 2797 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 2798 .buffer = buffer, 2799 .memory = memory, 2800 .memoryOffset = memoryOffset, 2801 }); 2802 2803 return VK_SUCCESS; 2804} 2805 2806VkResult anv_BindBufferMemory2( 2807 VkDevice device, 2808 uint32_t bindInfoCount, 2809 const VkBindBufferMemoryInfo* pBindInfos) 2810{ 2811 for (uint32_t i = 0; i < bindInfoCount; i++) 2812 anv_bind_buffer_memory(&pBindInfos[i]); 2813 2814 return VK_SUCCESS; 2815} 2816 2817VkResult anv_QueueBindSparse( 2818 VkQueue _queue, 2819 uint32_t bindInfoCount, 2820 const VkBindSparseInfo* pBindInfo, 2821 VkFence fence) 2822{ 2823 ANV_FROM_HANDLE(anv_queue, queue, _queue); 2824 if (anv_device_is_lost(queue->device)) 2825 return VK_ERROR_DEVICE_LOST; 2826 2827 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); 2828} 2829 2830// Event functions 2831 2832VkResult anv_CreateEvent( 2833 VkDevice _device, 2834 const VkEventCreateInfo* pCreateInfo, 2835 const VkAllocationCallbacks* pAllocator, 2836 VkEvent* pEvent) 2837{ 2838 ANV_FROM_HANDLE(anv_device, device, _device); 2839 struct anv_state state; 2840 struct anv_event *event; 2841 2842 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); 2843 2844 state = anv_state_pool_alloc(&device->dynamic_state_pool, 2845 sizeof(*event), 8); 2846 event = state.map; 2847 event->state = state; 2848 event->semaphore = VK_EVENT_RESET; 2849 2850 if (!device->info.has_llc) { 2851 /* Make sure the writes we're flushing have landed. */ 2852 __builtin_ia32_mfence(); 2853 __builtin_ia32_clflush(event); 2854 } 2855 2856 *pEvent = anv_event_to_handle(event); 2857 2858 return VK_SUCCESS; 2859} 2860 2861void anv_DestroyEvent( 2862 VkDevice _device, 2863 VkEvent _event, 2864 const VkAllocationCallbacks* pAllocator) 2865{ 2866 ANV_FROM_HANDLE(anv_device, device, _device); 2867 ANV_FROM_HANDLE(anv_event, event, _event); 2868 2869 if (!event) 2870 return; 2871 2872 anv_state_pool_free(&device->dynamic_state_pool, event->state); 2873} 2874 2875VkResult anv_GetEventStatus( 2876 VkDevice _device, 2877 VkEvent _event) 2878{ 2879 ANV_FROM_HANDLE(anv_device, device, _device); 2880 ANV_FROM_HANDLE(anv_event, event, _event); 2881 2882 if (anv_device_is_lost(device)) 2883 return VK_ERROR_DEVICE_LOST; 2884 2885 if (!device->info.has_llc) { 2886 /* Invalidate read cache before reading event written by GPU. */ 2887 __builtin_ia32_clflush(event); 2888 __builtin_ia32_mfence(); 2889 2890 } 2891 2892 return event->semaphore; 2893} 2894 2895VkResult anv_SetEvent( 2896 VkDevice _device, 2897 VkEvent _event) 2898{ 2899 ANV_FROM_HANDLE(anv_device, device, _device); 2900 ANV_FROM_HANDLE(anv_event, event, _event); 2901 2902 event->semaphore = VK_EVENT_SET; 2903 2904 if (!device->info.has_llc) { 2905 /* Make sure the writes we're flushing have landed. */ 2906 __builtin_ia32_mfence(); 2907 __builtin_ia32_clflush(event); 2908 } 2909 2910 return VK_SUCCESS; 2911} 2912 2913VkResult anv_ResetEvent( 2914 VkDevice _device, 2915 VkEvent _event) 2916{ 2917 ANV_FROM_HANDLE(anv_device, device, _device); 2918 ANV_FROM_HANDLE(anv_event, event, _event); 2919 2920 event->semaphore = VK_EVENT_RESET; 2921 2922 if (!device->info.has_llc) { 2923 /* Make sure the writes we're flushing have landed. */ 2924 __builtin_ia32_mfence(); 2925 __builtin_ia32_clflush(event); 2926 } 2927 2928 return VK_SUCCESS; 2929} 2930 2931// Buffer functions 2932 2933VkResult anv_CreateBuffer( 2934 VkDevice _device, 2935 const VkBufferCreateInfo* pCreateInfo, 2936 const VkAllocationCallbacks* pAllocator, 2937 VkBuffer* pBuffer) 2938{ 2939 ANV_FROM_HANDLE(anv_device, device, _device); 2940 struct anv_buffer *buffer; 2941 2942 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 2943 2944 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, 2945 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2946 if (buffer == NULL) 2947 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 2948 2949 buffer->size = pCreateInfo->size; 2950 buffer->usage = pCreateInfo->usage; 2951 buffer->address = ANV_NULL_ADDRESS; 2952 2953 *pBuffer = anv_buffer_to_handle(buffer); 2954 2955 return VK_SUCCESS; 2956} 2957 2958void anv_DestroyBuffer( 2959 VkDevice _device, 2960 VkBuffer _buffer, 2961 const VkAllocationCallbacks* pAllocator) 2962{ 2963 ANV_FROM_HANDLE(anv_device, device, _device); 2964 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 2965 2966 if (!buffer) 2967 return; 2968 2969 vk_free2(&device->alloc, pAllocator, buffer); 2970} 2971 2972void 2973anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, 2974 enum isl_format format, 2975 struct anv_address address, 2976 uint32_t range, uint32_t stride) 2977{ 2978 isl_buffer_fill_state(&device->isl_dev, state.map, 2979 .address = anv_address_physical(address), 2980 .mocs = device->default_mocs, 2981 .size_B = range, 2982 .format = format, 2983 .stride_B = stride); 2984 2985 anv_state_flush(device, state); 2986} 2987 2988void anv_DestroySampler( 2989 VkDevice _device, 2990 VkSampler _sampler, 2991 const VkAllocationCallbacks* pAllocator) 2992{ 2993 ANV_FROM_HANDLE(anv_device, device, _device); 2994 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); 2995 2996 if (!sampler) 2997 return; 2998 2999 vk_free2(&device->alloc, pAllocator, sampler); 3000} 3001 3002VkResult anv_CreateFramebuffer( 3003 VkDevice _device, 3004 const VkFramebufferCreateInfo* pCreateInfo, 3005 const VkAllocationCallbacks* pAllocator, 3006 VkFramebuffer* pFramebuffer) 3007{ 3008 ANV_FROM_HANDLE(anv_device, device, _device); 3009 struct anv_framebuffer *framebuffer; 3010 3011 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 3012 3013 size_t size = sizeof(*framebuffer) + 3014 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; 3015 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, 3016 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3017 if (framebuffer == NULL) 3018 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 3019 3020 framebuffer->attachment_count = pCreateInfo->attachmentCount; 3021 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 3022 VkImageView _iview = pCreateInfo->pAttachments[i]; 3023 framebuffer->attachments[i] = anv_image_view_from_handle(_iview); 3024 } 3025 3026 framebuffer->width = pCreateInfo->width; 3027 framebuffer->height = pCreateInfo->height; 3028 framebuffer->layers = pCreateInfo->layers; 3029 3030 *pFramebuffer = anv_framebuffer_to_handle(framebuffer); 3031 3032 return VK_SUCCESS; 3033} 3034 3035void anv_DestroyFramebuffer( 3036 VkDevice _device, 3037 VkFramebuffer _fb, 3038 const VkAllocationCallbacks* pAllocator) 3039{ 3040 ANV_FROM_HANDLE(anv_device, device, _device); 3041 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); 3042 3043 if (!fb) 3044 return; 3045 3046 vk_free2(&device->alloc, pAllocator, fb); 3047} 3048 3049static const VkTimeDomainEXT anv_time_domains[] = { 3050 VK_TIME_DOMAIN_DEVICE_EXT, 3051 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, 3052 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, 3053}; 3054 3055VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT( 3056 VkPhysicalDevice physicalDevice, 3057 uint32_t *pTimeDomainCount, 3058 VkTimeDomainEXT *pTimeDomains) 3059{ 3060 int d; 3061 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount); 3062 3063 for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) { 3064 vk_outarray_append(&out, i) { 3065 *i = anv_time_domains[d]; 3066 } 3067 } 3068 3069 return vk_outarray_status(&out); 3070} 3071 3072static uint64_t 3073anv_clock_gettime(clockid_t clock_id) 3074{ 3075 struct timespec current; 3076 int ret; 3077 3078 ret = clock_gettime(clock_id, ¤t); 3079 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW) 3080 ret = clock_gettime(CLOCK_MONOTONIC, ¤t); 3081 if (ret < 0) 3082 return 0; 3083 3084 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec; 3085} 3086 3087#define TIMESTAMP 0x2358 3088 3089VkResult anv_GetCalibratedTimestampsEXT( 3090 VkDevice _device, 3091 uint32_t timestampCount, 3092 const VkCalibratedTimestampInfoEXT *pTimestampInfos, 3093 uint64_t *pTimestamps, 3094 uint64_t *pMaxDeviation) 3095{ 3096 ANV_FROM_HANDLE(anv_device, device, _device); 3097 uint64_t timestamp_frequency = device->info.timestamp_frequency; 3098 int ret; 3099 int d; 3100 uint64_t begin, end; 3101 uint64_t max_clock_period = 0; 3102 3103 begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW); 3104 3105 for (d = 0; d < timestampCount; d++) { 3106 switch (pTimestampInfos[d].timeDomain) { 3107 case VK_TIME_DOMAIN_DEVICE_EXT: 3108 ret = anv_gem_reg_read(device, TIMESTAMP | 1, 3109 &pTimestamps[d]); 3110 3111 if (ret != 0) { 3112 return anv_device_set_lost(device, "Failed to read the TIMESTAMP " 3113 "register: %m"); 3114 } 3115 uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency); 3116 max_clock_period = MAX2(max_clock_period, device_period); 3117 break; 3118 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT: 3119 pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC); 3120 max_clock_period = MAX2(max_clock_period, 1); 3121 break; 3122 3123 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT: 3124 pTimestamps[d] = begin; 3125 break; 3126 default: 3127 pTimestamps[d] = 0; 3128 break; 3129 } 3130 } 3131 3132 end = anv_clock_gettime(CLOCK_MONOTONIC_RAW); 3133 3134 /* 3135 * The maximum deviation is the sum of the interval over which we 3136 * perform the sampling and the maximum period of any sampled 3137 * clock. That's because the maximum skew between any two sampled 3138 * clock edges is when the sampled clock with the largest period is 3139 * sampled at the end of that period but right at the beginning of the 3140 * sampling interval and some other clock is sampled right at the 3141 * begining of its sampling period and right at the end of the 3142 * sampling interval. Let's assume the GPU has the longest clock 3143 * period and that the application is sampling GPU and monotonic: 3144 * 3145 * s e 3146 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f 3147 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 3148 * 3149 * g 3150 * 0 1 2 3 3151 * GPU -----_____-----_____-----_____-----_____ 3152 * 3153 * m 3154 * x y z 0 1 2 3 4 5 6 7 8 9 a b c 3155 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 3156 * 3157 * Interval <-----------------> 3158 * Deviation <--------------------------> 3159 * 3160 * s = read(raw) 2 3161 * g = read(GPU) 1 3162 * m = read(monotonic) 2 3163 * e = read(raw) b 3164 * 3165 * We round the sample interval up by one tick to cover sampling error 3166 * in the interval clock 3167 */ 3168 3169 uint64_t sample_interval = end - begin + 1; 3170 3171 *pMaxDeviation = sample_interval + max_clock_period; 3172 3173 return VK_SUCCESS; 3174} 3175 3176/* vk_icd.h does not declare this function, so we declare it here to 3177 * suppress Wmissing-prototypes. 3178 */ 3179PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 3180vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); 3181 3182PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 3183vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) 3184{ 3185 /* For the full details on loader interface versioning, see 3186 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 3187 * What follows is a condensed summary, to help you navigate the large and 3188 * confusing official doc. 3189 * 3190 * - Loader interface v0 is incompatible with later versions. We don't 3191 * support it. 3192 * 3193 * - In loader interface v1: 3194 * - The first ICD entrypoint called by the loader is 3195 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 3196 * entrypoint. 3197 * - The ICD must statically expose no other Vulkan symbol unless it is 3198 * linked with -Bsymbolic. 3199 * - Each dispatchable Vulkan handle created by the ICD must be 3200 * a pointer to a struct whose first member is VK_LOADER_DATA. The 3201 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 3202 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 3203 * vkDestroySurfaceKHR(). The ICD must be capable of working with 3204 * such loader-managed surfaces. 3205 * 3206 * - Loader interface v2 differs from v1 in: 3207 * - The first ICD entrypoint called by the loader is 3208 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 3209 * statically expose this entrypoint. 3210 * 3211 * - Loader interface v3 differs from v2 in: 3212 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 3213 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 3214 * because the loader no longer does so. 3215 */ 3216 *pSupportedVersion = MIN2(*pSupportedVersion, 3u); 3217 return VK_SUCCESS; 3218} 3219