1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#include <stdbool.h>
29#include <string.h>
30#include <unistd.h>
31#include <fcntl.h>
32#include "radv_debug.h"
33#include "radv_private.h"
34#include "radv_shader.h"
35#include "radv_cs.h"
36#include "util/disk_cache.h"
37#include "util/strtod.h"
38#include "vk_util.h"
39#include <xf86drm.h>
40#include <amdgpu.h>
41#include <amdgpu_drm.h>
42#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43#include "ac_llvm_util.h"
44#include "vk_format.h"
45#include "sid.h"
46#include "git_sha1.h"
47#include "gfx9d.h"
48#include "util/build_id.h"
49#include "util/debug.h"
50#include "util/mesa-sha1.h"
51#include "compiler/glsl_types.h"
52#include "util/xmlpool.h"
53
54static int
55radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
56{
57	struct mesa_sha1 ctx;
58	unsigned char sha1[20];
59	unsigned ptr_size = sizeof(void*);
60
61	memset(uuid, 0, VK_UUID_SIZE);
62	_mesa_sha1_init(&ctx);
63
64	if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
65	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
66		return -1;
67
68	_mesa_sha1_update(&ctx, &family, sizeof(family));
69	_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
70	_mesa_sha1_final(&ctx, sha1);
71
72	memcpy(uuid, sha1, VK_UUID_SIZE);
73	return 0;
74}
75
76static void
77radv_get_driver_uuid(void *uuid)
78{
79	ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
80}
81
82static void
83radv_get_device_uuid(struct radeon_info *info, void *uuid)
84{
85	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
86}
87
88static void
89radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
90{
91	const char *chip_string;
92
93	switch (family) {
94	case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
95	case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
96	case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
97	case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
98	case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
99	case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
100	case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
101	case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
102	case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
103	case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
104	case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
105	case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
106	case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
107	case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
108	case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
109	case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
110	case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
111	case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
112	case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
113	case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
114	case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
115	case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
116	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
117	case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
118	default: chip_string = "AMD RADV unknown"; break;
119	}
120
121	snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
122}
123
124static uint64_t
125radv_get_visible_vram_size(struct radv_physical_device *device)
126{
127	return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
128}
129
130static uint64_t
131radv_get_vram_size(struct radv_physical_device *device)
132{
133	return device->rad_info.vram_size - radv_get_visible_vram_size(device);
134}
135
136static void
137radv_physical_device_init_mem_types(struct radv_physical_device *device)
138{
139	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
140	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
141	uint64_t vram_size = radv_get_vram_size(device);
142	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
143	device->memory_properties.memoryHeapCount = 0;
144	if (vram_size > 0) {
145		vram_index = device->memory_properties.memoryHeapCount++;
146		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
147			.size = vram_size,
148			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
149		};
150	}
151	if (visible_vram_size) {
152		visible_vram_index = device->memory_properties.memoryHeapCount++;
153		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
154			.size = visible_vram_size,
155			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
156		};
157	}
158	if (device->rad_info.gart_size > 0) {
159		gart_index = device->memory_properties.memoryHeapCount++;
160		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
161			.size = device->rad_info.gart_size,
162			.flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
163		};
164	}
165
166	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
167	unsigned type_count = 0;
168	if (vram_index >= 0) {
169		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
170		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
171			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
172			.heapIndex = vram_index,
173		};
174	}
175	if (gart_index >= 0) {
176		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
177		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
178			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
179			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
180			(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
181			.heapIndex = gart_index,
182		};
183	}
184	if (visible_vram_index >= 0) {
185		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
186		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
187			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
188			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
189			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
190			.heapIndex = visible_vram_index,
191		};
192	}
193	if (gart_index >= 0) {
194		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
195		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
196			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
197			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
198			VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
199			(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
200			.heapIndex = gart_index,
201		};
202	}
203	device->memory_properties.memoryTypeCount = type_count;
204}
205
206static void
207radv_handle_env_var_force_family(struct radv_physical_device *device)
208{
209	const char *family = getenv("RADV_FORCE_FAMILY");
210	unsigned i;
211
212	if (!family)
213		return;
214
215	for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
216		if (!strcmp(family, ac_get_llvm_processor_name(i))) {
217			/* Override family and chip_class. */
218			device->rad_info.family = i;
219
220			if (i >= CHIP_VEGA10)
221				device->rad_info.chip_class = GFX9;
222			else if (i >= CHIP_TONGA)
223				device->rad_info.chip_class = VI;
224			else if (i >= CHIP_BONAIRE)
225				device->rad_info.chip_class = CIK;
226			else
227				device->rad_info.chip_class = SI;
228
229			return;
230		}
231	}
232
233	fprintf(stderr, "radv: Unknown family: %s\n", family);
234	exit(1);
235}
236
237static VkResult
238radv_physical_device_init(struct radv_physical_device *device,
239			  struct radv_instance *instance,
240			  drmDevicePtr drm_device)
241{
242	const char *path = drm_device->nodes[DRM_NODE_RENDER];
243	VkResult result;
244	drmVersionPtr version;
245	int fd;
246	int master_fd = -1;
247
248	fd = open(path, O_RDWR | O_CLOEXEC);
249	if (fd < 0) {
250		if (instance->debug_flags & RADV_DEBUG_STARTUP)
251			radv_logi("Could not open device '%s'", path);
252
253		return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
254	}
255
256	version = drmGetVersion(fd);
257	if (!version) {
258		close(fd);
259
260		if (instance->debug_flags & RADV_DEBUG_STARTUP)
261			radv_logi("Could not get the kernel driver version for device '%s'", path);
262
263		return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
264				 "failed to get version %s: %m", path);
265	}
266
267	if (strcmp(version->name, "amdgpu")) {
268		drmFreeVersion(version);
269		close(fd);
270
271		if (instance->debug_flags & RADV_DEBUG_STARTUP)
272			radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
273
274		return VK_ERROR_INCOMPATIBLE_DRIVER;
275	}
276	drmFreeVersion(version);
277
278	if (instance->debug_flags & RADV_DEBUG_STARTUP)
279			radv_logi("Found compatible device '%s'.", path);
280
281	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
282	device->instance = instance;
283
284	device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
285					       instance->perftest_flags);
286	if (!device->ws) {
287		result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
288		goto fail;
289	}
290
291	if (instance->enabled_extensions.KHR_display) {
292		master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
293		if (master_fd >= 0) {
294			uint32_t accel_working = 0;
295			struct drm_amdgpu_info request = {
296				.return_pointer = (uintptr_t)&accel_working,
297				.return_size = sizeof(accel_working),
298				.query = AMDGPU_INFO_ACCEL_WORKING
299			};
300
301			if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
302				close(master_fd);
303				master_fd = -1;
304			}
305		}
306	}
307
308	device->master_fd = master_fd;
309	device->local_fd = fd;
310	device->ws->query_info(device->ws, &device->rad_info);
311
312	radv_handle_env_var_force_family(device);
313
314	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
315
316	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
317		device->ws->destroy(device->ws);
318		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
319				   "cannot generate UUID");
320		goto fail;
321	}
322
323	/* These flags affect shader compilation. */
324	uint64_t shader_env_flags =
325		(device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
326		(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
327
328	/* The gpu id is already embedded in the uuid so we just pass "radv"
329	 * when creating the cache.
330	 */
331	char buf[VK_UUID_SIZE * 2 + 1];
332	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
333	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
334
335	if (device->rad_info.chip_class < VI ||
336	    device->rad_info.chip_class > GFX9)
337		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
338
339	radv_get_driver_uuid(&device->driver_uuid);
340	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
341
342	if (device->rad_info.family == CHIP_STONEY ||
343	    device->rad_info.chip_class >= GFX9) {
344		device->has_rbplus = true;
345		device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
346					 device->rad_info.family == CHIP_VEGA12 ||
347		                         device->rad_info.family == CHIP_RAVEN ||
348		                         device->rad_info.family == CHIP_RAVEN2;
349	}
350
351	/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
352	 * on SI.
353	 */
354	device->has_clear_state = device->rad_info.chip_class >= CIK;
355
356	device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
357
358	/* Vega10/Raven need a special workaround for a hardware bug. */
359	device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
360				  device->rad_info.family == CHIP_RAVEN;
361
362	/* Out-of-order primitive rasterization. */
363	device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
364					device->rad_info.max_se >= 2;
365	device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
366					    !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
367
368	device->dcc_msaa_allowed =
369		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
370
371	/* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
372	device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
373				       (device->rad_info.chip_class >= VI &&
374				        device->rad_info.me_fw_feature >= 41);
375
376	radv_physical_device_init_mem_types(device);
377	radv_fill_device_extension_table(device, &device->supported_extensions);
378
379	device->bus_info = *drm_device->businfo.pci;
380
381	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
382		ac_print_gpu_info(&device->rad_info);
383
384	/* The WSI is structured as a layer on top of the driver, so this has
385	 * to be the last part of initialization (at least until we get other
386	 * semi-layers).
387	 */
388	result = radv_init_wsi(device);
389	if (result != VK_SUCCESS) {
390		device->ws->destroy(device->ws);
391		vk_error(instance, result);
392		goto fail;
393	}
394
395	return VK_SUCCESS;
396
397fail:
398	close(fd);
399	if (master_fd != -1)
400		close(master_fd);
401	return result;
402}
403
404static void
405radv_physical_device_finish(struct radv_physical_device *device)
406{
407	radv_finish_wsi(device);
408	device->ws->destroy(device->ws);
409	disk_cache_destroy(device->disk_cache);
410	close(device->local_fd);
411	if (device->master_fd != -1)
412		close(device->master_fd);
413}
414
415static void *
416default_alloc_func(void *pUserData, size_t size, size_t align,
417                   VkSystemAllocationScope allocationScope)
418{
419	return malloc(size);
420}
421
422static void *
423default_realloc_func(void *pUserData, void *pOriginal, size_t size,
424                     size_t align, VkSystemAllocationScope allocationScope)
425{
426	return realloc(pOriginal, size);
427}
428
429static void
430default_free_func(void *pUserData, void *pMemory)
431{
432	free(pMemory);
433}
434
435static const VkAllocationCallbacks default_alloc = {
436	.pUserData = NULL,
437	.pfnAllocation = default_alloc_func,
438	.pfnReallocation = default_realloc_func,
439	.pfnFree = default_free_func,
440};
441
442static const struct debug_control radv_debug_options[] = {
443	{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
444	{"nodcc", RADV_DEBUG_NO_DCC},
445	{"shaders", RADV_DEBUG_DUMP_SHADERS},
446	{"nocache", RADV_DEBUG_NO_CACHE},
447	{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
448	{"nohiz", RADV_DEBUG_NO_HIZ},
449	{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
450	{"unsafemath", RADV_DEBUG_UNSAFE_MATH},
451	{"allbos", RADV_DEBUG_ALL_BOS},
452	{"noibs", RADV_DEBUG_NO_IBS},
453	{"spirv", RADV_DEBUG_DUMP_SPIRV},
454	{"vmfaults", RADV_DEBUG_VM_FAULTS},
455	{"zerovram", RADV_DEBUG_ZERO_VRAM},
456	{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
457	{"nosisched", RADV_DEBUG_NO_SISCHED},
458	{"preoptir", RADV_DEBUG_PREOPTIR},
459	{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
460	{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
461	{"info", RADV_DEBUG_INFO},
462	{"errors", RADV_DEBUG_ERRORS},
463	{"startup", RADV_DEBUG_STARTUP},
464	{"checkir", RADV_DEBUG_CHECKIR},
465	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
466	{"nobinning", RADV_DEBUG_NOBINNING},
467	{"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
468	{NULL, 0}
469};
470
471const char *
472radv_get_debug_option_name(int id)
473{
474	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
475	return radv_debug_options[id].string;
476}
477
478static const struct debug_control radv_perftest_options[] = {
479	{"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
480	{"sisched", RADV_PERFTEST_SISCHED},
481	{"localbos", RADV_PERFTEST_LOCAL_BOS},
482	{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
483	{"bolist", RADV_PERFTEST_BO_LIST},
484	{NULL, 0}
485};
486
487const char *
488radv_get_perftest_option_name(int id)
489{
490	assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
491	return radv_perftest_options[id].string;
492}
493
494static void
495radv_handle_per_app_options(struct radv_instance *instance,
496			    const VkApplicationInfo *info)
497{
498	const char *name = info ? info->pApplicationName : NULL;
499
500	if (!name)
501		return;
502
503	if (!strcmp(name, "Talos - Linux - 32bit") ||
504	    !strcmp(name, "Talos - Linux - 64bit")) {
505		if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
506			/* Force enable LLVM sisched for Talos because it looks
507			 * safe and it gives few more FPS.
508			 */
509			instance->perftest_flags |= RADV_PERFTEST_SISCHED;
510		}
511	} else if (!strcmp(name, "DOOM_VFR")) {
512		/* Work around a Doom VFR game bug */
513		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
514	} else if (!strcmp(name, "MonsterHunterWorld.exe")) {
515		/* Workaround for a WaW hazard when LLVM moves/merges
516		 * load/store memory operations.
517		 * See https://reviews.llvm.org/D61313
518		 */
519		if (HAVE_LLVM < 0x900)
520			instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
521	}
522}
523
524static int radv_get_instance_extension_index(const char *name)
525{
526	for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
527		if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
528			return i;
529	}
530	return -1;
531}
532
533static const char radv_dri_options_xml[] =
534DRI_CONF_BEGIN
535	DRI_CONF_SECTION_PERFORMANCE
536		DRI_CONF_ADAPTIVE_SYNC("true")
537		DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
538	DRI_CONF_SECTION_END
539DRI_CONF_END;
540
541static void  radv_init_dri_options(struct radv_instance *instance)
542{
543	driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml);
544	driParseConfigFiles(&instance->dri_options,
545	                    &instance->available_dri_options,
546	                    0, "radv", NULL);
547}
548
549VkResult radv_CreateInstance(
550	const VkInstanceCreateInfo*                 pCreateInfo,
551	const VkAllocationCallbacks*                pAllocator,
552	VkInstance*                                 pInstance)
553{
554	struct radv_instance *instance;
555	VkResult result;
556
557	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
558
559	uint32_t client_version;
560	if (pCreateInfo->pApplicationInfo &&
561	    pCreateInfo->pApplicationInfo->apiVersion != 0) {
562		client_version = pCreateInfo->pApplicationInfo->apiVersion;
563	} else {
564		client_version = VK_API_VERSION_1_0;
565	}
566
567	instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
568			      VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
569	if (!instance)
570		return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
571
572	instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
573
574	if (pAllocator)
575		instance->alloc = *pAllocator;
576	else
577		instance->alloc = default_alloc;
578
579	instance->apiVersion = client_version;
580	instance->physicalDeviceCount = -1;
581
582	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
583						   radv_debug_options);
584
585	instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
586						   radv_perftest_options);
587
588
589	if (instance->debug_flags & RADV_DEBUG_STARTUP)
590		radv_logi("Created an instance");
591
592	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
593		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
594		int index = radv_get_instance_extension_index(ext_name);
595
596		if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
597			vk_free2(&default_alloc, pAllocator, instance);
598			return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
599		}
600
601		instance->enabled_extensions.extensions[index] = true;
602	}
603
604	result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
605	if (result != VK_SUCCESS) {
606		vk_free2(&default_alloc, pAllocator, instance);
607		return vk_error(instance, result);
608	}
609
610	_mesa_locale_init();
611	glsl_type_singleton_init_or_ref();
612
613	VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
614
615	radv_init_dri_options(instance);
616	radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
617
618	*pInstance = radv_instance_to_handle(instance);
619
620	return VK_SUCCESS;
621}
622
623void radv_DestroyInstance(
624	VkInstance                                  _instance,
625	const VkAllocationCallbacks*                pAllocator)
626{
627	RADV_FROM_HANDLE(radv_instance, instance, _instance);
628
629	if (!instance)
630		return;
631
632	for (int i = 0; i < instance->physicalDeviceCount; ++i) {
633		radv_physical_device_finish(instance->physicalDevices + i);
634	}
635
636	VG(VALGRIND_DESTROY_MEMPOOL(instance));
637
638	glsl_type_singleton_decref();
639	_mesa_locale_fini();
640
641	driDestroyOptionCache(&instance->dri_options);
642	driDestroyOptionInfo(&instance->available_dri_options);
643
644	vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
645
646	vk_free(&instance->alloc, instance);
647}
648
649static VkResult
650radv_enumerate_devices(struct radv_instance *instance)
651{
652	/* TODO: Check for more devices ? */
653	drmDevicePtr devices[8];
654	VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
655	int max_devices;
656
657	instance->physicalDeviceCount = 0;
658
659	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
660
661	if (instance->debug_flags & RADV_DEBUG_STARTUP)
662		radv_logi("Found %d drm nodes", max_devices);
663
664	if (max_devices < 1)
665		return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
666
667	for (unsigned i = 0; i < (unsigned)max_devices; i++) {
668		if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
669		    devices[i]->bustype == DRM_BUS_PCI &&
670		    devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
671
672			result = radv_physical_device_init(instance->physicalDevices +
673			                                   instance->physicalDeviceCount,
674			                                   instance,
675			                                   devices[i]);
676			if (result == VK_SUCCESS)
677				++instance->physicalDeviceCount;
678			else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
679				break;
680		}
681	}
682	drmFreeDevices(devices, max_devices);
683
684	return result;
685}
686
687VkResult radv_EnumeratePhysicalDevices(
688	VkInstance                                  _instance,
689	uint32_t*                                   pPhysicalDeviceCount,
690	VkPhysicalDevice*                           pPhysicalDevices)
691{
692	RADV_FROM_HANDLE(radv_instance, instance, _instance);
693	VkResult result;
694
695	if (instance->physicalDeviceCount < 0) {
696		result = radv_enumerate_devices(instance);
697		if (result != VK_SUCCESS &&
698		    result != VK_ERROR_INCOMPATIBLE_DRIVER)
699			return result;
700	}
701
702	if (!pPhysicalDevices) {
703		*pPhysicalDeviceCount = instance->physicalDeviceCount;
704	} else {
705		*pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
706		for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
707			pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
708	}
709
710	return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
711	                                                             : VK_SUCCESS;
712}
713
714VkResult radv_EnumeratePhysicalDeviceGroups(
715    VkInstance                                  _instance,
716    uint32_t*                                   pPhysicalDeviceGroupCount,
717    VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
718{
719	RADV_FROM_HANDLE(radv_instance, instance, _instance);
720	VkResult result;
721
722	if (instance->physicalDeviceCount < 0) {
723		result = radv_enumerate_devices(instance);
724		if (result != VK_SUCCESS &&
725		    result != VK_ERROR_INCOMPATIBLE_DRIVER)
726			return result;
727	}
728
729	if (!pPhysicalDeviceGroupProperties) {
730		*pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
731	} else {
732		*pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
733		for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
734			pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
735			pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
736			pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
737		}
738	}
739	return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
740	                                                                  : VK_SUCCESS;
741}
742
743void radv_GetPhysicalDeviceFeatures(
744	VkPhysicalDevice                            physicalDevice,
745	VkPhysicalDeviceFeatures*                   pFeatures)
746{
747	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
748	memset(pFeatures, 0, sizeof(*pFeatures));
749
750	*pFeatures = (VkPhysicalDeviceFeatures) {
751		.robustBufferAccess                       = true,
752		.fullDrawIndexUint32                      = true,
753		.imageCubeArray                           = true,
754		.independentBlend                         = true,
755		.geometryShader                           = true,
756		.tessellationShader                       = true,
757		.sampleRateShading                        = true,
758		.dualSrcBlend                             = true,
759		.logicOp                                  = true,
760		.multiDrawIndirect                        = true,
761		.drawIndirectFirstInstance                = true,
762		.depthClamp                               = true,
763		.depthBiasClamp                           = true,
764		.fillModeNonSolid                         = true,
765		.depthBounds                              = true,
766		.wideLines                                = true,
767		.largePoints                              = true,
768		.alphaToOne                               = true,
769		.multiViewport                            = true,
770		.samplerAnisotropy                        = true,
771		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
772		.textureCompressionASTC_LDR               = false,
773		.textureCompressionBC                     = true,
774		.occlusionQueryPrecise                    = true,
775		.pipelineStatisticsQuery                  = true,
776		.vertexPipelineStoresAndAtomics           = true,
777		.fragmentStoresAndAtomics                 = true,
778		.shaderTessellationAndGeometryPointSize   = true,
779		.shaderImageGatherExtended                = true,
780		.shaderStorageImageExtendedFormats        = true,
781		.shaderStorageImageMultisample            = pdevice->rad_info.chip_class >= VI,
782		.shaderUniformBufferArrayDynamicIndexing  = true,
783		.shaderSampledImageArrayDynamicIndexing   = true,
784		.shaderStorageBufferArrayDynamicIndexing  = true,
785		.shaderStorageImageArrayDynamicIndexing   = true,
786		.shaderStorageImageReadWithoutFormat      = true,
787		.shaderStorageImageWriteWithoutFormat     = true,
788		.shaderClipDistance                       = true,
789		.shaderCullDistance                       = true,
790		.shaderFloat64                            = true,
791		.shaderInt64                              = true,
792		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9,
793		.sparseBinding                            = true,
794		.variableMultisampleRate                  = true,
795		.inheritedQueries                         = true,
796	};
797}
798
799void radv_GetPhysicalDeviceFeatures2(
800	VkPhysicalDevice                            physicalDevice,
801	VkPhysicalDeviceFeatures2                  *pFeatures)
802{
803	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
804	vk_foreach_struct(ext, pFeatures->pNext) {
805		switch (ext->sType) {
806		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
807			VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
808			features->variablePointersStorageBuffer = true;
809			features->variablePointers = true;
810			break;
811		}
812		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
813			VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
814			features->multiview = true;
815			features->multiviewGeometryShader = true;
816			features->multiviewTessellationShader = true;
817			break;
818		}
819		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
820			VkPhysicalDeviceShaderDrawParametersFeatures *features =
821			    (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
822			features->shaderDrawParameters = true;
823			break;
824		}
825		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
826			VkPhysicalDeviceProtectedMemoryFeatures *features =
827			    (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
828			features->protectedMemory = false;
829			break;
830		}
831		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
832			VkPhysicalDevice16BitStorageFeatures *features =
833			    (VkPhysicalDevice16BitStorageFeatures*)ext;
834			bool enabled = pdevice->rad_info.chip_class >= VI;
835			features->storageBuffer16BitAccess = enabled;
836			features->uniformAndStorageBuffer16BitAccess = enabled;
837			features->storagePushConstant16 = enabled;
838			features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
839			break;
840		}
841		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
842			VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
843			    (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
844			features->samplerYcbcrConversion = true;
845			break;
846		}
847		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
848			VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
849				(VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
850			features->shaderInputAttachmentArrayDynamicIndexing = true;
851			features->shaderUniformTexelBufferArrayDynamicIndexing = true;
852			features->shaderStorageTexelBufferArrayDynamicIndexing = true;
853			features->shaderUniformBufferArrayNonUniformIndexing = true;
854			features->shaderSampledImageArrayNonUniformIndexing = true;
855			features->shaderStorageBufferArrayNonUniformIndexing = true;
856			features->shaderStorageImageArrayNonUniformIndexing = true;
857			features->shaderInputAttachmentArrayNonUniformIndexing = true;
858			features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
859			features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
860			features->descriptorBindingUniformBufferUpdateAfterBind = true;
861			features->descriptorBindingSampledImageUpdateAfterBind = true;
862			features->descriptorBindingStorageImageUpdateAfterBind = true;
863			features->descriptorBindingStorageBufferUpdateAfterBind = true;
864			features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
865			features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
866			features->descriptorBindingUpdateUnusedWhilePending = true;
867			features->descriptorBindingPartiallyBound = true;
868			features->descriptorBindingVariableDescriptorCount = true;
869			features->runtimeDescriptorArray = true;
870			break;
871		}
872		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
873			VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
874				(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
875			features->conditionalRendering = true;
876			features->inheritedConditionalRendering = false;
877			break;
878		}
879		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
880			VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
881				(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
882			features->vertexAttributeInstanceRateDivisor = VK_TRUE;
883			features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
884			break;
885		}
886		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
887			VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
888				(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
889			features->transformFeedback = true;
890			features->geometryStreams = true;
891			break;
892		}
893		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
894			VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
895				(VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
896			features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK;
897			break;
898		}
899		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
900			VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
901				(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
902			features->memoryPriority = VK_TRUE;
903			break;
904		}
905		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
906			VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
907				(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
908			features->bufferDeviceAddress = true;
909			features->bufferDeviceAddressCaptureReplay = false;
910			features->bufferDeviceAddressMultiDevice = false;
911			break;
912		}
913		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
914			VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
915				(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
916			features->depthClipEnable = true;
917			break;
918		}
919		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
920			VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
921				(VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
922			features->hostQueryReset = true;
923			break;
924		}
925		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
926			VkPhysicalDevice8BitStorageFeaturesKHR *features =
927			    (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
928			bool enabled = pdevice->rad_info.chip_class >= VI;
929			features->storageBuffer8BitAccess = enabled;
930			features->uniformAndStorageBuffer8BitAccess = enabled;
931			features->storagePushConstant8 = enabled;
932			break;
933		}
934		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
935			VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
936				(VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
937			features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800;
938			features->shaderInt8 = true;
939			break;
940		}
941		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
942			VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
943				(VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
944			/* TODO: Enable this once the driver supports 64-bit
945			 * compare&swap atomic operations.
946			 */
947			features->shaderBufferInt64Atomics = false;
948			features->shaderSharedInt64Atomics = false;
949			break;
950		}
951		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
952			VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
953				(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
954
955			features->inlineUniformBlock = true;
956			features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
957			break;
958		}
959		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
960			VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
961				(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
962			features->computeDerivativeGroupQuads = false;
963			features->computeDerivativeGroupLinear = true;
964			break;
965		}
966		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
967			VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
968				(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
969			features->ycbcrImageArrays = true;
970			break;
971		}
972		default:
973			break;
974		}
975	}
976	return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
977}
978
979void radv_GetPhysicalDeviceProperties(
980	VkPhysicalDevice                            physicalDevice,
981	VkPhysicalDeviceProperties*                 pProperties)
982{
983	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
984	VkSampleCountFlags sample_counts = 0xf;
985
986	/* make sure that the entire descriptor set is addressable with a signed
987	 * 32-bit int. So the sum of all limits scaled by descriptor size has to
988	 * be at most 2 GiB. the combined image & samples object count as one of
989	 * both. This limit is for the pipeline layout, not for the set layout, but
990	 * there is no set limit, so we just set a pipeline limit. I don't think
991	 * any app is going to hit this soon. */
992	size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
993	          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
994	           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
995	           32 /* sampler, largest when combined with image */ +
996	           64 /* sampled image */ +
997	           64 /* storage image */);
998
999	VkPhysicalDeviceLimits limits = {
1000		.maxImageDimension1D                      = (1 << 14),
1001		.maxImageDimension2D                      = (1 << 14),
1002		.maxImageDimension3D                      = (1 << 11),
1003		.maxImageDimensionCube                    = (1 << 14),
1004		.maxImageArrayLayers                      = (1 << 11),
1005		.maxTexelBufferElements                   = 128 * 1024 * 1024,
1006		.maxUniformBufferRange                    = UINT32_MAX,
1007		.maxStorageBufferRange                    = UINT32_MAX,
1008		.maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1009		.maxMemoryAllocationCount                 = UINT32_MAX,
1010		.maxSamplerAllocationCount                = 64 * 1024,
1011		.bufferImageGranularity                   = 64, /* A cache line */
1012		.sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
1013		.maxBoundDescriptorSets                   = MAX_SETS,
1014		.maxPerStageDescriptorSamplers            = max_descriptor_set_size,
1015		.maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
1016		.maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
1017		.maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
1018		.maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
1019		.maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
1020		.maxPerStageResources                     = max_descriptor_set_size,
1021		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
1022		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
1023		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
1024		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
1025		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
1026		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
1027		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
1028		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
1029		.maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
1030		.maxVertexInputBindings                   = MAX_VBS,
1031		.maxVertexInputAttributeOffset            = 2047,
1032		.maxVertexInputBindingStride              = 2048,
1033		.maxVertexOutputComponents                = 128,
1034		.maxTessellationGenerationLevel           = 64,
1035		.maxTessellationPatchSize                 = 32,
1036		.maxTessellationControlPerVertexInputComponents = 128,
1037		.maxTessellationControlPerVertexOutputComponents = 128,
1038		.maxTessellationControlPerPatchOutputComponents = 120,
1039		.maxTessellationControlTotalOutputComponents = 4096,
1040		.maxTessellationEvaluationInputComponents = 128,
1041		.maxTessellationEvaluationOutputComponents = 128,
1042		.maxGeometryShaderInvocations             = 127,
1043		.maxGeometryInputComponents               = 64,
1044		.maxGeometryOutputComponents              = 128,
1045		.maxGeometryOutputVertices                = 256,
1046		.maxGeometryTotalOutputComponents         = 1024,
1047		.maxFragmentInputComponents               = 128,
1048		.maxFragmentOutputAttachments             = 8,
1049		.maxFragmentDualSrcAttachments            = 1,
1050		.maxFragmentCombinedOutputResources       = 8,
1051		.maxComputeSharedMemorySize               = 32768,
1052		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1053		.maxComputeWorkGroupInvocations           = 2048,
1054		.maxComputeWorkGroupSize = {
1055			2048,
1056			2048,
1057			2048
1058		},
1059		.subPixelPrecisionBits                    = 8,
1060		.subTexelPrecisionBits                    = 8,
1061		.mipmapPrecisionBits                      = 8,
1062		.maxDrawIndexedIndexValue                 = UINT32_MAX,
1063		.maxDrawIndirectCount                     = UINT32_MAX,
1064		.maxSamplerLodBias                        = 16,
1065		.maxSamplerAnisotropy                     = 16,
1066		.maxViewports                             = MAX_VIEWPORTS,
1067		.maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1068		.viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1069		.viewportSubPixelBits                     = 8,
1070		.minMemoryMapAlignment                    = 4096, /* A page */
1071		.minTexelBufferOffsetAlignment            = 1,
1072		.minUniformBufferOffsetAlignment          = 4,
1073		.minStorageBufferOffsetAlignment          = 4,
1074		.minTexelOffset                           = -32,
1075		.maxTexelOffset                           = 31,
1076		.minTexelGatherOffset                     = -32,
1077		.maxTexelGatherOffset                     = 31,
1078		.minInterpolationOffset                   = -2,
1079		.maxInterpolationOffset                   = 2,
1080		.subPixelInterpolationOffsetBits          = 8,
1081		.maxFramebufferWidth                      = (1 << 14),
1082		.maxFramebufferHeight                     = (1 << 14),
1083		.maxFramebufferLayers                     = (1 << 10),
1084		.framebufferColorSampleCounts             = sample_counts,
1085		.framebufferDepthSampleCounts             = sample_counts,
1086		.framebufferStencilSampleCounts           = sample_counts,
1087		.framebufferNoAttachmentsSampleCounts     = sample_counts,
1088		.maxColorAttachments                      = MAX_RTS,
1089		.sampledImageColorSampleCounts            = sample_counts,
1090		.sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
1091		.sampledImageDepthSampleCounts            = sample_counts,
1092		.sampledImageStencilSampleCounts          = sample_counts,
1093		.storageImageSampleCounts                 = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
1094		.maxSampleMaskWords                       = 1,
1095		.timestampComputeAndGraphics              = true,
1096		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1097		.maxClipDistances                         = 8,
1098		.maxCullDistances                         = 8,
1099		.maxCombinedClipAndCullDistances          = 8,
1100		.discreteQueuePriorities                  = 2,
1101		.pointSizeRange                           = { 0.0, 8192.0 },
1102		.lineWidthRange                           = { 0.0, 7.9921875 },
1103		.pointSizeGranularity                     = (1.0 / 8.0),
1104		.lineWidthGranularity                     = (1.0 / 128.0),
1105		.strictLines                              = false, /* FINISHME */
1106		.standardSampleLocations                  = true,
1107		.optimalBufferCopyOffsetAlignment         = 128,
1108		.optimalBufferCopyRowPitchAlignment       = 128,
1109		.nonCoherentAtomSize                      = 64,
1110	};
1111
1112	*pProperties = (VkPhysicalDeviceProperties) {
1113		.apiVersion = radv_physical_device_api_version(pdevice),
1114		.driverVersion = vk_get_driver_version(),
1115		.vendorID = ATI_VENDOR_ID,
1116		.deviceID = pdevice->rad_info.pci_id,
1117		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1118		.limits = limits,
1119		.sparseProperties = {0},
1120	};
1121
1122	strcpy(pProperties->deviceName, pdevice->name);
1123	memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1124}
1125
1126void radv_GetPhysicalDeviceProperties2(
1127	VkPhysicalDevice                            physicalDevice,
1128	VkPhysicalDeviceProperties2                *pProperties)
1129{
1130	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1131	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1132
1133	vk_foreach_struct(ext, pProperties->pNext) {
1134		switch (ext->sType) {
1135		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1136			VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1137				(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1138			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1139			break;
1140		}
1141		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1142			VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1143			memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1144			memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1145			properties->deviceLUIDValid = false;
1146			break;
1147		}
1148		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1149			VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1150			properties->maxMultiviewViewCount = MAX_VIEWS;
1151			properties->maxMultiviewInstanceIndex = INT_MAX;
1152			break;
1153		}
1154		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1155			VkPhysicalDevicePointClippingProperties *properties =
1156			    (VkPhysicalDevicePointClippingProperties*)ext;
1157			properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1158			break;
1159		}
1160		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1161			VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1162			    (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1163			properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1164			break;
1165		}
1166		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1167			VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1168			    (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1169			properties->minImportedHostPointerAlignment = 4096;
1170			break;
1171		}
1172		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1173			VkPhysicalDeviceSubgroupProperties *properties =
1174			    (VkPhysicalDeviceSubgroupProperties*)ext;
1175			properties->subgroupSize = 64;
1176			properties->supportedStages = VK_SHADER_STAGE_ALL;
1177			properties->supportedOperations =
1178							VK_SUBGROUP_FEATURE_BASIC_BIT |
1179							VK_SUBGROUP_FEATURE_BALLOT_BIT |
1180							VK_SUBGROUP_FEATURE_QUAD_BIT |
1181							VK_SUBGROUP_FEATURE_VOTE_BIT;
1182			if (pdevice->rad_info.chip_class >= VI) {
1183				properties->supportedOperations |=
1184							VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1185							VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1186							VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1187			}
1188			properties->quadOperationsInAllStages = true;
1189			break;
1190		}
1191		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1192			VkPhysicalDeviceMaintenance3Properties *properties =
1193			    (VkPhysicalDeviceMaintenance3Properties*)ext;
1194			/* Make sure everything is addressable by a signed 32-bit int, and
1195			 * our largest descriptors are 96 bytes. */
1196			properties->maxPerSetDescriptors = (1ull << 31) / 96;
1197			/* Our buffer size fields allow only this much */
1198			properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1199			break;
1200		}
1201		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1202			VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1203				(VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1204			/* GFX6-8 only support single channel min/max filter. */
1205			properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1206			properties->filterMinmaxSingleComponentFormats = true;
1207			break;
1208		}
1209		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1210			VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1211				(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1212
1213			/* Shader engines. */
1214			properties->shaderEngineCount =
1215				pdevice->rad_info.max_se;
1216			properties->shaderArraysPerEngineCount =
1217				pdevice->rad_info.max_sh_per_se;
1218			properties->computeUnitsPerShaderArray =
1219				pdevice->rad_info.num_good_cu_per_sh;
1220			properties->simdPerComputeUnit = 4;
1221			properties->wavefrontsPerSimd =
1222				pdevice->rad_info.family == CHIP_TONGA ||
1223				pdevice->rad_info.family == CHIP_ICELAND ||
1224				pdevice->rad_info.family == CHIP_POLARIS10 ||
1225				pdevice->rad_info.family == CHIP_POLARIS11 ||
1226				pdevice->rad_info.family == CHIP_POLARIS12 ||
1227				pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1228			properties->wavefrontSize = 64;
1229
1230			/* SGPR. */
1231			properties->sgprsPerSimd =
1232				ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
1233			properties->minSgprAllocation =
1234				pdevice->rad_info.chip_class >= VI ? 16 : 8;
1235			properties->maxSgprAllocation =
1236				pdevice->rad_info.family == CHIP_TONGA ||
1237				pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1238			properties->sgprAllocationGranularity =
1239				pdevice->rad_info.chip_class >= VI ? 16 : 8;
1240
1241			/* VGPR. */
1242			properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1243			properties->minVgprAllocation = 4;
1244			properties->maxVgprAllocation = 256;
1245			properties->vgprAllocationGranularity = 4;
1246			break;
1247		}
1248		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1249			VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1250				(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1251			properties->maxVertexAttribDivisor = UINT32_MAX;
1252			break;
1253		}
1254		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1255			VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1256				(VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1257			properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1258			properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1259			properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1260			properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1261			properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1262			properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1263			properties->robustBufferAccessUpdateAfterBind = false;
1264			properties->quadDivergentImplicitLod = false;
1265
1266			size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1267				MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1268			          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1269			           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1270			           32 /* sampler, largest when combined with image */ +
1271			           64 /* sampled image */ +
1272			           64 /* storage image */);
1273			properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1274			properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1275			properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1276			properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1277			properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1278			properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1279			properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1280			properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1281			properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1282			properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1283			properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1284			properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1285			properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1286			properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1287			properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1288			break;
1289		}
1290		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1291			VkPhysicalDeviceProtectedMemoryProperties *properties =
1292				(VkPhysicalDeviceProtectedMemoryProperties *)ext;
1293			properties->protectedNoFault = false;
1294			break;
1295		}
1296		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1297			VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1298				(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1299			properties->primitiveOverestimationSize = 0;
1300			properties->maxExtraPrimitiveOverestimationSize = 0;
1301			properties->extraPrimitiveOverestimationSizeGranularity = 0;
1302			properties->primitiveUnderestimation = VK_FALSE;
1303			properties->conservativePointAndLineRasterization = VK_FALSE;
1304			properties->degenerateTrianglesRasterized = VK_FALSE;
1305			properties->degenerateLinesRasterized = VK_FALSE;
1306			properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1307			properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1308			break;
1309		}
1310		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1311			VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1312				(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1313			properties->pciDomain = pdevice->bus_info.domain;
1314			properties->pciBus = pdevice->bus_info.bus;
1315			properties->pciDevice = pdevice->bus_info.dev;
1316			properties->pciFunction = pdevice->bus_info.func;
1317			break;
1318		}
1319		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
1320			VkPhysicalDeviceDriverPropertiesKHR *driver_props =
1321				(VkPhysicalDeviceDriverPropertiesKHR *) ext;
1322
1323			driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
1324			memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
1325			strcpy(driver_props->driverName, "radv");
1326
1327			memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
1328			snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
1329				"Mesa " PACKAGE_VERSION MESA_GIT_SHA1
1330				" (LLVM " MESA_LLVM_VERSION_STRING ")");
1331
1332			driver_props->conformanceVersion = (VkConformanceVersionKHR) {
1333				.major = 1,
1334				.minor = 1,
1335				.subminor = 2,
1336				.patch = 0,
1337			};
1338			break;
1339		}
1340		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1341			VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1342				(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1343			properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1344			properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1345			properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1346			properties->maxTransformFeedbackStreamDataSize = 512;
1347			properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1348			properties->maxTransformFeedbackBufferDataStride = 512;
1349			properties->transformFeedbackQueries = true;
1350			properties->transformFeedbackStreamsLinesTriangles = false;
1351			properties->transformFeedbackRasterizationStreamSelect = false;
1352			properties->transformFeedbackDraw = true;
1353			break;
1354		}
1355		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
1356			VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
1357				(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
1358
1359			props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1360			props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1361			props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1362			props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1363			props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1364			break;
1365		}
1366		default:
1367			break;
1368		}
1369	}
1370}
1371
1372static void radv_get_physical_device_queue_family_properties(
1373	struct radv_physical_device*                pdevice,
1374	uint32_t*                                   pCount,
1375	VkQueueFamilyProperties**                    pQueueFamilyProperties)
1376{
1377	int num_queue_families = 1;
1378	int idx;
1379	if (pdevice->rad_info.num_compute_rings > 0 &&
1380	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1381		num_queue_families++;
1382
1383	if (pQueueFamilyProperties == NULL) {
1384		*pCount = num_queue_families;
1385		return;
1386	}
1387
1388	if (!*pCount)
1389		return;
1390
1391	idx = 0;
1392	if (*pCount >= 1) {
1393		*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1394			.queueFlags = VK_QUEUE_GRAPHICS_BIT |
1395			              VK_QUEUE_COMPUTE_BIT |
1396			              VK_QUEUE_TRANSFER_BIT |
1397			              VK_QUEUE_SPARSE_BINDING_BIT,
1398			.queueCount = 1,
1399			.timestampValidBits = 64,
1400			.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1401		};
1402		idx++;
1403	}
1404
1405	if (pdevice->rad_info.num_compute_rings > 0 &&
1406	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1407		if (*pCount > idx) {
1408			*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1409				.queueFlags = VK_QUEUE_COMPUTE_BIT |
1410				              VK_QUEUE_TRANSFER_BIT |
1411				              VK_QUEUE_SPARSE_BINDING_BIT,
1412				.queueCount = pdevice->rad_info.num_compute_rings,
1413				.timestampValidBits = 64,
1414				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1415			};
1416			idx++;
1417		}
1418	}
1419	*pCount = idx;
1420}
1421
1422void radv_GetPhysicalDeviceQueueFamilyProperties(
1423	VkPhysicalDevice                            physicalDevice,
1424	uint32_t*                                   pCount,
1425	VkQueueFamilyProperties*                    pQueueFamilyProperties)
1426{
1427	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1428	if (!pQueueFamilyProperties) {
1429		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1430		return;
1431	}
1432	VkQueueFamilyProperties *properties[] = {
1433		pQueueFamilyProperties + 0,
1434		pQueueFamilyProperties + 1,
1435		pQueueFamilyProperties + 2,
1436	};
1437	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1438	assert(*pCount <= 3);
1439}
1440
1441void radv_GetPhysicalDeviceQueueFamilyProperties2(
1442	VkPhysicalDevice                            physicalDevice,
1443	uint32_t*                                   pCount,
1444	VkQueueFamilyProperties2                   *pQueueFamilyProperties)
1445{
1446	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1447	if (!pQueueFamilyProperties) {
1448		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1449		return;
1450	}
1451	VkQueueFamilyProperties *properties[] = {
1452		&pQueueFamilyProperties[0].queueFamilyProperties,
1453		&pQueueFamilyProperties[1].queueFamilyProperties,
1454		&pQueueFamilyProperties[2].queueFamilyProperties,
1455	};
1456	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1457	assert(*pCount <= 3);
1458}
1459
1460void radv_GetPhysicalDeviceMemoryProperties(
1461	VkPhysicalDevice                            physicalDevice,
1462	VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
1463{
1464	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1465
1466	*pMemoryProperties = physical_device->memory_properties;
1467}
1468
1469static void
1470radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
1471				  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
1472{
1473	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
1474	VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
1475	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
1476	uint64_t vram_size = radv_get_vram_size(device);
1477	uint64_t gtt_size = device->rad_info.gart_size;
1478	uint64_t heap_budget, heap_usage;
1479
1480	/* For all memory heaps, the computation of budget is as follow:
1481	 *	heap_budget = heap_size - global_heap_usage + app_heap_usage
1482	 *
1483	 * The Vulkan spec 1.1.97 says that the budget should include any
1484	 * currently allocated device memory.
1485	 *
1486	 * Note that the application heap usages are not really accurate (eg.
1487	 * in presence of shared buffers).
1488	 */
1489	for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
1490		uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
1491
1492		switch (device->mem_type_indices[i]) {
1493		case RADV_MEM_TYPE_VRAM:
1494			heap_usage = device->ws->query_value(device->ws,
1495							     RADEON_ALLOCATED_VRAM);
1496
1497			heap_budget = vram_size -
1498				device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
1499				heap_usage;
1500
1501			memoryBudget->heapBudget[heap_index] = heap_budget;
1502			memoryBudget->heapUsage[heap_index] = heap_usage;
1503			break;
1504		case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
1505			heap_usage = device->ws->query_value(device->ws,
1506							     RADEON_ALLOCATED_VRAM_VIS);
1507
1508			heap_budget = visible_vram_size -
1509				device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
1510				heap_usage;
1511
1512			memoryBudget->heapBudget[heap_index] = heap_budget;
1513			memoryBudget->heapUsage[heap_index] = heap_usage;
1514			break;
1515		case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
1516			heap_usage = device->ws->query_value(device->ws,
1517							     RADEON_ALLOCATED_GTT);
1518
1519			heap_budget = gtt_size -
1520				device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
1521				heap_usage;
1522
1523			memoryBudget->heapBudget[heap_index] = heap_budget;
1524			memoryBudget->heapUsage[heap_index] = heap_usage;
1525			break;
1526		default:
1527			break;
1528		}
1529	}
1530
1531	/* The heapBudget and heapUsage values must be zero for array elements
1532	 * greater than or equal to
1533	 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
1534	 */
1535	for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
1536		memoryBudget->heapBudget[i] = 0;
1537		memoryBudget->heapUsage[i] = 0;
1538	}
1539}
1540
1541void radv_GetPhysicalDeviceMemoryProperties2(
1542	VkPhysicalDevice                            physicalDevice,
1543	VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
1544{
1545	radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1546					       &pMemoryProperties->memoryProperties);
1547
1548	VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
1549		vk_find_struct(pMemoryProperties->pNext,
1550			       PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
1551	if (memory_budget)
1552		radv_get_memory_budget_properties(physicalDevice, memory_budget);
1553}
1554
1555VkResult radv_GetMemoryHostPointerPropertiesEXT(
1556	VkDevice                                    _device,
1557	VkExternalMemoryHandleTypeFlagBits          handleType,
1558	const void                                 *pHostPointer,
1559	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
1560{
1561	RADV_FROM_HANDLE(radv_device, device, _device);
1562
1563	switch (handleType)
1564	{
1565	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1566		const struct radv_physical_device *physical_device = device->physical_device;
1567		uint32_t memoryTypeBits = 0;
1568		for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1569			if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1570				memoryTypeBits = (1 << i);
1571				break;
1572			}
1573		}
1574		pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1575		return VK_SUCCESS;
1576	}
1577	default:
1578		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1579	}
1580}
1581
1582static enum radeon_ctx_priority
1583radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1584{
1585	/* Default to MEDIUM when a specific global priority isn't requested */
1586	if (!pObj)
1587		return RADEON_CTX_PRIORITY_MEDIUM;
1588
1589	switch(pObj->globalPriority) {
1590	case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1591		return RADEON_CTX_PRIORITY_REALTIME;
1592	case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1593		return RADEON_CTX_PRIORITY_HIGH;
1594	case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1595		return RADEON_CTX_PRIORITY_MEDIUM;
1596	case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1597		return RADEON_CTX_PRIORITY_LOW;
1598	default:
1599		unreachable("Illegal global priority value");
1600		return RADEON_CTX_PRIORITY_INVALID;
1601	}
1602}
1603
1604static int
1605radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1606		uint32_t queue_family_index, int idx,
1607		VkDeviceQueueCreateFlags flags,
1608		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1609{
1610	queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1611	queue->device = device;
1612	queue->queue_family_index = queue_family_index;
1613	queue->queue_idx = idx;
1614	queue->priority = radv_get_queue_global_priority(global_priority);
1615	queue->flags = flags;
1616
1617	queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1618	if (!queue->hw_ctx)
1619		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1620
1621	return VK_SUCCESS;
1622}
1623
1624static void
1625radv_queue_finish(struct radv_queue *queue)
1626{
1627	if (queue->hw_ctx)
1628		queue->device->ws->ctx_destroy(queue->hw_ctx);
1629
1630	if (queue->initial_full_flush_preamble_cs)
1631		queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1632	if (queue->initial_preamble_cs)
1633		queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1634	if (queue->continue_preamble_cs)
1635		queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1636	if (queue->descriptor_bo)
1637		queue->device->ws->buffer_destroy(queue->descriptor_bo);
1638	if (queue->scratch_bo)
1639		queue->device->ws->buffer_destroy(queue->scratch_bo);
1640	if (queue->esgs_ring_bo)
1641		queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1642	if (queue->gsvs_ring_bo)
1643		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1644	if (queue->tess_rings_bo)
1645		queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1646	if (queue->compute_scratch_bo)
1647		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1648}
1649
1650static void
1651radv_bo_list_init(struct radv_bo_list *bo_list)
1652{
1653	pthread_mutex_init(&bo_list->mutex, NULL);
1654	bo_list->list.count = bo_list->capacity = 0;
1655	bo_list->list.bos = NULL;
1656}
1657
1658static void
1659radv_bo_list_finish(struct radv_bo_list *bo_list)
1660{
1661	free(bo_list->list.bos);
1662	pthread_mutex_destroy(&bo_list->mutex);
1663}
1664
1665static VkResult radv_bo_list_add(struct radv_device *device,
1666				 struct radeon_winsys_bo *bo)
1667{
1668	struct radv_bo_list *bo_list = &device->bo_list;
1669
1670	if (bo->is_local)
1671		return VK_SUCCESS;
1672
1673	if (unlikely(!device->use_global_bo_list))
1674		return VK_SUCCESS;
1675
1676	pthread_mutex_lock(&bo_list->mutex);
1677	if (bo_list->list.count == bo_list->capacity) {
1678		unsigned capacity = MAX2(4, bo_list->capacity * 2);
1679		void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1680
1681		if (!data) {
1682			pthread_mutex_unlock(&bo_list->mutex);
1683			return VK_ERROR_OUT_OF_HOST_MEMORY;
1684		}
1685
1686		bo_list->list.bos = (struct radeon_winsys_bo**)data;
1687		bo_list->capacity = capacity;
1688	}
1689
1690	bo_list->list.bos[bo_list->list.count++] = bo;
1691	pthread_mutex_unlock(&bo_list->mutex);
1692	return VK_SUCCESS;
1693}
1694
1695static void radv_bo_list_remove(struct radv_device *device,
1696				struct radeon_winsys_bo *bo)
1697{
1698	struct radv_bo_list *bo_list = &device->bo_list;
1699
1700	if (bo->is_local)
1701		return;
1702
1703	if (unlikely(!device->use_global_bo_list))
1704		return;
1705
1706	pthread_mutex_lock(&bo_list->mutex);
1707	for(unsigned i = 0; i < bo_list->list.count; ++i) {
1708		if (bo_list->list.bos[i] == bo) {
1709			bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1710			--bo_list->list.count;
1711			break;
1712		}
1713	}
1714	pthread_mutex_unlock(&bo_list->mutex);
1715}
1716
1717static void
1718radv_device_init_gs_info(struct radv_device *device)
1719{
1720	device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1721						       device->physical_device->rad_info.family);
1722}
1723
1724static int radv_get_device_extension_index(const char *name)
1725{
1726	for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1727		if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1728			return i;
1729	}
1730	return -1;
1731}
1732
1733static int
1734radv_get_int_debug_option(const char *name, int default_value)
1735{
1736	const char *str;
1737	int result;
1738
1739	str = getenv(name);
1740	if (!str) {
1741		result = default_value;
1742	} else {
1743		char *endptr;
1744
1745		result = strtol(str, &endptr, 0);
1746		if (str == endptr) {
1747			/* No digits founs. */
1748			result = default_value;
1749		}
1750	}
1751
1752	return result;
1753}
1754
1755VkResult radv_CreateDevice(
1756	VkPhysicalDevice                            physicalDevice,
1757	const VkDeviceCreateInfo*                   pCreateInfo,
1758	const VkAllocationCallbacks*                pAllocator,
1759	VkDevice*                                   pDevice)
1760{
1761	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1762	VkResult result;
1763	struct radv_device *device;
1764
1765	bool keep_shader_info = false;
1766
1767	/* Check enabled features */
1768	if (pCreateInfo->pEnabledFeatures) {
1769		VkPhysicalDeviceFeatures supported_features;
1770		radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1771		VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1772		VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1773		unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1774		for (uint32_t i = 0; i < num_features; i++) {
1775			if (enabled_feature[i] && !supported_feature[i])
1776				return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1777		}
1778	}
1779
1780	device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1781			    sizeof(*device), 8,
1782			    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1783	if (!device)
1784		return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1785
1786	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1787	device->instance = physical_device->instance;
1788	device->physical_device = physical_device;
1789
1790	device->ws = physical_device->ws;
1791	if (pAllocator)
1792		device->alloc = *pAllocator;
1793	else
1794		device->alloc = physical_device->instance->alloc;
1795
1796	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1797		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1798		int index = radv_get_device_extension_index(ext_name);
1799		if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1800			vk_free(&device->alloc, device);
1801			return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1802		}
1803
1804		device->enabled_extensions.extensions[index] = true;
1805	}
1806
1807	keep_shader_info = device->enabled_extensions.AMD_shader_info;
1808
1809	/* With update after bind we can't attach bo's to the command buffer
1810	 * from the descriptor set anymore, so we have to use a global BO list.
1811	 */
1812	device->use_global_bo_list =
1813		(device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
1814		device->enabled_extensions.EXT_descriptor_indexing ||
1815		device->enabled_extensions.EXT_buffer_device_address;
1816
1817	mtx_init(&device->shader_slab_mutex, mtx_plain);
1818	list_inithead(&device->shader_slabs);
1819
1820	radv_bo_list_init(&device->bo_list);
1821
1822	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1823		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1824		uint32_t qfi = queue_create->queueFamilyIndex;
1825		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1826			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1827
1828		assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1829
1830		device->queues[qfi] = vk_alloc(&device->alloc,
1831					       queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1832		if (!device->queues[qfi]) {
1833			result = VK_ERROR_OUT_OF_HOST_MEMORY;
1834			goto fail;
1835		}
1836
1837		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1838
1839		device->queue_count[qfi] = queue_create->queueCount;
1840
1841		for (unsigned q = 0; q < queue_create->queueCount; q++) {
1842			result = radv_queue_init(device, &device->queues[qfi][q],
1843						 qfi, q, queue_create->flags,
1844						 global_priority);
1845			if (result != VK_SUCCESS)
1846				goto fail;
1847		}
1848	}
1849
1850	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1851			      !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
1852
1853	/* Disabled and not implemented for now. */
1854	device->dfsm_allowed = device->pbb_allowed &&
1855	                       (device->physical_device->rad_info.family == CHIP_RAVEN ||
1856	                        device->physical_device->rad_info.family == CHIP_RAVEN2);
1857
1858#ifdef ANDROID
1859	device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1860#endif
1861
1862	/* The maximum number of scratch waves. Scratch space isn't divided
1863	 * evenly between CUs. The number is only a function of the number of CUs.
1864	 * We can decrease the constant to decrease the scratch buffer size.
1865	 *
1866	 * sctx->scratch_waves must be >= the maximum possible size of
1867	 * 1 threadgroup, so that the hw doesn't hang from being unable
1868	 * to start any.
1869	 *
1870	 * The recommended value is 4 per CU at most. Higher numbers don't
1871	 * bring much benefit, but they still occupy chip resources (think
1872	 * async compute). I've seen ~2% performance difference between 4 and 32.
1873	 */
1874	uint32_t max_threads_per_block = 2048;
1875	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1876				     max_threads_per_block / 64);
1877
1878	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1879
1880	if (device->physical_device->rad_info.chip_class >= CIK) {
1881		/* If the KMD allows it (there is a KMD hw register for it),
1882		 * allow launching waves out-of-order.
1883		 */
1884		device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1885	}
1886
1887	radv_device_init_gs_info(device);
1888
1889	device->tess_offchip_block_dw_size =
1890		device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1891	device->has_distributed_tess =
1892		device->physical_device->rad_info.chip_class >= VI &&
1893		device->physical_device->rad_info.max_se >= 2;
1894
1895	if (getenv("RADV_TRACE_FILE")) {
1896		const char *filename = getenv("RADV_TRACE_FILE");
1897
1898		keep_shader_info = true;
1899
1900		if (!radv_init_trace(device))
1901			goto fail;
1902
1903		fprintf(stderr, "*****************************************************************************\n");
1904		fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
1905		fprintf(stderr, "*****************************************************************************\n");
1906
1907		fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1908		radv_dump_enabled_options(device, stderr);
1909	}
1910
1911	device->keep_shader_info = keep_shader_info;
1912
1913	result = radv_device_init_meta(device);
1914	if (result != VK_SUCCESS)
1915		goto fail;
1916
1917	radv_device_init_msaa(device);
1918
1919	for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1920		device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1921		switch (family) {
1922		case RADV_QUEUE_GENERAL:
1923			radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1924			radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1925			radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1926			break;
1927		case RADV_QUEUE_COMPUTE:
1928			radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1929			radeon_emit(device->empty_cs[family], 0);
1930			break;
1931		}
1932		device->ws->cs_finalize(device->empty_cs[family]);
1933	}
1934
1935	if (device->physical_device->rad_info.chip_class >= CIK)
1936		cik_create_gfx_config(device);
1937
1938	VkPipelineCacheCreateInfo ci;
1939	ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1940	ci.pNext = NULL;
1941	ci.flags = 0;
1942	ci.pInitialData = NULL;
1943	ci.initialDataSize = 0;
1944	VkPipelineCache pc;
1945	result = radv_CreatePipelineCache(radv_device_to_handle(device),
1946					  &ci, NULL, &pc);
1947	if (result != VK_SUCCESS)
1948		goto fail_meta;
1949
1950	device->mem_cache = radv_pipeline_cache_from_handle(pc);
1951
1952	device->force_aniso =
1953		MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
1954	if (device->force_aniso >= 0) {
1955		fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
1956			1 << util_logbase2(device->force_aniso));
1957	}
1958
1959	*pDevice = radv_device_to_handle(device);
1960	return VK_SUCCESS;
1961
1962fail_meta:
1963	radv_device_finish_meta(device);
1964fail:
1965	radv_bo_list_finish(&device->bo_list);
1966
1967	if (device->trace_bo)
1968		device->ws->buffer_destroy(device->trace_bo);
1969
1970	if (device->gfx_init)
1971		device->ws->buffer_destroy(device->gfx_init);
1972
1973	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1974		for (unsigned q = 0; q < device->queue_count[i]; q++)
1975			radv_queue_finish(&device->queues[i][q]);
1976		if (device->queue_count[i])
1977			vk_free(&device->alloc, device->queues[i]);
1978	}
1979
1980	vk_free(&device->alloc, device);
1981	return result;
1982}
1983
1984void radv_DestroyDevice(
1985	VkDevice                                    _device,
1986	const VkAllocationCallbacks*                pAllocator)
1987{
1988	RADV_FROM_HANDLE(radv_device, device, _device);
1989
1990	if (!device)
1991		return;
1992
1993	if (device->trace_bo)
1994		device->ws->buffer_destroy(device->trace_bo);
1995
1996	if (device->gfx_init)
1997		device->ws->buffer_destroy(device->gfx_init);
1998
1999	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2000		for (unsigned q = 0; q < device->queue_count[i]; q++)
2001			radv_queue_finish(&device->queues[i][q]);
2002		if (device->queue_count[i])
2003			vk_free(&device->alloc, device->queues[i]);
2004		if (device->empty_cs[i])
2005			device->ws->cs_destroy(device->empty_cs[i]);
2006	}
2007	radv_device_finish_meta(device);
2008
2009	VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
2010	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2011
2012	radv_destroy_shader_slabs(device);
2013
2014	radv_bo_list_finish(&device->bo_list);
2015	vk_free(&device->alloc, device);
2016}
2017
2018VkResult radv_EnumerateInstanceLayerProperties(
2019	uint32_t*                                   pPropertyCount,
2020	VkLayerProperties*                          pProperties)
2021{
2022	if (pProperties == NULL) {
2023		*pPropertyCount = 0;
2024		return VK_SUCCESS;
2025	}
2026
2027	/* None supported at this time */
2028	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2029}
2030
2031VkResult radv_EnumerateDeviceLayerProperties(
2032	VkPhysicalDevice                            physicalDevice,
2033	uint32_t*                                   pPropertyCount,
2034	VkLayerProperties*                          pProperties)
2035{
2036	if (pProperties == NULL) {
2037		*pPropertyCount = 0;
2038		return VK_SUCCESS;
2039	}
2040
2041	/* None supported at this time */
2042	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2043}
2044
2045void radv_GetDeviceQueue2(
2046	VkDevice                                    _device,
2047	const VkDeviceQueueInfo2*                   pQueueInfo,
2048	VkQueue*                                    pQueue)
2049{
2050	RADV_FROM_HANDLE(radv_device, device, _device);
2051	struct radv_queue *queue;
2052
2053	queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
2054	if (pQueueInfo->flags != queue->flags) {
2055		/* From the Vulkan 1.1.70 spec:
2056		 *
2057		 * "The queue returned by vkGetDeviceQueue2 must have the same
2058		 * flags value from this structure as that used at device
2059		 * creation time in a VkDeviceQueueCreateInfo instance. If no
2060		 * matching flags were specified at device creation time then
2061		 * pQueue will return VK_NULL_HANDLE."
2062		 */
2063		*pQueue = VK_NULL_HANDLE;
2064		return;
2065	}
2066
2067	*pQueue = radv_queue_to_handle(queue);
2068}
2069
2070void radv_GetDeviceQueue(
2071	VkDevice                                    _device,
2072	uint32_t                                    queueFamilyIndex,
2073	uint32_t                                    queueIndex,
2074	VkQueue*                                    pQueue)
2075{
2076	const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
2077		.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
2078		.queueFamilyIndex = queueFamilyIndex,
2079		.queueIndex = queueIndex
2080	};
2081
2082	radv_GetDeviceQueue2(_device, &info, pQueue);
2083}
2084
2085static void
2086fill_geom_tess_rings(struct radv_queue *queue,
2087		     uint32_t *map,
2088		     bool add_sample_positions,
2089		     uint32_t esgs_ring_size,
2090		     struct radeon_winsys_bo *esgs_ring_bo,
2091		     uint32_t gsvs_ring_size,
2092		     struct radeon_winsys_bo *gsvs_ring_bo,
2093		     uint32_t tess_factor_ring_size,
2094		     uint32_t tess_offchip_ring_offset,
2095		     uint32_t tess_offchip_ring_size,
2096		     struct radeon_winsys_bo *tess_rings_bo)
2097{
2098	uint32_t *desc = &map[4];
2099
2100	if (esgs_ring_bo) {
2101		uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
2102
2103		/* stride 0, num records - size, add tid, swizzle, elsize4,
2104		   index stride 64 */
2105		desc[0] = esgs_va;
2106		desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
2107			  S_008F04_STRIDE(0) |
2108			  S_008F04_SWIZZLE_ENABLE(true);
2109		desc[2] = esgs_ring_size;
2110		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2111			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2112			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2113			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2114			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2115			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2116			  S_008F0C_ELEMENT_SIZE(1) |
2117			  S_008F0C_INDEX_STRIDE(3) |
2118			  S_008F0C_ADD_TID_ENABLE(true);
2119
2120		/* GS entry for ES->GS ring */
2121		/* stride 0, num records - size, elsize0,
2122		   index stride 0 */
2123		desc[4] = esgs_va;
2124		desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
2125			  S_008F04_STRIDE(0) |
2126			  S_008F04_SWIZZLE_ENABLE(false);
2127		desc[6] = esgs_ring_size;
2128		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2129			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2130			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2131			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2132			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2133			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2134			  S_008F0C_ELEMENT_SIZE(0) |
2135			  S_008F0C_INDEX_STRIDE(0) |
2136			  S_008F0C_ADD_TID_ENABLE(false);
2137	}
2138
2139	desc += 8;
2140
2141	if (gsvs_ring_bo) {
2142		uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
2143
2144		/* VS entry for GS->VS ring */
2145		/* stride 0, num records - size, elsize0,
2146		   index stride 0 */
2147		desc[0] = gsvs_va;
2148		desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
2149			  S_008F04_STRIDE(0) |
2150			  S_008F04_SWIZZLE_ENABLE(false);
2151		desc[2] = gsvs_ring_size;
2152		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2153			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2154			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2155			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2156			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2157			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2158			  S_008F0C_ELEMENT_SIZE(0) |
2159			  S_008F0C_INDEX_STRIDE(0) |
2160			  S_008F0C_ADD_TID_ENABLE(false);
2161
2162		/* stride gsvs_itemsize, num records 64
2163		   elsize 4, index stride 16 */
2164		/* shader will patch stride and desc[2] */
2165		desc[4] = gsvs_va;
2166		desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
2167			  S_008F04_STRIDE(0) |
2168			  S_008F04_SWIZZLE_ENABLE(true);
2169		desc[6] = 0;
2170		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2171			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2172			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2173			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2174			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2175			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2176			  S_008F0C_ELEMENT_SIZE(1) |
2177			  S_008F0C_INDEX_STRIDE(1) |
2178			  S_008F0C_ADD_TID_ENABLE(true);
2179	}
2180
2181	desc += 8;
2182
2183	if (tess_rings_bo) {
2184		uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
2185		uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
2186
2187		desc[0] = tess_va;
2188		desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
2189			  S_008F04_STRIDE(0) |
2190			  S_008F04_SWIZZLE_ENABLE(false);
2191		desc[2] = tess_factor_ring_size;
2192		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2193			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2194			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2195			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2196			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2197			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2198			  S_008F0C_ELEMENT_SIZE(0) |
2199			  S_008F0C_INDEX_STRIDE(0) |
2200			  S_008F0C_ADD_TID_ENABLE(false);
2201
2202		desc[4] = tess_offchip_va;
2203		desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
2204			  S_008F04_STRIDE(0) |
2205			  S_008F04_SWIZZLE_ENABLE(false);
2206		desc[6] = tess_offchip_ring_size;
2207		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2208			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2209			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2210			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2211			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2212			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2213			  S_008F0C_ELEMENT_SIZE(0) |
2214			  S_008F0C_INDEX_STRIDE(0) |
2215			  S_008F0C_ADD_TID_ENABLE(false);
2216	}
2217
2218	desc += 8;
2219
2220	if (add_sample_positions) {
2221		/* add sample positions after all rings */
2222		memcpy(desc, queue->device->sample_locations_1x, 8);
2223		desc += 2;
2224		memcpy(desc, queue->device->sample_locations_2x, 16);
2225		desc += 4;
2226		memcpy(desc, queue->device->sample_locations_4x, 32);
2227		desc += 8;
2228		memcpy(desc, queue->device->sample_locations_8x, 64);
2229	}
2230}
2231
2232static unsigned
2233radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
2234{
2235	bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
2236		device->physical_device->rad_info.family != CHIP_CARRIZO &&
2237		device->physical_device->rad_info.family != CHIP_STONEY;
2238	unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
2239	unsigned max_offchip_buffers;
2240	unsigned offchip_granularity;
2241	unsigned hs_offchip_param;
2242
2243	/*
2244	 * Per RadeonSI:
2245	 * This must be one less than the maximum number due to a hw limitation.
2246         * Various hardware bugs in SI, CIK, and GFX9 need this.
2247	 *
2248	 * Per AMDVLK:
2249	 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
2250	 * Gfx7 should limit max_offchip_buffers to 508
2251	 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
2252	 *
2253	 * Follow AMDVLK here.
2254	 */
2255	if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2256	    device->physical_device->rad_info.chip_class == CIK ||
2257	    device->physical_device->rad_info.chip_class == SI)
2258		--max_offchip_buffers_per_se;
2259
2260	max_offchip_buffers = max_offchip_buffers_per_se *
2261		device->physical_device->rad_info.max_se;
2262
2263	/* Hawaii has a bug with offchip buffers > 256 that can be worked
2264	 * around by setting 4K granularity.
2265	 */
2266	if (device->tess_offchip_block_dw_size == 4096) {
2267		assert(device->physical_device->rad_info.family == CHIP_HAWAII);
2268		offchip_granularity = V_03093C_X_4K_DWORDS;
2269	} else {
2270		assert(device->tess_offchip_block_dw_size == 8192);
2271		offchip_granularity = V_03093C_X_8K_DWORDS;
2272	}
2273
2274	switch (device->physical_device->rad_info.chip_class) {
2275	case SI:
2276		max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2277		break;
2278	case CIK:
2279	case VI:
2280	case GFX9:
2281	default:
2282		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2283		break;
2284	}
2285
2286	*max_offchip_buffers_p = max_offchip_buffers;
2287	if (device->physical_device->rad_info.chip_class >= CIK) {
2288		if (device->physical_device->rad_info.chip_class >= VI)
2289			--max_offchip_buffers;
2290		hs_offchip_param =
2291			S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2292			S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2293	} else {
2294		hs_offchip_param =
2295			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2296	}
2297	return hs_offchip_param;
2298}
2299
2300static void
2301radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2302			struct radeon_winsys_bo *esgs_ring_bo,
2303			uint32_t esgs_ring_size,
2304			struct radeon_winsys_bo *gsvs_ring_bo,
2305			uint32_t gsvs_ring_size)
2306{
2307	if (!esgs_ring_bo && !gsvs_ring_bo)
2308		return;
2309
2310	if (esgs_ring_bo)
2311		radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2312
2313	if (gsvs_ring_bo)
2314		radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2315
2316	if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2317		radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2318		radeon_emit(cs, esgs_ring_size >> 8);
2319		radeon_emit(cs, gsvs_ring_size >> 8);
2320	} else {
2321		radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2322		radeon_emit(cs, esgs_ring_size >> 8);
2323		radeon_emit(cs, gsvs_ring_size >> 8);
2324	}
2325}
2326
2327static void
2328radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2329			   unsigned hs_offchip_param, unsigned tf_ring_size,
2330			   struct radeon_winsys_bo *tess_rings_bo)
2331{
2332	uint64_t tf_va;
2333
2334	if (!tess_rings_bo)
2335		return;
2336
2337	tf_va = radv_buffer_get_va(tess_rings_bo);
2338
2339	radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2340
2341	if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2342		radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2343				       S_030938_SIZE(tf_ring_size / 4));
2344		radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2345				       tf_va >> 8);
2346		if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2347			radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2348					       S_030944_BASE_HI(tf_va >> 40));
2349		}
2350		radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2351				       hs_offchip_param);
2352	} else {
2353		radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2354				      S_008988_SIZE(tf_ring_size / 4));
2355		radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2356				      tf_va >> 8);
2357		radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2358				     hs_offchip_param);
2359	}
2360}
2361
2362static void
2363radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2364			  struct radeon_winsys_bo *compute_scratch_bo)
2365{
2366	uint64_t scratch_va;
2367
2368	if (!compute_scratch_bo)
2369		return;
2370
2371	scratch_va = radv_buffer_get_va(compute_scratch_bo);
2372
2373	radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2374
2375	radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2376	radeon_emit(cs, scratch_va);
2377	radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2378			S_008F04_SWIZZLE_ENABLE(1));
2379}
2380
2381static void
2382radv_emit_global_shader_pointers(struct radv_queue *queue,
2383				 struct radeon_cmdbuf *cs,
2384				 struct radeon_winsys_bo *descriptor_bo)
2385{
2386	uint64_t va;
2387
2388	if (!descriptor_bo)
2389		return;
2390
2391	va = radv_buffer_get_va(descriptor_bo);
2392
2393	radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2394
2395	if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2396		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2397				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
2398				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2399				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2400
2401		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2402			radv_emit_shader_pointer(queue->device, cs, regs[i],
2403						 va, true);
2404		}
2405	} else {
2406		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2407				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
2408				   R_00B230_SPI_SHADER_USER_DATA_GS_0,
2409				   R_00B330_SPI_SHADER_USER_DATA_ES_0,
2410				   R_00B430_SPI_SHADER_USER_DATA_HS_0,
2411				   R_00B530_SPI_SHADER_USER_DATA_LS_0};
2412
2413		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2414			radv_emit_shader_pointer(queue->device, cs, regs[i],
2415						 va, true);
2416		}
2417	}
2418}
2419
2420static void
2421radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2422{
2423	struct radv_device *device = queue->device;
2424
2425	if (device->gfx_init) {
2426		uint64_t va = radv_buffer_get_va(device->gfx_init);
2427
2428		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2429		radeon_emit(cs, va);
2430		radeon_emit(cs, va >> 32);
2431		radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2432
2433		radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2434	} else {
2435		struct radv_physical_device *physical_device = device->physical_device;
2436		si_emit_graphics(physical_device, cs);
2437	}
2438}
2439
2440static void
2441radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2442{
2443	struct radv_physical_device *physical_device = queue->device->physical_device;
2444	si_emit_compute(physical_device, cs);
2445}
2446
2447static VkResult
2448radv_get_preamble_cs(struct radv_queue *queue,
2449                     uint32_t scratch_size,
2450                     uint32_t compute_scratch_size,
2451		     uint32_t esgs_ring_size,
2452		     uint32_t gsvs_ring_size,
2453		     bool needs_tess_rings,
2454		     bool needs_sample_positions,
2455		     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2456                     struct radeon_cmdbuf **initial_preamble_cs,
2457                     struct radeon_cmdbuf **continue_preamble_cs)
2458{
2459	struct radeon_winsys_bo *scratch_bo = NULL;
2460	struct radeon_winsys_bo *descriptor_bo = NULL;
2461	struct radeon_winsys_bo *compute_scratch_bo = NULL;
2462	struct radeon_winsys_bo *esgs_ring_bo = NULL;
2463	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2464	struct radeon_winsys_bo *tess_rings_bo = NULL;
2465	struct radeon_cmdbuf *dest_cs[3] = {0};
2466	bool add_tess_rings = false, add_sample_positions = false;
2467	unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2468	unsigned max_offchip_buffers;
2469	unsigned hs_offchip_param = 0;
2470	unsigned tess_offchip_ring_offset;
2471	uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2472	if (!queue->has_tess_rings) {
2473		if (needs_tess_rings)
2474			add_tess_rings = true;
2475	}
2476	if (!queue->has_sample_positions) {
2477		if (needs_sample_positions)
2478			add_sample_positions = true;
2479	}
2480	tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2481	hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2482						     &max_offchip_buffers);
2483	tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2484	tess_offchip_ring_size = max_offchip_buffers *
2485		queue->device->tess_offchip_block_dw_size * 4;
2486
2487	if (scratch_size <= queue->scratch_size &&
2488	    compute_scratch_size <= queue->compute_scratch_size &&
2489	    esgs_ring_size <= queue->esgs_ring_size &&
2490	    gsvs_ring_size <= queue->gsvs_ring_size &&
2491	    !add_tess_rings && !add_sample_positions &&
2492	    queue->initial_preamble_cs) {
2493		*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2494		*initial_preamble_cs = queue->initial_preamble_cs;
2495		*continue_preamble_cs = queue->continue_preamble_cs;
2496		if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2497			*continue_preamble_cs = NULL;
2498		return VK_SUCCESS;
2499	}
2500
2501	if (scratch_size > queue->scratch_size) {
2502		scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2503		                                              scratch_size,
2504		                                              4096,
2505		                                              RADEON_DOMAIN_VRAM,
2506		                                              ring_bo_flags,
2507		                                              RADV_BO_PRIORITY_SCRATCH);
2508		if (!scratch_bo)
2509			goto fail;
2510	} else
2511		scratch_bo = queue->scratch_bo;
2512
2513	if (compute_scratch_size > queue->compute_scratch_size) {
2514		compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2515		                                                      compute_scratch_size,
2516		                                                      4096,
2517		                                                      RADEON_DOMAIN_VRAM,
2518		                                                      ring_bo_flags,
2519		                                                      RADV_BO_PRIORITY_SCRATCH);
2520		if (!compute_scratch_bo)
2521			goto fail;
2522
2523	} else
2524		compute_scratch_bo = queue->compute_scratch_bo;
2525
2526	if (esgs_ring_size > queue->esgs_ring_size) {
2527		esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2528								esgs_ring_size,
2529								4096,
2530								RADEON_DOMAIN_VRAM,
2531								ring_bo_flags,
2532								RADV_BO_PRIORITY_SCRATCH);
2533		if (!esgs_ring_bo)
2534			goto fail;
2535	} else {
2536		esgs_ring_bo = queue->esgs_ring_bo;
2537		esgs_ring_size = queue->esgs_ring_size;
2538	}
2539
2540	if (gsvs_ring_size > queue->gsvs_ring_size) {
2541		gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2542								gsvs_ring_size,
2543								4096,
2544								RADEON_DOMAIN_VRAM,
2545								ring_bo_flags,
2546								RADV_BO_PRIORITY_SCRATCH);
2547		if (!gsvs_ring_bo)
2548			goto fail;
2549	} else {
2550		gsvs_ring_bo = queue->gsvs_ring_bo;
2551		gsvs_ring_size = queue->gsvs_ring_size;
2552	}
2553
2554	if (add_tess_rings) {
2555		tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2556								 tess_offchip_ring_offset + tess_offchip_ring_size,
2557								 256,
2558								 RADEON_DOMAIN_VRAM,
2559								 ring_bo_flags,
2560								 RADV_BO_PRIORITY_SCRATCH);
2561		if (!tess_rings_bo)
2562			goto fail;
2563	} else {
2564		tess_rings_bo = queue->tess_rings_bo;
2565	}
2566
2567	if (scratch_bo != queue->scratch_bo ||
2568	    esgs_ring_bo != queue->esgs_ring_bo ||
2569	    gsvs_ring_bo != queue->gsvs_ring_bo ||
2570	    tess_rings_bo != queue->tess_rings_bo ||
2571	    add_sample_positions) {
2572		uint32_t size = 0;
2573		if (gsvs_ring_bo || esgs_ring_bo ||
2574		    tess_rings_bo || add_sample_positions) {
2575			size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2576			if (add_sample_positions)
2577				size += 128; /* 64+32+16+8 = 120 bytes */
2578		}
2579		else if (scratch_bo)
2580			size = 8; /* 2 dword */
2581
2582		descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2583		                                                 size,
2584		                                                 4096,
2585		                                                 RADEON_DOMAIN_VRAM,
2586		                                                 RADEON_FLAG_CPU_ACCESS |
2587								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2588								 RADEON_FLAG_READ_ONLY,
2589								 RADV_BO_PRIORITY_DESCRIPTOR);
2590		if (!descriptor_bo)
2591			goto fail;
2592	} else
2593		descriptor_bo = queue->descriptor_bo;
2594
2595	if (descriptor_bo != queue->descriptor_bo) {
2596		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2597
2598		if (scratch_bo) {
2599			uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2600			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2601				         S_008F04_SWIZZLE_ENABLE(1);
2602			map[0] = scratch_va;
2603			map[1] = rsrc1;
2604		}
2605
2606		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
2607			fill_geom_tess_rings(queue, map, add_sample_positions,
2608					     esgs_ring_size, esgs_ring_bo,
2609					     gsvs_ring_size, gsvs_ring_bo,
2610					     tess_factor_ring_size,
2611					     tess_offchip_ring_offset,
2612					     tess_offchip_ring_size,
2613					     tess_rings_bo);
2614
2615		queue->device->ws->buffer_unmap(descriptor_bo);
2616	}
2617
2618	for(int i = 0; i < 3; ++i) {
2619		struct radeon_cmdbuf *cs = NULL;
2620		cs = queue->device->ws->cs_create(queue->device->ws,
2621						  queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2622		if (!cs)
2623			goto fail;
2624
2625		dest_cs[i] = cs;
2626
2627		if (scratch_bo)
2628			radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2629
2630		/* Emit initial configuration. */
2631		switch (queue->queue_family_index) {
2632		case RADV_QUEUE_GENERAL:
2633			radv_init_graphics_state(cs, queue);
2634			break;
2635		case RADV_QUEUE_COMPUTE:
2636			radv_init_compute_state(cs, queue);
2637			break;
2638		case RADV_QUEUE_TRANSFER:
2639			break;
2640		}
2641
2642		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
2643			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2644			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2645			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2646			radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2647		}
2648
2649		radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2650					gsvs_ring_bo, gsvs_ring_size);
2651		radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2652					   tess_factor_ring_size, tess_rings_bo);
2653		radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2654		radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2655
2656		if (i == 0) {
2657			si_cs_emit_cache_flush(cs,
2658			                       queue->device->physical_device->rad_info.chip_class,
2659					       NULL, 0,
2660			                       queue->queue_family_index == RING_COMPUTE &&
2661			                         queue->device->physical_device->rad_info.chip_class >= CIK,
2662			                       (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2663			                       RADV_CMD_FLAG_INV_ICACHE |
2664			                       RADV_CMD_FLAG_INV_SMEM_L1 |
2665			                       RADV_CMD_FLAG_INV_VMEM_L1 |
2666			                       RADV_CMD_FLAG_INV_GLOBAL_L2 |
2667					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2668		} else if (i == 1) {
2669			si_cs_emit_cache_flush(cs,
2670			                       queue->device->physical_device->rad_info.chip_class,
2671					       NULL, 0,
2672			                       queue->queue_family_index == RING_COMPUTE &&
2673			                         queue->device->physical_device->rad_info.chip_class >= CIK,
2674			                       RADV_CMD_FLAG_INV_ICACHE |
2675			                       RADV_CMD_FLAG_INV_SMEM_L1 |
2676			                       RADV_CMD_FLAG_INV_VMEM_L1 |
2677			                       RADV_CMD_FLAG_INV_GLOBAL_L2 |
2678					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2679		}
2680
2681		if (!queue->device->ws->cs_finalize(cs))
2682			goto fail;
2683	}
2684
2685	if (queue->initial_full_flush_preamble_cs)
2686			queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2687
2688	if (queue->initial_preamble_cs)
2689			queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2690
2691	if (queue->continue_preamble_cs)
2692			queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2693
2694	queue->initial_full_flush_preamble_cs = dest_cs[0];
2695	queue->initial_preamble_cs = dest_cs[1];
2696	queue->continue_preamble_cs = dest_cs[2];
2697
2698	if (scratch_bo != queue->scratch_bo) {
2699		if (queue->scratch_bo)
2700			queue->device->ws->buffer_destroy(queue->scratch_bo);
2701		queue->scratch_bo = scratch_bo;
2702		queue->scratch_size = scratch_size;
2703	}
2704
2705	if (compute_scratch_bo != queue->compute_scratch_bo) {
2706		if (queue->compute_scratch_bo)
2707			queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2708		queue->compute_scratch_bo = compute_scratch_bo;
2709		queue->compute_scratch_size = compute_scratch_size;
2710	}
2711
2712	if (esgs_ring_bo != queue->esgs_ring_bo) {
2713		if (queue->esgs_ring_bo)
2714			queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2715		queue->esgs_ring_bo = esgs_ring_bo;
2716		queue->esgs_ring_size = esgs_ring_size;
2717	}
2718
2719	if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2720		if (queue->gsvs_ring_bo)
2721			queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2722		queue->gsvs_ring_bo = gsvs_ring_bo;
2723		queue->gsvs_ring_size = gsvs_ring_size;
2724	}
2725
2726	if (tess_rings_bo != queue->tess_rings_bo) {
2727		queue->tess_rings_bo = tess_rings_bo;
2728		queue->has_tess_rings = true;
2729	}
2730
2731	if (descriptor_bo != queue->descriptor_bo) {
2732		if (queue->descriptor_bo)
2733			queue->device->ws->buffer_destroy(queue->descriptor_bo);
2734
2735		queue->descriptor_bo = descriptor_bo;
2736	}
2737
2738	if (add_sample_positions)
2739		queue->has_sample_positions = true;
2740
2741	*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2742	*initial_preamble_cs = queue->initial_preamble_cs;
2743	*continue_preamble_cs = queue->continue_preamble_cs;
2744	if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2745			*continue_preamble_cs = NULL;
2746	return VK_SUCCESS;
2747fail:
2748	for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2749		if (dest_cs[i])
2750			queue->device->ws->cs_destroy(dest_cs[i]);
2751	if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2752		queue->device->ws->buffer_destroy(descriptor_bo);
2753	if (scratch_bo && scratch_bo != queue->scratch_bo)
2754		queue->device->ws->buffer_destroy(scratch_bo);
2755	if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2756		queue->device->ws->buffer_destroy(compute_scratch_bo);
2757	if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2758		queue->device->ws->buffer_destroy(esgs_ring_bo);
2759	if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2760		queue->device->ws->buffer_destroy(gsvs_ring_bo);
2761	if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2762		queue->device->ws->buffer_destroy(tess_rings_bo);
2763	return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2764}
2765
2766static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2767				      struct radv_winsys_sem_counts *counts,
2768				      int num_sems,
2769				      const VkSemaphore *sems,
2770				      VkFence _fence,
2771				      bool reset_temp)
2772{
2773	int syncobj_idx = 0, sem_idx = 0;
2774
2775	if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2776		return VK_SUCCESS;
2777
2778	for (uint32_t i = 0; i < num_sems; i++) {
2779		RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2780
2781		if (sem->temp_syncobj || sem->syncobj)
2782			counts->syncobj_count++;
2783		else
2784			counts->sem_count++;
2785	}
2786
2787	if (_fence != VK_NULL_HANDLE) {
2788		RADV_FROM_HANDLE(radv_fence, fence, _fence);
2789		if (fence->temp_syncobj || fence->syncobj)
2790			counts->syncobj_count++;
2791	}
2792
2793	if (counts->syncobj_count) {
2794		counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2795		if (!counts->syncobj)
2796			return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2797	}
2798
2799	if (counts->sem_count) {
2800		counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2801		if (!counts->sem) {
2802			free(counts->syncobj);
2803			return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2804		}
2805	}
2806
2807	for (uint32_t i = 0; i < num_sems; i++) {
2808		RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2809
2810		if (sem->temp_syncobj) {
2811			counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2812		}
2813		else if (sem->syncobj)
2814			counts->syncobj[syncobj_idx++] = sem->syncobj;
2815		else {
2816			assert(sem->sem);
2817			counts->sem[sem_idx++] = sem->sem;
2818		}
2819	}
2820
2821	if (_fence != VK_NULL_HANDLE) {
2822		RADV_FROM_HANDLE(radv_fence, fence, _fence);
2823		if (fence->temp_syncobj)
2824			counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2825		else if (fence->syncobj)
2826			counts->syncobj[syncobj_idx++] = fence->syncobj;
2827	}
2828
2829	return VK_SUCCESS;
2830}
2831
2832static void
2833radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2834{
2835	free(sem_info->wait.syncobj);
2836	free(sem_info->wait.sem);
2837	free(sem_info->signal.syncobj);
2838	free(sem_info->signal.sem);
2839}
2840
2841
2842static void radv_free_temp_syncobjs(struct radv_device *device,
2843				    int num_sems,
2844				    const VkSemaphore *sems)
2845{
2846	for (uint32_t i = 0; i < num_sems; i++) {
2847		RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2848
2849		if (sem->temp_syncobj) {
2850			device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2851			sem->temp_syncobj = 0;
2852		}
2853	}
2854}
2855
2856static VkResult
2857radv_alloc_sem_info(struct radv_instance *instance,
2858		    struct radv_winsys_sem_info *sem_info,
2859		    int num_wait_sems,
2860		    const VkSemaphore *wait_sems,
2861		    int num_signal_sems,
2862		    const VkSemaphore *signal_sems,
2863		    VkFence fence)
2864{
2865	VkResult ret;
2866	memset(sem_info, 0, sizeof(*sem_info));
2867
2868	ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2869	if (ret)
2870		return ret;
2871	ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
2872	if (ret)
2873		radv_free_sem_info(sem_info);
2874
2875	/* caller can override these */
2876	sem_info->cs_emit_wait = true;
2877	sem_info->cs_emit_signal = true;
2878	return ret;
2879}
2880
2881/* Signals fence as soon as all the work currently put on queue is done. */
2882static VkResult radv_signal_fence(struct radv_queue *queue,
2883                              struct radv_fence *fence)
2884{
2885	int ret;
2886	VkResult result;
2887	struct radv_winsys_sem_info sem_info;
2888
2889	result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
2890	                             radv_fence_to_handle(fence));
2891	if (result != VK_SUCCESS)
2892		return result;
2893
2894	ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2895	                                   &queue->device->empty_cs[queue->queue_family_index],
2896	                                   1, NULL, NULL, &sem_info, NULL,
2897	                                   false, fence->fence);
2898	radv_free_sem_info(&sem_info);
2899
2900	if (ret)
2901		return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
2902
2903	return VK_SUCCESS;
2904}
2905
2906VkResult radv_QueueSubmit(
2907	VkQueue                                     _queue,
2908	uint32_t                                    submitCount,
2909	const VkSubmitInfo*                         pSubmits,
2910	VkFence                                     _fence)
2911{
2912	RADV_FROM_HANDLE(radv_queue, queue, _queue);
2913	RADV_FROM_HANDLE(radv_fence, fence, _fence);
2914	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2915	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2916	int ret;
2917	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
2918	uint32_t scratch_size = 0;
2919	uint32_t compute_scratch_size = 0;
2920	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2921	struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2922	VkResult result;
2923	bool fence_emitted = false;
2924	bool tess_rings_needed = false;
2925	bool sample_positions_needed = false;
2926
2927	/* Do this first so failing to allocate scratch buffers can't result in
2928	 * partially executed submissions. */
2929	for (uint32_t i = 0; i < submitCount; i++) {
2930		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2931			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2932					 pSubmits[i].pCommandBuffers[j]);
2933
2934			scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2935			compute_scratch_size = MAX2(compute_scratch_size,
2936			                            cmd_buffer->compute_scratch_size_needed);
2937			esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2938			gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2939			tess_rings_needed |= cmd_buffer->tess_rings_needed;
2940			sample_positions_needed |= cmd_buffer->sample_positions_needed;
2941		}
2942	}
2943
2944	result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2945	                              esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2946				      sample_positions_needed, &initial_flush_preamble_cs,
2947	                              &initial_preamble_cs, &continue_preamble_cs);
2948	if (result != VK_SUCCESS)
2949		return result;
2950
2951	for (uint32_t i = 0; i < submitCount; i++) {
2952		struct radeon_cmdbuf **cs_array;
2953		bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2954		bool can_patch = true;
2955		uint32_t advance;
2956		struct radv_winsys_sem_info sem_info;
2957
2958		result = radv_alloc_sem_info(queue->device->instance,
2959					     &sem_info,
2960					     pSubmits[i].waitSemaphoreCount,
2961					     pSubmits[i].pWaitSemaphores,
2962					     pSubmits[i].signalSemaphoreCount,
2963					     pSubmits[i].pSignalSemaphores,
2964					     _fence);
2965		if (result != VK_SUCCESS)
2966			return result;
2967
2968		if (!pSubmits[i].commandBufferCount) {
2969			if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2970				ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2971								   &queue->device->empty_cs[queue->queue_family_index],
2972								   1, NULL, NULL,
2973								   &sem_info, NULL,
2974								   false, base_fence);
2975				if (ret) {
2976					radv_loge("failed to submit CS %d\n", i);
2977					abort();
2978				}
2979				fence_emitted = true;
2980			}
2981			radv_free_sem_info(&sem_info);
2982			continue;
2983		}
2984
2985		cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
2986					        (pSubmits[i].commandBufferCount));
2987
2988		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2989			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2990					 pSubmits[i].pCommandBuffers[j]);
2991			assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2992
2993			cs_array[j] = cmd_buffer->cs;
2994			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2995				can_patch = false;
2996
2997			cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2998		}
2999
3000		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
3001			struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
3002			const struct radv_winsys_bo_list *bo_list = NULL;
3003
3004			advance = MIN2(max_cs_submission,
3005				       pSubmits[i].commandBufferCount - j);
3006
3007			if (queue->device->trace_bo)
3008				*queue->device->trace_id_ptr = 0;
3009
3010			sem_info.cs_emit_wait = j == 0;
3011			sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
3012
3013			if (unlikely(queue->device->use_global_bo_list)) {
3014				pthread_mutex_lock(&queue->device->bo_list.mutex);
3015				bo_list = &queue->device->bo_list.list;
3016			}
3017
3018			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
3019							advance, initial_preamble, continue_preamble_cs,
3020							&sem_info, bo_list,
3021							can_patch, base_fence);
3022
3023			if (unlikely(queue->device->use_global_bo_list))
3024				pthread_mutex_unlock(&queue->device->bo_list.mutex);
3025
3026			if (ret) {
3027				radv_loge("failed to submit CS %d\n", i);
3028				abort();
3029			}
3030			fence_emitted = true;
3031			if (queue->device->trace_bo) {
3032				radv_check_gpu_hangs(queue, cs_array[j]);
3033			}
3034		}
3035
3036		radv_free_temp_syncobjs(queue->device,
3037					pSubmits[i].waitSemaphoreCount,
3038					pSubmits[i].pWaitSemaphores);
3039		radv_free_sem_info(&sem_info);
3040		free(cs_array);
3041	}
3042
3043	if (fence) {
3044		if (!fence_emitted) {
3045			result = radv_signal_fence(queue, fence);
3046			if (result != VK_SUCCESS)
3047				return result;
3048		}
3049		fence->submitted = true;
3050	}
3051
3052	return VK_SUCCESS;
3053}
3054
3055VkResult radv_QueueWaitIdle(
3056	VkQueue                                     _queue)
3057{
3058	RADV_FROM_HANDLE(radv_queue, queue, _queue);
3059
3060	queue->device->ws->ctx_wait_idle(queue->hw_ctx,
3061	                                 radv_queue_family_to_ring(queue->queue_family_index),
3062	                                 queue->queue_idx);
3063	return VK_SUCCESS;
3064}
3065
3066VkResult radv_DeviceWaitIdle(
3067	VkDevice                                    _device)
3068{
3069	RADV_FROM_HANDLE(radv_device, device, _device);
3070
3071	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3072		for (unsigned q = 0; q < device->queue_count[i]; q++) {
3073			radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
3074		}
3075	}
3076	return VK_SUCCESS;
3077}
3078
3079VkResult radv_EnumerateInstanceExtensionProperties(
3080    const char*                                 pLayerName,
3081    uint32_t*                                   pPropertyCount,
3082    VkExtensionProperties*                      pProperties)
3083{
3084	VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3085
3086	for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
3087		if (radv_supported_instance_extensions.extensions[i]) {
3088			vk_outarray_append(&out, prop) {
3089				*prop = radv_instance_extensions[i];
3090			}
3091		}
3092	}
3093
3094	return vk_outarray_status(&out);
3095}
3096
3097VkResult radv_EnumerateDeviceExtensionProperties(
3098    VkPhysicalDevice                            physicalDevice,
3099    const char*                                 pLayerName,
3100    uint32_t*                                   pPropertyCount,
3101    VkExtensionProperties*                      pProperties)
3102{
3103	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
3104	VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3105
3106	for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
3107		if (device->supported_extensions.extensions[i]) {
3108			vk_outarray_append(&out, prop) {
3109				*prop = radv_device_extensions[i];
3110			}
3111		}
3112	}
3113
3114	return vk_outarray_status(&out);
3115}
3116
3117PFN_vkVoidFunction radv_GetInstanceProcAddr(
3118	VkInstance                                  _instance,
3119	const char*                                 pName)
3120{
3121	RADV_FROM_HANDLE(radv_instance, instance, _instance);
3122
3123	return radv_lookup_entrypoint_checked(pName,
3124	                                      instance ? instance->apiVersion : 0,
3125	                                      instance ? &instance->enabled_extensions : NULL,
3126	                                      NULL);
3127}
3128
3129/* The loader wants us to expose a second GetInstanceProcAddr function
3130 * to work around certain LD_PRELOAD issues seen in apps.
3131 */
3132PUBLIC
3133VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3134	VkInstance                                  instance,
3135	const char*                                 pName);
3136
3137PUBLIC
3138VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3139	VkInstance                                  instance,
3140	const char*                                 pName)
3141{
3142	return radv_GetInstanceProcAddr(instance, pName);
3143}
3144
3145PFN_vkVoidFunction radv_GetDeviceProcAddr(
3146	VkDevice                                    _device,
3147	const char*                                 pName)
3148{
3149	RADV_FROM_HANDLE(radv_device, device, _device);
3150
3151	return radv_lookup_entrypoint_checked(pName,
3152	                                      device->instance->apiVersion,
3153	                                      &device->instance->enabled_extensions,
3154	                                      &device->enabled_extensions);
3155}
3156
3157bool radv_get_memory_fd(struct radv_device *device,
3158			struct radv_device_memory *memory,
3159			int *pFD)
3160{
3161	struct radeon_bo_metadata metadata;
3162
3163	if (memory->image) {
3164		radv_init_metadata(device, memory->image, &metadata);
3165		device->ws->buffer_set_metadata(memory->bo, &metadata);
3166	}
3167
3168	return device->ws->buffer_get_fd(device->ws, memory->bo,
3169					 pFD);
3170}
3171
3172static VkResult radv_alloc_memory(struct radv_device *device,
3173				  const VkMemoryAllocateInfo*     pAllocateInfo,
3174				  const VkAllocationCallbacks*    pAllocator,
3175				  VkDeviceMemory*                 pMem)
3176{
3177	struct radv_device_memory *mem;
3178	VkResult result;
3179	enum radeon_bo_domain domain;
3180	uint32_t flags = 0;
3181	enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
3182
3183	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3184
3185	if (pAllocateInfo->allocationSize == 0) {
3186		/* Apparently, this is allowed */
3187		*pMem = VK_NULL_HANDLE;
3188		return VK_SUCCESS;
3189	}
3190
3191	const VkImportMemoryFdInfoKHR *import_info =
3192		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3193	const VkMemoryDedicatedAllocateInfo *dedicate_info =
3194		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3195	const VkExportMemoryAllocateInfo *export_info =
3196		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3197	const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
3198		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
3199
3200	const struct wsi_memory_allocate_info *wsi_info =
3201		vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3202
3203	mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
3204			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3205	if (mem == NULL)
3206		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3207
3208	if (wsi_info && wsi_info->implicit_sync)
3209		flags |= RADEON_FLAG_IMPLICIT_SYNC;
3210
3211	if (dedicate_info) {
3212		mem->image = radv_image_from_handle(dedicate_info->image);
3213		mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
3214	} else {
3215		mem->image = NULL;
3216		mem->buffer = NULL;
3217	}
3218
3219	float priority_float = 0.5;
3220	const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
3221		vk_find_struct_const(pAllocateInfo->pNext,
3222				     MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
3223	if (priority_ext)
3224		priority_float = priority_ext->priority;
3225
3226	unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
3227	                         (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
3228
3229	mem->user_ptr = NULL;
3230
3231	if (import_info) {
3232		assert(import_info->handleType ==
3233		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3234		       import_info->handleType ==
3235		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3236		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
3237						     priority, NULL, NULL);
3238		if (!mem->bo) {
3239			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3240			goto fail;
3241		} else {
3242			close(import_info->fd);
3243		}
3244	} else if (host_ptr_info) {
3245		assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3246		assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
3247		mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
3248		                                      pAllocateInfo->allocationSize,
3249		                                      priority);
3250		if (!mem->bo) {
3251			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3252			goto fail;
3253		} else {
3254			mem->user_ptr = host_ptr_info->pHostPointer;
3255		}
3256	} else {
3257		uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
3258		if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
3259		    mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
3260			domain = RADEON_DOMAIN_GTT;
3261		else
3262			domain = RADEON_DOMAIN_VRAM;
3263
3264		if (mem_type_index == RADV_MEM_TYPE_VRAM)
3265			flags |= RADEON_FLAG_NO_CPU_ACCESS;
3266		else
3267			flags |= RADEON_FLAG_CPU_ACCESS;
3268
3269		if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3270			flags |= RADEON_FLAG_GTT_WC;
3271
3272		if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
3273			flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3274			if (device->use_global_bo_list) {
3275				flags |= RADEON_FLAG_PREFER_LOCAL_BO;
3276			}
3277		}
3278
3279		mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3280		                                    domain, flags, priority);
3281
3282		if (!mem->bo) {
3283			result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3284			goto fail;
3285		}
3286		mem->type_index = mem_type_index;
3287	}
3288
3289	result = radv_bo_list_add(device, mem->bo);
3290	if (result != VK_SUCCESS)
3291		goto fail_bo;
3292
3293	*pMem = radv_device_memory_to_handle(mem);
3294
3295	return VK_SUCCESS;
3296
3297fail_bo:
3298	device->ws->buffer_destroy(mem->bo);
3299fail:
3300	vk_free2(&device->alloc, pAllocator, mem);
3301
3302	return result;
3303}
3304
3305VkResult radv_AllocateMemory(
3306	VkDevice                                    _device,
3307	const VkMemoryAllocateInfo*                 pAllocateInfo,
3308	const VkAllocationCallbacks*                pAllocator,
3309	VkDeviceMemory*                             pMem)
3310{
3311	RADV_FROM_HANDLE(radv_device, device, _device);
3312	return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3313}
3314
3315void radv_FreeMemory(
3316	VkDevice                                    _device,
3317	VkDeviceMemory                              _mem,
3318	const VkAllocationCallbacks*                pAllocator)
3319{
3320	RADV_FROM_HANDLE(radv_device, device, _device);
3321	RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3322
3323	if (mem == NULL)
3324		return;
3325
3326	radv_bo_list_remove(device, mem->bo);
3327	device->ws->buffer_destroy(mem->bo);
3328	mem->bo = NULL;
3329
3330	vk_free2(&device->alloc, pAllocator, mem);
3331}
3332
3333VkResult radv_MapMemory(
3334	VkDevice                                    _device,
3335	VkDeviceMemory                              _memory,
3336	VkDeviceSize                                offset,
3337	VkDeviceSize                                size,
3338	VkMemoryMapFlags                            flags,
3339	void**                                      ppData)
3340{
3341	RADV_FROM_HANDLE(radv_device, device, _device);
3342	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3343
3344	if (mem == NULL) {
3345		*ppData = NULL;
3346		return VK_SUCCESS;
3347	}
3348
3349	if (mem->user_ptr)
3350		*ppData = mem->user_ptr;
3351	else
3352		*ppData = device->ws->buffer_map(mem->bo);
3353
3354	if (*ppData) {
3355		*ppData += offset;
3356		return VK_SUCCESS;
3357	}
3358
3359	return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3360}
3361
3362void radv_UnmapMemory(
3363	VkDevice                                    _device,
3364	VkDeviceMemory                              _memory)
3365{
3366	RADV_FROM_HANDLE(radv_device, device, _device);
3367	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3368
3369	if (mem == NULL)
3370		return;
3371
3372	if (mem->user_ptr == NULL)
3373		device->ws->buffer_unmap(mem->bo);
3374}
3375
3376VkResult radv_FlushMappedMemoryRanges(
3377	VkDevice                                    _device,
3378	uint32_t                                    memoryRangeCount,
3379	const VkMappedMemoryRange*                  pMemoryRanges)
3380{
3381	return VK_SUCCESS;
3382}
3383
3384VkResult radv_InvalidateMappedMemoryRanges(
3385	VkDevice                                    _device,
3386	uint32_t                                    memoryRangeCount,
3387	const VkMappedMemoryRange*                  pMemoryRanges)
3388{
3389	return VK_SUCCESS;
3390}
3391
3392void radv_GetBufferMemoryRequirements(
3393	VkDevice                                    _device,
3394	VkBuffer                                    _buffer,
3395	VkMemoryRequirements*                       pMemoryRequirements)
3396{
3397	RADV_FROM_HANDLE(radv_device, device, _device);
3398	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3399
3400	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3401
3402	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3403		pMemoryRequirements->alignment = 4096;
3404	else
3405		pMemoryRequirements->alignment = 16;
3406
3407	pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3408}
3409
3410void radv_GetBufferMemoryRequirements2(
3411	VkDevice                                     device,
3412	const VkBufferMemoryRequirementsInfo2       *pInfo,
3413	VkMemoryRequirements2                       *pMemoryRequirements)
3414{
3415	radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3416                                        &pMemoryRequirements->memoryRequirements);
3417	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3418	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3419		switch (ext->sType) {
3420		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3421			VkMemoryDedicatedRequirements *req =
3422			               (VkMemoryDedicatedRequirements *) ext;
3423			req->requiresDedicatedAllocation = buffer->shareable;
3424			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3425			break;
3426		}
3427		default:
3428			break;
3429		}
3430	}
3431}
3432
3433void radv_GetImageMemoryRequirements(
3434	VkDevice                                    _device,
3435	VkImage                                     _image,
3436	VkMemoryRequirements*                       pMemoryRequirements)
3437{
3438	RADV_FROM_HANDLE(radv_device, device, _device);
3439	RADV_FROM_HANDLE(radv_image, image, _image);
3440
3441	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3442
3443	pMemoryRequirements->size = image->size;
3444	pMemoryRequirements->alignment = image->alignment;
3445}
3446
3447void radv_GetImageMemoryRequirements2(
3448	VkDevice                                    device,
3449	const VkImageMemoryRequirementsInfo2       *pInfo,
3450	VkMemoryRequirements2                      *pMemoryRequirements)
3451{
3452	radv_GetImageMemoryRequirements(device, pInfo->image,
3453                                        &pMemoryRequirements->memoryRequirements);
3454
3455	RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3456
3457	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3458		switch (ext->sType) {
3459		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3460			VkMemoryDedicatedRequirements *req =
3461			               (VkMemoryDedicatedRequirements *) ext;
3462			req->requiresDedicatedAllocation = image->shareable;
3463			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3464			break;
3465		}
3466		default:
3467			break;
3468		}
3469	}
3470}
3471
3472void radv_GetImageSparseMemoryRequirements(
3473	VkDevice                                    device,
3474	VkImage                                     image,
3475	uint32_t*                                   pSparseMemoryRequirementCount,
3476	VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
3477{
3478	stub();
3479}
3480
3481void radv_GetImageSparseMemoryRequirements2(
3482	VkDevice                                    device,
3483	const VkImageSparseMemoryRequirementsInfo2 *pInfo,
3484	uint32_t*                                   pSparseMemoryRequirementCount,
3485	VkSparseImageMemoryRequirements2           *pSparseMemoryRequirements)
3486{
3487	stub();
3488}
3489
3490void radv_GetDeviceMemoryCommitment(
3491	VkDevice                                    device,
3492	VkDeviceMemory                              memory,
3493	VkDeviceSize*                               pCommittedMemoryInBytes)
3494{
3495	*pCommittedMemoryInBytes = 0;
3496}
3497
3498VkResult radv_BindBufferMemory2(VkDevice device,
3499                                uint32_t bindInfoCount,
3500                                const VkBindBufferMemoryInfo *pBindInfos)
3501{
3502	for (uint32_t i = 0; i < bindInfoCount; ++i) {
3503		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3504		RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3505
3506		if (mem) {
3507			buffer->bo = mem->bo;
3508			buffer->offset = pBindInfos[i].memoryOffset;
3509		} else {
3510			buffer->bo = NULL;
3511		}
3512	}
3513	return VK_SUCCESS;
3514}
3515
3516VkResult radv_BindBufferMemory(
3517	VkDevice                                    device,
3518	VkBuffer                                    buffer,
3519	VkDeviceMemory                              memory,
3520	VkDeviceSize                                memoryOffset)
3521{
3522	const VkBindBufferMemoryInfo info = {
3523		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3524		.buffer = buffer,
3525		.memory = memory,
3526		.memoryOffset = memoryOffset
3527	};
3528
3529	return radv_BindBufferMemory2(device, 1, &info);
3530}
3531
3532VkResult radv_BindImageMemory2(VkDevice device,
3533                               uint32_t bindInfoCount,
3534                               const VkBindImageMemoryInfo *pBindInfos)
3535{
3536	for (uint32_t i = 0; i < bindInfoCount; ++i) {
3537		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3538		RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3539
3540		if (mem) {
3541			image->bo = mem->bo;
3542			image->offset = pBindInfos[i].memoryOffset;
3543		} else {
3544			image->bo = NULL;
3545			image->offset = 0;
3546		}
3547	}
3548	return VK_SUCCESS;
3549}
3550
3551
3552VkResult radv_BindImageMemory(
3553	VkDevice                                    device,
3554	VkImage                                     image,
3555	VkDeviceMemory                              memory,
3556	VkDeviceSize                                memoryOffset)
3557{
3558	const VkBindImageMemoryInfo info = {
3559		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3560		.image = image,
3561		.memory = memory,
3562		.memoryOffset = memoryOffset
3563	};
3564
3565	return radv_BindImageMemory2(device, 1, &info);
3566}
3567
3568
3569static void
3570radv_sparse_buffer_bind_memory(struct radv_device *device,
3571                               const VkSparseBufferMemoryBindInfo *bind)
3572{
3573	RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3574
3575	for (uint32_t i = 0; i < bind->bindCount; ++i) {
3576		struct radv_device_memory *mem = NULL;
3577
3578		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3579			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3580
3581		device->ws->buffer_virtual_bind(buffer->bo,
3582		                                bind->pBinds[i].resourceOffset,
3583		                                bind->pBinds[i].size,
3584		                                mem ? mem->bo : NULL,
3585		                                bind->pBinds[i].memoryOffset);
3586	}
3587}
3588
3589static void
3590radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3591                                     const VkSparseImageOpaqueMemoryBindInfo *bind)
3592{
3593	RADV_FROM_HANDLE(radv_image, image, bind->image);
3594
3595	for (uint32_t i = 0; i < bind->bindCount; ++i) {
3596		struct radv_device_memory *mem = NULL;
3597
3598		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3599			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3600
3601		device->ws->buffer_virtual_bind(image->bo,
3602		                                bind->pBinds[i].resourceOffset,
3603		                                bind->pBinds[i].size,
3604		                                mem ? mem->bo : NULL,
3605		                                bind->pBinds[i].memoryOffset);
3606	}
3607}
3608
3609 VkResult radv_QueueBindSparse(
3610	VkQueue                                     _queue,
3611	uint32_t                                    bindInfoCount,
3612	const VkBindSparseInfo*                     pBindInfo,
3613	VkFence                                     _fence)
3614{
3615	RADV_FROM_HANDLE(radv_fence, fence, _fence);
3616	RADV_FROM_HANDLE(radv_queue, queue, _queue);
3617	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3618	bool fence_emitted = false;
3619	VkResult result;
3620	int ret;
3621
3622	for (uint32_t i = 0; i < bindInfoCount; ++i) {
3623		struct radv_winsys_sem_info sem_info;
3624		for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3625			radv_sparse_buffer_bind_memory(queue->device,
3626			                               pBindInfo[i].pBufferBinds + j);
3627		}
3628
3629		for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3630			radv_sparse_image_opaque_bind_memory(queue->device,
3631			                                     pBindInfo[i].pImageOpaqueBinds + j);
3632		}
3633
3634		VkResult result;
3635		result = radv_alloc_sem_info(queue->device->instance,
3636					     &sem_info,
3637					     pBindInfo[i].waitSemaphoreCount,
3638					     pBindInfo[i].pWaitSemaphores,
3639					     pBindInfo[i].signalSemaphoreCount,
3640					     pBindInfo[i].pSignalSemaphores,
3641					     _fence);
3642		if (result != VK_SUCCESS)
3643			return result;
3644
3645		if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3646			ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3647							  &queue->device->empty_cs[queue->queue_family_index],
3648							  1, NULL, NULL,
3649							  &sem_info, NULL,
3650							  false, base_fence);
3651			if (ret) {
3652				radv_loge("failed to submit CS %d\n", i);
3653				abort();
3654			}
3655
3656			fence_emitted = true;
3657			if (fence)
3658				fence->submitted = true;
3659		}
3660
3661		radv_free_sem_info(&sem_info);
3662
3663	}
3664
3665	if (fence) {
3666		if (!fence_emitted) {
3667			result = radv_signal_fence(queue, fence);
3668			if (result != VK_SUCCESS)
3669				return result;
3670		}
3671		fence->submitted = true;
3672	}
3673
3674	return VK_SUCCESS;
3675}
3676
3677VkResult radv_CreateFence(
3678	VkDevice                                    _device,
3679	const VkFenceCreateInfo*                    pCreateInfo,
3680	const VkAllocationCallbacks*                pAllocator,
3681	VkFence*                                    pFence)
3682{
3683	RADV_FROM_HANDLE(radv_device, device, _device);
3684	const VkExportFenceCreateInfo *export =
3685		vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
3686	VkExternalFenceHandleTypeFlags handleTypes =
3687		export ? export->handleTypes : 0;
3688
3689	struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3690					       sizeof(*fence), 8,
3691					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3692
3693	if (!fence)
3694		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3695
3696	fence->fence_wsi = NULL;
3697	fence->submitted = false;
3698	fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3699	fence->temp_syncobj = 0;
3700	if (device->always_use_syncobj || handleTypes) {
3701		int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3702		if (ret) {
3703			vk_free2(&device->alloc, pAllocator, fence);
3704			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3705		}
3706		if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3707			device->ws->signal_syncobj(device->ws, fence->syncobj);
3708		}
3709		fence->fence = NULL;
3710	} else {
3711		fence->fence = device->ws->create_fence();
3712		if (!fence->fence) {
3713			vk_free2(&device->alloc, pAllocator, fence);
3714			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3715		}
3716		fence->syncobj = 0;
3717	}
3718
3719	*pFence = radv_fence_to_handle(fence);
3720
3721	return VK_SUCCESS;
3722}
3723
3724void radv_DestroyFence(
3725	VkDevice                                    _device,
3726	VkFence                                     _fence,
3727	const VkAllocationCallbacks*                pAllocator)
3728{
3729	RADV_FROM_HANDLE(radv_device, device, _device);
3730	RADV_FROM_HANDLE(radv_fence, fence, _fence);
3731
3732	if (!fence)
3733		return;
3734
3735	if (fence->temp_syncobj)
3736		device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3737	if (fence->syncobj)
3738		device->ws->destroy_syncobj(device->ws, fence->syncobj);
3739	if (fence->fence)
3740		device->ws->destroy_fence(fence->fence);
3741	if (fence->fence_wsi)
3742		fence->fence_wsi->destroy(fence->fence_wsi);
3743	vk_free2(&device->alloc, pAllocator, fence);
3744}
3745
3746
3747uint64_t radv_get_current_time(void)
3748{
3749	struct timespec tv;
3750	clock_gettime(CLOCK_MONOTONIC, &tv);
3751	return tv.tv_nsec + tv.tv_sec*1000000000ull;
3752}
3753
3754static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3755{
3756	uint64_t current_time = radv_get_current_time();
3757
3758	timeout = MIN2(UINT64_MAX - current_time, timeout);
3759
3760	return current_time + timeout;
3761}
3762
3763
3764static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3765{
3766	for (uint32_t i = 0; i < fenceCount; ++i) {
3767		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3768		if (fence->fence == NULL || fence->syncobj ||
3769		    fence->temp_syncobj ||
3770		    (!fence->signalled && !fence->submitted))
3771			return false;
3772	}
3773	return true;
3774}
3775
3776static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3777{
3778	for (uint32_t i = 0; i < fenceCount; ++i) {
3779		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3780		if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3781			return false;
3782	}
3783	return true;
3784}
3785
3786VkResult radv_WaitForFences(
3787	VkDevice                                    _device,
3788	uint32_t                                    fenceCount,
3789	const VkFence*                              pFences,
3790	VkBool32                                    waitAll,
3791	uint64_t                                    timeout)
3792{
3793	RADV_FROM_HANDLE(radv_device, device, _device);
3794	timeout = radv_get_absolute_timeout(timeout);
3795
3796	if (device->always_use_syncobj &&
3797	    radv_all_fences_syncobj(fenceCount, pFences))
3798	{
3799		uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3800		if (!handles)
3801			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3802
3803		for (uint32_t i = 0; i < fenceCount; ++i) {
3804			RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3805			handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3806		}
3807
3808		bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3809
3810		free(handles);
3811		return success ? VK_SUCCESS : VK_TIMEOUT;
3812	}
3813
3814	if (!waitAll && fenceCount > 1) {
3815		/* Not doing this by default for waitAll, due to needing to allocate twice. */
3816		if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3817			uint32_t wait_count = 0;
3818			struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3819			if (!fences)
3820				return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3821
3822			for (uint32_t i = 0; i < fenceCount; ++i) {
3823				RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3824
3825				if (fence->signalled) {
3826					free(fences);
3827					return VK_SUCCESS;
3828				}
3829
3830				fences[wait_count++] = fence->fence;
3831			}
3832
3833			bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3834							       waitAll, timeout - radv_get_current_time());
3835
3836			free(fences);
3837			return success ? VK_SUCCESS : VK_TIMEOUT;
3838		}
3839
3840		while(radv_get_current_time() <= timeout) {
3841			for (uint32_t i = 0; i < fenceCount; ++i) {
3842				if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3843					return VK_SUCCESS;
3844			}
3845		}
3846		return VK_TIMEOUT;
3847	}
3848
3849	for (uint32_t i = 0; i < fenceCount; ++i) {
3850		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3851		bool expired = false;
3852
3853		if (fence->temp_syncobj) {
3854			if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3855				return VK_TIMEOUT;
3856			continue;
3857		}
3858
3859		if (fence->syncobj) {
3860			if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3861				return VK_TIMEOUT;
3862			continue;
3863		}
3864
3865		if (fence->signalled)
3866			continue;
3867
3868		if (fence->fence) {
3869			if (!fence->submitted) {
3870				while(radv_get_current_time() <= timeout &&
3871				      !fence->submitted)
3872					/* Do nothing */;
3873
3874				if (!fence->submitted)
3875					return VK_TIMEOUT;
3876
3877				/* Recheck as it may have been set by
3878				 * submitting operations. */
3879
3880				if (fence->signalled)
3881					continue;
3882			}
3883
3884			expired = device->ws->fence_wait(device->ws,
3885							 fence->fence,
3886							 true, timeout);
3887			if (!expired)
3888				return VK_TIMEOUT;
3889		}
3890
3891		if (fence->fence_wsi) {
3892			VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
3893			if (result != VK_SUCCESS)
3894				return result;
3895		}
3896
3897		fence->signalled = true;
3898	}
3899
3900	return VK_SUCCESS;
3901}
3902
3903VkResult radv_ResetFences(VkDevice _device,
3904			  uint32_t fenceCount,
3905			  const VkFence *pFences)
3906{
3907	RADV_FROM_HANDLE(radv_device, device, _device);
3908
3909	for (unsigned i = 0; i < fenceCount; ++i) {
3910		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3911		fence->submitted = fence->signalled = false;
3912
3913		/* Per spec, we first restore the permanent payload, and then reset, so
3914		 * having a temp syncobj should not skip resetting the permanent syncobj. */
3915		if (fence->temp_syncobj) {
3916			device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3917			fence->temp_syncobj = 0;
3918		}
3919
3920		if (fence->syncobj) {
3921			device->ws->reset_syncobj(device->ws, fence->syncobj);
3922		}
3923	}
3924
3925	return VK_SUCCESS;
3926}
3927
3928VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3929{
3930	RADV_FROM_HANDLE(radv_device, device, _device);
3931	RADV_FROM_HANDLE(radv_fence, fence, _fence);
3932
3933	if (fence->temp_syncobj) {
3934			bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3935			return success ? VK_SUCCESS : VK_NOT_READY;
3936	}
3937
3938	if (fence->syncobj) {
3939			bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3940			return success ? VK_SUCCESS : VK_NOT_READY;
3941	}
3942
3943	if (fence->signalled)
3944		return VK_SUCCESS;
3945	if (!fence->submitted)
3946		return VK_NOT_READY;
3947	if (fence->fence) {
3948		if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3949			return VK_NOT_READY;
3950	}
3951	if (fence->fence_wsi) {
3952		VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
3953
3954		if (result != VK_SUCCESS) {
3955			if (result == VK_TIMEOUT)
3956				return VK_NOT_READY;
3957			return result;
3958		}
3959	}
3960	return VK_SUCCESS;
3961}
3962
3963
3964// Queue semaphore functions
3965
3966VkResult radv_CreateSemaphore(
3967	VkDevice                                    _device,
3968	const VkSemaphoreCreateInfo*                pCreateInfo,
3969	const VkAllocationCallbacks*                pAllocator,
3970	VkSemaphore*                                pSemaphore)
3971{
3972	RADV_FROM_HANDLE(radv_device, device, _device);
3973	const VkExportSemaphoreCreateInfo *export =
3974		vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
3975	VkExternalSemaphoreHandleTypeFlags handleTypes =
3976		export ? export->handleTypes : 0;
3977
3978	struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3979					       sizeof(*sem), 8,
3980					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3981	if (!sem)
3982		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3983
3984	sem->temp_syncobj = 0;
3985	/* create a syncobject if we are going to export this semaphore */
3986	if (device->always_use_syncobj || handleTypes) {
3987		assert (device->physical_device->rad_info.has_syncobj);
3988		int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3989		if (ret) {
3990			vk_free2(&device->alloc, pAllocator, sem);
3991			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3992		}
3993		sem->sem = NULL;
3994	} else {
3995		sem->sem = device->ws->create_sem(device->ws);
3996		if (!sem->sem) {
3997			vk_free2(&device->alloc, pAllocator, sem);
3998			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3999		}
4000		sem->syncobj = 0;
4001	}
4002
4003	*pSemaphore = radv_semaphore_to_handle(sem);
4004	return VK_SUCCESS;
4005}
4006
4007void radv_DestroySemaphore(
4008	VkDevice                                    _device,
4009	VkSemaphore                                 _semaphore,
4010	const VkAllocationCallbacks*                pAllocator)
4011{
4012	RADV_FROM_HANDLE(radv_device, device, _device);
4013	RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
4014	if (!_semaphore)
4015		return;
4016
4017	if (sem->syncobj)
4018		device->ws->destroy_syncobj(device->ws, sem->syncobj);
4019	else
4020		device->ws->destroy_sem(sem->sem);
4021	vk_free2(&device->alloc, pAllocator, sem);
4022}
4023
4024VkResult radv_CreateEvent(
4025	VkDevice                                    _device,
4026	const VkEventCreateInfo*                    pCreateInfo,
4027	const VkAllocationCallbacks*                pAllocator,
4028	VkEvent*                                    pEvent)
4029{
4030	RADV_FROM_HANDLE(radv_device, device, _device);
4031	struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
4032					       sizeof(*event), 8,
4033					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4034
4035	if (!event)
4036		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4037
4038	event->bo = device->ws->buffer_create(device->ws, 8, 8,
4039					      RADEON_DOMAIN_GTT,
4040					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
4041					      RADV_BO_PRIORITY_FENCE);
4042	if (!event->bo) {
4043		vk_free2(&device->alloc, pAllocator, event);
4044		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4045	}
4046
4047	event->map = (uint64_t*)device->ws->buffer_map(event->bo);
4048
4049	*pEvent = radv_event_to_handle(event);
4050
4051	return VK_SUCCESS;
4052}
4053
4054void radv_DestroyEvent(
4055	VkDevice                                    _device,
4056	VkEvent                                     _event,
4057	const VkAllocationCallbacks*                pAllocator)
4058{
4059	RADV_FROM_HANDLE(radv_device, device, _device);
4060	RADV_FROM_HANDLE(radv_event, event, _event);
4061
4062	if (!event)
4063		return;
4064	device->ws->buffer_destroy(event->bo);
4065	vk_free2(&device->alloc, pAllocator, event);
4066}
4067
4068VkResult radv_GetEventStatus(
4069	VkDevice                                    _device,
4070	VkEvent                                     _event)
4071{
4072	RADV_FROM_HANDLE(radv_event, event, _event);
4073
4074	if (*event->map == 1)
4075		return VK_EVENT_SET;
4076	return VK_EVENT_RESET;
4077}
4078
4079VkResult radv_SetEvent(
4080	VkDevice                                    _device,
4081	VkEvent                                     _event)
4082{
4083	RADV_FROM_HANDLE(radv_event, event, _event);
4084	*event->map = 1;
4085
4086	return VK_SUCCESS;
4087}
4088
4089VkResult radv_ResetEvent(
4090    VkDevice                                    _device,
4091    VkEvent                                     _event)
4092{
4093	RADV_FROM_HANDLE(radv_event, event, _event);
4094	*event->map = 0;
4095
4096	return VK_SUCCESS;
4097}
4098
4099VkResult radv_CreateBuffer(
4100	VkDevice                                    _device,
4101	const VkBufferCreateInfo*                   pCreateInfo,
4102	const VkAllocationCallbacks*                pAllocator,
4103	VkBuffer*                                   pBuffer)
4104{
4105	RADV_FROM_HANDLE(radv_device, device, _device);
4106	struct radv_buffer *buffer;
4107
4108	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4109
4110	buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
4111			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4112	if (buffer == NULL)
4113		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4114
4115	buffer->size = pCreateInfo->size;
4116	buffer->usage = pCreateInfo->usage;
4117	buffer->bo = NULL;
4118	buffer->offset = 0;
4119	buffer->flags = pCreateInfo->flags;
4120
4121	buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
4122						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
4123
4124	if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
4125		buffer->bo = device->ws->buffer_create(device->ws,
4126		                                       align64(buffer->size, 4096),
4127		                                       4096, 0, RADEON_FLAG_VIRTUAL,
4128		                                       RADV_BO_PRIORITY_VIRTUAL);
4129		if (!buffer->bo) {
4130			vk_free2(&device->alloc, pAllocator, buffer);
4131			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4132		}
4133	}
4134
4135	*pBuffer = radv_buffer_to_handle(buffer);
4136
4137	return VK_SUCCESS;
4138}
4139
4140void radv_DestroyBuffer(
4141	VkDevice                                    _device,
4142	VkBuffer                                    _buffer,
4143	const VkAllocationCallbacks*                pAllocator)
4144{
4145	RADV_FROM_HANDLE(radv_device, device, _device);
4146	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
4147
4148	if (!buffer)
4149		return;
4150
4151	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
4152		device->ws->buffer_destroy(buffer->bo);
4153
4154	vk_free2(&device->alloc, pAllocator, buffer);
4155}
4156
4157VkDeviceAddress radv_GetBufferDeviceAddressEXT(
4158	VkDevice                                    device,
4159	const VkBufferDeviceAddressInfoEXT*         pInfo)
4160{
4161	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
4162	return radv_buffer_get_va(buffer->bo) + buffer->offset;
4163}
4164
4165
4166static inline unsigned
4167si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
4168{
4169	if (stencil)
4170		return plane->surface.u.legacy.stencil_tiling_index[level];
4171	else
4172		return plane->surface.u.legacy.tiling_index[level];
4173}
4174
4175static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
4176{
4177	return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
4178}
4179
4180static uint32_t
4181radv_init_dcc_control_reg(struct radv_device *device,
4182			  struct radv_image_view *iview)
4183{
4184	unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
4185	unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
4186	unsigned max_compressed_block_size;
4187	unsigned independent_64b_blocks;
4188
4189	if (!radv_image_has_dcc(iview->image))
4190		return 0;
4191
4192	if (iview->image->info.samples > 1) {
4193		if (iview->image->planes[0].surface.bpe == 1)
4194			max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4195		else if (iview->image->planes[0].surface.bpe == 2)
4196			max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4197	}
4198
4199	if (!device->physical_device->rad_info.has_dedicated_vram) {
4200		/* amdvlk: [min-compressed-block-size] should be set to 32 for
4201		 * dGPU and 64 for APU because all of our APUs to date use
4202		 * DIMMs which have a request granularity size of 64B while all
4203		 * other chips have a 32B request size.
4204		 */
4205		min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
4206	}
4207
4208	if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
4209				   VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
4210				   VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
4211		/* If this DCC image is potentially going to be used in texture
4212		 * fetches, we need some special settings.
4213		 */
4214		independent_64b_blocks = 1;
4215		max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4216	} else {
4217		/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
4218		 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
4219		 * big as possible for better compression state.
4220		 */
4221		independent_64b_blocks = 0;
4222		max_compressed_block_size = max_uncompressed_block_size;
4223	}
4224
4225	return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
4226	       S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
4227	       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
4228	       S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
4229}
4230
4231static void
4232radv_initialise_color_surface(struct radv_device *device,
4233			      struct radv_color_buffer_info *cb,
4234			      struct radv_image_view *iview)
4235{
4236	const struct vk_format_description *desc;
4237	unsigned ntype, format, swap, endian;
4238	unsigned blend_clamp = 0, blend_bypass = 0;
4239	uint64_t va;
4240	const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
4241	const struct radeon_surf *surf = &plane->surface;
4242
4243	desc = vk_format_description(iview->vk_format);
4244
4245	memset(cb, 0, sizeof(*cb));
4246
4247	/* Intensity is implemented as Red, so treat it that way. */
4248	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
4249
4250	va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
4251
4252	cb->cb_color_base = va >> 8;
4253
4254	if (device->physical_device->rad_info.chip_class >= GFX9) {
4255		struct gfx9_surf_meta_flags meta;
4256		if (iview->image->dcc_offset)
4257			meta = surf->u.gfx9.dcc;
4258		else
4259			meta = surf->u.gfx9.cmask;
4260
4261		cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4262			S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
4263			S_028C74_RB_ALIGNED(meta.rb_aligned) |
4264			S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
4265
4266		cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
4267		cb->cb_color_base |= surf->tile_swizzle;
4268
4269		cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
4270	} else {
4271		const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
4272		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
4273
4274		cb->cb_color_base += level_info->offset >> 8;
4275		if (level_info->mode == RADEON_SURF_MODE_2D)
4276			cb->cb_color_base |= surf->tile_swizzle;
4277
4278		pitch_tile_max = level_info->nblk_x / 8 - 1;
4279		slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
4280		tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
4281
4282		cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
4283		cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
4284		cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
4285
4286		cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
4287
4288		if (radv_image_has_fmask(iview->image)) {
4289			if (device->physical_device->rad_info.chip_class >= CIK)
4290				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
4291			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
4292			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
4293		} else {
4294			/* This must be set for fast clear to work without FMASK. */
4295			if (device->physical_device->rad_info.chip_class >= CIK)
4296				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4297			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4298			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4299		}
4300	}
4301
4302	/* CMASK variables */
4303	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4304	va += iview->image->cmask.offset;
4305	cb->cb_color_cmask = va >> 8;
4306
4307	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4308	va += iview->image->dcc_offset;
4309	cb->cb_dcc_base = va >> 8;
4310	cb->cb_dcc_base |= surf->tile_swizzle;
4311
4312	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4313	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4314		S_028C6C_SLICE_MAX(max_slice);
4315
4316	if (iview->image->info.samples > 1) {
4317		unsigned log_samples = util_logbase2(iview->image->info.samples);
4318
4319		cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4320			S_028C74_NUM_FRAGMENTS(log_samples);
4321	}
4322
4323	if (radv_image_has_fmask(iview->image)) {
4324		va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
4325		cb->cb_color_fmask = va >> 8;
4326		cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
4327	} else {
4328		cb->cb_color_fmask = cb->cb_color_base;
4329	}
4330
4331	ntype = radv_translate_color_numformat(iview->vk_format,
4332					       desc,
4333					       vk_format_get_first_non_void_channel(iview->vk_format));
4334	format = radv_translate_colorformat(iview->vk_format);
4335	if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4336		radv_finishme("Illegal color\n");
4337	swap = radv_translate_colorswap(iview->vk_format, FALSE);
4338	endian = radv_colorformat_endian_swap(format);
4339
4340	/* blend clamp should be set for all NORM/SRGB types */
4341	if (ntype == V_028C70_NUMBER_UNORM ||
4342	    ntype == V_028C70_NUMBER_SNORM ||
4343	    ntype == V_028C70_NUMBER_SRGB)
4344		blend_clamp = 1;
4345
4346	/* set blend bypass according to docs if SINT/UINT or
4347	   8/24 COLOR variants */
4348	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4349	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4350	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
4351		blend_clamp = 0;
4352		blend_bypass = 1;
4353	}
4354#if 0
4355	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4356	    (format == V_028C70_COLOR_8 ||
4357	     format == V_028C70_COLOR_8_8 ||
4358	     format == V_028C70_COLOR_8_8_8_8))
4359		->color_is_int8 = true;
4360#endif
4361	cb->cb_color_info = S_028C70_FORMAT(format) |
4362		S_028C70_COMP_SWAP(swap) |
4363		S_028C70_BLEND_CLAMP(blend_clamp) |
4364		S_028C70_BLEND_BYPASS(blend_bypass) |
4365		S_028C70_SIMPLE_FLOAT(1) |
4366		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4367				    ntype != V_028C70_NUMBER_SNORM &&
4368				    ntype != V_028C70_NUMBER_SRGB &&
4369				    format != V_028C70_COLOR_8_24 &&
4370				    format != V_028C70_COLOR_24_8) |
4371		S_028C70_NUMBER_TYPE(ntype) |
4372		S_028C70_ENDIAN(endian);
4373	if (radv_image_has_fmask(iview->image)) {
4374		cb->cb_color_info |= S_028C70_COMPRESSION(1);
4375		if (device->physical_device->rad_info.chip_class == SI) {
4376			unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
4377			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4378		}
4379	}
4380
4381	if (radv_image_has_cmask(iview->image) &&
4382	    !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4383		cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4384
4385	if (radv_dcc_enabled(iview->image, iview->base_mip))
4386		cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4387
4388	cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4389
4390	/* This must be set for fast clear to work without FMASK. */
4391	if (!radv_image_has_fmask(iview->image) &&
4392	    device->physical_device->rad_info.chip_class == SI) {
4393		unsigned bankh = util_logbase2(surf->u.legacy.bankh);
4394		cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4395	}
4396
4397	if (device->physical_device->rad_info.chip_class >= GFX9) {
4398		const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
4399
4400		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4401		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4402		unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
4403		unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
4404
4405		cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
4406		cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4407			S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
4408		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
4409			S_028C68_MIP0_HEIGHT(height - 1) |
4410			S_028C68_MAX_MIP(iview->image->info.levels - 1);
4411	}
4412}
4413
4414static unsigned
4415radv_calc_decompress_on_z_planes(struct radv_device *device,
4416				 struct radv_image_view *iview)
4417{
4418	unsigned max_zplanes = 0;
4419
4420	assert(radv_image_is_tc_compat_htile(iview->image));
4421
4422	if (device->physical_device->rad_info.chip_class >= GFX9) {
4423		/* Default value for 32-bit depth surfaces. */
4424		max_zplanes = 4;
4425
4426		if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4427		    iview->image->info.samples > 1)
4428			max_zplanes = 2;
4429
4430		max_zplanes = max_zplanes + 1;
4431	} else {
4432		if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4433			/* Do not enable Z plane compression for 16-bit depth
4434			 * surfaces because isn't supported on GFX8. Only
4435			 * 32-bit depth surfaces are supported by the hardware.
4436			 * This allows to maintain shader compatibility and to
4437			 * reduce the number of depth decompressions.
4438			 */
4439			max_zplanes = 1;
4440		} else {
4441			if (iview->image->info.samples <= 1)
4442				max_zplanes = 5;
4443			else if (iview->image->info.samples <= 4)
4444				max_zplanes = 3;
4445			else
4446				max_zplanes = 2;
4447		}
4448	}
4449
4450	return max_zplanes;
4451}
4452
4453static void
4454radv_initialise_ds_surface(struct radv_device *device,
4455			   struct radv_ds_buffer_info *ds,
4456			   struct radv_image_view *iview)
4457{
4458	unsigned level = iview->base_mip;
4459	unsigned format, stencil_format;
4460	uint64_t va, s_offs, z_offs;
4461	bool stencil_only = false;
4462	const struct radv_image_plane *plane = &iview->image->planes[0];
4463	const struct radeon_surf *surf = &plane->surface;
4464
4465	assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
4466
4467	memset(ds, 0, sizeof(*ds));
4468	switch (iview->image->vk_format) {
4469	case VK_FORMAT_D24_UNORM_S8_UINT:
4470	case VK_FORMAT_X8_D24_UNORM_PACK32:
4471		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4472		ds->offset_scale = 2.0f;
4473		break;
4474	case VK_FORMAT_D16_UNORM:
4475	case VK_FORMAT_D16_UNORM_S8_UINT:
4476		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4477		ds->offset_scale = 4.0f;
4478		break;
4479	case VK_FORMAT_D32_SFLOAT:
4480	case VK_FORMAT_D32_SFLOAT_S8_UINT:
4481		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4482			S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4483		ds->offset_scale = 1.0f;
4484		break;
4485	case VK_FORMAT_S8_UINT:
4486		stencil_only = true;
4487		break;
4488	default:
4489		break;
4490	}
4491
4492	format = radv_translate_dbformat(iview->image->vk_format);
4493	stencil_format = surf->has_stencil ?
4494		V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4495
4496	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4497	ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4498		S_028008_SLICE_MAX(max_slice);
4499
4500	ds->db_htile_data_base = 0;
4501	ds->db_htile_surface = 0;
4502
4503	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4504	s_offs = z_offs = va;
4505
4506	if (device->physical_device->rad_info.chip_class >= GFX9) {
4507		assert(surf->u.gfx9.surf_offset == 0);
4508		s_offs += surf->u.gfx9.stencil_offset;
4509
4510		ds->db_z_info = S_028038_FORMAT(format) |
4511			S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4512			S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4513			S_028038_MAXMIP(iview->image->info.levels - 1) |
4514			S_028038_ZRANGE_PRECISION(1);
4515		ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4516			S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
4517
4518		ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
4519		ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
4520		ds->db_depth_view |= S_028008_MIPID(level);
4521
4522		ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4523			S_02801C_Y_MAX(iview->image->info.height - 1);
4524
4525		if (radv_htile_enabled(iview->image, level)) {
4526			ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4527
4528			if (radv_image_is_tc_compat_htile(iview->image)) {
4529				unsigned max_zplanes =
4530					radv_calc_decompress_on_z_planes(device, iview);
4531
4532				ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
4533						 S_028038_ITERATE_FLUSH(1);
4534				ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4535			}
4536
4537			if (!surf->has_stencil)
4538				/* Use all of the htile_buffer for depth if there's no stencil. */
4539				ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4540			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4541				iview->image->htile_offset;
4542			ds->db_htile_data_base = va >> 8;
4543			ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4544				S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned) |
4545				S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned);
4546		}
4547	} else {
4548		const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
4549
4550		if (stencil_only)
4551			level_info = &surf->u.legacy.stencil_level[level];
4552
4553		z_offs += surf->u.legacy.level[level].offset;
4554		s_offs += surf->u.legacy.stencil_level[level].offset;
4555
4556		ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4557		ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4558		ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4559
4560		if (iview->image->info.samples > 1)
4561			ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4562
4563		if (device->physical_device->rad_info.chip_class >= CIK) {
4564			struct radeon_info *info = &device->physical_device->rad_info;
4565			unsigned tiling_index = surf->u.legacy.tiling_index[level];
4566			unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
4567			unsigned macro_index = surf->u.legacy.macro_tile_index;
4568			unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4569			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4570			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4571
4572			if (stencil_only)
4573				tile_mode = stencil_tile_mode;
4574
4575			ds->db_depth_info |=
4576				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4577				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4578				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4579				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4580				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4581				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4582			ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4583			ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4584		} else {
4585			unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
4586			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4587			tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
4588			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4589			if (stencil_only)
4590				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4591		}
4592
4593		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4594			S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4595		ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4596
4597		if (radv_htile_enabled(iview->image, level)) {
4598			ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4599
4600			if (!surf->has_stencil &&
4601			    !radv_image_is_tc_compat_htile(iview->image))
4602				/* Use all of the htile_buffer for depth if there's no stencil. */
4603				ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4604
4605			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4606				iview->image->htile_offset;
4607			ds->db_htile_data_base = va >> 8;
4608			ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4609
4610			if (radv_image_is_tc_compat_htile(iview->image)) {
4611				unsigned max_zplanes =
4612					radv_calc_decompress_on_z_planes(device, iview);
4613
4614				ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4615				ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4616			}
4617		}
4618	}
4619
4620	ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4621	ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4622}
4623
4624VkResult radv_CreateFramebuffer(
4625	VkDevice                                    _device,
4626	const VkFramebufferCreateInfo*              pCreateInfo,
4627	const VkAllocationCallbacks*                pAllocator,
4628	VkFramebuffer*                              pFramebuffer)
4629{
4630	RADV_FROM_HANDLE(radv_device, device, _device);
4631	struct radv_framebuffer *framebuffer;
4632
4633	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4634
4635	size_t size = sizeof(*framebuffer) +
4636		sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4637	framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4638				  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4639	if (framebuffer == NULL)
4640		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4641
4642	framebuffer->attachment_count = pCreateInfo->attachmentCount;
4643	framebuffer->width = pCreateInfo->width;
4644	framebuffer->height = pCreateInfo->height;
4645	framebuffer->layers = pCreateInfo->layers;
4646	for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4647		VkImageView _iview = pCreateInfo->pAttachments[i];
4648		struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4649		framebuffer->attachments[i].attachment = iview;
4650		if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4651			radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4652		} else {
4653			radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4654		}
4655		framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4656		framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4657		framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4658	}
4659
4660	*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4661	return VK_SUCCESS;
4662}
4663
4664void radv_DestroyFramebuffer(
4665	VkDevice                                    _device,
4666	VkFramebuffer                               _fb,
4667	const VkAllocationCallbacks*                pAllocator)
4668{
4669	RADV_FROM_HANDLE(radv_device, device, _device);
4670	RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4671
4672	if (!fb)
4673		return;
4674	vk_free2(&device->alloc, pAllocator, fb);
4675}
4676
4677static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4678{
4679	switch (address_mode) {
4680	case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4681		return V_008F30_SQ_TEX_WRAP;
4682	case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4683		return V_008F30_SQ_TEX_MIRROR;
4684	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4685		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4686	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4687		return V_008F30_SQ_TEX_CLAMP_BORDER;
4688	case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4689		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4690	default:
4691		unreachable("illegal tex wrap mode");
4692		break;
4693	}
4694}
4695
4696static unsigned
4697radv_tex_compare(VkCompareOp op)
4698{
4699	switch (op) {
4700	case VK_COMPARE_OP_NEVER:
4701		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4702	case VK_COMPARE_OP_LESS:
4703		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4704	case VK_COMPARE_OP_EQUAL:
4705		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4706	case VK_COMPARE_OP_LESS_OR_EQUAL:
4707		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4708	case VK_COMPARE_OP_GREATER:
4709		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4710	case VK_COMPARE_OP_NOT_EQUAL:
4711		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4712	case VK_COMPARE_OP_GREATER_OR_EQUAL:
4713		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4714	case VK_COMPARE_OP_ALWAYS:
4715		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4716	default:
4717		unreachable("illegal compare mode");
4718		break;
4719	}
4720}
4721
4722static unsigned
4723radv_tex_filter(VkFilter filter, unsigned max_ansio)
4724{
4725	switch (filter) {
4726	case VK_FILTER_NEAREST:
4727		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4728			V_008F38_SQ_TEX_XY_FILTER_POINT);
4729	case VK_FILTER_LINEAR:
4730		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4731			V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4732	case VK_FILTER_CUBIC_IMG:
4733	default:
4734		fprintf(stderr, "illegal texture filter");
4735		return 0;
4736	}
4737}
4738
4739static unsigned
4740radv_tex_mipfilter(VkSamplerMipmapMode mode)
4741{
4742	switch (mode) {
4743	case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4744		return V_008F38_SQ_TEX_Z_FILTER_POINT;
4745	case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4746		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4747	default:
4748		return V_008F38_SQ_TEX_Z_FILTER_NONE;
4749	}
4750}
4751
4752static unsigned
4753radv_tex_bordercolor(VkBorderColor bcolor)
4754{
4755	switch (bcolor) {
4756	case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4757	case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4758		return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4759	case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4760	case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4761		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4762	case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4763	case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4764		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4765	default:
4766		break;
4767	}
4768	return 0;
4769}
4770
4771static unsigned
4772radv_tex_aniso_filter(unsigned filter)
4773{
4774	if (filter < 2)
4775		return 0;
4776	if (filter < 4)
4777		return 1;
4778	if (filter < 8)
4779		return 2;
4780	if (filter < 16)
4781		return 3;
4782	return 4;
4783}
4784
4785static unsigned
4786radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4787{
4788	switch (mode) {
4789	case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4790		return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
4791	case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4792		return V_008F30_SQ_IMG_FILTER_MODE_MIN;
4793	case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4794		return V_008F30_SQ_IMG_FILTER_MODE_MAX;
4795	default:
4796		break;
4797	}
4798	return 0;
4799}
4800
4801static uint32_t
4802radv_get_max_anisotropy(struct radv_device *device,
4803			const VkSamplerCreateInfo *pCreateInfo)
4804{
4805	if (device->force_aniso >= 0)
4806		return device->force_aniso;
4807
4808	if (pCreateInfo->anisotropyEnable &&
4809	    pCreateInfo->maxAnisotropy > 1.0f)
4810		return (uint32_t)pCreateInfo->maxAnisotropy;
4811
4812	return 0;
4813}
4814
4815static void
4816radv_init_sampler(struct radv_device *device,
4817		  struct radv_sampler *sampler,
4818		  const VkSamplerCreateInfo *pCreateInfo)
4819{
4820	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
4821	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4822	bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4823	unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
4824
4825	const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4826		vk_find_struct_const(pCreateInfo->pNext,
4827				     SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4828	if (sampler_reduction)
4829		filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4830
4831	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4832			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4833			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4834			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4835			     S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4836			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4837			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4838			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
4839			     S_008F30_DISABLE_CUBE_WRAP(0) |
4840			     S_008F30_COMPAT_MODE(is_vi) |
4841			     S_008F30_FILTER_MODE(filter_mode));
4842	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4843			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4844			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4845	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4846			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4847			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4848			     S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4849			     S_008F38_MIP_POINT_PRECLAMP(0) |
4850			     S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4851			     S_008F38_FILTER_PREC_FIX(1) |
4852			     S_008F38_ANISO_OVERRIDE(is_vi));
4853	sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4854			     S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4855}
4856
4857VkResult radv_CreateSampler(
4858	VkDevice                                    _device,
4859	const VkSamplerCreateInfo*                  pCreateInfo,
4860	const VkAllocationCallbacks*                pAllocator,
4861	VkSampler*                                  pSampler)
4862{
4863	RADV_FROM_HANDLE(radv_device, device, _device);
4864	struct radv_sampler *sampler;
4865
4866	const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
4867		vk_find_struct_const(pCreateInfo->pNext,
4868				     SAMPLER_YCBCR_CONVERSION_INFO);
4869
4870	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4871
4872	sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4873			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4874	if (!sampler)
4875		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4876
4877	radv_init_sampler(device, sampler, pCreateInfo);
4878
4879	sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
4880	*pSampler = radv_sampler_to_handle(sampler);
4881
4882	return VK_SUCCESS;
4883}
4884
4885void radv_DestroySampler(
4886	VkDevice                                    _device,
4887	VkSampler                                   _sampler,
4888	const VkAllocationCallbacks*                pAllocator)
4889{
4890	RADV_FROM_HANDLE(radv_device, device, _device);
4891	RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4892
4893	if (!sampler)
4894		return;
4895	vk_free2(&device->alloc, pAllocator, sampler);
4896}
4897
4898/* vk_icd.h does not declare this function, so we declare it here to
4899 * suppress Wmissing-prototypes.
4900 */
4901PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4902vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4903
4904PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4905vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4906{
4907	/* For the full details on loader interface versioning, see
4908	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4909	* What follows is a condensed summary, to help you navigate the large and
4910	* confusing official doc.
4911	*
4912	*   - Loader interface v0 is incompatible with later versions. We don't
4913	*     support it.
4914	*
4915	*   - In loader interface v1:
4916	*       - The first ICD entrypoint called by the loader is
4917	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4918	*         entrypoint.
4919	*       - The ICD must statically expose no other Vulkan symbol unless it is
4920	*         linked with -Bsymbolic.
4921	*       - Each dispatchable Vulkan handle created by the ICD must be
4922	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
4923	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4924	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4925	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
4926	*         such loader-managed surfaces.
4927	*
4928	*    - Loader interface v2 differs from v1 in:
4929	*       - The first ICD entrypoint called by the loader is
4930	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4931	*         statically expose this entrypoint.
4932	*
4933	*    - Loader interface v3 differs from v2 in:
4934	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4935	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4936	*          because the loader no longer does so.
4937	*/
4938	*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4939	return VK_SUCCESS;
4940}
4941
4942VkResult radv_GetMemoryFdKHR(VkDevice _device,
4943			     const VkMemoryGetFdInfoKHR *pGetFdInfo,
4944			     int *pFD)
4945{
4946	RADV_FROM_HANDLE(radv_device, device, _device);
4947	RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4948
4949	assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4950
4951	/* At the moment, we support only the below handle types. */
4952	assert(pGetFdInfo->handleType ==
4953	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4954	       pGetFdInfo->handleType ==
4955	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4956
4957	bool ret = radv_get_memory_fd(device, memory, pFD);
4958	if (ret == false)
4959		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4960	return VK_SUCCESS;
4961}
4962
4963VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4964				       VkExternalMemoryHandleTypeFlagBits handleType,
4965				       int fd,
4966				       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4967{
4968   RADV_FROM_HANDLE(radv_device, device, _device);
4969
4970   switch (handleType) {
4971   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4972      pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4973      return VK_SUCCESS;
4974
4975   default:
4976      /* The valid usage section for this function says:
4977       *
4978       *    "handleType must not be one of the handle types defined as
4979       *    opaque."
4980       *
4981       * So opaque handle types fall into the default "unsupported" case.
4982       */
4983      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4984   }
4985}
4986
4987static VkResult radv_import_opaque_fd(struct radv_device *device,
4988                                      int fd,
4989                                      uint32_t *syncobj)
4990{
4991	uint32_t syncobj_handle = 0;
4992	int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4993	if (ret != 0)
4994		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4995
4996	if (*syncobj)
4997		device->ws->destroy_syncobj(device->ws, *syncobj);
4998
4999	*syncobj = syncobj_handle;
5000	close(fd);
5001
5002	return VK_SUCCESS;
5003}
5004
5005static VkResult radv_import_sync_fd(struct radv_device *device,
5006                                    int fd,
5007                                    uint32_t *syncobj)
5008{
5009	/* If we create a syncobj we do it locally so that if we have an error, we don't
5010	 * leave a syncobj in an undetermined state in the fence. */
5011	uint32_t syncobj_handle =  *syncobj;
5012	if (!syncobj_handle) {
5013		int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
5014		if (ret) {
5015			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5016		}
5017	}
5018
5019	if (fd == -1) {
5020		device->ws->signal_syncobj(device->ws, syncobj_handle);
5021	} else {
5022		int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
5023	if (ret != 0)
5024		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5025	}
5026
5027	*syncobj = syncobj_handle;
5028	if (fd != -1)
5029		close(fd);
5030
5031	return VK_SUCCESS;
5032}
5033
5034VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
5035				   const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
5036{
5037	RADV_FROM_HANDLE(radv_device, device, _device);
5038	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
5039	uint32_t *syncobj_dst = NULL;
5040
5041	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
5042		syncobj_dst = &sem->temp_syncobj;
5043	} else {
5044		syncobj_dst = &sem->syncobj;
5045	}
5046
5047	switch(pImportSemaphoreFdInfo->handleType) {
5048		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5049			return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5050		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5051			return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5052		default:
5053			unreachable("Unhandled semaphore handle type");
5054	}
5055}
5056
5057VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
5058				const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
5059				int *pFd)
5060{
5061	RADV_FROM_HANDLE(radv_device, device, _device);
5062	RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
5063	int ret;
5064	uint32_t syncobj_handle;
5065
5066	if (sem->temp_syncobj)
5067		syncobj_handle = sem->temp_syncobj;
5068	else
5069		syncobj_handle = sem->syncobj;
5070
5071	switch(pGetFdInfo->handleType) {
5072	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5073		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5074		break;
5075	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5076		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5077		if (!ret) {
5078			if (sem->temp_syncobj) {
5079				close (sem->temp_syncobj);
5080				sem->temp_syncobj = 0;
5081			} else {
5082				device->ws->reset_syncobj(device->ws, syncobj_handle);
5083			}
5084		}
5085		break;
5086	default:
5087		unreachable("Unhandled semaphore handle type");
5088	}
5089
5090	if (ret)
5091		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5092	return VK_SUCCESS;
5093}
5094
5095void radv_GetPhysicalDeviceExternalSemaphoreProperties(
5096	VkPhysicalDevice                            physicalDevice,
5097	const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
5098	VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
5099{
5100	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5101
5102	/* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
5103	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5104	    (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5105	     pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
5106		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5107		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5108		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5109			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5110	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
5111		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5112		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5113		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5114			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5115	} else {
5116		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
5117		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
5118		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
5119	}
5120}
5121
5122VkResult radv_ImportFenceFdKHR(VkDevice _device,
5123				   const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
5124{
5125	RADV_FROM_HANDLE(radv_device, device, _device);
5126	RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
5127	uint32_t *syncobj_dst = NULL;
5128
5129
5130	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
5131		syncobj_dst = &fence->temp_syncobj;
5132	} else {
5133		syncobj_dst = &fence->syncobj;
5134	}
5135
5136	switch(pImportFenceFdInfo->handleType) {
5137		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5138			return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5139		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5140			return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5141		default:
5142			unreachable("Unhandled fence handle type");
5143	}
5144}
5145
5146VkResult radv_GetFenceFdKHR(VkDevice _device,
5147				const VkFenceGetFdInfoKHR *pGetFdInfo,
5148				int *pFd)
5149{
5150	RADV_FROM_HANDLE(radv_device, device, _device);
5151	RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
5152	int ret;
5153	uint32_t syncobj_handle;
5154
5155	if (fence->temp_syncobj)
5156		syncobj_handle = fence->temp_syncobj;
5157	else
5158		syncobj_handle = fence->syncobj;
5159
5160	switch(pGetFdInfo->handleType) {
5161	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5162		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5163		break;
5164	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5165		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5166		if (!ret) {
5167			if (fence->temp_syncobj) {
5168				close (fence->temp_syncobj);
5169				fence->temp_syncobj = 0;
5170			} else {
5171				device->ws->reset_syncobj(device->ws, syncobj_handle);
5172			}
5173		}
5174		break;
5175	default:
5176		unreachable("Unhandled fence handle type");
5177	}
5178
5179	if (ret)
5180		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5181	return VK_SUCCESS;
5182}
5183
5184void radv_GetPhysicalDeviceExternalFenceProperties(
5185	VkPhysicalDevice                            physicalDevice,
5186	const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
5187	VkExternalFenceProperties               *pExternalFenceProperties)
5188{
5189	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5190
5191	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5192	    (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5193	     pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
5194		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5195		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5196		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
5197			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5198	} else {
5199		pExternalFenceProperties->exportFromImportedHandleTypes = 0;
5200		pExternalFenceProperties->compatibleHandleTypes = 0;
5201		pExternalFenceProperties->externalFenceFeatures = 0;
5202	}
5203}
5204
5205VkResult
5206radv_CreateDebugReportCallbackEXT(VkInstance _instance,
5207                                 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
5208                                 const VkAllocationCallbacks* pAllocator,
5209                                 VkDebugReportCallbackEXT* pCallback)
5210{
5211	RADV_FROM_HANDLE(radv_instance, instance, _instance);
5212	return vk_create_debug_report_callback(&instance->debug_report_callbacks,
5213	                                       pCreateInfo, pAllocator, &instance->alloc,
5214	                                       pCallback);
5215}
5216
5217void
5218radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
5219                                  VkDebugReportCallbackEXT _callback,
5220                                  const VkAllocationCallbacks* pAllocator)
5221{
5222	RADV_FROM_HANDLE(radv_instance, instance, _instance);
5223	vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
5224	                                 _callback, pAllocator, &instance->alloc);
5225}
5226
5227void
5228radv_DebugReportMessageEXT(VkInstance _instance,
5229                          VkDebugReportFlagsEXT flags,
5230                          VkDebugReportObjectTypeEXT objectType,
5231                          uint64_t object,
5232                          size_t location,
5233                          int32_t messageCode,
5234                          const char* pLayerPrefix,
5235                          const char* pMessage)
5236{
5237	RADV_FROM_HANDLE(radv_instance, instance, _instance);
5238	vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
5239	                object, location, messageCode, pLayerPrefix, pMessage);
5240}
5241
5242void
5243radv_GetDeviceGroupPeerMemoryFeatures(
5244    VkDevice                                    device,
5245    uint32_t                                    heapIndex,
5246    uint32_t                                    localDeviceIndex,
5247    uint32_t                                    remoteDeviceIndex,
5248    VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
5249{
5250	assert(localDeviceIndex == remoteDeviceIndex);
5251
5252	*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
5253	                       VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
5254	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
5255	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
5256}
5257
5258static const VkTimeDomainEXT radv_time_domains[] = {
5259	VK_TIME_DOMAIN_DEVICE_EXT,
5260	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
5261	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
5262};
5263
5264VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
5265	VkPhysicalDevice                             physicalDevice,
5266	uint32_t                                     *pTimeDomainCount,
5267	VkTimeDomainEXT                              *pTimeDomains)
5268{
5269	int d;
5270	VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
5271
5272	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
5273		vk_outarray_append(&out, i) {
5274			*i = radv_time_domains[d];
5275		}
5276	}
5277
5278	return vk_outarray_status(&out);
5279}
5280
5281static uint64_t
5282radv_clock_gettime(clockid_t clock_id)
5283{
5284	struct timespec current;
5285	int ret;
5286
5287	ret = clock_gettime(clock_id, &current);
5288	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
5289		ret = clock_gettime(CLOCK_MONOTONIC, &current);
5290	if (ret < 0)
5291		return 0;
5292
5293	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
5294}
5295
5296VkResult radv_GetCalibratedTimestampsEXT(
5297	VkDevice                                     _device,
5298	uint32_t                                     timestampCount,
5299	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
5300	uint64_t                                     *pTimestamps,
5301	uint64_t                                     *pMaxDeviation)
5302{
5303	RADV_FROM_HANDLE(radv_device, device, _device);
5304	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
5305	int d;
5306	uint64_t begin, end;
5307        uint64_t max_clock_period = 0;
5308
5309	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5310
5311	for (d = 0; d < timestampCount; d++) {
5312		switch (pTimestampInfos[d].timeDomain) {
5313		case VK_TIME_DOMAIN_DEVICE_EXT:
5314			pTimestamps[d] = device->ws->query_value(device->ws,
5315								 RADEON_TIMESTAMP);
5316                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
5317                        max_clock_period = MAX2(max_clock_period, device_period);
5318			break;
5319		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
5320			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
5321                        max_clock_period = MAX2(max_clock_period, 1);
5322			break;
5323
5324		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
5325			pTimestamps[d] = begin;
5326			break;
5327		default:
5328			pTimestamps[d] = 0;
5329			break;
5330		}
5331	}
5332
5333	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5334
5335        /*
5336         * The maximum deviation is the sum of the interval over which we
5337         * perform the sampling and the maximum period of any sampled
5338         * clock. That's because the maximum skew between any two sampled
5339         * clock edges is when the sampled clock with the largest period is
5340         * sampled at the end of that period but right at the beginning of the
5341         * sampling interval and some other clock is sampled right at the
5342         * begining of its sampling period and right at the end of the
5343         * sampling interval. Let's assume the GPU has the longest clock
5344         * period and that the application is sampling GPU and monotonic:
5345         *
5346         *                               s                 e
5347         *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
5348         *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5349         *
5350         *                               g
5351         *		  0         1         2         3
5352         *	GPU       -----_____-----_____-----_____-----_____
5353         *
5354         *                                                m
5355         *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
5356         *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5357         *
5358         *	Interval                     <----------------->
5359         *	Deviation           <-------------------------->
5360         *
5361         *		s  = read(raw)       2
5362         *		g  = read(GPU)       1
5363         *		m  = read(monotonic) 2
5364         *		e  = read(raw)       b
5365         *
5366         * We round the sample interval up by one tick to cover sampling error
5367         * in the interval clock
5368         */
5369
5370        uint64_t sample_interval = end - begin + 1;
5371
5372        *pMaxDeviation = sample_interval + max_clock_period;
5373
5374	return VK_SUCCESS;
5375}
5376