1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#ifndef RADV_PRIVATE_H
29#define RADV_PRIVATE_H
30
31#include <stdlib.h>
32#include <stdio.h>
33#include <stdbool.h>
34#include <pthread.h>
35#include <assert.h>
36#include <stdint.h>
37#include <string.h>
38#ifdef HAVE_VALGRIND
39#include <valgrind.h>
40#include <memcheck.h>
41#define VG(x) x
42#else
43#define VG(x)
44#endif
45
46#include "c11/threads.h"
47#include <amdgpu.h>
48#include "compiler/shader_enums.h"
49#include "util/macros.h"
50#include "util/list.h"
51#include "util/xmlconfig.h"
52#include "main/macros.h"
53#include "vk_alloc.h"
54#include "vk_debug_report.h"
55
56#include "radv_radeon_winsys.h"
57#include "ac_binary.h"
58#include "ac_nir_to_llvm.h"
59#include "ac_gpu_info.h"
60#include "ac_surface.h"
61#include "ac_llvm_build.h"
62#include "ac_llvm_util.h"
63#include "radv_descriptor_set.h"
64#include "radv_extensions.h"
65#include "radv_cs.h"
66
67#include <llvm-c/TargetMachine.h>
68
69/* Pre-declarations needed for WSI entrypoints */
70struct wl_surface;
71struct wl_display;
72typedef struct xcb_connection_t xcb_connection_t;
73typedef uint32_t xcb_visualid_t;
74typedef uint32_t xcb_window_t;
75
76#include <vulkan/vulkan.h>
77#include <vulkan/vulkan_intel.h>
78#include <vulkan/vk_icd.h>
79#include <vulkan/vk_android_native_buffer.h>
80
81#include "radv_entrypoints.h"
82
83#include "wsi_common.h"
84#include "wsi_common_display.h"
85
86#define ATI_VENDOR_ID 0x1002
87
88#define MAX_VBS         32
89#define MAX_VERTEX_ATTRIBS 32
90#define MAX_RTS          8
91#define MAX_VIEWPORTS   16
92#define MAX_SCISSORS    16
93#define MAX_DISCARD_RECTANGLES 4
94#define MAX_PUSH_CONSTANTS_SIZE 128
95#define MAX_PUSH_DESCRIPTORS 32
96#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
97#define MAX_DYNAMIC_STORAGE_BUFFERS 8
98#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
99#define MAX_SAMPLES_LOG2 4
100#define NUM_META_FS_KEYS 12
101#define RADV_MAX_DRM_DEVICES 8
102#define MAX_VIEWS        8
103#define MAX_SO_STREAMS 4
104#define MAX_SO_BUFFERS 4
105#define MAX_SO_OUTPUTS 64
106#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
107#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64
108
109#define NUM_DEPTH_CLEAR_PIPELINES 3
110
111/*
112 * This is the point we switch from using CP to compute shader
113 * for certain buffer operations.
114 */
115#define RADV_BUFFER_OPS_CS_THRESHOLD 4096
116
117#define RADV_BUFFER_UPDATE_THRESHOLD 1024
118
119enum radv_mem_heap {
120	RADV_MEM_HEAP_VRAM,
121	RADV_MEM_HEAP_VRAM_CPU_ACCESS,
122	RADV_MEM_HEAP_GTT,
123	RADV_MEM_HEAP_COUNT
124};
125
126enum radv_mem_type {
127	RADV_MEM_TYPE_VRAM,
128	RADV_MEM_TYPE_GTT_WRITE_COMBINE,
129	RADV_MEM_TYPE_VRAM_CPU_ACCESS,
130	RADV_MEM_TYPE_GTT_CACHED,
131	RADV_MEM_TYPE_COUNT
132};
133
134#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
135
136static inline uint32_t
137align_u32(uint32_t v, uint32_t a)
138{
139	assert(a != 0 && a == (a & -a));
140	return (v + a - 1) & ~(a - 1);
141}
142
143static inline uint32_t
144align_u32_npot(uint32_t v, uint32_t a)
145{
146	return (v + a - 1) / a * a;
147}
148
149static inline uint64_t
150align_u64(uint64_t v, uint64_t a)
151{
152	assert(a != 0 && a == (a & -a));
153	return (v + a - 1) & ~(a - 1);
154}
155
156static inline int32_t
157align_i32(int32_t v, int32_t a)
158{
159	assert(a != 0 && a == (a & -a));
160	return (v + a - 1) & ~(a - 1);
161}
162
163/** Alignment must be a power of 2. */
164static inline bool
165radv_is_aligned(uintmax_t n, uintmax_t a)
166{
167	assert(a == (a & -a));
168	return (n & (a - 1)) == 0;
169}
170
171static inline uint32_t
172round_up_u32(uint32_t v, uint32_t a)
173{
174	return (v + a - 1) / a;
175}
176
177static inline uint64_t
178round_up_u64(uint64_t v, uint64_t a)
179{
180	return (v + a - 1) / a;
181}
182
183static inline uint32_t
184radv_minify(uint32_t n, uint32_t levels)
185{
186	if (unlikely(n == 0))
187		return 0;
188	else
189		return MAX2(n >> levels, 1);
190}
191static inline float
192radv_clamp_f(float f, float min, float max)
193{
194	assert(min < max);
195
196	if (f > max)
197		return max;
198	else if (f < min)
199		return min;
200	else
201		return f;
202}
203
204static inline bool
205radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
206{
207	if (*inout_mask & clear_mask) {
208		*inout_mask &= ~clear_mask;
209		return true;
210	} else {
211		return false;
212	}
213}
214
215#define for_each_bit(b, dword)                          \
216	for (uint32_t __dword = (dword);		\
217	     (b) = __builtin_ffs(__dword) - 1, __dword;	\
218	     __dword &= ~(1 << (b)))
219
220#define typed_memcpy(dest, src, count) ({				\
221			STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
222			memcpy((dest), (src), (count) * sizeof(*(src))); \
223		})
224
225/* Whenever we generate an error, pass it through this function. Useful for
226 * debugging, where we can break on it. Only call at error site, not when
227 * propagating errors. Might be useful to plug in a stack trace here.
228 */
229
230struct radv_instance;
231
232VkResult __vk_errorf(struct radv_instance *instance, VkResult error, const char *file, int line, const char *format, ...);
233
234#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
235#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);
236
237void __radv_finishme(const char *file, int line, const char *format, ...)
238	radv_printflike(3, 4);
239void radv_loge(const char *format, ...) radv_printflike(1, 2);
240void radv_loge_v(const char *format, va_list va);
241void radv_logi(const char *format, ...) radv_printflike(1, 2);
242void radv_logi_v(const char *format, va_list va);
243
244/**
245 * Print a FINISHME message, including its source location.
246 */
247#define radv_finishme(format, ...)					\
248	do { \
249		static bool reported = false; \
250		if (!reported) { \
251			__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
252			reported = true; \
253		} \
254	} while (0)
255
256/* A non-fatal assert.  Useful for debugging. */
257#ifdef DEBUG
258#define radv_assert(x) ({						\
259			if (unlikely(!(x)))				\
260				fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
261		})
262#else
263#define radv_assert(x)
264#endif
265
266#define stub_return(v)					\
267	do {						\
268		radv_finishme("stub %s", __func__);	\
269		return (v);				\
270	} while (0)
271
272#define stub()						\
273	do {						\
274		radv_finishme("stub %s", __func__);	\
275		return;					\
276	} while (0)
277
278void *radv_lookup_entrypoint_unchecked(const char *name);
279void *radv_lookup_entrypoint_checked(const char *name,
280                                    uint32_t core_version,
281                                    const struct radv_instance_extension_table *instance,
282                                    const struct radv_device_extension_table *device);
283
284struct radv_physical_device {
285	VK_LOADER_DATA                              _loader_data;
286
287	struct radv_instance *                       instance;
288
289	struct radeon_winsys *ws;
290	struct radeon_info rad_info;
291	char                                        name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
292	uint8_t                                     driver_uuid[VK_UUID_SIZE];
293	uint8_t                                     device_uuid[VK_UUID_SIZE];
294	uint8_t                                     cache_uuid[VK_UUID_SIZE];
295
296	int local_fd;
297	int master_fd;
298	struct wsi_device                       wsi_device;
299
300	bool has_rbplus; /* if RB+ register exist */
301	bool rbplus_allowed; /* if RB+ is allowed */
302	bool has_clear_state;
303	bool cpdma_prefetch_writes_memory;
304	bool has_scissor_bug;
305
306	bool has_out_of_order_rast;
307	bool out_of_order_rast_allowed;
308
309	/* Whether DCC should be enabled for MSAA textures. */
310	bool dcc_msaa_allowed;
311
312	/* Whether LOAD_CONTEXT_REG packets are supported. */
313	bool has_load_ctx_reg_pkt;
314
315	/* This is the drivers on-disk cache used as a fallback as opposed to
316	 * the pipeline cache defined by apps.
317	 */
318	struct disk_cache *                          disk_cache;
319
320	VkPhysicalDeviceMemoryProperties memory_properties;
321	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
322
323	drmPciBusInfo bus_info;
324
325	struct radv_device_extension_table supported_extensions;
326};
327
328struct radv_instance {
329	VK_LOADER_DATA                              _loader_data;
330
331	VkAllocationCallbacks                       alloc;
332
333	uint32_t                                    apiVersion;
334	int                                         physicalDeviceCount;
335	struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];
336
337	uint64_t debug_flags;
338	uint64_t perftest_flags;
339
340	struct vk_debug_report_instance             debug_report_callbacks;
341
342	struct radv_instance_extension_table enabled_extensions;
343
344	struct driOptionCache dri_options;
345	struct driOptionCache available_dri_options;
346};
347
348VkResult radv_init_wsi(struct radv_physical_device *physical_device);
349void radv_finish_wsi(struct radv_physical_device *physical_device);
350
351bool radv_instance_extension_supported(const char *name);
352uint32_t radv_physical_device_api_version(struct radv_physical_device *dev);
353bool radv_physical_device_extension_supported(struct radv_physical_device *dev,
354					      const char *name);
355
356struct cache_entry;
357
358struct radv_pipeline_cache {
359	struct radv_device *                          device;
360	pthread_mutex_t                              mutex;
361
362	uint32_t                                     total_size;
363	uint32_t                                     table_size;
364	uint32_t                                     kernel_count;
365	struct cache_entry **                        hash_table;
366	bool                                         modified;
367
368	VkAllocationCallbacks                        alloc;
369};
370
371struct radv_pipeline_key {
372	uint32_t instance_rate_inputs;
373	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
374	uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
375	uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
376	uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
377	uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
378	uint64_t vertex_alpha_adjust;
379	uint32_t vertex_post_shuffle;
380	unsigned tess_input_vertices;
381	uint32_t col_format;
382	uint32_t is_int8;
383	uint32_t is_int10;
384	uint8_t log2_ps_iter_samples;
385	uint8_t num_samples;
386	uint32_t has_multiview_view_index : 1;
387	uint32_t optimisations_disabled : 1;
388};
389
390void
391radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
392			 struct radv_device *device);
393void
394radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
395bool
396radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
397			 const void *data, size_t size);
398
399struct radv_shader_variant;
400
401bool
402radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
403					        struct radv_pipeline_cache *cache,
404					        const unsigned char *sha1,
405					        struct radv_shader_variant **variants,
406						bool *found_in_application_cache);
407
408void
409radv_pipeline_cache_insert_shaders(struct radv_device *device,
410				   struct radv_pipeline_cache *cache,
411				   const unsigned char *sha1,
412				   struct radv_shader_variant **variants,
413				   const void *const *codes,
414				   const unsigned *code_sizes);
415
416enum radv_blit_ds_layout {
417	RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
418	RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
419	RADV_BLIT_DS_LAYOUT_COUNT,
420};
421
422static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
423{
424	return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
425}
426
427static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
428{
429	return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
430}
431
432enum radv_meta_dst_layout {
433	RADV_META_DST_LAYOUT_GENERAL,
434	RADV_META_DST_LAYOUT_OPTIMAL,
435	RADV_META_DST_LAYOUT_COUNT,
436};
437
438static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout)
439{
440	return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
441}
442
443static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
444{
445	return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
446}
447
448struct radv_meta_state {
449	VkAllocationCallbacks alloc;
450
451	struct radv_pipeline_cache cache;
452
453	/*
454	 * For on-demand pipeline creation, makes sure that
455	 * only one thread tries to build a pipeline at the same time.
456	 */
457	mtx_t mtx;
458
459	/**
460	 * Use array element `i` for images with `2^i` samples.
461	 */
462	struct {
463		VkRenderPass render_pass[NUM_META_FS_KEYS];
464		VkPipeline color_pipelines[NUM_META_FS_KEYS];
465
466		VkRenderPass depthstencil_rp;
467		VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
468		VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
469		VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
470	} clear[1 + MAX_SAMPLES_LOG2];
471
472	VkPipelineLayout                          clear_color_p_layout;
473	VkPipelineLayout                          clear_depth_p_layout;
474
475	/* Optimized compute fast HTILE clear for stencil or depth only. */
476	VkPipeline clear_htile_mask_pipeline;
477	VkPipelineLayout clear_htile_mask_p_layout;
478	VkDescriptorSetLayout clear_htile_mask_ds_layout;
479
480	struct {
481		VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
482
483		/** Pipeline that blits from a 1D image. */
484		VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
485
486		/** Pipeline that blits from a 2D image. */
487		VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
488
489		/** Pipeline that blits from a 3D image. */
490		VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
491
492		VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
493		VkPipeline depth_only_1d_pipeline;
494		VkPipeline depth_only_2d_pipeline;
495		VkPipeline depth_only_3d_pipeline;
496
497		VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
498		VkPipeline stencil_only_1d_pipeline;
499		VkPipeline stencil_only_2d_pipeline;
500		VkPipeline stencil_only_3d_pipeline;
501		VkPipelineLayout                          pipeline_layout;
502		VkDescriptorSetLayout                     ds_layout;
503	} blit;
504
505	struct {
506		VkPipelineLayout p_layouts[5];
507		VkDescriptorSetLayout ds_layouts[5];
508		VkPipeline pipelines[5][NUM_META_FS_KEYS];
509
510		VkPipeline depth_only_pipeline[5];
511
512		VkPipeline stencil_only_pipeline[5];
513	} blit2d[1 + MAX_SAMPLES_LOG2];
514
515	VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
516	VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
517	VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
518
519	struct {
520		VkPipelineLayout                          img_p_layout;
521		VkDescriptorSetLayout                     img_ds_layout;
522		VkPipeline pipeline;
523		VkPipeline pipeline_3d;
524	} itob;
525	struct {
526		VkPipelineLayout                          img_p_layout;
527		VkDescriptorSetLayout                     img_ds_layout;
528		VkPipeline pipeline;
529		VkPipeline pipeline_3d;
530	} btoi;
531	struct {
532		VkPipelineLayout                          img_p_layout;
533		VkDescriptorSetLayout                     img_ds_layout;
534		VkPipeline pipeline;
535	} btoi_r32g32b32;
536	struct {
537		VkPipelineLayout                          img_p_layout;
538		VkDescriptorSetLayout                     img_ds_layout;
539		VkPipeline pipeline;
540		VkPipeline pipeline_3d;
541	} itoi;
542	struct {
543		VkPipelineLayout                          img_p_layout;
544		VkDescriptorSetLayout                     img_ds_layout;
545		VkPipeline pipeline;
546	} itoi_r32g32b32;
547	struct {
548		VkPipelineLayout                          img_p_layout;
549		VkDescriptorSetLayout                     img_ds_layout;
550		VkPipeline pipeline;
551		VkPipeline pipeline_3d;
552	} cleari;
553	struct {
554		VkPipelineLayout                          img_p_layout;
555		VkDescriptorSetLayout                     img_ds_layout;
556		VkPipeline pipeline;
557	} cleari_r32g32b32;
558
559	struct {
560		VkPipelineLayout                          p_layout;
561		VkPipeline                                pipeline[NUM_META_FS_KEYS];
562		VkRenderPass                              pass[NUM_META_FS_KEYS];
563	} resolve;
564
565	struct {
566		VkDescriptorSetLayout                     ds_layout;
567		VkPipelineLayout                          p_layout;
568		struct {
569			VkPipeline                                pipeline;
570			VkPipeline                                i_pipeline;
571			VkPipeline                                srgb_pipeline;
572		} rc[MAX_SAMPLES_LOG2];
573	} resolve_compute;
574
575	struct {
576		VkDescriptorSetLayout                     ds_layout;
577		VkPipelineLayout                          p_layout;
578
579		struct {
580			VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
581			VkPipeline   pipeline[NUM_META_FS_KEYS];
582		} rc[MAX_SAMPLES_LOG2];
583	} resolve_fragment;
584
585	struct {
586		VkPipelineLayout                          p_layout;
587		VkPipeline                                decompress_pipeline;
588		VkPipeline                                resummarize_pipeline;
589		VkRenderPass                              pass;
590	} depth_decomp[1 + MAX_SAMPLES_LOG2];
591
592	struct {
593		VkPipelineLayout                          p_layout;
594		VkPipeline                                cmask_eliminate_pipeline;
595		VkPipeline                                fmask_decompress_pipeline;
596		VkPipeline                                dcc_decompress_pipeline;
597		VkRenderPass                              pass;
598
599		VkDescriptorSetLayout                     dcc_decompress_compute_ds_layout;
600		VkPipelineLayout                          dcc_decompress_compute_p_layout;
601		VkPipeline                                dcc_decompress_compute_pipeline;
602	} fast_clear_flush;
603
604	struct {
605		VkPipelineLayout fill_p_layout;
606		VkPipelineLayout copy_p_layout;
607		VkDescriptorSetLayout fill_ds_layout;
608		VkDescriptorSetLayout copy_ds_layout;
609		VkPipeline fill_pipeline;
610		VkPipeline copy_pipeline;
611	} buffer;
612
613	struct {
614		VkDescriptorSetLayout ds_layout;
615		VkPipelineLayout p_layout;
616		VkPipeline occlusion_query_pipeline;
617		VkPipeline pipeline_statistics_query_pipeline;
618		VkPipeline tfb_query_pipeline;
619	} query;
620
621	struct {
622		VkDescriptorSetLayout ds_layout;
623		VkPipelineLayout p_layout;
624		VkPipeline pipeline[MAX_SAMPLES_LOG2];
625	} fmask_expand;
626};
627
628/* queue types */
629#define RADV_QUEUE_GENERAL 0
630#define RADV_QUEUE_COMPUTE 1
631#define RADV_QUEUE_TRANSFER 2
632
633#define RADV_MAX_QUEUE_FAMILIES 3
634
635enum ring_type radv_queue_family_to_ring(int f);
636
637struct radv_queue {
638	VK_LOADER_DATA                              _loader_data;
639	struct radv_device *                         device;
640	struct radeon_winsys_ctx                    *hw_ctx;
641	enum radeon_ctx_priority                     priority;
642	uint32_t queue_family_index;
643	int queue_idx;
644	VkDeviceQueueCreateFlags flags;
645
646	uint32_t scratch_size;
647	uint32_t compute_scratch_size;
648	uint32_t esgs_ring_size;
649	uint32_t gsvs_ring_size;
650	bool has_tess_rings;
651	bool has_sample_positions;
652
653	struct radeon_winsys_bo *scratch_bo;
654	struct radeon_winsys_bo *descriptor_bo;
655	struct radeon_winsys_bo *compute_scratch_bo;
656	struct radeon_winsys_bo *esgs_ring_bo;
657	struct radeon_winsys_bo *gsvs_ring_bo;
658	struct radeon_winsys_bo *tess_rings_bo;
659	struct radeon_cmdbuf *initial_preamble_cs;
660	struct radeon_cmdbuf *initial_full_flush_preamble_cs;
661	struct radeon_cmdbuf *continue_preamble_cs;
662};
663
664struct radv_bo_list {
665	struct radv_winsys_bo_list list;
666	unsigned capacity;
667	pthread_mutex_t mutex;
668};
669
670struct radv_device {
671	VK_LOADER_DATA                              _loader_data;
672
673	VkAllocationCallbacks                       alloc;
674
675	struct radv_instance *                       instance;
676	struct radeon_winsys *ws;
677
678	struct radv_meta_state                       meta_state;
679
680	struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
681	int queue_count[RADV_MAX_QUEUE_FAMILIES];
682	struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
683
684	bool always_use_syncobj;
685	bool has_distributed_tess;
686	bool pbb_allowed;
687	bool dfsm_allowed;
688	uint32_t tess_offchip_block_dw_size;
689	uint32_t scratch_waves;
690	uint32_t dispatch_initiator;
691
692	uint32_t gs_table_depth;
693
694	/* MSAA sample locations.
695	 * The first index is the sample index.
696	 * The second index is the coordinate: X, Y. */
697	float sample_locations_1x[1][2];
698	float sample_locations_2x[2][2];
699	float sample_locations_4x[4][2];
700	float sample_locations_8x[8][2];
701	float sample_locations_16x[16][2];
702
703	/* CIK and later */
704	uint32_t gfx_init_size_dw;
705	struct radeon_winsys_bo                      *gfx_init;
706
707	struct radeon_winsys_bo                      *trace_bo;
708	uint32_t                                     *trace_id_ptr;
709
710	/* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
711	bool                                         keep_shader_info;
712
713	struct radv_physical_device                  *physical_device;
714
715	/* Backup in-memory cache to be used if the app doesn't provide one */
716	struct radv_pipeline_cache *                mem_cache;
717
718	/*
719	 * use different counters so MSAA MRTs get consecutive surface indices,
720	 * even if MASK is allocated in between.
721	 */
722	uint32_t image_mrt_offset_counter;
723	uint32_t fmask_mrt_offset_counter;
724	struct list_head shader_slabs;
725	mtx_t shader_slab_mutex;
726
727	/* For detecting VM faults reported by dmesg. */
728	uint64_t dmesg_timestamp;
729
730	struct radv_device_extension_table enabled_extensions;
731
732	/* Whether the driver uses a global BO list. */
733	bool use_global_bo_list;
734
735	struct radv_bo_list bo_list;
736
737	/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
738	int force_aniso;
739};
740
741struct radv_device_memory {
742	struct radeon_winsys_bo                      *bo;
743	/* for dedicated allocations */
744	struct radv_image                            *image;
745	struct radv_buffer                           *buffer;
746	uint32_t                                     type_index;
747	VkDeviceSize                                 map_size;
748	void *                                       map;
749	void *                                       user_ptr;
750};
751
752
753struct radv_descriptor_range {
754	uint64_t va;
755	uint32_t size;
756};
757
758struct radv_descriptor_set {
759	const struct radv_descriptor_set_layout *layout;
760	uint32_t size;
761
762	struct radeon_winsys_bo *bo;
763	uint64_t va;
764	uint32_t *mapped_ptr;
765	struct radv_descriptor_range *dynamic_descriptors;
766
767	struct radeon_winsys_bo *descriptors[0];
768};
769
770struct radv_push_descriptor_set
771{
772	struct radv_descriptor_set set;
773	uint32_t capacity;
774};
775
776struct radv_descriptor_pool_entry {
777	uint32_t offset;
778	uint32_t size;
779	struct radv_descriptor_set *set;
780};
781
782struct radv_descriptor_pool {
783	struct radeon_winsys_bo *bo;
784	uint8_t *mapped_ptr;
785	uint64_t current_offset;
786	uint64_t size;
787
788	uint8_t *host_memory_base;
789	uint8_t *host_memory_ptr;
790	uint8_t *host_memory_end;
791
792	uint32_t entry_count;
793	uint32_t max_entry_count;
794	struct radv_descriptor_pool_entry entries[0];
795};
796
797struct radv_descriptor_update_template_entry {
798	VkDescriptorType descriptor_type;
799
800	/* The number of descriptors to update */
801	uint32_t descriptor_count;
802
803	/* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
804	uint32_t dst_offset;
805
806	/* In dwords. Not valid/used for dynamic descriptors */
807	uint32_t dst_stride;
808
809	uint32_t buffer_offset;
810
811	/* Only valid for combined image samplers and samplers */
812	uint8_t has_sampler;
813	uint8_t sampler_offset;
814
815	/* In bytes */
816	size_t src_offset;
817	size_t src_stride;
818
819	/* For push descriptors */
820	const uint32_t *immutable_samplers;
821};
822
823struct radv_descriptor_update_template {
824	uint32_t entry_count;
825	VkPipelineBindPoint bind_point;
826	struct radv_descriptor_update_template_entry entry[0];
827};
828
829struct radv_buffer {
830	VkDeviceSize                                 size;
831
832	VkBufferUsageFlags                           usage;
833	VkBufferCreateFlags                          flags;
834
835	/* Set when bound */
836	struct radeon_winsys_bo *                      bo;
837	VkDeviceSize                                 offset;
838
839	bool shareable;
840};
841
842enum radv_dynamic_state_bits {
843	RADV_DYNAMIC_VIEWPORT             = 1 << 0,
844	RADV_DYNAMIC_SCISSOR              = 1 << 1,
845	RADV_DYNAMIC_LINE_WIDTH           = 1 << 2,
846	RADV_DYNAMIC_DEPTH_BIAS           = 1 << 3,
847	RADV_DYNAMIC_BLEND_CONSTANTS      = 1 << 4,
848	RADV_DYNAMIC_DEPTH_BOUNDS         = 1 << 5,
849	RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
850	RADV_DYNAMIC_STENCIL_WRITE_MASK   = 1 << 7,
851	RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
852	RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
853	RADV_DYNAMIC_ALL                  = (1 << 10) - 1,
854};
855
856enum radv_cmd_dirty_bits {
857	/* Keep the dynamic state dirty bits in sync with
858	 * enum radv_dynamic_state_bits */
859	RADV_CMD_DIRTY_DYNAMIC_VIEWPORT                  = 1 << 0,
860	RADV_CMD_DIRTY_DYNAMIC_SCISSOR                   = 1 << 1,
861	RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH                = 1 << 2,
862	RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS                = 1 << 3,
863	RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS           = 1 << 4,
864	RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS              = 1 << 5,
865	RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 6,
866	RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 7,
867	RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
868	RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
869	RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 10) - 1,
870	RADV_CMD_DIRTY_PIPELINE                          = 1 << 10,
871	RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 11,
872	RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 12,
873	RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 13,
874	RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 14,
875};
876
877enum radv_cmd_flush_bits {
878	RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
879	/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
880	RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
881	/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
882	RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
883	/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
884	RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
885	/* Same as above, but only writes back and doesn't invalidate */
886	RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
887	/* Framebuffer caches */
888	RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
889	RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
890	RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
891	RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
892	/* Engine synchronization. */
893	RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
894	RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
895	RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
896	RADV_CMD_FLAG_VGT_FLUSH        = 1 << 12,
897	/* Pipeline query controls. */
898	RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
899	RADV_CMD_FLAG_STOP_PIPELINE_STATS  = 1 << 14,
900	RADV_CMD_FLAG_VGT_STREAMOUT_SYNC   = 1 << 15,
901
902	RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
903					      RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
904					      RADV_CMD_FLAG_FLUSH_AND_INV_DB |
905					      RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
906};
907
908struct radv_vertex_binding {
909	struct radv_buffer *                          buffer;
910	VkDeviceSize                                 offset;
911};
912
913struct radv_streamout_binding {
914	struct radv_buffer *buffer;
915	VkDeviceSize offset;
916	VkDeviceSize size;
917};
918
919struct radv_streamout_state {
920	/* Mask of bound streamout buffers. */
921	uint8_t enabled_mask;
922
923	/* External state that comes from the last vertex stage, it must be
924	 * set explicitely when binding a new graphics pipeline.
925	 */
926	uint16_t stride_in_dw[MAX_SO_BUFFERS];
927	uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
928
929	/* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
930	uint32_t hw_enabled_mask;
931
932	/* State of VGT_STRMOUT_(CONFIG|EN) */
933	bool streamout_enabled;
934};
935
936struct radv_viewport_state {
937	uint32_t                                          count;
938	VkViewport                                        viewports[MAX_VIEWPORTS];
939};
940
941struct radv_scissor_state {
942	uint32_t                                          count;
943	VkRect2D                                          scissors[MAX_SCISSORS];
944};
945
946struct radv_discard_rectangle_state {
947	uint32_t                                          count;
948	VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
949};
950
951struct radv_dynamic_state {
952	/**
953	 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
954	 * Defines the set of saved dynamic state.
955	 */
956	uint32_t mask;
957
958	struct radv_viewport_state                        viewport;
959
960	struct radv_scissor_state                         scissor;
961
962	float                                        line_width;
963
964	struct {
965		float                                     bias;
966		float                                     clamp;
967		float                                     slope;
968	} depth_bias;
969
970	float                                        blend_constants[4];
971
972	struct {
973		float                                     min;
974		float                                     max;
975	} depth_bounds;
976
977	struct {
978		uint32_t                                  front;
979		uint32_t                                  back;
980	} stencil_compare_mask;
981
982	struct {
983		uint32_t                                  front;
984		uint32_t                                  back;
985	} stencil_write_mask;
986
987	struct {
988		uint32_t                                  front;
989		uint32_t                                  back;
990	} stencil_reference;
991
992	struct radv_discard_rectangle_state               discard_rectangle;
993};
994
995extern const struct radv_dynamic_state default_dynamic_state;
996
997const char *
998radv_get_debug_option_name(int id);
999
1000const char *
1001radv_get_perftest_option_name(int id);
1002
1003/**
1004 * Attachment state when recording a renderpass instance.
1005 *
1006 * The clear value is valid only if there exists a pending clear.
1007 */
1008struct radv_attachment_state {
1009	VkImageAspectFlags                           pending_clear_aspects;
1010	uint32_t                                     cleared_views;
1011	VkClearValue                                 clear_value;
1012	VkImageLayout                                current_layout;
1013};
1014
1015struct radv_descriptor_state {
1016	struct radv_descriptor_set *sets[MAX_SETS];
1017	uint32_t dirty;
1018	uint32_t valid;
1019	struct radv_push_descriptor_set push_set;
1020	bool push_dirty;
1021	uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1022};
1023
1024struct radv_cmd_state {
1025	/* Vertex descriptors */
1026	uint64_t                                      vb_va;
1027	unsigned                                      vb_size;
1028
1029	bool predicating;
1030	uint32_t                                      dirty;
1031
1032	uint32_t                                      prefetch_L2_mask;
1033
1034	struct radv_pipeline *                        pipeline;
1035	struct radv_pipeline *                        emitted_pipeline;
1036	struct radv_pipeline *                        compute_pipeline;
1037	struct radv_pipeline *                        emitted_compute_pipeline;
1038	struct radv_framebuffer *                     framebuffer;
1039	struct radv_render_pass *                     pass;
1040	const struct radv_subpass *                         subpass;
1041	struct radv_dynamic_state                     dynamic;
1042	struct radv_attachment_state *                attachments;
1043	struct radv_streamout_state                  streamout;
1044	VkRect2D                                     render_area;
1045
1046	/* Index buffer */
1047	struct radv_buffer                           *index_buffer;
1048	uint64_t                                     index_offset;
1049	uint32_t                                     index_type;
1050	uint32_t                                     max_index_count;
1051	uint64_t                                     index_va;
1052	int32_t                                      last_index_type;
1053
1054	int32_t                                      last_primitive_reset_en;
1055	uint32_t                                     last_primitive_reset_index;
1056	enum radv_cmd_flush_bits                     flush_bits;
1057	unsigned                                     active_occlusion_queries;
1058	bool                                         perfect_occlusion_queries_enabled;
1059	unsigned                                     active_pipeline_queries;
1060	float					     offset_scale;
1061	uint32_t                                      trace_id;
1062	uint32_t                                      last_ia_multi_vgt_param;
1063
1064	uint32_t last_num_instances;
1065	uint32_t last_first_instance;
1066	uint32_t last_vertex_offset;
1067
1068	/* Whether CP DMA is busy/idle. */
1069	bool dma_is_busy;
1070
1071	/* Conditional rendering info. */
1072	int predication_type; /* -1: disabled, 0: normal, 1: inverted */
1073	uint64_t predication_va;
1074
1075	bool context_roll_without_scissor_emitted;
1076};
1077
1078struct radv_cmd_pool {
1079	VkAllocationCallbacks                        alloc;
1080	struct list_head                             cmd_buffers;
1081	struct list_head                             free_cmd_buffers;
1082	uint32_t queue_family_index;
1083};
1084
1085struct radv_cmd_buffer_upload {
1086	uint8_t *map;
1087	unsigned offset;
1088	uint64_t size;
1089	struct radeon_winsys_bo *upload_bo;
1090	struct list_head list;
1091};
1092
1093enum radv_cmd_buffer_status {
1094	RADV_CMD_BUFFER_STATUS_INVALID,
1095	RADV_CMD_BUFFER_STATUS_INITIAL,
1096	RADV_CMD_BUFFER_STATUS_RECORDING,
1097	RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1098	RADV_CMD_BUFFER_STATUS_PENDING,
1099};
1100
1101struct radv_cmd_buffer {
1102	VK_LOADER_DATA                               _loader_data;
1103
1104	struct radv_device *                          device;
1105
1106	struct radv_cmd_pool *                        pool;
1107	struct list_head                             pool_link;
1108
1109	VkCommandBufferUsageFlags                    usage_flags;
1110	VkCommandBufferLevel                         level;
1111	enum radv_cmd_buffer_status status;
1112	struct radeon_cmdbuf *cs;
1113	struct radv_cmd_state state;
1114	struct radv_vertex_binding                   vertex_bindings[MAX_VBS];
1115	struct radv_streamout_binding                streamout_bindings[MAX_SO_BUFFERS];
1116	uint32_t queue_family_index;
1117
1118	uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1119	VkShaderStageFlags push_constant_stages;
1120	struct radv_descriptor_set meta_push_descriptors;
1121
1122	struct radv_descriptor_state descriptors[VK_PIPELINE_BIND_POINT_RANGE_SIZE];
1123
1124	struct radv_cmd_buffer_upload upload;
1125
1126	uint32_t scratch_size_needed;
1127	uint32_t compute_scratch_size_needed;
1128	uint32_t esgs_ring_size_needed;
1129	uint32_t gsvs_ring_size_needed;
1130	bool tess_rings_needed;
1131	bool sample_positions_needed;
1132
1133	VkResult record_result;
1134
1135	uint64_t gfx9_fence_va;
1136	uint32_t gfx9_fence_idx;
1137	uint64_t gfx9_eop_bug_va;
1138
1139	/**
1140	 * Whether a query pool has been resetted and we have to flush caches.
1141	 */
1142	bool pending_reset_query;
1143};
1144
1145struct radv_image;
1146
1147bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1148
1149void si_emit_graphics(struct radv_physical_device *physical_device,
1150		      struct radeon_cmdbuf *cs);
1151void si_emit_compute(struct radv_physical_device *physical_device,
1152		      struct radeon_cmdbuf *cs);
1153
1154void cik_create_gfx_config(struct radv_device *device);
1155
1156void si_write_viewport(struct radeon_cmdbuf *cs, int first_vp,
1157		       int count, const VkViewport *viewports);
1158void si_write_scissors(struct radeon_cmdbuf *cs, int first,
1159		       int count, const VkRect2D *scissors,
1160		       const VkViewport *viewports, bool can_use_guardband);
1161uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
1162				   bool instanced_draw, bool indirect_draw,
1163				   bool count_from_stream_output,
1164				   uint32_t draw_vertex_count);
1165void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
1166				enum chip_class chip_class,
1167				bool is_mec,
1168				unsigned event, unsigned event_flags,
1169				unsigned data_sel,
1170				uint64_t va,
1171				uint32_t new_fence,
1172				uint64_t gfx9_eop_bug_va);
1173
1174void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
1175		      uint32_t ref, uint32_t mask);
1176void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
1177			    enum chip_class chip_class,
1178			    uint32_t *fence_ptr, uint64_t va,
1179			    bool is_mec,
1180			    enum radv_cmd_flush_bits flush_bits,
1181			    uint64_t gfx9_eop_bug_va);
1182void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1183void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
1184				   bool inverted, uint64_t va);
1185void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
1186			   uint64_t src_va, uint64_t dest_va,
1187			   uint64_t size);
1188void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
1189                        unsigned size);
1190void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
1191			    uint64_t size, unsigned value);
1192void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1193
1194void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
1195bool
1196radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
1197			     unsigned size,
1198			     unsigned alignment,
1199			     unsigned *out_offset,
1200			     void **ptr);
1201void
1202radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1203			    const struct radv_subpass *subpass);
1204bool
1205radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
1206			    unsigned size, unsigned alignmnet,
1207			    const void *data, unsigned *out_offset);
1208
1209void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1210void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1211void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1212void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1213void radv_cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples);
1214unsigned radv_cayman_get_maxdist(int log_samples);
1215void radv_device_init_msaa(struct radv_device *device);
1216
1217void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1218				   struct radv_image *image,
1219				   VkClearDepthStencilValue ds_clear_value,
1220				   VkImageAspectFlags aspects);
1221
1222void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1223				      struct radv_image *image,
1224				      int cb_idx,
1225				      uint32_t color_values[2]);
1226
1227void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
1228			      struct radv_image *image, bool value);
1229
1230void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
1231			      struct radv_image *image, bool value);
1232
1233uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
1234			  struct radeon_winsys_bo *bo,
1235			  uint64_t offset, uint64_t size, uint32_t value);
1236void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1237bool radv_get_memory_fd(struct radv_device *device,
1238			struct radv_device_memory *memory,
1239			int *pFD);
1240
1241static inline void
1242radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs,
1243			      unsigned sh_offset, unsigned pointer_count,
1244			      bool use_32bit_pointers)
1245{
1246	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1247	radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1248}
1249
1250static inline void
1251radv_emit_shader_pointer_body(struct radv_device *device,
1252			      struct radeon_cmdbuf *cs,
1253			      uint64_t va, bool use_32bit_pointers)
1254{
1255	radeon_emit(cs, va);
1256
1257	if (use_32bit_pointers) {
1258		assert(va == 0 ||
1259		       (va >> 32) == device->physical_device->rad_info.address32_hi);
1260	} else {
1261		radeon_emit(cs, va >> 32);
1262	}
1263}
1264
1265static inline void
1266radv_emit_shader_pointer(struct radv_device *device,
1267			 struct radeon_cmdbuf *cs,
1268			 uint32_t sh_offset, uint64_t va, bool global)
1269{
1270	bool use_32bit_pointers = !global;
1271
1272	radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1273	radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1274}
1275
1276static inline struct radv_descriptor_state *
1277radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer,
1278			   VkPipelineBindPoint bind_point)
1279{
1280	assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ||
1281	       bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1282	return &cmd_buffer->descriptors[bind_point];
1283}
1284
1285/*
1286 * Takes x,y,z as exact numbers of invocations, instead of blocks.
1287 *
1288 * Limitations: Can't call normal dispatch functions without binding or rebinding
1289 *              the compute pipeline.
1290 */
1291void radv_unaligned_dispatch(
1292	struct radv_cmd_buffer                      *cmd_buffer,
1293	uint32_t                                    x,
1294	uint32_t                                    y,
1295	uint32_t                                    z);
1296
1297struct radv_event {
1298	struct radeon_winsys_bo *bo;
1299	uint64_t *map;
1300};
1301
1302struct radv_shader_module;
1303
1304#define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
1305#define RADV_HASH_SHADER_SISCHED             (1 << 1)
1306#define RADV_HASH_SHADER_UNSAFE_MATH         (1 << 2)
1307void
1308radv_hash_shaders(unsigned char *hash,
1309		  const VkPipelineShaderStageCreateInfo **stages,
1310		  const struct radv_pipeline_layout *layout,
1311		  const struct radv_pipeline_key *key,
1312		  uint32_t flags);
1313
1314static inline gl_shader_stage
1315vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
1316{
1317	assert(__builtin_popcount(vk_stage) == 1);
1318	return ffs(vk_stage) - 1;
1319}
1320
1321static inline VkShaderStageFlagBits
1322mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
1323{
1324	return (1 << mesa_stage);
1325}
1326
1327#define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
1328
1329#define radv_foreach_stage(stage, stage_bits)				\
1330	for (gl_shader_stage stage,					\
1331		     __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK);	\
1332	     stage = __builtin_ffs(__tmp) - 1, __tmp;			\
1333	     __tmp &= ~(1 << (stage)))
1334
1335extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1336unsigned radv_format_meta_fs_key(VkFormat format);
1337
1338struct radv_multisample_state {
1339	uint32_t db_eqaa;
1340	uint32_t pa_sc_line_cntl;
1341	uint32_t pa_sc_mode_cntl_0;
1342	uint32_t pa_sc_mode_cntl_1;
1343	uint32_t pa_sc_aa_config;
1344	uint32_t pa_sc_aa_mask[2];
1345	unsigned num_samples;
1346};
1347
1348struct radv_prim_vertex_count {
1349	uint8_t min;
1350	uint8_t incr;
1351};
1352
1353struct radv_vertex_elements_info {
1354	uint32_t format_size[MAX_VERTEX_ATTRIBS];
1355};
1356
1357struct radv_ia_multi_vgt_param_helpers {
1358	uint32_t base;
1359	bool partial_es_wave;
1360	uint8_t primgroup_size;
1361	bool wd_switch_on_eop;
1362	bool ia_switch_on_eoi;
1363	bool partial_vs_wave;
1364};
1365
1366#define SI_GS_PER_ES 128
1367
1368struct radv_pipeline {
1369	struct radv_device *                          device;
1370	struct radv_dynamic_state                     dynamic_state;
1371
1372	struct radv_pipeline_layout *                 layout;
1373
1374	bool					     need_indirect_descriptor_sets;
1375	struct radv_shader_variant *                 shaders[MESA_SHADER_STAGES];
1376	struct radv_shader_variant *gs_copy_shader;
1377	VkShaderStageFlags                           active_stages;
1378
1379	struct radeon_cmdbuf                      cs;
1380	uint32_t                                  ctx_cs_hash;
1381	struct radeon_cmdbuf                      ctx_cs;
1382
1383	struct radv_vertex_elements_info             vertex_elements;
1384
1385	uint32_t                                     binding_stride[MAX_VBS];
1386	uint8_t                                      num_vertex_bindings;
1387
1388	uint32_t user_data_0[MESA_SHADER_STAGES];
1389	union {
1390		struct {
1391			struct radv_multisample_state ms;
1392			uint32_t spi_baryc_cntl;
1393			bool prim_restart_enable;
1394			unsigned esgs_ring_size;
1395			unsigned gsvs_ring_size;
1396			uint32_t vtx_base_sgpr;
1397			struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1398			uint8_t vtx_emit_num;
1399			struct radv_prim_vertex_count prim_vertex_count;
1400 			bool can_use_guardband;
1401			uint32_t needed_dynamic_state;
1402			bool disable_out_of_order_rast_for_occlusion;
1403
1404			/* Used for rbplus */
1405			uint32_t col_format;
1406			uint32_t cb_target_mask;
1407		} graphics;
1408	};
1409
1410	unsigned max_waves;
1411	unsigned scratch_bytes_per_wave;
1412
1413	/* Not NULL if graphics pipeline uses streamout. */
1414	struct radv_shader_variant *streamout_shader;
1415};
1416
1417static inline bool radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
1418{
1419	return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
1420}
1421
1422static inline bool radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
1423{
1424	return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
1425}
1426
1427struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
1428						 gl_shader_stage stage,
1429						 int idx);
1430
1431struct radv_shader_variant *radv_get_shader(struct radv_pipeline *pipeline,
1432					    gl_shader_stage stage);
1433
1434struct radv_graphics_pipeline_create_info {
1435	bool use_rectlist;
1436	bool db_depth_clear;
1437	bool db_stencil_clear;
1438	bool db_depth_disable_expclear;
1439	bool db_stencil_disable_expclear;
1440	bool db_flush_depth_inplace;
1441	bool db_flush_stencil_inplace;
1442	bool db_resummarize;
1443	uint32_t custom_blend_mode;
1444};
1445
1446VkResult
1447radv_graphics_pipeline_create(VkDevice device,
1448			      VkPipelineCache cache,
1449			      const VkGraphicsPipelineCreateInfo *pCreateInfo,
1450			      const struct radv_graphics_pipeline_create_info *extra,
1451			      const VkAllocationCallbacks *alloc,
1452			      VkPipeline *pPipeline);
1453
1454struct vk_format_description;
1455uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
1456					  int first_non_void);
1457uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc,
1458					 int first_non_void);
1459bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
1460uint32_t radv_translate_colorformat(VkFormat format);
1461uint32_t radv_translate_color_numformat(VkFormat format,
1462					const struct vk_format_description *desc,
1463					int first_non_void);
1464uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
1465unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
1466uint32_t radv_translate_dbformat(VkFormat format);
1467uint32_t radv_translate_tex_dataformat(VkFormat format,
1468				       const struct vk_format_description *desc,
1469				       int first_non_void);
1470uint32_t radv_translate_tex_numformat(VkFormat format,
1471				      const struct vk_format_description *desc,
1472				      int first_non_void);
1473bool radv_format_pack_clear_color(VkFormat format,
1474				  uint32_t clear_vals[2],
1475				  VkClearColorValue *value);
1476bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
1477bool radv_dcc_formats_compatible(VkFormat format1,
1478                                 VkFormat format2);
1479bool radv_device_supports_etc(struct radv_physical_device *physical_device);
1480
1481struct radv_fmask_info {
1482	uint64_t offset;
1483	uint64_t size;
1484	unsigned alignment;
1485	unsigned pitch_in_pixels;
1486	unsigned bank_height;
1487	unsigned slice_tile_max;
1488	unsigned tile_mode_index;
1489	unsigned tile_swizzle;
1490};
1491
1492struct radv_cmask_info {
1493	uint64_t offset;
1494	uint64_t size;
1495	unsigned alignment;
1496	unsigned slice_tile_max;
1497};
1498
1499
1500struct radv_image_plane {
1501	VkFormat format;
1502	struct radeon_surf surface;
1503	uint64_t offset;
1504};
1505
1506struct radv_image {
1507	VkImageType type;
1508	/* The original VkFormat provided by the client.  This may not match any
1509	 * of the actual surface formats.
1510	 */
1511	VkFormat vk_format;
1512	VkImageAspectFlags aspects;
1513	VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
1514	struct ac_surf_info info;
1515	VkImageTiling tiling; /** VkImageCreateInfo::tiling */
1516	VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
1517
1518	VkDeviceSize size;
1519	uint32_t alignment;
1520
1521	unsigned queue_family_mask;
1522	bool exclusive;
1523	bool shareable;
1524
1525	/* Set when bound */
1526	struct radeon_winsys_bo *bo;
1527	VkDeviceSize offset;
1528	uint64_t dcc_offset;
1529	uint64_t htile_offset;
1530	bool tc_compatible_htile;
1531
1532	struct radv_fmask_info fmask;
1533	struct radv_cmask_info cmask;
1534	uint64_t clear_value_offset;
1535	uint64_t fce_pred_offset;
1536	uint64_t dcc_pred_offset;
1537
1538	/*
1539	 * Metadata for the TC-compat zrange workaround. If the 32-bit value
1540	 * stored at this offset is UINT_MAX, the driver will emit
1541	 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
1542	 * SET_CONTEXT_REG packet.
1543	 */
1544	uint64_t tc_compat_zrange_offset;
1545
1546	/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
1547	VkDeviceMemory owned_memory;
1548
1549	unsigned plane_count;
1550	struct radv_image_plane planes[0];
1551};
1552
1553/* Whether the image has a htile that is known consistent with the contents of
1554 * the image. */
1555bool radv_layout_has_htile(const struct radv_image *image,
1556                           VkImageLayout layout,
1557                           unsigned queue_mask);
1558
1559/* Whether the image has a htile  that is known consistent with the contents of
1560 * the image and is allowed to be in compressed form.
1561 *
1562 * If this is false reads that don't use the htile should be able to return
1563 * correct results.
1564 */
1565bool radv_layout_is_htile_compressed(const struct radv_image *image,
1566                                     VkImageLayout layout,
1567                                     unsigned queue_mask);
1568
1569bool radv_layout_can_fast_clear(const struct radv_image *image,
1570			        VkImageLayout layout,
1571			        unsigned queue_mask);
1572
1573bool radv_layout_dcc_compressed(const struct radv_image *image,
1574			        VkImageLayout layout,
1575			        unsigned queue_mask);
1576
1577/**
1578 * Return whether the image has CMASK metadata for color surfaces.
1579 */
1580static inline bool
1581radv_image_has_cmask(const struct radv_image *image)
1582{
1583	return image->cmask.size;
1584}
1585
1586/**
1587 * Return whether the image has FMASK metadata for color surfaces.
1588 */
1589static inline bool
1590radv_image_has_fmask(const struct radv_image *image)
1591{
1592	return image->fmask.size;
1593}
1594
1595/**
1596 * Return whether the image has DCC metadata for color surfaces.
1597 */
1598static inline bool
1599radv_image_has_dcc(const struct radv_image *image)
1600{
1601	return image->planes[0].surface.dcc_size;
1602}
1603
1604/**
1605 * Return whether DCC metadata is enabled for a level.
1606 */
1607static inline bool
1608radv_dcc_enabled(const struct radv_image *image, unsigned level)
1609{
1610	return radv_image_has_dcc(image) &&
1611	       level < image->planes[0].surface.num_dcc_levels;
1612}
1613
1614/**
1615 * Return whether the image has CB metadata.
1616 */
1617static inline bool
1618radv_image_has_CB_metadata(const struct radv_image *image)
1619{
1620	return radv_image_has_cmask(image) ||
1621	       radv_image_has_fmask(image) ||
1622	       radv_image_has_dcc(image);
1623}
1624
1625/**
1626 * Return whether the image has HTILE metadata for depth surfaces.
1627 */
1628static inline bool
1629radv_image_has_htile(const struct radv_image *image)
1630{
1631	return image->planes[0].surface.htile_size;
1632}
1633
1634/**
1635 * Return whether HTILE metadata is enabled for a level.
1636 */
1637static inline bool
1638radv_htile_enabled(const struct radv_image *image, unsigned level)
1639{
1640	return radv_image_has_htile(image) && level == 0;
1641}
1642
1643/**
1644 * Return whether the image is TC-compatible HTILE.
1645 */
1646static inline bool
1647radv_image_is_tc_compat_htile(const struct radv_image *image)
1648{
1649	return radv_image_has_htile(image) && image->tc_compatible_htile;
1650}
1651
1652unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
1653
1654static inline uint32_t
1655radv_get_layerCount(const struct radv_image *image,
1656		    const VkImageSubresourceRange *range)
1657{
1658	return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
1659		image->info.array_size - range->baseArrayLayer : range->layerCount;
1660}
1661
1662static inline uint32_t
1663radv_get_levelCount(const struct radv_image *image,
1664		    const VkImageSubresourceRange *range)
1665{
1666	return range->levelCount == VK_REMAINING_MIP_LEVELS ?
1667		image->info.levels - range->baseMipLevel : range->levelCount;
1668}
1669
1670struct radeon_bo_metadata;
1671void
1672radv_init_metadata(struct radv_device *device,
1673		   struct radv_image *image,
1674		   struct radeon_bo_metadata *metadata);
1675
1676union radv_descriptor {
1677	struct {
1678		uint32_t plane0_descriptor[8];
1679		uint32_t fmask_descriptor[8];
1680	};
1681	struct {
1682		uint32_t plane_descriptors[3][8];
1683	};
1684};
1685
1686struct radv_image_view {
1687	struct radv_image *image; /**< VkImageViewCreateInfo::image */
1688	struct radeon_winsys_bo *bo;
1689
1690	VkImageViewType type;
1691	VkImageAspectFlags aspect_mask;
1692	VkFormat vk_format;
1693	unsigned plane_id;
1694	bool multiple_planes;
1695	uint32_t base_layer;
1696	uint32_t layer_count;
1697	uint32_t base_mip;
1698	uint32_t level_count;
1699	VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
1700
1701	union radv_descriptor descriptor;
1702
1703	/* Descriptor for use as a storage image as opposed to a sampled image.
1704	 * This has a few differences for cube maps (e.g. type).
1705	 */
1706	union radv_descriptor storage_descriptor;
1707};
1708
1709struct radv_image_create_info {
1710	const VkImageCreateInfo *vk_info;
1711	bool scanout;
1712	bool no_metadata_planes;
1713};
1714
1715VkResult radv_image_create(VkDevice _device,
1716			   const struct radv_image_create_info *info,
1717			   const VkAllocationCallbacks* alloc,
1718			   VkImage *pImage);
1719
1720VkResult
1721radv_image_from_gralloc(VkDevice device_h,
1722                       const VkImageCreateInfo *base_info,
1723                       const VkNativeBufferANDROID *gralloc_info,
1724                       const VkAllocationCallbacks *alloc,
1725                       VkImage *out_image_h);
1726
1727void radv_image_view_init(struct radv_image_view *view,
1728			  struct radv_device *device,
1729			  const VkImageViewCreateInfo* pCreateInfo);
1730
1731VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
1732
1733struct radv_sampler_ycbcr_conversion {
1734	VkFormat format;
1735	VkSamplerYcbcrModelConversion ycbcr_model;
1736	VkSamplerYcbcrRange ycbcr_range;
1737	VkComponentMapping components;
1738	VkChromaLocation chroma_offsets[2];
1739	VkFilter chroma_filter;
1740};
1741
1742struct radv_buffer_view {
1743	struct radeon_winsys_bo *bo;
1744	VkFormat vk_format;
1745	uint64_t range; /**< VkBufferViewCreateInfo::range */
1746	uint32_t state[4];
1747};
1748void radv_buffer_view_init(struct radv_buffer_view *view,
1749			   struct radv_device *device,
1750			   const VkBufferViewCreateInfo* pCreateInfo);
1751
1752static inline struct VkExtent3D
1753radv_sanitize_image_extent(const VkImageType imageType,
1754			   const struct VkExtent3D imageExtent)
1755{
1756	switch (imageType) {
1757	case VK_IMAGE_TYPE_1D:
1758		return (VkExtent3D) { imageExtent.width, 1, 1 };
1759	case VK_IMAGE_TYPE_2D:
1760		return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
1761	case VK_IMAGE_TYPE_3D:
1762		return imageExtent;
1763	default:
1764		unreachable("invalid image type");
1765	}
1766}
1767
1768static inline struct VkOffset3D
1769radv_sanitize_image_offset(const VkImageType imageType,
1770			   const struct VkOffset3D imageOffset)
1771{
1772	switch (imageType) {
1773	case VK_IMAGE_TYPE_1D:
1774		return (VkOffset3D) { imageOffset.x, 0, 0 };
1775	case VK_IMAGE_TYPE_2D:
1776		return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
1777	case VK_IMAGE_TYPE_3D:
1778		return imageOffset;
1779	default:
1780		unreachable("invalid image type");
1781	}
1782}
1783
1784static inline bool
1785radv_image_extent_compare(const struct radv_image *image,
1786			  const VkExtent3D *extent)
1787{
1788	if (extent->width != image->info.width ||
1789	    extent->height != image->info.height ||
1790	    extent->depth != image->info.depth)
1791		return false;
1792	return true;
1793}
1794
1795struct radv_sampler {
1796	uint32_t state[4];
1797	struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
1798};
1799
1800struct radv_color_buffer_info {
1801	uint64_t cb_color_base;
1802	uint64_t cb_color_cmask;
1803	uint64_t cb_color_fmask;
1804	uint64_t cb_dcc_base;
1805	uint32_t cb_color_slice;
1806	uint32_t cb_color_view;
1807	uint32_t cb_color_info;
1808	uint32_t cb_color_attrib;
1809	uint32_t cb_color_attrib2;
1810	uint32_t cb_dcc_control;
1811	uint32_t cb_color_cmask_slice;
1812	uint32_t cb_color_fmask_slice;
1813	union {
1814		uint32_t cb_color_pitch; // GFX6-GFX8
1815		uint32_t cb_mrt_epitch; // GFX9+
1816	};
1817};
1818
1819struct radv_ds_buffer_info {
1820	uint64_t db_z_read_base;
1821	uint64_t db_stencil_read_base;
1822	uint64_t db_z_write_base;
1823	uint64_t db_stencil_write_base;
1824	uint64_t db_htile_data_base;
1825	uint32_t db_depth_info;
1826	uint32_t db_z_info;
1827	uint32_t db_stencil_info;
1828	uint32_t db_depth_view;
1829	uint32_t db_depth_size;
1830	uint32_t db_depth_slice;
1831	uint32_t db_htile_surface;
1832	uint32_t pa_su_poly_offset_db_fmt_cntl;
1833	uint32_t db_z_info2;
1834	uint32_t db_stencil_info2;
1835	float offset_scale;
1836};
1837
1838struct radv_attachment_info {
1839	union {
1840		struct radv_color_buffer_info cb;
1841		struct radv_ds_buffer_info ds;
1842	};
1843	struct radv_image_view *attachment;
1844};
1845
1846struct radv_framebuffer {
1847	uint32_t                                     width;
1848	uint32_t                                     height;
1849	uint32_t                                     layers;
1850
1851	uint32_t                                     attachment_count;
1852	struct radv_attachment_info                  attachments[0];
1853};
1854
1855struct radv_subpass_barrier {
1856	VkPipelineStageFlags src_stage_mask;
1857	VkAccessFlags        src_access_mask;
1858	VkAccessFlags        dst_access_mask;
1859};
1860
1861void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
1862			  const struct radv_subpass_barrier *barrier);
1863
1864struct radv_subpass_attachment {
1865	uint32_t         attachment;
1866	VkImageLayout    layout;
1867};
1868
1869struct radv_subpass {
1870	uint32_t                                     attachment_count;
1871	struct radv_subpass_attachment *             attachments;
1872
1873	uint32_t                                     input_count;
1874	uint32_t                                     color_count;
1875	struct radv_subpass_attachment *             input_attachments;
1876	struct radv_subpass_attachment *             color_attachments;
1877	struct radv_subpass_attachment *             resolve_attachments;
1878	struct radv_subpass_attachment *             depth_stencil_attachment;
1879
1880	/** Subpass has at least one resolve attachment */
1881	bool                                         has_resolve;
1882
1883	/** Subpass has at least one color attachment */
1884	bool                                         has_color_att;
1885
1886	struct radv_subpass_barrier                  start_barrier;
1887
1888	uint32_t                                     view_mask;
1889	VkSampleCountFlagBits                        max_sample_count;
1890};
1891
1892struct radv_render_pass_attachment {
1893	VkFormat                                     format;
1894	uint32_t                                     samples;
1895	VkAttachmentLoadOp                           load_op;
1896	VkAttachmentLoadOp                           stencil_load_op;
1897	VkImageLayout                                initial_layout;
1898	VkImageLayout                                final_layout;
1899
1900	/* The subpass id in which the attachment will be used last. */
1901	uint32_t                                     last_subpass_idx;
1902};
1903
1904struct radv_render_pass {
1905	uint32_t                                     attachment_count;
1906	uint32_t                                     subpass_count;
1907	struct radv_subpass_attachment *             subpass_attachments;
1908	struct radv_render_pass_attachment *         attachments;
1909	struct radv_subpass_barrier                  end_barrier;
1910	struct radv_subpass                          subpasses[0];
1911};
1912
1913VkResult radv_device_init_meta(struct radv_device *device);
1914void radv_device_finish_meta(struct radv_device *device);
1915
1916struct radv_query_pool {
1917	struct radeon_winsys_bo *bo;
1918	uint32_t stride;
1919	uint32_t availability_offset;
1920	uint64_t size;
1921	char *ptr;
1922	VkQueryType type;
1923	uint32_t pipeline_stats_mask;
1924};
1925
1926struct radv_semaphore {
1927	/* use a winsys sem for non-exportable */
1928	struct radeon_winsys_sem *sem;
1929	uint32_t syncobj;
1930	uint32_t temp_syncobj;
1931};
1932
1933void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
1934			     VkPipelineBindPoint bind_point,
1935			     struct radv_descriptor_set *set,
1936			     unsigned idx);
1937
1938void
1939radv_update_descriptor_sets(struct radv_device *device,
1940                            struct radv_cmd_buffer *cmd_buffer,
1941                            VkDescriptorSet overrideSet,
1942                            uint32_t descriptorWriteCount,
1943                            const VkWriteDescriptorSet *pDescriptorWrites,
1944                            uint32_t descriptorCopyCount,
1945                            const VkCopyDescriptorSet *pDescriptorCopies);
1946
1947void
1948radv_update_descriptor_set_with_template(struct radv_device *device,
1949                                         struct radv_cmd_buffer *cmd_buffer,
1950                                         struct radv_descriptor_set *set,
1951                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1952                                         const void *pData);
1953
1954void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
1955                                   VkPipelineBindPoint pipelineBindPoint,
1956                                   VkPipelineLayout _layout,
1957                                   uint32_t set,
1958                                   uint32_t descriptorWriteCount,
1959                                   const VkWriteDescriptorSet *pDescriptorWrites);
1960
1961void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
1962			 struct radv_image *image, uint32_t value);
1963
1964void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer,
1965			   struct radv_image *image);
1966
1967struct radv_fence {
1968	struct radeon_winsys_fence *fence;
1969	struct wsi_fence *fence_wsi;
1970	bool submitted;
1971	bool signalled;
1972
1973	uint32_t syncobj;
1974	uint32_t temp_syncobj;
1975};
1976
1977/* radv_nir_to_llvm.c */
1978struct radv_shader_variant_info;
1979struct radv_nir_compiler_options;
1980
1981void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
1982				 struct nir_shader *geom_shader,
1983				 struct ac_shader_binary *binary,
1984				 struct ac_shader_config *config,
1985				 struct radv_shader_variant_info *shader_info,
1986				 const struct radv_nir_compiler_options *option);
1987
1988void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
1989			     struct ac_shader_binary *binary,
1990			     struct ac_shader_config *config,
1991			     struct radv_shader_variant_info *shader_info,
1992			     struct nir_shader *const *nir,
1993			     int nir_count,
1994			     const struct radv_nir_compiler_options *options);
1995
1996unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
1997					 gl_shader_stage stage,
1998					 const struct nir_shader *nir);
1999
2000/* radv_shader_info.h */
2001struct radv_shader_info;
2002
2003void radv_nir_shader_info_pass(const struct nir_shader *nir,
2004			       const struct radv_nir_compiler_options *options,
2005			       struct radv_shader_info *info);
2006
2007void radv_nir_shader_info_init(struct radv_shader_info *info);
2008
2009struct radeon_winsys_sem;
2010
2011uint64_t radv_get_current_time(void);
2012
2013#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
2014								\
2015	static inline struct __radv_type *			\
2016	__radv_type ## _from_handle(__VkType _handle)		\
2017	{							\
2018		return (struct __radv_type *) _handle;		\
2019	}							\
2020								\
2021	static inline __VkType					\
2022	__radv_type ## _to_handle(struct __radv_type *_obj)	\
2023	{							\
2024		return (__VkType) _obj;				\
2025	}
2026
2027#define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType)		\
2028									\
2029	static inline struct __radv_type *				\
2030	__radv_type ## _from_handle(__VkType _handle)			\
2031	{								\
2032		return (struct __radv_type *)(uintptr_t) _handle;	\
2033	}								\
2034									\
2035	static inline __VkType						\
2036	__radv_type ## _to_handle(struct __radv_type *_obj)		\
2037	{								\
2038		return (__VkType)(uintptr_t) _obj;			\
2039	}
2040
2041#define RADV_FROM_HANDLE(__radv_type, __name, __handle)			\
2042	struct __radv_type *__name = __radv_type ## _from_handle(__handle)
2043
2044RADV_DEFINE_HANDLE_CASTS(radv_cmd_buffer, VkCommandBuffer)
2045RADV_DEFINE_HANDLE_CASTS(radv_device, VkDevice)
2046RADV_DEFINE_HANDLE_CASTS(radv_instance, VkInstance)
2047RADV_DEFINE_HANDLE_CASTS(radv_physical_device, VkPhysicalDevice)
2048RADV_DEFINE_HANDLE_CASTS(radv_queue, VkQueue)
2049
2050RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, VkCommandPool)
2051RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, VkBuffer)
2052RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView)
2053RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool)
2054RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet)
2055RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout)
2056RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplate)
2057RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory)
2058RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence)
2059RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent)
2060RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_framebuffer, VkFramebuffer)
2061RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image, VkImage)
2062RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, VkImageView);
2063RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, VkPipelineCache)
2064RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, VkPipeline)
2065RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, VkPipelineLayout)
2066RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool)
2067RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
2068RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
2069RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
2070RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)
2071RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore)
2072
2073#endif /* RADV_PRIVATE_H */
2074