1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2018 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25#ifndef SI_PIPE_H
26#define SI_PIPE_H
27
28#include "si_shader.h"
29#include "si_state.h"
30
31#include "util/u_dynarray.h"
32#include "util/u_idalloc.h"
33#include "util/u_threaded_context.h"
34
35#ifdef PIPE_ARCH_BIG_ENDIAN
36#define SI_BIG_ENDIAN 1
37#else
38#define SI_BIG_ENDIAN 0
39#endif
40
41#define ATI_VENDOR_ID			0x1002
42
43#define SI_NOT_QUERY			0xffffffff
44
45/* The base vertex and primitive restart can be any number, but we must pick
46 * one which will mean "unknown" for the purpose of state tracking and
47 * the number shouldn't be a commonly-used one. */
48#define SI_BASE_VERTEX_UNKNOWN		INT_MIN
49#define SI_RESTART_INDEX_UNKNOWN	INT_MIN
50#define SI_INSTANCE_COUNT_UNKNOWN	INT_MIN
51#define SI_NUM_SMOOTH_AA_SAMPLES	8
52#define SI_MAX_POINT_SIZE		2048
53#define SI_GS_PER_ES			128
54/* Alignment for optimal CP DMA performance. */
55#define SI_CPDMA_ALIGNMENT		32
56
57/* Tunables for compute-based clear_buffer and copy_buffer: */
58#define SI_COMPUTE_CLEAR_DW_PER_THREAD	4
59#define SI_COMPUTE_COPY_DW_PER_THREAD	4
60#define SI_COMPUTE_DST_CACHE_POLICY	L2_STREAM
61
62/* Pipeline & streamout query controls. */
63#define SI_CONTEXT_START_PIPELINE_STATS	(1 << 0)
64#define SI_CONTEXT_STOP_PIPELINE_STATS	(1 << 1)
65#define SI_CONTEXT_FLUSH_FOR_RENDER_COND (1 << 2)
66/* Instruction cache. */
67#define SI_CONTEXT_INV_ICACHE		(1 << 3)
68/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
69#define SI_CONTEXT_INV_SMEM_L1		(1 << 4)
70/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
71#define SI_CONTEXT_INV_VMEM_L1		(1 << 5)
72/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
73#define SI_CONTEXT_INV_GLOBAL_L2	(1 << 6)
74/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
75 * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
76#define SI_CONTEXT_WRITEBACK_GLOBAL_L2	(1 << 7)
77/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
78 * a CB or DB flush. */
79#define SI_CONTEXT_INV_L2_METADATA	(1 << 8)
80/* Framebuffer caches. */
81#define SI_CONTEXT_FLUSH_AND_INV_DB	(1 << 9)
82#define SI_CONTEXT_FLUSH_AND_INV_DB_META (1 << 10)
83#define SI_CONTEXT_FLUSH_AND_INV_CB	(1 << 11)
84/* Engine synchronization. */
85#define SI_CONTEXT_VS_PARTIAL_FLUSH	(1 << 12)
86#define SI_CONTEXT_PS_PARTIAL_FLUSH	(1 << 13)
87#define SI_CONTEXT_CS_PARTIAL_FLUSH	(1 << 14)
88#define SI_CONTEXT_VGT_FLUSH		(1 << 15)
89#define SI_CONTEXT_VGT_STREAMOUT_SYNC	(1 << 16)
90
91#define SI_PREFETCH_VBO_DESCRIPTORS	(1 << 0)
92#define SI_PREFETCH_LS			(1 << 1)
93#define SI_PREFETCH_HS			(1 << 2)
94#define SI_PREFETCH_ES			(1 << 3)
95#define SI_PREFETCH_GS			(1 << 4)
96#define SI_PREFETCH_VS			(1 << 5)
97#define SI_PREFETCH_PS			(1 << 6)
98
99#define SI_MAX_BORDER_COLORS		4096
100#define SI_MAX_VIEWPORTS		16
101#define SIX_BITS			0x3F
102#define SI_MAP_BUFFER_ALIGNMENT		64
103#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
104
105#define SI_RESOURCE_FLAG_TRANSFER	(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
106#define SI_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
107#define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
108#define SI_RESOURCE_FLAG_DISABLE_DCC	(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
109#define SI_RESOURCE_FLAG_UNMAPPABLE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
110#define SI_RESOURCE_FLAG_READ_ONLY	(PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
111#define SI_RESOURCE_FLAG_32BIT		(PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
112#define SI_RESOURCE_FLAG_CLEAR		(PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
113/* For const_uploader, upload data via GTT and copy to VRAM on context flush via SDMA. */
114#define SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA  (PIPE_RESOURCE_FLAG_DRV_PRIV << 8)
115
116enum si_clear_code
117{
118	DCC_CLEAR_COLOR_0000   = 0x00000000,
119	DCC_CLEAR_COLOR_0001   = 0x40404040,
120	DCC_CLEAR_COLOR_1110   = 0x80808080,
121	DCC_CLEAR_COLOR_1111   = 0xC0C0C0C0,
122	DCC_CLEAR_COLOR_REG    = 0x20202020,
123	DCC_UNCOMPRESSED       = 0xFFFFFFFF,
124};
125
126#define SI_IMAGE_ACCESS_AS_BUFFER	(1 << 7)
127
128/* Debug flags. */
129enum {
130	/* Shader logging options: */
131	DBG_VS = PIPE_SHADER_VERTEX,
132	DBG_PS = PIPE_SHADER_FRAGMENT,
133	DBG_GS = PIPE_SHADER_GEOMETRY,
134	DBG_TCS = PIPE_SHADER_TESS_CTRL,
135	DBG_TES = PIPE_SHADER_TESS_EVAL,
136	DBG_CS = PIPE_SHADER_COMPUTE,
137	DBG_NO_IR,
138	DBG_NO_TGSI,
139	DBG_NO_ASM,
140	DBG_PREOPT_IR,
141
142	/* Shader compiler options the shader cache should be aware of: */
143	DBG_FS_CORRECT_DERIVS_AFTER_KILL,
144	DBG_UNSAFE_MATH,
145	DBG_SI_SCHED,
146	DBG_GISEL,
147
148	/* Shader compiler options (with no effect on the shader cache): */
149	DBG_CHECK_IR,
150	DBG_MONOLITHIC_SHADERS,
151	DBG_NO_OPT_VARIANT,
152
153	/* Information logging options: */
154	DBG_INFO,
155	DBG_TEX,
156	DBG_COMPUTE,
157	DBG_VM,
158
159	/* Driver options: */
160	DBG_FORCE_DMA,
161	DBG_NO_ASYNC_DMA,
162	DBG_NO_WC,
163	DBG_CHECK_VM,
164	DBG_RESERVE_VMID,
165	DBG_ZERO_VRAM,
166
167	/* 3D engine options: */
168	DBG_SWITCH_ON_EOP,
169	DBG_NO_OUT_OF_ORDER,
170	DBG_NO_DPBB,
171	DBG_NO_DFSM,
172	DBG_DPBB,
173	DBG_DFSM,
174	DBG_NO_HYPERZ,
175	DBG_NO_RB_PLUS,
176	DBG_NO_2D_TILING,
177	DBG_NO_TILING,
178	DBG_NO_DCC,
179	DBG_NO_DCC_CLEAR,
180	DBG_NO_DCC_FB,
181	DBG_NO_DCC_MSAA,
182	DBG_NO_FMASK,
183
184	/* Tests: */
185	DBG_TEST_DMA,
186	DBG_TEST_VMFAULT_CP,
187	DBG_TEST_VMFAULT_SDMA,
188	DBG_TEST_VMFAULT_SHADER,
189	DBG_TEST_DMA_PERF,
190	DBG_TEST_GDS,
191	DBG_TEST_GDS_MM,
192	DBG_TEST_GDS_OA_MM,
193};
194
195#define DBG_ALL_SHADERS		(((1 << (DBG_CS + 1)) - 1))
196#define DBG(name)		(1ull << DBG_##name)
197
198enum si_cache_policy {
199	L2_BYPASS,
200	L2_STREAM, /* same as SLC=1 */
201	L2_LRU,    /* same as SLC=0 */
202};
203
204enum si_coherency {
205	SI_COHERENCY_NONE, /* no cache flushes needed */
206	SI_COHERENCY_SHADER,
207	SI_COHERENCY_CB_META,
208	SI_COHERENCY_CP,
209};
210
211struct si_compute;
212struct hash_table;
213struct u_suballocator;
214
215/* Only 32-bit buffer allocations are supported, gallium doesn't support more
216 * at the moment.
217 */
218struct si_resource {
219	struct threaded_resource	b;
220
221	/* Winsys objects. */
222	struct pb_buffer		*buf;
223	uint64_t			gpu_address;
224	/* Memory usage if the buffer placement is optimal. */
225	uint64_t			vram_usage;
226	uint64_t			gart_usage;
227
228	/* Resource properties. */
229	uint64_t			bo_size;
230	unsigned			bo_alignment;
231	enum radeon_bo_domain		domains;
232	enum radeon_bo_flag		flags;
233	unsigned			bind_history;
234	int				max_forced_staging_uploads;
235
236	/* The buffer range which is initialized (with a write transfer,
237	 * streamout, DMA, or as a random access target). The rest of
238	 * the buffer is considered invalid and can be mapped unsynchronized.
239	 *
240	 * This allows unsychronized mapping of a buffer range which hasn't
241	 * been used yet. It's for applications which forget to use
242	 * the unsynchronized map flag and expect the driver to figure it out.
243         */
244	struct util_range		valid_buffer_range;
245
246	/* For buffers only. This indicates that a write operation has been
247	 * performed by TC L2, but the cache hasn't been flushed.
248	 * Any hw block which doesn't use or bypasses TC L2 should check this
249	 * flag and flush the cache before using the buffer.
250	 *
251	 * For example, TC L2 must be flushed if a buffer which has been
252	 * modified by a shader store instruction is about to be used as
253	 * an index buffer. The reason is that VGT DMA index fetching doesn't
254	 * use TC L2.
255	 */
256	bool				TC_L2_dirty;
257
258	/* Whether this resource is referenced by bindless handles. */
259	bool				texture_handle_allocated;
260	bool				image_handle_allocated;
261
262	/* Whether the resource has been exported via resource_get_handle. */
263	unsigned			external_usage; /* PIPE_HANDLE_USAGE_* */
264};
265
266struct si_transfer {
267	struct threaded_transfer	b;
268	struct si_resource		*staging;
269	unsigned			offset;
270};
271
272struct si_texture {
273	struct si_resource		buffer;
274
275	struct radeon_surf		surface;
276	uint64_t			size;
277	struct si_texture		*flushed_depth_texture;
278
279	/* One texture allocation can contain these buffers:
280	 * - image (pixel data)
281	 * - FMASK buffer (MSAA compression)
282	 * - CMASK buffer (MSAA compression and/or legacy fast color clear)
283	 * - HTILE buffer (Z/S compression and fast Z/S clear)
284	 * - DCC buffer (color compression and new fast color clear)
285	 * - displayable DCC buffer (if the DCC buffer is not displayable)
286	 * - DCC retile mapping buffer (if the DCC buffer is not displayable)
287	 */
288	uint64_t			fmask_offset;
289	uint64_t			cmask_offset;
290	uint64_t			cmask_base_address_reg;
291	struct si_resource		*cmask_buffer;
292	uint64_t			dcc_offset; /* 0 = disabled */
293	uint64_t			display_dcc_offset;
294	uint64_t			dcc_retile_map_offset;
295	unsigned			cb_color_info; /* fast clear enable bit */
296	unsigned			color_clear_value[2];
297	unsigned			last_msaa_resolve_target_micro_mode;
298	unsigned			num_level0_transfers;
299
300	/* Depth buffer compression and fast clear. */
301	uint64_t			htile_offset;
302	float				depth_clear_value;
303	uint16_t			dirty_level_mask; /* each bit says if that mipmap is compressed */
304	uint16_t			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
305	enum pipe_format		db_render_format:16;
306	uint8_t				stencil_clear_value;
307	bool				tc_compatible_htile:1;
308	bool				depth_cleared:1; /* if it was cleared at least once */
309	bool				stencil_cleared:1; /* if it was cleared at least once */
310	bool				upgraded_depth:1; /* upgraded from unorm to Z32_FLOAT */
311	bool				is_depth:1;
312	bool				db_compatible:1;
313	bool				can_sample_z:1;
314	bool				can_sample_s:1;
315
316	/* We need to track DCC dirtiness, because st/dri usually calls
317	 * flush_resource twice per frame (not a bug) and we don't wanna
318	 * decompress DCC twice. Also, the dirty tracking must be done even
319	 * if DCC isn't used, because it's required by the DCC usage analysis
320	 * for a possible future enablement.
321	 */
322	bool				separate_dcc_dirty:1;
323	/* Statistics gathering for the DCC enablement heuristic. */
324	bool				dcc_gather_statistics:1;
325	/* Counter that should be non-zero if the texture is bound to a
326	 * framebuffer.
327	 */
328	unsigned                        framebuffers_bound;
329	/* Whether the texture is a displayable back buffer and needs DCC
330	 * decompression, which is expensive. Therefore, it's enabled only
331	 * if statistics suggest that it will pay off and it's allocated
332	 * separately. It can't be bound as a sampler by apps. Limited to
333	 * target == 2D and last_level == 0. If enabled, dcc_offset contains
334	 * the absolute GPUVM address, not the relative one.
335	 */
336	struct si_resource		*dcc_separate_buffer;
337	/* When DCC is temporarily disabled, the separate buffer is here. */
338	struct si_resource		*last_dcc_separate_buffer;
339	/* Estimate of how much this color buffer is written to in units of
340	 * full-screen draws: ps_invocations / (width * height)
341	 * Shader kills, late Z, and blending with trivial discards make it
342	 * inaccurate (we need to count CB updates, not PS invocations).
343	 */
344	unsigned			ps_draw_ratio;
345	/* The number of clears since the last DCC usage analysis. */
346	unsigned			num_slow_clears;
347};
348
349struct si_surface {
350	struct pipe_surface		base;
351
352	/* These can vary with block-compressed textures. */
353	uint16_t width0;
354	uint16_t height0;
355
356	bool color_initialized:1;
357	bool depth_initialized:1;
358
359	/* Misc. color flags. */
360	bool color_is_int8:1;
361	bool color_is_int10:1;
362	bool dcc_incompatible:1;
363
364	/* Color registers. */
365	unsigned cb_color_info;
366	unsigned cb_color_view;
367	unsigned cb_color_attrib;
368	unsigned cb_color_attrib2;	/* GFX9 and later */
369	unsigned cb_dcc_control;	/* VI and later */
370	unsigned spi_shader_col_format:8;	/* no blending, no alpha-to-coverage. */
371	unsigned spi_shader_col_format_alpha:8;	/* alpha-to-coverage */
372	unsigned spi_shader_col_format_blend:8;	/* blending without alpha. */
373	unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */
374
375	/* DB registers. */
376	uint64_t db_depth_base;		/* DB_Z_READ/WRITE_BASE */
377	uint64_t db_stencil_base;
378	uint64_t db_htile_data_base;
379	unsigned db_depth_info;
380	unsigned db_z_info;
381	unsigned db_z_info2;		/* GFX9+ */
382	unsigned db_depth_view;
383	unsigned db_depth_size;
384	unsigned db_depth_slice;
385	unsigned db_stencil_info;
386	unsigned db_stencil_info2;	/* GFX9+ */
387	unsigned db_htile_surface;
388};
389
390struct si_mmio_counter {
391	unsigned busy;
392	unsigned idle;
393};
394
395union si_mmio_counters {
396	struct {
397		/* For global GPU load including SDMA. */
398		struct si_mmio_counter gpu;
399
400		/* GRBM_STATUS */
401		struct si_mmio_counter spi;
402		struct si_mmio_counter gui;
403		struct si_mmio_counter ta;
404		struct si_mmio_counter gds;
405		struct si_mmio_counter vgt;
406		struct si_mmio_counter ia;
407		struct si_mmio_counter sx;
408		struct si_mmio_counter wd;
409		struct si_mmio_counter bci;
410		struct si_mmio_counter sc;
411		struct si_mmio_counter pa;
412		struct si_mmio_counter db;
413		struct si_mmio_counter cp;
414		struct si_mmio_counter cb;
415
416		/* SRBM_STATUS2 */
417		struct si_mmio_counter sdma;
418
419		/* CP_STAT */
420		struct si_mmio_counter pfp;
421		struct si_mmio_counter meq;
422		struct si_mmio_counter me;
423		struct si_mmio_counter surf_sync;
424		struct si_mmio_counter cp_dma;
425		struct si_mmio_counter scratch_ram;
426	} named;
427	unsigned array[0];
428};
429
430struct si_memory_object {
431	struct pipe_memory_object	b;
432	struct pb_buffer		*buf;
433	uint32_t			stride;
434};
435
436/* Saved CS data for debugging features. */
437struct radeon_saved_cs {
438	uint32_t			*ib;
439	unsigned			num_dw;
440
441	struct radeon_bo_list_item	*bo_list;
442	unsigned			bo_count;
443};
444
445struct si_screen {
446	struct pipe_screen		b;
447	struct radeon_winsys		*ws;
448	struct disk_cache		*disk_shader_cache;
449
450	struct radeon_info		info;
451	uint64_t			debug_flags;
452	char				renderer_string[183];
453
454	unsigned			pa_sc_raster_config;
455	unsigned			pa_sc_raster_config_1;
456	unsigned			se_tile_repeat;
457	unsigned			gs_table_depth;
458	unsigned			tess_offchip_block_dw_size;
459	unsigned			tess_offchip_ring_size;
460	unsigned			tess_factor_ring_size;
461	unsigned			vgt_hs_offchip_param;
462	unsigned			eqaa_force_coverage_samples;
463	unsigned			eqaa_force_z_samples;
464	unsigned			eqaa_force_color_samples;
465	bool				has_clear_state;
466	bool				has_distributed_tess;
467	bool				has_draw_indirect_multi;
468	bool				has_out_of_order_rast;
469	bool				assume_no_z_fights;
470	bool				commutative_blend_add;
471	bool				has_gfx9_scissor_bug;
472	bool				has_msaa_sample_loc_bug;
473	bool				has_ls_vgpr_init_bug;
474	bool				has_dcc_constant_encode;
475	bool				dpbb_allowed;
476	bool				dfsm_allowed;
477	bool				llvm_has_working_vgpr_indexing;
478
479	struct {
480#define OPT_BOOL(name, dflt, description) bool name:1;
481#include "si_debug_options.h"
482	} options;
483
484	/* Whether shaders are monolithic (1-part) or separate (3-part). */
485	bool				use_monolithic_shaders;
486	bool				record_llvm_ir;
487	bool				has_rbplus;     /* if RB+ registers exist */
488	bool				rbplus_allowed; /* if RB+ is allowed */
489	bool				dcc_msaa_allowed;
490	bool				cpdma_prefetch_writes_memory;
491
492	struct slab_parent_pool		pool_transfers;
493
494	/* Texture filter settings. */
495	int				force_aniso; /* -1 = disabled */
496
497	/* Auxiliary context. Mainly used to initialize resources.
498	 * It must be locked prior to using and flushed before unlocking. */
499	struct pipe_context		*aux_context;
500	mtx_t				aux_context_lock;
501
502	/* This must be in the screen, because UE4 uses one context for
503	 * compilation and another one for rendering.
504	 */
505	unsigned			num_compilations;
506	/* Along with ST_DEBUG=precompile, this should show if applications
507	 * are loading shaders on demand. This is a monotonic counter.
508	 */
509	unsigned			num_shaders_created;
510	unsigned			num_shader_cache_hits;
511
512	/* GPU load thread. */
513	mtx_t				gpu_load_mutex;
514	thrd_t				gpu_load_thread;
515	union si_mmio_counters	mmio_counters;
516	volatile unsigned		gpu_load_stop_thread; /* bool */
517
518	/* Performance counters. */
519	struct si_perfcounters	*perfcounters;
520
521	/* If pipe_screen wants to recompute and re-emit the framebuffer,
522	 * sampler, and image states of all contexts, it should atomically
523	 * increment this.
524	 *
525	 * Each context will compare this with its own last known value of
526	 * the counter before drawing and re-emit the states accordingly.
527	 */
528	unsigned			dirty_tex_counter;
529	unsigned			dirty_buf_counter;
530
531	/* Atomically increment this counter when an existing texture's
532	 * metadata is enabled or disabled in a way that requires changing
533	 * contexts' compressed texture binding masks.
534	 */
535	unsigned			compressed_colortex_counter;
536
537	struct {
538		/* Context flags to set so that all writes from earlier jobs
539		 * in the CP are seen by L2 clients.
540		 */
541		unsigned cp_to_L2;
542
543		/* Context flags to set so that all writes from earlier jobs
544		 * that end in L2 are seen by CP.
545		 */
546		unsigned L2_to_cp;
547	} barrier_flags;
548
549	mtx_t			shader_parts_mutex;
550	struct si_shader_part		*vs_prologs;
551	struct si_shader_part		*tcs_epilogs;
552	struct si_shader_part		*gs_prologs;
553	struct si_shader_part		*ps_prologs;
554	struct si_shader_part		*ps_epilogs;
555
556	/* Shader cache in memory.
557	 *
558	 * Design & limitations:
559	 * - The shader cache is per screen (= per process), never saved to
560	 *   disk, and skips redundant shader compilations from TGSI to bytecode.
561	 * - It can only be used with one-variant-per-shader support, in which
562	 *   case only the main (typically middle) part of shaders is cached.
563	 * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
564	 *   variants of VS and TES are cached, so LS and ES aren't.
565	 * - GS and CS aren't cached, but it's certainly possible to cache
566	 *   those as well.
567	 */
568	mtx_t			shader_cache_mutex;
569	struct hash_table		*shader_cache;
570
571	/* Shader compiler queue for multithreaded compilation. */
572	struct util_queue		shader_compiler_queue;
573	/* Use at most 3 normal compiler threads on quadcore and better.
574	 * Hyperthreaded CPUs report the number of threads, but we want
575	 * the number of cores. We only need this many threads for shader-db. */
576	struct ac_llvm_compiler		compiler[24]; /* used by the queue only */
577
578	struct util_queue		shader_compiler_queue_low_priority;
579	/* Use at most 2 low priority threads on quadcore and better.
580	 * We want to minimize the impact on multithreaded Mesa. */
581	struct ac_llvm_compiler		compiler_lowp[10];
582};
583
584struct si_blend_color {
585	struct pipe_blend_color		state;
586	bool				any_nonzeros;
587};
588
589struct si_sampler_view {
590	struct pipe_sampler_view	base;
591        /* [0..7] = image descriptor
592         * [4..7] = buffer descriptor */
593	uint32_t			state[8];
594	uint32_t			fmask_state[8];
595	const struct legacy_surf_level	*base_level_info;
596	ubyte				base_level;
597	ubyte				block_width;
598	bool is_stencil_sampler;
599	bool is_integer;
600	bool dcc_incompatible;
601};
602
603#define SI_SAMPLER_STATE_MAGIC 0x34f1c35a
604
605struct si_sampler_state {
606#ifdef DEBUG
607	unsigned			magic;
608#endif
609	uint32_t			val[4];
610	uint32_t			integer_val[4];
611	uint32_t			upgraded_depth_val[4];
612};
613
614struct si_cs_shader_state {
615	struct si_compute		*program;
616	struct si_compute		*emitted_program;
617	unsigned			offset;
618	bool				initialized;
619	bool				uses_scratch;
620};
621
622struct si_samplers {
623	struct pipe_sampler_view	*views[SI_NUM_SAMPLERS];
624	struct si_sampler_state		*sampler_states[SI_NUM_SAMPLERS];
625
626	/* The i-th bit is set if that element is enabled (non-NULL resource). */
627	unsigned			enabled_mask;
628	uint32_t			needs_depth_decompress_mask;
629	uint32_t			needs_color_decompress_mask;
630};
631
632struct si_images {
633	struct pipe_image_view		views[SI_NUM_IMAGES];
634	uint32_t			needs_color_decompress_mask;
635	unsigned			enabled_mask;
636};
637
638struct si_framebuffer {
639	struct pipe_framebuffer_state	state;
640	unsigned			colorbuf_enabled_4bit;
641	unsigned			spi_shader_col_format;
642	unsigned			spi_shader_col_format_alpha;
643	unsigned			spi_shader_col_format_blend;
644	unsigned			spi_shader_col_format_blend_alpha;
645	ubyte				nr_samples:5; /* at most 16xAA */
646	ubyte				log_samples:3; /* at most 4 = 16xAA */
647	ubyte				nr_color_samples; /* at most 8xAA */
648	ubyte				compressed_cb_mask;
649	ubyte				uncompressed_cb_mask;
650	ubyte				color_is_int8;
651	ubyte				color_is_int10;
652	ubyte				dirty_cbufs;
653	ubyte				dcc_overwrite_combiner_watermark;
654	bool				dirty_zsbuf;
655	bool				any_dst_linear;
656	bool				CB_has_shader_readable_metadata;
657	bool				DB_has_shader_readable_metadata;
658	bool				all_DCC_pipe_aligned;
659};
660
661enum si_quant_mode {
662	/* This is the list we want to support. */
663	SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
664	SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
665	SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
666};
667
668struct si_signed_scissor {
669	int minx;
670	int miny;
671	int maxx;
672	int maxy;
673	enum si_quant_mode quant_mode;
674};
675
676struct si_viewports {
677	struct pipe_viewport_state	states[SI_MAX_VIEWPORTS];
678	struct si_signed_scissor	as_scissor[SI_MAX_VIEWPORTS];
679};
680
681struct si_clip_state {
682	struct pipe_clip_state		state;
683	bool				any_nonzeros;
684};
685
686struct si_streamout_target {
687	struct pipe_stream_output_target b;
688
689	/* The buffer where BUFFER_FILLED_SIZE is stored. */
690	struct si_resource	*buf_filled_size;
691	unsigned		buf_filled_size_offset;
692	bool			buf_filled_size_valid;
693
694	unsigned		stride_in_dw;
695};
696
697struct si_streamout {
698	bool				begin_emitted;
699
700	unsigned			enabled_mask;
701	unsigned			num_targets;
702	struct si_streamout_target	*targets[PIPE_MAX_SO_BUFFERS];
703
704	unsigned			append_bitmask;
705	bool				suspended;
706
707	/* External state which comes from the vertex shader,
708	 * it must be set explicitly when binding a shader. */
709	uint16_t			*stride_in_dw;
710	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
711
712	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
713	unsigned			hw_enabled_mask;
714
715	/* The state of VGT_STRMOUT_(CONFIG|EN). */
716	bool				streamout_enabled;
717	bool				prims_gen_query_enabled;
718	int				num_prims_gen_queries;
719};
720
721/* A shader state consists of the shader selector, which is a constant state
722 * object shared by multiple contexts and shouldn't be modified, and
723 * the current shader variant selected for this context.
724 */
725struct si_shader_ctx_state {
726	struct si_shader_selector	*cso;
727	struct si_shader		*current;
728};
729
730#define SI_NUM_VGT_PARAM_KEY_BITS 12
731#define SI_NUM_VGT_PARAM_STATES (1 << SI_NUM_VGT_PARAM_KEY_BITS)
732
733/* The IA_MULTI_VGT_PARAM key used to index the table of precomputed values.
734 * Some fields are set by state-change calls, most are set by draw_vbo.
735 */
736union si_vgt_param_key {
737	struct {
738#ifdef PIPE_ARCH_LITTLE_ENDIAN
739		unsigned prim:4;
740		unsigned uses_instancing:1;
741		unsigned multi_instances_smaller_than_primgroup:1;
742		unsigned primitive_restart:1;
743		unsigned count_from_stream_output:1;
744		unsigned line_stipple_enabled:1;
745		unsigned uses_tess:1;
746		unsigned tess_uses_prim_id:1;
747		unsigned uses_gs:1;
748		unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS;
749#else /* PIPE_ARCH_BIG_ENDIAN */
750		unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS;
751		unsigned uses_gs:1;
752		unsigned tess_uses_prim_id:1;
753		unsigned uses_tess:1;
754		unsigned line_stipple_enabled:1;
755		unsigned count_from_stream_output:1;
756		unsigned primitive_restart:1;
757		unsigned multi_instances_smaller_than_primgroup:1;
758		unsigned uses_instancing:1;
759		unsigned prim:4;
760#endif
761	} u;
762	uint32_t index;
763};
764
765struct si_texture_handle
766{
767	unsigned			desc_slot;
768	bool				desc_dirty;
769	struct pipe_sampler_view	*view;
770	struct si_sampler_state		sstate;
771};
772
773struct si_image_handle
774{
775	unsigned			desc_slot;
776	bool				desc_dirty;
777	struct pipe_image_view		view;
778};
779
780struct si_saved_cs {
781	struct pipe_reference	reference;
782	struct si_context	*ctx;
783	struct radeon_saved_cs	gfx;
784	struct si_resource	*trace_buf;
785	unsigned		trace_id;
786
787	unsigned		gfx_last_dw;
788	bool			flushed;
789	int64_t			time_flush;
790};
791
792struct si_sdma_upload {
793	struct si_resource	*dst;
794	struct si_resource	*src;
795	unsigned		src_offset;
796	unsigned		dst_offset;
797	unsigned		size;
798};
799
800struct si_context {
801	struct pipe_context		b; /* base class */
802
803	enum radeon_family		family;
804	enum chip_class			chip_class;
805
806	struct radeon_winsys		*ws;
807	struct radeon_winsys_ctx	*ctx;
808	struct radeon_cmdbuf		*gfx_cs; /* compute IB if graphics is disabled */
809	struct radeon_cmdbuf		*dma_cs;
810	struct pipe_fence_handle	*last_gfx_fence;
811	struct pipe_fence_handle	*last_sdma_fence;
812	struct si_resource		*eop_bug_scratch;
813	struct u_upload_mgr		*cached_gtt_allocator;
814	struct threaded_context		*tc;
815	struct u_suballocator		*allocator_zeroed_memory;
816	struct slab_child_pool		pool_transfers;
817	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
818	struct pipe_device_reset_callback device_reset_callback;
819	struct u_log_context		*log;
820	void				*query_result_shader;
821	struct blitter_context		*blitter;
822	void				*custom_dsa_flush;
823	void				*custom_blend_resolve;
824	void				*custom_blend_fmask_decompress;
825	void				*custom_blend_eliminate_fastclear;
826	void				*custom_blend_dcc_decompress;
827	void				*vs_blit_pos;
828	void				*vs_blit_pos_layered;
829	void				*vs_blit_color;
830	void				*vs_blit_color_layered;
831	void				*vs_blit_texcoord;
832	void				*cs_clear_buffer;
833	void				*cs_copy_buffer;
834	void				*cs_copy_image;
835	void				*cs_copy_image_1d_array;
836	void				*cs_clear_render_target;
837	void				*cs_clear_render_target_1d_array;
838	void				*cs_dcc_retile;
839	struct si_screen		*screen;
840	struct pipe_debug_callback	debug;
841	struct ac_llvm_compiler		compiler; /* only non-threaded compilation */
842	struct si_shader_ctx_state	fixed_func_tcs_shader;
843	struct si_resource		*wait_mem_scratch;
844	unsigned			wait_mem_number;
845	uint16_t			prefetch_L2_mask;
846
847	bool				has_graphics;
848	bool				gfx_flush_in_progress:1;
849	bool				gfx_last_ib_is_busy:1;
850	bool				compute_is_busy:1;
851
852	unsigned			num_gfx_cs_flushes;
853	unsigned			initial_gfx_cs_size;
854	unsigned			gpu_reset_counter;
855	unsigned			last_dirty_tex_counter;
856	unsigned			last_dirty_buf_counter;
857	unsigned			last_compressed_colortex_counter;
858	unsigned			last_num_draw_calls;
859	unsigned			flags; /* flush flags */
860	/* Current unaccounted memory usage. */
861	uint64_t			vram;
862	uint64_t			gtt;
863
864	/* Atoms (direct states). */
865	union si_state_atoms		atoms;
866	unsigned			dirty_atoms; /* mask */
867	/* PM4 states (precomputed immutable states) */
868	unsigned			dirty_states;
869	union si_state			queued;
870	union si_state			emitted;
871
872	/* Atom declarations. */
873	struct si_framebuffer		framebuffer;
874	unsigned			sample_locs_num_samples;
875	uint16_t			sample_mask;
876	unsigned			last_cb_target_mask;
877	struct si_blend_color		blend_color;
878	struct si_clip_state		clip_state;
879	struct si_shader_data		shader_pointers;
880	struct si_stencil_ref		stencil_ref;
881	struct pipe_scissor_state	scissors[SI_MAX_VIEWPORTS];
882	struct si_streamout		streamout;
883	struct si_viewports		viewports;
884	unsigned			num_window_rectangles;
885	bool				window_rectangles_include;
886	struct pipe_scissor_state	window_rectangles[4];
887
888	/* Precomputed states. */
889	struct si_pm4_state		*init_config;
890	struct si_pm4_state		*init_config_gs_rings;
891	bool				init_config_has_vgt_flush;
892	struct si_pm4_state		*vgt_shader_config[4];
893
894	/* shaders */
895	struct si_shader_ctx_state	ps_shader;
896	struct si_shader_ctx_state	gs_shader;
897	struct si_shader_ctx_state	vs_shader;
898	struct si_shader_ctx_state	tcs_shader;
899	struct si_shader_ctx_state	tes_shader;
900	struct si_cs_shader_state	cs_shader_state;
901
902	/* shader information */
903	struct si_vertex_elements	*vertex_elements;
904	unsigned			sprite_coord_enable;
905	unsigned			cs_max_waves_per_sh;
906	bool				flatshade;
907	bool				do_update_shaders;
908
909	/* vertex buffer descriptors */
910	uint32_t *vb_descriptors_gpu_list;
911	struct si_resource *vb_descriptors_buffer;
912	unsigned vb_descriptors_offset;
913
914	/* shader descriptors */
915	struct si_descriptors		descriptors[SI_NUM_DESCS];
916	unsigned			descriptors_dirty;
917	unsigned			shader_pointers_dirty;
918	unsigned			shader_needs_decompress_mask;
919	struct si_buffer_resources	rw_buffers;
920	struct si_buffer_resources	const_and_shader_buffers[SI_NUM_SHADERS];
921	struct si_samplers		samplers[SI_NUM_SHADERS];
922	struct si_images		images[SI_NUM_SHADERS];
923	bool				bo_list_add_all_resident_resources;
924	bool				bo_list_add_all_gfx_resources;
925	bool				bo_list_add_all_compute_resources;
926
927	/* other shader resources */
928	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
929	struct pipe_resource		*esgs_ring;
930	struct pipe_resource		*gsvs_ring;
931	struct pipe_resource		*tess_rings;
932	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
933	struct si_resource		*border_color_buffer;
934	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
935	unsigned			border_color_count;
936	unsigned			num_vs_blit_sgprs;
937	uint32_t			vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
938	uint32_t			cs_user_data[4];
939
940	/* Vertex and index buffers. */
941	bool				vertex_buffers_dirty;
942	bool				vertex_buffer_pointer_dirty;
943	struct pipe_vertex_buffer	vertex_buffer[SI_NUM_VERTEX_BUFFERS];
944
945	/* MSAA config state. */
946	int				ps_iter_samples;
947	bool				ps_uses_fbfetch;
948	bool				smoothing_enabled;
949
950	/* DB render state. */
951	unsigned		ps_db_shader_control;
952	unsigned		dbcb_copy_sample;
953	bool			dbcb_depth_copy_enabled:1;
954	bool			dbcb_stencil_copy_enabled:1;
955	bool			db_flush_depth_inplace:1;
956	bool			db_flush_stencil_inplace:1;
957	bool			db_depth_clear:1;
958	bool			db_depth_disable_expclear:1;
959	bool			db_stencil_clear:1;
960	bool			db_stencil_disable_expclear:1;
961	bool			occlusion_queries_disabled:1;
962	bool			generate_mipmap_for_depth:1;
963
964	/* Emitted draw state. */
965	bool			gs_tri_strip_adj_fix:1;
966	bool			ls_vgpr_fix:1;
967	int			last_index_size;
968	int			last_base_vertex;
969	int			last_start_instance;
970	int			last_instance_count;
971	int			last_drawid;
972	int			last_sh_base_reg;
973	int			last_primitive_restart_en;
974	int			last_restart_index;
975	int			last_prim;
976	int			last_multi_vgt_param;
977	int			last_rast_prim;
978	unsigned		last_sc_line_stipple;
979	unsigned		current_vs_state;
980	unsigned		last_vs_state;
981	enum pipe_prim_type	current_rast_prim; /* primitive type after TES, GS */
982
983	/* Scratch buffer */
984	struct si_resource	*scratch_buffer;
985	unsigned		scratch_waves;
986	unsigned		spi_tmpring_size;
987	unsigned		max_seen_scratch_bytes_per_wave;
988	unsigned		max_seen_compute_scratch_bytes_per_wave;
989
990	struct si_resource	*compute_scratch_buffer;
991
992	/* Emitted derived tessellation state. */
993	/* Local shader (VS), or HS if LS-HS are merged. */
994	struct si_shader	*last_ls;
995	struct si_shader_selector *last_tcs;
996	int			last_num_tcs_input_cp;
997	int			last_tes_sh_base;
998	bool			last_tess_uses_primid;
999	unsigned		last_num_patches;
1000	int			last_ls_hs_config;
1001
1002	/* Debug state. */
1003	bool			is_debug;
1004	struct si_saved_cs	*current_saved_cs;
1005	uint64_t		dmesg_timestamp;
1006	unsigned		apitrace_call_number;
1007
1008	/* Other state */
1009	bool need_check_render_feedback;
1010	bool			decompression_enabled;
1011	bool			dpbb_force_off;
1012	bool			vs_writes_viewport_index;
1013	bool			vs_disables_clipping_viewport;
1014
1015	/* Precomputed IA_MULTI_VGT_PARAM */
1016	union si_vgt_param_key  ia_multi_vgt_param_key;
1017	unsigned		ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
1018
1019	/* Bindless descriptors. */
1020	struct si_descriptors	bindless_descriptors;
1021	struct util_idalloc	bindless_used_slots;
1022	unsigned		num_bindless_descriptors;
1023	bool			bindless_descriptors_dirty;
1024	bool			graphics_bindless_pointer_dirty;
1025	bool			compute_bindless_pointer_dirty;
1026
1027	/* Allocated bindless handles */
1028	struct hash_table	*tex_handles;
1029	struct hash_table	*img_handles;
1030
1031	/* Resident bindless handles */
1032	struct util_dynarray	resident_tex_handles;
1033	struct util_dynarray	resident_img_handles;
1034
1035	/* Resident bindless handles which need decompression */
1036	struct util_dynarray	resident_tex_needs_color_decompress;
1037	struct util_dynarray	resident_img_needs_color_decompress;
1038	struct util_dynarray	resident_tex_needs_depth_decompress;
1039
1040	/* Bindless state */
1041	bool			uses_bindless_samplers;
1042	bool			uses_bindless_images;
1043
1044	/* MSAA sample locations.
1045	 * The first index is the sample index.
1046	 * The second index is the coordinate: X, Y. */
1047	struct {
1048		float			x1[1][2];
1049		float			x2[2][2];
1050		float			x4[4][2];
1051		float			x8[8][2];
1052		float			x16[16][2];
1053	} sample_positions;
1054	struct pipe_resource *sample_pos_buffer;
1055
1056	/* Misc stats. */
1057	unsigned			num_draw_calls;
1058	unsigned			num_decompress_calls;
1059	unsigned			num_mrt_draw_calls;
1060	unsigned			num_prim_restart_calls;
1061	unsigned			num_spill_draw_calls;
1062	unsigned			num_compute_calls;
1063	unsigned			num_spill_compute_calls;
1064	unsigned			num_dma_calls;
1065	unsigned			num_cp_dma_calls;
1066	unsigned			num_vs_flushes;
1067	unsigned			num_ps_flushes;
1068	unsigned			num_cs_flushes;
1069	unsigned			num_cb_cache_flushes;
1070	unsigned			num_db_cache_flushes;
1071	unsigned			num_L2_invalidates;
1072	unsigned			num_L2_writebacks;
1073	unsigned			num_resident_handles;
1074	uint64_t			num_alloc_tex_transfer_bytes;
1075	unsigned			last_tex_ps_draw_ratio; /* for query */
1076	unsigned			context_roll;
1077
1078	/* Queries. */
1079	/* Maintain the list of active queries for pausing between IBs. */
1080	int				num_occlusion_queries;
1081	int				num_perfect_occlusion_queries;
1082	struct list_head		active_queries;
1083	unsigned			num_cs_dw_queries_suspend;
1084
1085	/* Render condition. */
1086	struct pipe_query		*render_cond;
1087	unsigned			render_cond_mode;
1088	bool				render_cond_invert;
1089	bool				render_cond_force_off; /* for u_blitter */
1090
1091	/* For uploading data via GTT and copy to VRAM on context flush via SDMA. */
1092	bool				sdma_uploads_in_progress;
1093	struct si_sdma_upload		*sdma_uploads;
1094	unsigned			num_sdma_uploads;
1095	unsigned			max_sdma_uploads;
1096
1097	/* Statistics gathering for the DCC enablement heuristic. It can't be
1098	 * in si_texture because si_texture can be shared by multiple
1099	 * contexts. This is for back buffers only. We shouldn't get too many
1100	 * of those.
1101	 *
1102	 * X11 DRI3 rotates among a finite set of back buffers. They should
1103	 * all fit in this array. If they don't, separate DCC might never be
1104	 * enabled by DCC stat gathering.
1105	 */
1106	struct {
1107		struct si_texture		*tex;
1108		/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
1109		struct pipe_query		*ps_stats[3];
1110		/* If all slots are used and another slot is needed,
1111		 * the least recently used slot is evicted based on this. */
1112		int64_t				last_use_timestamp;
1113		bool				query_active;
1114	} dcc_stats[5];
1115
1116	/* Copy one resource to another using async DMA. */
1117	void (*dma_copy)(struct pipe_context *ctx,
1118			 struct pipe_resource *dst,
1119			 unsigned dst_level,
1120			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
1121			 struct pipe_resource *src,
1122			 unsigned src_level,
1123			 const struct pipe_box *src_box);
1124
1125	struct si_tracked_regs			tracked_regs;
1126};
1127
1128/* cik_sdma.c */
1129void cik_init_sdma_functions(struct si_context *sctx);
1130
1131/* si_blit.c */
1132enum si_blitter_op /* bitmask */
1133{
1134	SI_SAVE_TEXTURES      = 1,
1135	SI_SAVE_FRAMEBUFFER   = 2,
1136	SI_SAVE_FRAGMENT_STATE = 4,
1137	SI_DISABLE_RENDER_COND = 8,
1138};
1139
1140void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op);
1141void si_blitter_end(struct si_context *sctx);
1142void si_init_blit_functions(struct si_context *sctx);
1143void si_decompress_textures(struct si_context *sctx, unsigned shader_mask);
1144void si_resource_copy_region(struct pipe_context *ctx,
1145			     struct pipe_resource *dst,
1146			     unsigned dst_level,
1147			     unsigned dstx, unsigned dsty, unsigned dstz,
1148			     struct pipe_resource *src,
1149			     unsigned src_level,
1150			     const struct pipe_box *src_box);
1151void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex);
1152void si_blit_decompress_depth(struct pipe_context *ctx,
1153			      struct si_texture *texture,
1154			      struct si_texture *staging,
1155			      unsigned first_level, unsigned last_level,
1156			      unsigned first_layer, unsigned last_layer,
1157			      unsigned first_sample, unsigned last_sample);
1158
1159/* si_buffer.c */
1160bool si_rings_is_buffer_referenced(struct si_context *sctx,
1161				   struct pb_buffer *buf,
1162				   enum radeon_bo_usage usage);
1163void *si_buffer_map_sync_with_rings(struct si_context *sctx,
1164				    struct si_resource *resource,
1165				    unsigned usage);
1166void si_init_resource_fields(struct si_screen *sscreen,
1167			     struct si_resource *res,
1168			     uint64_t size, unsigned alignment);
1169bool si_alloc_resource(struct si_screen *sscreen,
1170		       struct si_resource *res);
1171struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen,
1172						 unsigned flags, unsigned usage,
1173						 unsigned size, unsigned alignment);
1174struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen,
1175					       unsigned flags, unsigned usage,
1176					       unsigned size, unsigned alignment);
1177void si_replace_buffer_storage(struct pipe_context *ctx,
1178			       struct pipe_resource *dst,
1179			       struct pipe_resource *src);
1180void si_init_screen_buffer_functions(struct si_screen *sscreen);
1181void si_init_buffer_functions(struct si_context *sctx);
1182
1183/* si_clear.c */
1184enum pipe_format si_simplify_cb_format(enum pipe_format format);
1185bool vi_alpha_is_on_msb(enum pipe_format format);
1186void vi_dcc_clear_level(struct si_context *sctx,
1187			struct si_texture *tex,
1188			unsigned level, unsigned clear_value);
1189void si_init_clear_functions(struct si_context *sctx);
1190
1191/* si_compute_blit.c */
1192unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
1193			    enum si_cache_policy cache_policy);
1194void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
1195		     uint64_t offset, uint64_t size, uint32_t *clear_value,
1196		     uint32_t clear_value_size, enum si_coherency coher,
1197		     bool force_cpdma);
1198void si_copy_buffer(struct si_context *sctx,
1199		    struct pipe_resource *dst, struct pipe_resource *src,
1200		    uint64_t dst_offset, uint64_t src_offset, unsigned size);
1201void si_compute_copy_image(struct si_context *sctx,
1202			   struct pipe_resource *dst,
1203			   unsigned dst_level,
1204			   struct pipe_resource *src,
1205			   unsigned src_level,
1206			   unsigned dstx, unsigned dsty, unsigned dstz,
1207			   const struct pipe_box *src_box);
1208void si_compute_clear_render_target(struct pipe_context *ctx,
1209                                    struct pipe_surface *dstsurf,
1210                                    const union pipe_color_union *color,
1211                                    unsigned dstx, unsigned dsty,
1212                                    unsigned width, unsigned height,
1213				    bool render_condition_enabled);
1214void si_retile_dcc(struct si_context *sctx, struct si_texture *tex);
1215void si_init_compute_blit_functions(struct si_context *sctx);
1216
1217/* si_cp_dma.c */
1218#define SI_CPDMA_SKIP_CHECK_CS_SPACE	(1 << 0) /* don't call need_cs_space */
1219#define SI_CPDMA_SKIP_SYNC_AFTER	(1 << 1) /* don't wait for DMA after the copy */
1220#define SI_CPDMA_SKIP_SYNC_BEFORE	(1 << 2) /* don't wait for DMA before the copy (RAW hazards) */
1221#define SI_CPDMA_SKIP_GFX_SYNC		(1 << 3) /* don't flush caches and don't wait for PS/CS */
1222#define SI_CPDMA_SKIP_BO_LIST_UPDATE	(1 << 4) /* don't update the BO list */
1223#define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \
1224			   SI_CPDMA_SKIP_SYNC_AFTER | \
1225			   SI_CPDMA_SKIP_SYNC_BEFORE | \
1226			   SI_CPDMA_SKIP_GFX_SYNC | \
1227			   SI_CPDMA_SKIP_BO_LIST_UPDATE)
1228
1229void si_cp_dma_wait_for_idle(struct si_context *sctx);
1230void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
1231			    struct pipe_resource *dst, uint64_t offset,
1232			    uint64_t size, unsigned value, unsigned user_flags,
1233			    enum si_coherency coher, enum si_cache_policy cache_policy);
1234void si_cp_dma_copy_buffer(struct si_context *sctx,
1235			   struct pipe_resource *dst, struct pipe_resource *src,
1236			   uint64_t dst_offset, uint64_t src_offset, unsigned size,
1237			   unsigned user_flags, enum si_coherency coher,
1238			   enum si_cache_policy cache_policy);
1239void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
1240			      uint64_t offset, unsigned size);
1241void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
1242void si_test_gds(struct si_context *sctx);
1243void si_cp_write_data(struct si_context *sctx, struct si_resource *buf,
1244		      unsigned offset, unsigned size, unsigned dst_sel,
1245		      unsigned engine, const void *data);
1246void si_cp_copy_data(struct si_context *sctx,
1247		     unsigned dst_sel, struct si_resource *dst, unsigned dst_offset,
1248		     unsigned src_sel, struct si_resource *src, unsigned src_offset);
1249
1250/* si_debug.c */
1251void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
1252		struct radeon_saved_cs *saved, bool get_buffer_list);
1253void si_clear_saved_cs(struct radeon_saved_cs *saved);
1254void si_destroy_saved_cs(struct si_saved_cs *scs);
1255void si_auto_log_cs(void *data, struct u_log_context *log);
1256void si_log_hw_flush(struct si_context *sctx);
1257void si_log_draw_state(struct si_context *sctx, struct u_log_context *log);
1258void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
1259void si_init_debug_functions(struct si_context *sctx);
1260void si_check_vm_faults(struct si_context *sctx,
1261			struct radeon_saved_cs *saved, enum ring_type ring);
1262bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
1263
1264/* si_dma.c */
1265void si_init_dma_functions(struct si_context *sctx);
1266
1267/* si_dma_cs.c */
1268void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
1269			   uint64_t offset);
1270void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
1271			  uint64_t offset, uint64_t size, unsigned clear_value);
1272void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
1273		       struct si_resource *dst, struct si_resource *src);
1274void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
1275		     struct pipe_fence_handle **fence);
1276void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst,
1277			    uint64_t offset, uint64_t size, unsigned value);
1278
1279/* si_fence.c */
1280void si_cp_release_mem(struct si_context *ctx,
1281		       unsigned event, unsigned event_flags,
1282		       unsigned dst_sel, unsigned int_sel, unsigned data_sel,
1283		       struct si_resource *buf, uint64_t va,
1284		       uint32_t new_fence, unsigned query_type);
1285unsigned si_cp_write_fence_dwords(struct si_screen *screen);
1286void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
1287		      uint64_t va, uint32_t ref, uint32_t mask, unsigned flags);
1288void si_init_fence_functions(struct si_context *ctx);
1289void si_init_screen_fence_functions(struct si_screen *screen);
1290struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
1291					  struct tc_unflushed_batch_token *tc_token);
1292
1293/* si_get.c */
1294void si_init_screen_get_functions(struct si_screen *sscreen);
1295
1296/* si_gfx_cs.c */
1297void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
1298		     struct pipe_fence_handle **fence);
1299void si_begin_new_gfx_cs(struct si_context *ctx);
1300void si_need_gfx_cs_space(struct si_context *ctx);
1301void si_unref_sdma_uploads(struct si_context *sctx);
1302
1303/* si_gpu_load.c */
1304void si_gpu_load_kill_thread(struct si_screen *sscreen);
1305uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);
1306unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
1307			uint64_t begin);
1308
1309/* si_compute.c */
1310void si_init_compute_functions(struct si_context *sctx);
1311
1312/* si_perfcounters.c */
1313void si_init_perfcounters(struct si_screen *screen);
1314void si_destroy_perfcounters(struct si_screen *screen);
1315
1316/* si_pipe.c */
1317bool si_check_device_reset(struct si_context *sctx);
1318
1319/* si_query.c */
1320void si_init_screen_query_functions(struct si_screen *sscreen);
1321void si_init_query_functions(struct si_context *sctx);
1322void si_suspend_queries(struct si_context *sctx);
1323void si_resume_queries(struct si_context *sctx);
1324
1325/* si_shaderlib_tgsi.c */
1326void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
1327			unsigned num_layers);
1328void *si_create_fixed_func_tcs(struct si_context *sctx);
1329void *si_create_dma_compute_shader(struct pipe_context *ctx,
1330				   unsigned num_dwords_per_thread,
1331				   bool dst_stream_cache_policy, bool is_copy);
1332void *si_create_copy_image_compute_shader(struct pipe_context *ctx);
1333void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);
1334void *si_clear_render_target_shader(struct pipe_context *ctx);
1335void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx);
1336void *si_create_dcc_retile_cs(struct pipe_context *ctx);
1337void *si_create_query_result_cs(struct si_context *sctx);
1338
1339/* si_test_dma.c */
1340void si_test_dma(struct si_screen *sscreen);
1341
1342/* si_test_clearbuffer.c */
1343void si_test_dma_perf(struct si_screen *sscreen);
1344
1345/* si_uvd.c */
1346struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
1347					       const struct pipe_video_codec *templ);
1348
1349struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
1350						 const struct pipe_video_buffer *tmpl);
1351
1352/* si_viewport.c */
1353void si_update_vs_viewport_state(struct si_context *ctx);
1354void si_init_viewport_functions(struct si_context *ctx);
1355
1356/* si_texture.c */
1357bool si_prepare_for_dma_blit(struct si_context *sctx,
1358			     struct si_texture *dst,
1359			     unsigned dst_level, unsigned dstx,
1360			     unsigned dsty, unsigned dstz,
1361			     struct si_texture *src,
1362			     unsigned src_level,
1363			     const struct pipe_box *src_box);
1364void si_eliminate_fast_color_clear(struct si_context *sctx,
1365				   struct si_texture *tex);
1366void si_texture_discard_cmask(struct si_screen *sscreen,
1367			      struct si_texture *tex);
1368bool si_init_flushed_depth_texture(struct pipe_context *ctx,
1369				   struct pipe_resource *texture,
1370				   struct si_texture **staging);
1371void si_print_texture_info(struct si_screen *sscreen,
1372			   struct si_texture *tex, struct u_log_context *log);
1373struct pipe_resource *si_texture_create(struct pipe_screen *screen,
1374					const struct pipe_resource *templ);
1375bool vi_dcc_formats_compatible(enum pipe_format format1,
1376			       enum pipe_format format2);
1377bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
1378				     unsigned level,
1379				     enum pipe_format view_format);
1380void vi_disable_dcc_if_incompatible_format(struct si_context *sctx,
1381					   struct pipe_resource *tex,
1382					   unsigned level,
1383					   enum pipe_format view_format);
1384struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
1385					      struct pipe_resource *texture,
1386					      const struct pipe_surface *templ,
1387					      unsigned width0, unsigned height0,
1388					      unsigned width, unsigned height);
1389unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
1390void vi_separate_dcc_try_enable(struct si_context *sctx,
1391				struct si_texture *tex);
1392void vi_separate_dcc_start_query(struct si_context *sctx,
1393				 struct si_texture *tex);
1394void vi_separate_dcc_stop_query(struct si_context *sctx,
1395				struct si_texture *tex);
1396void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
1397					     struct si_texture *tex);
1398bool si_texture_disable_dcc(struct si_context *sctx,
1399			    struct si_texture *tex);
1400void si_init_screen_texture_functions(struct si_screen *sscreen);
1401void si_init_context_texture_functions(struct si_context *sctx);
1402
1403
1404/*
1405 * common helpers
1406 */
1407
1408static inline struct si_resource *si_resource(struct pipe_resource *r)
1409{
1410	return (struct si_resource*)r;
1411}
1412
1413static inline void
1414si_resource_reference(struct si_resource **ptr, struct si_resource *res)
1415{
1416	pipe_resource_reference((struct pipe_resource **)ptr,
1417				(struct pipe_resource *)res);
1418}
1419
1420static inline void
1421si_texture_reference(struct si_texture **ptr, struct si_texture *res)
1422{
1423	pipe_resource_reference((struct pipe_resource **)ptr, &res->buffer.b.b);
1424}
1425
1426static inline bool
1427vi_dcc_enabled(struct si_texture *tex, unsigned level)
1428{
1429	return tex->dcc_offset && level < tex->surface.num_dcc_levels;
1430}
1431
1432static inline unsigned
1433si_tile_mode_index(struct si_texture *tex, unsigned level, bool stencil)
1434{
1435	if (stencil)
1436		return tex->surface.u.legacy.stencil_tiling_index[level];
1437	else
1438		return tex->surface.u.legacy.tiling_index[level];
1439}
1440
1441static inline unsigned
1442si_get_minimum_num_gfx_cs_dwords(struct si_context *sctx)
1443{
1444	/* Don't count the needed CS space exactly and just use an upper bound.
1445	 *
1446	 * Also reserve space for stopping queries at the end of IB, because
1447	 * the number of active queries is unlimited in theory.
1448	 */
1449	return 2048 + sctx->num_cs_dw_queries_suspend;
1450}
1451
1452static inline void
1453si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r)
1454{
1455	if (r) {
1456		/* Add memory usage for need_gfx_cs_space */
1457		sctx->vram += si_resource(r)->vram_usage;
1458		sctx->gtt += si_resource(r)->gart_usage;
1459	}
1460}
1461
1462static inline void
1463si_invalidate_draw_sh_constants(struct si_context *sctx)
1464{
1465	sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
1466	sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
1467}
1468
1469static inline unsigned
1470si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
1471{
1472	return 1 << (atom - sctx->atoms.array);
1473}
1474
1475static inline void
1476si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
1477{
1478	unsigned bit = si_get_atom_bit(sctx, atom);
1479
1480	if (dirty)
1481		sctx->dirty_atoms |= bit;
1482	else
1483		sctx->dirty_atoms &= ~bit;
1484}
1485
1486static inline bool
1487si_is_atom_dirty(struct si_context *sctx, struct si_atom *atom)
1488{
1489	return (sctx->dirty_atoms & si_get_atom_bit(sctx, atom)) != 0;
1490}
1491
1492static inline void
1493si_mark_atom_dirty(struct si_context *sctx, struct si_atom *atom)
1494{
1495	si_set_atom_dirty(sctx, atom, true);
1496}
1497
1498static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)
1499{
1500	if (sctx->gs_shader.cso)
1501		return &sctx->gs_shader;
1502	if (sctx->tes_shader.cso)
1503		return &sctx->tes_shader;
1504
1505	return &sctx->vs_shader;
1506}
1507
1508static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
1509{
1510	struct si_shader_ctx_state *vs = si_get_vs(sctx);
1511
1512	return vs->cso ? &vs->cso->info : NULL;
1513}
1514
1515static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
1516{
1517	if (sctx->gs_shader.cso)
1518		return sctx->gs_shader.cso->gs_copy_shader;
1519
1520	struct si_shader_ctx_state *vs = si_get_vs(sctx);
1521	return vs->current ? vs->current : NULL;
1522}
1523
1524static inline bool si_can_dump_shader(struct si_screen *sscreen,
1525				      unsigned processor)
1526{
1527	return sscreen->debug_flags & (1 << processor);
1528}
1529
1530static inline bool si_get_strmout_en(struct si_context *sctx)
1531{
1532	return sctx->streamout.streamout_enabled ||
1533	       sctx->streamout.prims_gen_query_enabled;
1534}
1535
1536static inline unsigned
1537si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
1538{
1539	unsigned alignment, tcc_cache_line_size;
1540
1541	/* If the upload size is less than the cache line size (e.g. 16, 32),
1542	 * the whole thing will fit into a cache line if we align it to its size.
1543	 * The idea is that multiple small uploads can share a cache line.
1544	 * If the upload size is greater, align it to the cache line size.
1545	 */
1546	alignment = util_next_power_of_two(upload_size);
1547	tcc_cache_line_size = sctx->screen->info.tcc_cache_line_size;
1548	return MIN2(alignment, tcc_cache_line_size);
1549}
1550
1551static inline void
1552si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
1553{
1554	if (pipe_reference(&(*dst)->reference, &src->reference))
1555		si_destroy_saved_cs(*dst);
1556
1557	*dst = src;
1558}
1559
1560static inline void
1561si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
1562			   bool shaders_read_metadata, bool dcc_pipe_aligned)
1563{
1564	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
1565		       SI_CONTEXT_INV_VMEM_L1;
1566
1567	if (sctx->chip_class >= GFX9) {
1568		/* Single-sample color is coherent with shaders on GFX9, but
1569		 * L2 metadata must be flushed if shaders read metadata.
1570		 * (DCC, CMASK).
1571		 */
1572		if (num_samples >= 2 ||
1573		    (shaders_read_metadata && !dcc_pipe_aligned))
1574			sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
1575		else if (shaders_read_metadata)
1576			sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
1577	} else {
1578		/* SI-CI-VI */
1579		sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
1580	}
1581}
1582
1583static inline void
1584si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
1585			   bool include_stencil, bool shaders_read_metadata)
1586{
1587	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
1588		       SI_CONTEXT_INV_VMEM_L1;
1589
1590	if (sctx->chip_class >= GFX9) {
1591		/* Single-sample depth (not stencil) is coherent with shaders
1592		 * on GFX9, but L2 metadata must be flushed if shaders read
1593		 * metadata.
1594		 */
1595		if (num_samples >= 2 || include_stencil)
1596			sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
1597		else if (shaders_read_metadata)
1598			sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
1599	} else {
1600		/* SI-CI-VI */
1601		sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
1602	}
1603}
1604
1605static inline bool
1606si_can_sample_zs(struct si_texture *tex, bool stencil_sampler)
1607{
1608	return (stencil_sampler && tex->can_sample_s) ||
1609	       (!stencil_sampler && tex->can_sample_z);
1610}
1611
1612static inline bool
1613si_htile_enabled(struct si_texture *tex, unsigned level)
1614{
1615	return tex->htile_offset && level == 0;
1616}
1617
1618static inline bool
1619vi_tc_compat_htile_enabled(struct si_texture *tex, unsigned level)
1620{
1621	assert(!tex->tc_compatible_htile || tex->htile_offset);
1622	return tex->tc_compatible_htile && level == 0;
1623}
1624
1625static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)
1626{
1627	if (sctx->ps_uses_fbfetch)
1628		return sctx->framebuffer.nr_color_samples;
1629
1630	return MIN2(sctx->ps_iter_samples, sctx->framebuffer.nr_color_samples);
1631}
1632
1633static inline unsigned si_get_total_colormask(struct si_context *sctx)
1634{
1635	if (sctx->queued.named.rasterizer->rasterizer_discard)
1636		return 0;
1637
1638	struct si_shader_selector *ps = sctx->ps_shader.cso;
1639	if (!ps)
1640		return 0;
1641
1642	unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit &
1643			     sctx->queued.named.blend->cb_target_mask;
1644
1645	if (!ps->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
1646		colormask &= ps->colors_written_4bit;
1647	else if (!ps->colors_written_4bit)
1648		colormask = 0; /* color0 writes all cbufs, but it's not written */
1649
1650	return colormask;
1651}
1652
1653#define UTIL_ALL_PRIM_LINE_MODES ((1 << PIPE_PRIM_LINES) | \
1654				  (1 << PIPE_PRIM_LINE_LOOP) | \
1655				  (1 << PIPE_PRIM_LINE_STRIP) | \
1656				  (1 << PIPE_PRIM_LINES_ADJACENCY) | \
1657				  (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY))
1658
1659static inline bool util_prim_is_lines(unsigned prim)
1660{
1661	return ((1 << prim) & UTIL_ALL_PRIM_LINE_MODES) != 0;
1662}
1663
1664static inline bool util_prim_is_points_or_lines(unsigned prim)
1665{
1666	return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES |
1667			       (1 << PIPE_PRIM_POINTS))) != 0;
1668}
1669
1670/**
1671 * Return true if there is enough memory in VRAM and GTT for the buffers
1672 * added so far.
1673 *
1674 * \param vram      VRAM memory size not added to the buffer list yet
1675 * \param gtt       GTT memory size not added to the buffer list yet
1676 */
1677static inline bool
1678radeon_cs_memory_below_limit(struct si_screen *screen,
1679			     struct radeon_cmdbuf *cs,
1680			     uint64_t vram, uint64_t gtt)
1681{
1682	vram += cs->used_vram;
1683	gtt += cs->used_gart;
1684
1685	/* Anything that goes above the VRAM size should go to GTT. */
1686	if (vram > screen->info.vram_size)
1687		gtt += vram - screen->info.vram_size;
1688
1689	/* Now we just need to check if we have enough GTT. */
1690	return gtt < screen->info.gart_size * 0.7;
1691}
1692
1693/**
1694 * Add a buffer to the buffer list for the given command stream (CS).
1695 *
1696 * All buffers used by a CS must be added to the list. This tells the kernel
1697 * driver which buffers are used by GPU commands. Other buffers can
1698 * be swapped out (not accessible) during execution.
1699 *
1700 * The buffer list becomes empty after every context flush and must be
1701 * rebuilt.
1702 */
1703static inline void radeon_add_to_buffer_list(struct si_context *sctx,
1704					     struct radeon_cmdbuf *cs,
1705					     struct si_resource *bo,
1706					     enum radeon_bo_usage usage,
1707					     enum radeon_bo_priority priority)
1708{
1709	assert(usage);
1710	sctx->ws->cs_add_buffer(
1711		cs, bo->buf,
1712		(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
1713		bo->domains, priority);
1714}
1715
1716/**
1717 * Same as above, but also checks memory usage and flushes the context
1718 * accordingly.
1719 *
1720 * When this SHOULD NOT be used:
1721 *
1722 * - if si_context_add_resource_size has been called for the buffer
1723 *   followed by *_need_cs_space for checking the memory usage
1724 *
1725 * - if si_need_dma_space has been called for the buffer
1726 *
1727 * - when emitting state packets and draw packets (because preceding packets
1728 *   can't be re-emitted at that point)
1729 *
1730 * - if shader resource "enabled_mask" is not up-to-date or there is
1731 *   a different constraint disallowing a context flush
1732 */
1733static inline void
1734radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,
1735					struct si_resource *bo,
1736					enum radeon_bo_usage usage,
1737					enum radeon_bo_priority priority,
1738					bool check_mem)
1739{
1740	if (check_mem &&
1741	    !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs,
1742					  sctx->vram + bo->vram_usage,
1743					  sctx->gtt + bo->gart_usage))
1744		si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
1745
1746	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority);
1747}
1748
1749#define PRINT_ERR(fmt, args...) \
1750	fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
1751
1752#endif
1753