1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Marek Olšák <maraeo@gmail.com>
24 *
25 */
26
27/**
28 * This file contains common screen and context structures and functions
29 * for r600g and radeonsi.
30 */
31
32#ifndef R600_PIPE_COMMON_H
33#define R600_PIPE_COMMON_H
34
35#include <stdio.h>
36
37#include "amd/common/ac_binary.h"
38
39#include "radeon/radeon_winsys.h"
40
41#include "util/disk_cache.h"
42#include "util/u_blitter.h"
43#include "util/list.h"
44#include "util/u_range.h"
45#include "util/slab.h"
46#include "util/u_suballoc.h"
47#include "util/u_transfer.h"
48#include "util/u_threaded_context.h"
49
50struct u_log_context;
51
52#define ATI_VENDOR_ID 0x1002
53
54#define R600_RESOURCE_FLAG_TRANSFER		(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
55#define R600_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
56#define R600_RESOURCE_FLAG_FORCE_TILING		(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
57#define R600_RESOURCE_FLAG_UNMAPPABLE		(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
58
59#define R600_CONTEXT_STREAMOUT_FLUSH		(1u << 0)
60/* Pipeline & streamout query controls. */
61#define R600_CONTEXT_START_PIPELINE_STATS	(1u << 1)
62#define R600_CONTEXT_STOP_PIPELINE_STATS	(1u << 2)
63#define R600_CONTEXT_FLUSH_FOR_RENDER_COND	(1u << 3)
64#define R600_CONTEXT_PRIVATE_FLAG		(1u << 4)
65
66/* special primitive types */
67#define R600_PRIM_RECTANGLE_LIST	PIPE_PRIM_MAX
68
69#define R600_NOT_QUERY		0xffffffff
70
71/* Debug flags. */
72#define DBG_VS			(1 << PIPE_SHADER_VERTEX)
73#define DBG_PS			(1 << PIPE_SHADER_FRAGMENT)
74#define DBG_GS			(1 << PIPE_SHADER_GEOMETRY)
75#define DBG_TCS			(1 << PIPE_SHADER_TESS_CTRL)
76#define DBG_TES			(1 << PIPE_SHADER_TESS_EVAL)
77#define DBG_CS			(1 << PIPE_SHADER_COMPUTE)
78#define DBG_ALL_SHADERS		(DBG_FS - 1)
79#define DBG_FS			(1 << 6) /* fetch shader */
80#define DBG_TEX			(1 << 7)
81#define DBG_NIR			(1 << 8)
82#define DBG_COMPUTE		(1 << 9)
83/* gap */
84#define DBG_VM			(1 << 11)
85#define DBG_NO_IR		(1 << 12)
86#define DBG_NO_TGSI		(1 << 13)
87#define DBG_NO_ASM		(1 << 14)
88#define DBG_PREOPT_IR		(1 << 15)
89#define DBG_CHECK_IR		(1 << 16)
90#define DBG_NO_OPT_VARIANT	(1 << 17)
91#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
92/* gaps */
93#define DBG_TEST_DMA		(1 << 20)
94/* Bits 21-31 are reserved for the r600g driver. */
95/* features */
96#define DBG_NO_ASYNC_DMA	(1ull << 32)
97#define DBG_NO_HYPERZ		(1ull << 33)
98#define DBG_NO_DISCARD_RANGE	(1ull << 34)
99#define DBG_NO_2D_TILING	(1ull << 35)
100#define DBG_NO_TILING		(1ull << 36)
101#define DBG_SWITCH_ON_EOP	(1ull << 37)
102#define DBG_FORCE_DMA		(1ull << 38)
103#define DBG_PRECOMPILE		(1ull << 39)
104#define DBG_INFO		(1ull << 40)
105#define DBG_NO_WC		(1ull << 41)
106#define DBG_CHECK_VM		(1ull << 42)
107/* gap */
108#define DBG_UNSAFE_MATH		(1ull << 49)
109#define DBG_TEST_VMFAULT_CP	(1ull << 51)
110#define DBG_TEST_VMFAULT_SDMA	(1ull << 52)
111#define DBG_TEST_VMFAULT_SHADER	(1ull << 53)
112
113#define R600_MAP_BUFFER_ALIGNMENT 64
114#define R600_MAX_VIEWPORTS        16
115
116#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
117
118enum r600_coherency {
119	R600_COHERENCY_NONE, /* no cache flushes needed */
120	R600_COHERENCY_SHADER,
121	R600_COHERENCY_CB_META,
122};
123
124#ifdef PIPE_ARCH_BIG_ENDIAN
125#define R600_BIG_ENDIAN 1
126#else
127#define R600_BIG_ENDIAN 0
128#endif
129
130struct r600_common_context;
131struct r600_perfcounters;
132struct tgsi_shader_info;
133struct r600_qbo_state;
134
135void radeon_shader_binary_init(struct ac_shader_binary *b);
136void radeon_shader_binary_clean(struct ac_shader_binary *b);
137
138/* Only 32-bit buffer allocations are supported, gallium doesn't support more
139 * at the moment.
140 */
141struct r600_resource {
142	struct threaded_resource	b;
143
144	/* Winsys objects. */
145	struct pb_buffer		*buf;
146	uint64_t			gpu_address;
147	/* Memory usage if the buffer placement is optimal. */
148	uint64_t			vram_usage;
149	uint64_t			gart_usage;
150
151	/* Resource properties. */
152	uint64_t			bo_size;
153	unsigned			bo_alignment;
154	enum radeon_bo_domain		domains;
155	enum radeon_bo_flag		flags;
156	unsigned			bind_history;
157
158	/* The buffer range which is initialized (with a write transfer,
159	 * streamout, DMA, or as a random access target). The rest of
160	 * the buffer is considered invalid and can be mapped unsynchronized.
161	 *
162	 * This allows unsychronized mapping of a buffer range which hasn't
163	 * been used yet. It's for applications which forget to use
164	 * the unsynchronized map flag and expect the driver to figure it out.
165         */
166	struct util_range		valid_buffer_range;
167
168	/* Whether the resource has been exported via resource_get_handle. */
169	unsigned			external_usage; /* PIPE_HANDLE_USAGE_* */
170
171	/* Whether this resource is referenced by bindless handles. */
172	bool				texture_handle_allocated;
173	bool				image_handle_allocated;
174
175	/*
176	 * EG/Cayman only - for RAT operations hw need an immediate buffer
177	 * to store results in.
178	 */
179	struct r600_resource            *immed_buffer;
180};
181
182struct r600_transfer {
183	struct threaded_transfer	b;
184	struct r600_resource		*staging;
185	unsigned			offset;
186};
187
188struct r600_fmask_info {
189	uint64_t offset;
190	uint64_t size;
191	unsigned alignment;
192	unsigned pitch_in_pixels;
193	unsigned bank_height;
194	unsigned slice_tile_max;
195	unsigned tile_mode_index;
196	unsigned tile_swizzle;
197};
198
199struct r600_cmask_info {
200	uint64_t offset;
201	uint64_t size;
202	unsigned alignment;
203	unsigned slice_tile_max;
204	uint64_t base_address_reg;
205};
206
207struct r600_texture {
208	struct r600_resource		resource;
209
210	uint64_t			size;
211	unsigned			num_level0_transfers;
212	enum pipe_format		db_render_format;
213	bool				is_depth;
214	bool				db_compatible;
215	bool				can_sample_z;
216	bool				can_sample_s;
217	unsigned			dirty_level_mask; /* each bit says if that mipmap is compressed */
218	unsigned			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
219	struct r600_texture		*flushed_depth_texture;
220	struct radeon_surf		surface;
221
222	/* Colorbuffer compression and fast clear. */
223	struct r600_fmask_info		fmask;
224	struct r600_cmask_info		cmask;
225	struct r600_resource		*cmask_buffer;
226	unsigned			cb_color_info; /* fast clear enable bit */
227	unsigned			color_clear_value[2];
228	unsigned			last_msaa_resolve_target_micro_mode;
229
230	/* Depth buffer compression and fast clear. */
231	uint64_t			htile_offset;
232	bool				depth_cleared; /* if it was cleared at least once */
233	float				depth_clear_value;
234	bool				stencil_cleared; /* if it was cleared at least once */
235	uint8_t				stencil_clear_value;
236
237	bool				non_disp_tiling; /* R600-Cayman only */
238
239	/* Counter that should be non-zero if the texture is bound to a
240	 * framebuffer. Implemented in radeonsi only.
241	 */
242	uint32_t			framebuffers_bound;
243};
244
245struct r600_surface {
246	struct pipe_surface		base;
247
248	/* These can vary with block-compressed textures. */
249	unsigned width0;
250	unsigned height0;
251
252	bool color_initialized;
253	bool depth_initialized;
254
255	/* Misc. color flags. */
256	bool alphatest_bypass;
257	bool export_16bpc;
258	bool color_is_int8;
259	bool color_is_int10;
260
261	/* Color registers. */
262	unsigned cb_color_info;
263	unsigned cb_color_base;
264	unsigned cb_color_view;
265	unsigned cb_color_size;		/* R600 only */
266	unsigned cb_color_dim;		/* EG only */
267	unsigned cb_color_pitch;	/* EG and later */
268	unsigned cb_color_slice;	/* EG and later */
269	unsigned cb_color_attrib;	/* EG and later */
270	unsigned cb_color_fmask;	/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
271	unsigned cb_color_fmask_slice;	/* EG and later */
272	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
273	unsigned cb_color_mask;		/* R600 only */
274	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
275	struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
276
277	/* DB registers. */
278	uint64_t db_depth_base;		/* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
279	uint64_t db_stencil_base;	/* EG and later */
280	uint64_t db_htile_data_base;
281	unsigned db_depth_info;		/* R600 only, then SI and later */
282	unsigned db_z_info;		/* EG and later */
283	unsigned db_depth_view;
284	unsigned db_depth_size;
285	unsigned db_depth_slice;	/* EG and later */
286	unsigned db_stencil_info;	/* EG and later */
287	unsigned db_prefetch_limit;	/* R600 only */
288	unsigned db_htile_surface;
289	unsigned db_preload_control;	/* EG and later */
290};
291
292struct r600_mmio_counter {
293	unsigned busy;
294	unsigned idle;
295};
296
297union r600_mmio_counters {
298	struct {
299		/* For global GPU load including SDMA. */
300		struct r600_mmio_counter gpu;
301
302		/* GRBM_STATUS */
303		struct r600_mmio_counter spi;
304		struct r600_mmio_counter gui;
305		struct r600_mmio_counter ta;
306		struct r600_mmio_counter gds;
307		struct r600_mmio_counter vgt;
308		struct r600_mmio_counter ia;
309		struct r600_mmio_counter sx;
310		struct r600_mmio_counter wd;
311		struct r600_mmio_counter bci;
312		struct r600_mmio_counter sc;
313		struct r600_mmio_counter pa;
314		struct r600_mmio_counter db;
315		struct r600_mmio_counter cp;
316		struct r600_mmio_counter cb;
317
318		/* SRBM_STATUS2 */
319		struct r600_mmio_counter sdma;
320
321		/* CP_STAT */
322		struct r600_mmio_counter pfp;
323		struct r600_mmio_counter meq;
324		struct r600_mmio_counter me;
325		struct r600_mmio_counter surf_sync;
326		struct r600_mmio_counter cp_dma;
327		struct r600_mmio_counter scratch_ram;
328	} named;
329	unsigned array[0];
330};
331
332struct r600_memory_object {
333	struct pipe_memory_object	b;
334	struct pb_buffer		*buf;
335	uint32_t			stride;
336	uint32_t			offset;
337};
338
339struct r600_common_screen {
340	struct pipe_screen		b;
341	struct radeon_winsys		*ws;
342	enum radeon_family		family;
343	enum chip_class			chip_class;
344	struct radeon_info		info;
345	uint64_t			debug_flags;
346	bool				has_cp_dma;
347	bool				has_streamout;
348
349	struct disk_cache		*disk_shader_cache;
350
351	struct slab_parent_pool		pool_transfers;
352
353	/* Texture filter settings. */
354	int				force_aniso; /* -1 = disabled */
355
356	/* Auxiliary context. Mainly used to initialize resources.
357	 * It must be locked prior to using and flushed before unlocking. */
358	struct pipe_context		*aux_context;
359	mtx_t				aux_context_lock;
360
361	/* This must be in the screen, because UE4 uses one context for
362	 * compilation and another one for rendering.
363	 */
364	unsigned			num_compilations;
365	/* Along with ST_DEBUG=precompile, this should show if applications
366	 * are loading shaders on demand. This is a monotonic counter.
367	 */
368	unsigned			num_shaders_created;
369	unsigned			num_shader_cache_hits;
370
371	/* GPU load thread. */
372	mtx_t				gpu_load_mutex;
373	thrd_t				gpu_load_thread;
374	union r600_mmio_counters	mmio_counters;
375	volatile unsigned		gpu_load_stop_thread; /* bool */
376
377	char				renderer_string[100];
378
379	/* Performance counters. */
380	struct r600_perfcounters	*perfcounters;
381
382	/* If pipe_screen wants to recompute and re-emit the framebuffer,
383	 * sampler, and image states of all contexts, it should atomically
384	 * increment this.
385	 *
386	 * Each context will compare this with its own last known value of
387	 * the counter before drawing and re-emit the states accordingly.
388	 */
389	unsigned			dirty_tex_counter;
390
391	/* Atomically increment this counter when an existing texture's
392	 * metadata is enabled or disabled in a way that requires changing
393	 * contexts' compressed texture binding masks.
394	 */
395	unsigned			compressed_colortex_counter;
396
397	struct {
398		/* Context flags to set so that all writes from earlier jobs
399		 * in the CP are seen by L2 clients.
400		 */
401		unsigned cp_to_L2;
402
403		/* Context flags to set so that all writes from earlier jobs
404		 * that end in L2 are seen by CP.
405		 */
406		unsigned L2_to_cp;
407
408		/* Context flags to set so that all writes from earlier
409		 * compute jobs are seen by L2 clients.
410		 */
411		unsigned compute_to_L2;
412	} barrier_flags;
413
414	void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
415				      struct r600_texture *rtex,
416				      struct radeon_bo_metadata *md);
417
418	void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
419				    struct r600_texture *rtex,
420				    struct radeon_bo_metadata *md);
421};
422
423/* This encapsulates a state or an operation which can emitted into the GPU
424 * command stream. */
425struct r600_atom {
426	void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
427	unsigned		num_dw;
428	unsigned short		id;
429};
430
431struct r600_so_target {
432	struct pipe_stream_output_target b;
433
434	/* The buffer where BUFFER_FILLED_SIZE is stored. */
435	struct r600_resource	*buf_filled_size;
436	unsigned		buf_filled_size_offset;
437	bool			buf_filled_size_valid;
438
439	unsigned		stride_in_dw;
440};
441
442struct r600_streamout {
443	struct r600_atom		begin_atom;
444	bool				begin_emitted;
445	unsigned			num_dw_for_end;
446
447	unsigned			enabled_mask;
448	unsigned			num_targets;
449	struct r600_so_target		*targets[PIPE_MAX_SO_BUFFERS];
450
451	unsigned			append_bitmask;
452	bool				suspended;
453
454	/* External state which comes from the vertex shader,
455	 * it must be set explicitly when binding a shader. */
456	uint16_t			*stride_in_dw;
457	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
458
459	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
460	unsigned			hw_enabled_mask;
461
462	/* The state of VGT_STRMOUT_(CONFIG|EN). */
463	struct r600_atom		enable_atom;
464	bool				streamout_enabled;
465	bool				prims_gen_query_enabled;
466	int				num_prims_gen_queries;
467};
468
469struct r600_signed_scissor {
470	int minx;
471	int miny;
472	int maxx;
473	int maxy;
474};
475
476struct r600_scissors {
477	struct r600_atom		atom;
478	unsigned			dirty_mask;
479	struct pipe_scissor_state	states[R600_MAX_VIEWPORTS];
480};
481
482struct r600_viewports {
483	struct r600_atom		atom;
484	unsigned			dirty_mask;
485	unsigned			depth_range_dirty_mask;
486	struct pipe_viewport_state	states[R600_MAX_VIEWPORTS];
487	struct r600_signed_scissor	as_scissor[R600_MAX_VIEWPORTS];
488};
489
490struct r600_ring {
491	struct radeon_cmdbuf		*cs;
492	void (*flush)(void *ctx, unsigned flags,
493		      struct pipe_fence_handle **fence);
494};
495
496/* Saved CS data for debugging features. */
497struct radeon_saved_cs {
498	uint32_t			*ib;
499	unsigned			num_dw;
500
501	struct radeon_bo_list_item	*bo_list;
502	unsigned			bo_count;
503};
504
505struct r600_common_context {
506	struct pipe_context b; /* base class */
507
508	struct r600_common_screen	*screen;
509	struct radeon_winsys		*ws;
510	struct radeon_winsys_ctx	*ctx;
511	enum radeon_family		family;
512	enum chip_class			chip_class;
513	struct r600_ring		gfx;
514	struct r600_ring		dma;
515	struct pipe_fence_handle	*last_gfx_fence;
516	struct pipe_fence_handle	*last_sdma_fence;
517	struct r600_resource		*eop_bug_scratch;
518	unsigned			num_gfx_cs_flushes;
519	unsigned			initial_gfx_cs_size;
520	unsigned			gpu_reset_counter;
521	unsigned			last_dirty_tex_counter;
522	unsigned			last_compressed_colortex_counter;
523	unsigned			last_num_draw_calls;
524
525	struct threaded_context		*tc;
526	struct u_suballocator		*allocator_zeroed_memory;
527	struct slab_child_pool		pool_transfers;
528	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
529
530	/* Current unaccounted memory usage. */
531	uint64_t			vram;
532	uint64_t			gtt;
533
534	/* States. */
535	struct r600_streamout		streamout;
536	struct r600_scissors		scissors;
537	struct r600_viewports		viewports;
538	bool				scissor_enabled;
539	bool				clip_halfz;
540	bool				vs_writes_viewport_index;
541	bool				vs_disables_clipping_viewport;
542
543	/* Additional context states. */
544	unsigned flags; /* flush flags */
545
546	/* Queries. */
547	/* Maintain the list of active queries for pausing between IBs. */
548	int				num_occlusion_queries;
549	int				num_perfect_occlusion_queries;
550	struct list_head		active_queries;
551	unsigned			num_cs_dw_queries_suspend;
552	/* Misc stats. */
553	unsigned			num_draw_calls;
554	unsigned			num_decompress_calls;
555	unsigned			num_mrt_draw_calls;
556	unsigned			num_prim_restart_calls;
557	unsigned			num_spill_draw_calls;
558	unsigned			num_compute_calls;
559	unsigned			num_spill_compute_calls;
560	unsigned			num_dma_calls;
561	unsigned			num_cp_dma_calls;
562	unsigned			num_vs_flushes;
563	unsigned			num_ps_flushes;
564	unsigned			num_cs_flushes;
565	unsigned			num_cb_cache_flushes;
566	unsigned			num_db_cache_flushes;
567	unsigned			num_resident_handles;
568	uint64_t			num_alloc_tex_transfer_bytes;
569
570	/* Render condition. */
571	struct r600_atom		render_cond_atom;
572	struct pipe_query		*render_cond;
573	unsigned			render_cond_mode;
574	bool				render_cond_invert;
575	bool				render_cond_force_off; /* for u_blitter */
576
577	/* MSAA sample locations.
578	 * The first index is the sample index.
579	 * The second index is the coordinate: X, Y. */
580	float				sample_locations_1x[1][2];
581	float				sample_locations_2x[2][2];
582	float				sample_locations_4x[4][2];
583	float				sample_locations_8x[8][2];
584	float				sample_locations_16x[16][2];
585
586	struct pipe_debug_callback	debug;
587	struct pipe_device_reset_callback device_reset_callback;
588	struct u_log_context		*log;
589
590	void				*query_result_shader;
591
592	/* Copy one resource to another using async DMA. */
593	void (*dma_copy)(struct pipe_context *ctx,
594			 struct pipe_resource *dst,
595			 unsigned dst_level,
596			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
597			 struct pipe_resource *src,
598			 unsigned src_level,
599			 const struct pipe_box *src_box);
600
601	void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
602				 uint64_t offset, uint64_t size, unsigned value);
603
604	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
605			     uint64_t offset, uint64_t size, unsigned value,
606			     enum r600_coherency coher);
607
608	void (*blit_decompress_depth)(struct pipe_context *ctx,
609				      struct r600_texture *texture,
610				      struct r600_texture *staging,
611				      unsigned first_level, unsigned last_level,
612				      unsigned first_layer, unsigned last_layer,
613				      unsigned first_sample, unsigned last_sample);
614
615	/* Reallocate the buffer and update all resource bindings where
616	 * the buffer is bound, including all resource descriptors. */
617	void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
618
619	/* Update all resource bindings where the buffer is bound, including
620	 * all resource descriptors. This is invalidate_buffer without
621	 * the invalidation. */
622	void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
623			      uint64_t old_gpu_address);
624
625	void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
626
627	/* This ensures there is enough space in the command stream. */
628	void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
629				  bool include_draw_vbo);
630
631	void (*set_atom_dirty)(struct r600_common_context *ctx,
632			       struct r600_atom *atom, bool dirty);
633
634	void (*check_vm_faults)(struct r600_common_context *ctx,
635				struct radeon_saved_cs *saved,
636				enum ring_type ring);
637};
638
639/* r600_buffer_common.c */
640bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
641				     struct pb_buffer *buf,
642				     enum radeon_bo_usage usage);
643void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
644                                      struct r600_resource *resource,
645                                      unsigned usage);
646void r600_buffer_subdata(struct pipe_context *ctx,
647			 struct pipe_resource *buffer,
648			 unsigned usage, unsigned offset,
649			 unsigned size, const void *data);
650void r600_init_resource_fields(struct r600_common_screen *rscreen,
651			       struct r600_resource *res,
652			       uint64_t size, unsigned alignment);
653bool r600_alloc_resource(struct r600_common_screen *rscreen,
654			 struct r600_resource *res);
655struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
656					 const struct pipe_resource *templ,
657					 unsigned alignment);
658struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
659						  unsigned flags,
660						  unsigned usage,
661						  unsigned size,
662						  unsigned alignment);
663struct pipe_resource *
664r600_buffer_from_user_memory(struct pipe_screen *screen,
665			     const struct pipe_resource *templ,
666			     void *user_memory);
667void
668r600_invalidate_resource(struct pipe_context *ctx,
669			 struct pipe_resource *resource);
670void r600_replace_buffer_storage(struct pipe_context *ctx,
671				 struct pipe_resource *dst,
672				 struct pipe_resource *src);
673
674/* r600_common_pipe.c */
675void r600_gfx_write_event_eop(struct r600_common_context *ctx,
676			      unsigned event, unsigned event_flags,
677			      unsigned data_sel,
678			      struct r600_resource *buf, uint64_t va,
679			      uint32_t new_fence, unsigned query_type);
680unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
681void r600_gfx_wait_fence(struct r600_common_context *ctx,
682			 struct r600_resource *buf,
683			 uint64_t va, uint32_t ref, uint32_t mask);
684void r600_draw_rectangle(struct blitter_context *blitter,
685			 void *vertex_elements_cso,
686			 blitter_get_vs_func get_vs,
687			 int x1, int y1, int x2, int y2,
688			 float depth, unsigned num_instances,
689			 enum blitter_attrib_type type,
690			 const union blitter_attrib *attrib);
691bool r600_common_screen_init(struct r600_common_screen *rscreen,
692			     struct radeon_winsys *ws);
693void r600_destroy_common_screen(struct r600_common_screen *rscreen);
694void r600_preflush_suspend_features(struct r600_common_context *ctx);
695void r600_postflush_resume_features(struct r600_common_context *ctx);
696bool r600_common_context_init(struct r600_common_context *rctx,
697			      struct r600_common_screen *rscreen,
698			      unsigned context_flags);
699void r600_common_context_cleanup(struct r600_common_context *rctx);
700bool r600_can_dump_shader(struct r600_common_screen *rscreen,
701			  unsigned processor);
702bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
703			      unsigned processor);
704void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
705			      uint64_t offset, uint64_t size, unsigned value);
706struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
707						  const struct pipe_resource *templ);
708const char *r600_get_llvm_processor_name(enum radeon_family family);
709void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
710			 struct r600_resource *dst, struct r600_resource *src);
711void radeon_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
712		    struct radeon_saved_cs *saved, bool get_buffer_list);
713void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
714bool r600_check_device_reset(struct r600_common_context *rctx);
715
716/* r600_gpu_load.c */
717void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
718uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
719unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
720			  uint64_t begin);
721
722/* r600_perfcounters.c */
723void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
724
725/* r600_query.c */
726void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
727void r600_query_init(struct r600_common_context *rctx);
728void r600_suspend_queries(struct r600_common_context *ctx);
729void r600_resume_queries(struct r600_common_context *ctx);
730void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
731
732/* r600_streamout.c */
733void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
734void r600_set_streamout_targets(struct pipe_context *ctx,
735				unsigned num_targets,
736				struct pipe_stream_output_target **targets,
737				const unsigned *offset);
738void r600_emit_streamout_end(struct r600_common_context *rctx);
739void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
740					     unsigned type, int diff);
741void r600_streamout_init(struct r600_common_context *rctx);
742
743/* r600_test_dma.c */
744void r600_test_dma(struct r600_common_screen *rscreen);
745
746/* r600_texture.c */
747bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
748				struct r600_texture *rdst,
749				unsigned dst_level, unsigned dstx,
750				unsigned dsty, unsigned dstz,
751				struct r600_texture *rsrc,
752				unsigned src_level,
753				const struct pipe_box *src_box);
754void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
755				 struct r600_texture *rtex,
756				 unsigned nr_samples,
757				 struct r600_fmask_info *out);
758void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
759				 struct r600_texture *rtex,
760				 struct r600_cmask_info *out);
761bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
762				     struct pipe_resource *texture,
763				     struct r600_texture **staging);
764void r600_print_texture_info(struct r600_common_screen *rscreen,
765			     struct r600_texture *rtex, struct u_log_context *log);
766struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
767					const struct pipe_resource *templ);
768struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
769						struct pipe_resource *texture,
770						const struct pipe_surface *templ,
771						unsigned width0, unsigned height0,
772						unsigned width, unsigned height);
773unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
774void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
775				   struct pipe_framebuffer_state *fb,
776				   struct r600_atom *fb_state,
777				   unsigned *buffers, ubyte *dirty_cbufs,
778				   const union pipe_color_union *color);
779void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
780void r600_init_context_texture_functions(struct r600_common_context *rctx);
781void eg_resource_alloc_immed(struct r600_common_screen *rscreen,
782			     struct r600_resource *res,
783			     unsigned immed_size);
784
785/* r600_viewport.c */
786void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
787					    struct pipe_scissor_state *scissor);
788void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
789				 bool scissor_enable, bool clip_halfz);
790void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
791					  struct tgsi_shader_info *info);
792void r600_init_viewport_functions(struct r600_common_context *rctx);
793
794/* cayman_msaa.c */
795extern const uint32_t eg_sample_locs_2x[4];
796extern const unsigned eg_max_dist_2x;
797extern const uint32_t eg_sample_locs_4x[4];
798extern const unsigned eg_max_dist_4x;
799void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
800				unsigned sample_index, float *out_value);
801void cayman_init_msaa(struct pipe_context *ctx);
802void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
803			    int ps_iter_samples, int overrast_samples);
804
805
806/* Inline helpers. */
807
808static inline struct r600_resource *r600_resource(struct pipe_resource *r)
809{
810	return (struct r600_resource*)r;
811}
812
813static inline void
814r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
815{
816	pipe_resource_reference((struct pipe_resource **)ptr,
817				(struct pipe_resource *)res);
818}
819
820static inline void
821r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
822{
823	pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
824}
825
826static inline void
827r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
828{
829	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
830	struct r600_resource *res = (struct r600_resource *)r;
831
832	if (res) {
833		/* Add memory usage for need_gfx_cs_space */
834		rctx->vram += res->vram_usage;
835		rctx->gtt += res->gart_usage;
836	}
837}
838
839static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
840{
841	return rctx->streamout.streamout_enabled ||
842	       rctx->streamout.prims_gen_query_enabled;
843}
844
845#define     SQ_TEX_XY_FILTER_POINT                         0x00
846#define     SQ_TEX_XY_FILTER_BILINEAR                      0x01
847#define     SQ_TEX_XY_FILTER_ANISO_POINT                   0x02
848#define     SQ_TEX_XY_FILTER_ANISO_BILINEAR                0x03
849
850static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
851{
852	if (filter == PIPE_TEX_FILTER_LINEAR)
853		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
854				     : SQ_TEX_XY_FILTER_BILINEAR;
855	else
856		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
857				     : SQ_TEX_XY_FILTER_POINT;
858}
859
860static inline unsigned r600_tex_aniso_filter(unsigned filter)
861{
862	if (filter < 2)
863		return 0;
864	if (filter < 4)
865		return 1;
866	if (filter < 8)
867		return 2;
868	if (filter < 16)
869		return 3;
870	return 4;
871}
872
873static inline unsigned r600_wavefront_size(enum radeon_family family)
874{
875	switch (family) {
876	case CHIP_RV610:
877	case CHIP_RS780:
878	case CHIP_RV620:
879	case CHIP_RS880:
880		return 16;
881	case CHIP_RV630:
882	case CHIP_RV635:
883	case CHIP_RV730:
884	case CHIP_RV710:
885	case CHIP_PALM:
886	case CHIP_CEDAR:
887		return 32;
888	default:
889		return 64;
890	}
891}
892
893static inline enum radeon_bo_priority
894r600_get_sampler_view_priority(struct r600_resource *res)
895{
896	if (res->b.b.target == PIPE_BUFFER)
897		return RADEON_PRIO_SAMPLER_BUFFER;
898
899	if (res->b.b.nr_samples > 1)
900		return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
901
902	return RADEON_PRIO_SAMPLER_TEXTURE;
903}
904
905static inline bool
906r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
907{
908	return (stencil_sampler && tex->can_sample_s) ||
909	       (!stencil_sampler && tex->can_sample_z);
910}
911
912static inline bool
913r600_htile_enabled(struct r600_texture *tex, unsigned level)
914{
915	return tex->htile_offset && level == 0;
916}
917
918#define COMPUTE_DBG(rscreen, fmt, args...) \
919	do { \
920		if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
921	} while (0);
922
923#define R600_ERR(fmt, args...) \
924	fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
925
926/* For MSAA sample positions. */
927#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
928	(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |		   \
929	(((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |	   \
930	(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |	   \
931	 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
932
933static inline int S_FIXED(float value, unsigned frac_bits)
934{
935	return value * (1 << frac_bits);
936}
937
938#endif
939