r600_asm.h revision 01e04c3f
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#ifndef R600_ASM_H
24#define R600_ASM_H
25
26#include "r600_pipe.h"
27#include "r600_isa.h"
28#include "tgsi/tgsi_exec.h"
29
30struct r600_bytecode_alu_src {
31	unsigned			sel;
32	unsigned			chan;
33	unsigned			neg;
34	unsigned			abs;
35	unsigned			rel;
36	unsigned			kc_bank;
37	unsigned			kc_rel;
38	uint32_t			value;
39};
40
41struct r600_bytecode_alu_dst {
42	unsigned			sel;
43	unsigned			chan;
44	unsigned			clamp;
45	unsigned			write;
46	unsigned			rel;
47};
48
49struct r600_bytecode_alu {
50	struct list_head		list;
51	struct r600_bytecode_alu_src		src[3];
52	struct r600_bytecode_alu_dst		dst;
53	unsigned			op;
54	unsigned			last;
55	unsigned			is_op3;
56	unsigned			is_lds_idx_op;
57	unsigned			execute_mask;
58	unsigned			update_pred;
59	unsigned			pred_sel;
60	unsigned			bank_swizzle;
61	unsigned			bank_swizzle_force;
62	unsigned			omod;
63	unsigned                        index_mode;
64	unsigned                        lds_idx;
65};
66
67struct r600_bytecode_tex {
68	struct list_head		list;
69	unsigned			op;
70	unsigned			inst_mod;
71	unsigned			resource_id;
72	unsigned			src_gpr;
73	unsigned			src_rel;
74	unsigned			dst_gpr;
75	unsigned			dst_rel;
76	unsigned			dst_sel_x;
77	unsigned			dst_sel_y;
78	unsigned			dst_sel_z;
79	unsigned			dst_sel_w;
80	unsigned			lod_bias;
81	unsigned			coord_type_x;
82	unsigned			coord_type_y;
83	unsigned			coord_type_z;
84	unsigned			coord_type_w;
85	int				offset_x;
86	int				offset_y;
87	int				offset_z;
88	unsigned			sampler_id;
89	unsigned			src_sel_x;
90	unsigned			src_sel_y;
91	unsigned			src_sel_z;
92	unsigned			src_sel_w;
93	/* indexed samplers/resources only on evergreen/cayman */
94	unsigned			sampler_index_mode;
95	unsigned			resource_index_mode;
96};
97
98struct r600_bytecode_vtx {
99	struct list_head		list;
100	unsigned			op;
101	unsigned			fetch_type;
102	unsigned			buffer_id;
103	unsigned			src_gpr;
104	unsigned			src_sel_x;
105	unsigned			mega_fetch_count;
106	unsigned			dst_gpr;
107	unsigned			dst_sel_x;
108	unsigned			dst_sel_y;
109	unsigned			dst_sel_z;
110	unsigned			dst_sel_w;
111	unsigned			use_const_fields;
112	unsigned			data_format;
113	unsigned			num_format_all;
114	unsigned			format_comp_all;
115	unsigned			srf_mode_all;
116	unsigned			offset;
117	unsigned			endian;
118	unsigned			buffer_index_mode;
119
120	// READ_SCRATCH fields
121	unsigned			uncached;
122	unsigned			indexed;
123	unsigned			src_sel_y;
124	unsigned			src_rel;
125	unsigned			elem_size;
126	unsigned			array_size;
127	unsigned			array_base;
128	unsigned			burst_count;
129	unsigned			dst_rel;
130};
131
132struct r600_bytecode_gds {
133	struct list_head		list;
134	unsigned			op;
135	unsigned			src_gpr;
136	unsigned			src_rel;
137	unsigned			src_sel_x;
138	unsigned			src_sel_y;
139	unsigned			src_sel_z;
140	unsigned			src_gpr2;
141	unsigned			dst_gpr;
142	unsigned			dst_rel;
143	unsigned			dst_sel_x;
144	unsigned			dst_sel_y;
145	unsigned			dst_sel_z;
146	unsigned			dst_sel_w;
147	unsigned			uav_index_mode;
148	unsigned                        uav_id;
149	unsigned                        alloc_consume;
150	unsigned                        bcast_first_req;
151};
152
153struct r600_bytecode_output {
154	unsigned			array_base;
155	unsigned			array_size;
156	unsigned			comp_mask;
157	unsigned			type;
158
159	unsigned			op;
160
161	unsigned			elem_size;
162	unsigned			gpr;
163	unsigned			swizzle_x;
164	unsigned			swizzle_y;
165	unsigned			swizzle_z;
166	unsigned			swizzle_w;
167	unsigned			burst_count;
168	unsigned			index_gpr;
169	unsigned			mark; /* used by MEM_SCRATCH */
170};
171
172struct r600_bytecode_rat {
173	unsigned			id;
174	unsigned			inst;
175	unsigned			index_mode;
176};
177
178struct r600_bytecode_kcache {
179	unsigned			bank;
180	unsigned			mode;
181	unsigned			addr;
182	unsigned			index_mode;
183};
184
185struct r600_bytecode_cf {
186	struct list_head		list;
187
188	unsigned			op;
189	unsigned			addr;
190	unsigned			ndw;
191	unsigned			id;
192	unsigned			cond;
193	unsigned			pop_count;
194	unsigned			count;
195	unsigned			cf_addr; /* control flow addr */
196	struct r600_bytecode_kcache		kcache[4];
197	unsigned			r6xx_uses_waterfall;
198	unsigned			eg_alu_extended;
199	unsigned			barrier;
200	unsigned			end_of_program;
201	unsigned                        mark;
202	unsigned                        vpm;
203	struct list_head		alu;
204	struct list_head		tex;
205	struct list_head		vtx;
206	struct list_head		gds;
207	struct r600_bytecode_output		output;
208	struct r600_bytecode_rat		rat;
209	struct r600_bytecode_alu		*curr_bs_head;
210	struct r600_bytecode_alu		*prev_bs_head;
211	struct r600_bytecode_alu		*prev2_bs_head;
212	unsigned isa[2];
213};
214
215#define FC_NONE				0
216#define FC_IF				1
217#define FC_LOOP				2
218#define FC_REP				3
219#define FC_PUSH_VPM			4
220#define FC_PUSH_WQM			5
221
222struct r600_cf_stack_entry {
223	int				type;
224	struct r600_bytecode_cf		*start;
225	struct r600_bytecode_cf		**mid; /* used to store the else point */
226	int				num_mid;
227};
228
229#define SQ_MAX_CALL_DEPTH 0x00000020
230
231#define AR_HANDLE_NORMAL 0
232#define AR_HANDLE_RV6XX 1 /* except RV670 */
233
234struct r600_stack_info {
235	/* current level of non-WQM PUSH operations
236	 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
237	int push;
238	/* current level of WQM PUSH operations
239	 * (PUSH, PUSH_ELSE, PUSH_WQM) */
240	int push_wqm;
241	/* current loop level */
242	int loop;
243
244	/* required depth */
245	int max_entries;
246	/* subentries per entry */
247	int entry_size;
248};
249
250struct r600_bytecode {
251	enum chip_class			chip_class;
252	enum radeon_family		family;
253	bool				has_compressed_msaa_texturing;
254	int				type;
255	struct list_head		cf;
256	struct r600_bytecode_cf		*cf_last;
257	unsigned			ndw;
258	unsigned			ncf;
259	unsigned			ngpr;
260	unsigned			nstack;
261	unsigned			nlds_dw;
262	unsigned			nresource;
263	unsigned			force_add_cf;
264	uint32_t			*bytecode;
265	uint32_t			fc_sp;
266	struct r600_cf_stack_entry	fc_stack[TGSI_EXEC_MAX_NESTING];
267	struct r600_stack_info		stack;
268	unsigned	ar_loaded;
269	unsigned	ar_reg;
270	unsigned	ar_chan;
271	unsigned        ar_handling;
272	unsigned        r6xx_nop_after_rel_dst;
273	bool            index_loaded[2];
274	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
275	unsigned        debug_id;
276	struct r600_isa* isa;
277	struct r600_bytecode_output pending_outputs[5];
278	int n_pending_outputs;
279	boolean			need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
280	boolean			precise;
281};
282
283/* eg_asm.c */
284int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
285int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause);
286int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
287int eg_bytecode_alu_build(struct r600_bytecode *bc,
288			  struct r600_bytecode_alu *alu, unsigned id);
289/* r600_asm.c */
290void r600_bytecode_init(struct r600_bytecode *bc,
291			enum chip_class chip_class,
292			enum radeon_family family,
293			bool has_compressed_msaa_texturing);
294void r600_bytecode_clear(struct r600_bytecode *bc);
295int r600_bytecode_add_alu(struct r600_bytecode *bc,
296		const struct r600_bytecode_alu *alu);
297int r600_bytecode_add_vtx(struct r600_bytecode *bc,
298		const struct r600_bytecode_vtx *vtx);
299int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
300			     const struct r600_bytecode_vtx *vtx);
301int r600_bytecode_add_tex(struct r600_bytecode *bc,
302		const struct r600_bytecode_tex *tex);
303int r600_bytecode_add_gds(struct r600_bytecode *bc,
304		const struct r600_bytecode_gds *gds);
305int r600_bytecode_add_output(struct r600_bytecode *bc,
306		const struct r600_bytecode_output *output);
307int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
308		const struct r600_bytecode_output *output);
309void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean needed);
310boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc);
311int r600_bytecode_build(struct r600_bytecode *bc);
312int r600_bytecode_add_cf(struct r600_bytecode *bc);
313int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
314		unsigned op);
315int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
316		const struct r600_bytecode_alu *alu, unsigned type);
317void r600_bytecode_special_constants(uint32_t value,
318		unsigned *sel, unsigned *neg, unsigned abs);
319void r600_bytecode_disasm(struct r600_bytecode *bc);
320void r600_bytecode_alu_read(struct r600_bytecode *bc,
321		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
322
323int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
324
325void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
326				      unsigned count,
327				      const struct pipe_vertex_element *elements);
328
329/* r700_asm.c */
330void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
331		const struct r600_bytecode_cf *cf);
332int r700_bytecode_alu_build(struct r600_bytecode *bc,
333		struct r600_bytecode_alu *alu, unsigned id);
334void r700_bytecode_alu_read(struct r600_bytecode *bc,
335		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
336int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
337		struct r600_bytecode_vtx *mem, unsigned id);
338
339void r600_bytecode_export_read(struct r600_bytecode *bc,
340		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
341void eg_bytecode_export_read(struct r600_bytecode *bc,
342		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
343
344void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
345			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
346
347static inline int fp64_switch(int i)
348{
349	switch (i) {
350	case 0:
351		return 1;
352	case 1:
353		return 0;
354	case 2:
355		return 3;
356	case 3:
357		return 2;
358	}
359	return 0;
360}
361#endif
362