1848b8605Smrg/*
2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
8848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom
9848b8605Smrg * the Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice (including the next
12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
13848b8605Smrg * Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
22848b8605Smrg */
23848b8605Smrg#include "r600_sq.h"
24848b8605Smrg#include "r600_opcodes.h"
25848b8605Smrg#include "r600_formats.h"
26848b8605Smrg#include "r600_shader.h"
27848b8605Smrg#include "r600d.h"
28848b8605Smrg
29848b8605Smrg#include <errno.h>
30b8e80941Smrg#include "util/u_bitcast.h"
31848b8605Smrg#include "util/u_dump.h"
32848b8605Smrg#include "util/u_memory.h"
33848b8605Smrg#include "util/u_math.h"
34848b8605Smrg#include "pipe/p_shader_tokens.h"
35848b8605Smrg
36848b8605Smrg#include "sb/sb_public.h"
37848b8605Smrg
38848b8605Smrg#define NUM_OF_CYCLES 3
39848b8605Smrg#define NUM_OF_COMPONENTS 4
40848b8605Smrg
41b8e80941Smrgstatic inline bool alu_writes(struct r600_bytecode_alu *alu)
42848b8605Smrg{
43b8e80941Smrg	return alu->dst.write || alu->is_op3;
44848b8605Smrg}
45848b8605Smrg
46b8e80941Smrgstatic inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
47b8e80941Smrg{
48b8e80941Smrg	return r600_isa_alu(alu->op)->src_count;
49b8e80941Smrg}
50848b8605Smrg
51848b8605Smrgstatic struct r600_bytecode_cf *r600_bytecode_cf(void)
52848b8605Smrg{
53848b8605Smrg	struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
54848b8605Smrg
55b8e80941Smrg	if (!cf)
56848b8605Smrg		return NULL;
57848b8605Smrg	LIST_INITHEAD(&cf->list);
58848b8605Smrg	LIST_INITHEAD(&cf->alu);
59848b8605Smrg	LIST_INITHEAD(&cf->vtx);
60848b8605Smrg	LIST_INITHEAD(&cf->tex);
61b8e80941Smrg	LIST_INITHEAD(&cf->gds);
62848b8605Smrg	return cf;
63848b8605Smrg}
64848b8605Smrg
65848b8605Smrgstatic struct r600_bytecode_alu *r600_bytecode_alu(void)
66848b8605Smrg{
67848b8605Smrg	struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
68848b8605Smrg
69b8e80941Smrg	if (!alu)
70848b8605Smrg		return NULL;
71848b8605Smrg	LIST_INITHEAD(&alu->list);
72848b8605Smrg	return alu;
73848b8605Smrg}
74848b8605Smrg
75848b8605Smrgstatic struct r600_bytecode_vtx *r600_bytecode_vtx(void)
76848b8605Smrg{
77848b8605Smrg	struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
78848b8605Smrg
79b8e80941Smrg	if (!vtx)
80848b8605Smrg		return NULL;
81848b8605Smrg	LIST_INITHEAD(&vtx->list);
82848b8605Smrg	return vtx;
83848b8605Smrg}
84848b8605Smrg
85848b8605Smrgstatic struct r600_bytecode_tex *r600_bytecode_tex(void)
86848b8605Smrg{
87848b8605Smrg	struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
88848b8605Smrg
89b8e80941Smrg	if (!tex)
90848b8605Smrg		return NULL;
91848b8605Smrg	LIST_INITHEAD(&tex->list);
92848b8605Smrg	return tex;
93848b8605Smrg}
94848b8605Smrg
95b8e80941Smrgstatic struct r600_bytecode_gds *r600_bytecode_gds(void)
96b8e80941Smrg{
97b8e80941Smrg	struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
98b8e80941Smrg
99b8e80941Smrg	if (gds == NULL)
100b8e80941Smrg		return NULL;
101b8e80941Smrg	LIST_INITHEAD(&gds->list);
102b8e80941Smrg	return gds;
103b8e80941Smrg}
104b8e80941Smrg
105848b8605Smrgstatic unsigned stack_entry_size(enum radeon_family chip) {
106848b8605Smrg	/* Wavefront size:
107848b8605Smrg	 *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
108848b8605Smrg	 *       Aruba/Sumo/Sumo2/redwood/juniper
109848b8605Smrg	 *   32: R630/R730/R710/Palm/Cedar
110848b8605Smrg	 *   16: R610/Rs780
111848b8605Smrg	 *
112848b8605Smrg	 * Stack row size:
113848b8605Smrg	 * 	Wavefront Size                        16  32  48  64
114848b8605Smrg	 * 	Columns per Row (R6xx/R7xx/R8xx only)  8   8   4   4
115848b8605Smrg	 * 	Columns per Row (R9xx+)                8   4   4   4 */
116848b8605Smrg
117848b8605Smrg	switch (chip) {
118848b8605Smrg	/* FIXME: are some chips missing here? */
119848b8605Smrg	/* wavefront size 16 */
120848b8605Smrg	case CHIP_RV610:
121848b8605Smrg	case CHIP_RS780:
122848b8605Smrg	case CHIP_RV620:
123848b8605Smrg	case CHIP_RS880:
124848b8605Smrg	/* wavefront size 32 */
125848b8605Smrg	case CHIP_RV630:
126848b8605Smrg	case CHIP_RV635:
127848b8605Smrg	case CHIP_RV730:
128848b8605Smrg	case CHIP_RV710:
129848b8605Smrg	case CHIP_PALM:
130848b8605Smrg	case CHIP_CEDAR:
131848b8605Smrg		return 8;
132848b8605Smrg
133848b8605Smrg	/* wavefront size 64 */
134848b8605Smrg	default:
135848b8605Smrg		return 4;
136848b8605Smrg	}
137848b8605Smrg}
138848b8605Smrg
139848b8605Smrgvoid r600_bytecode_init(struct r600_bytecode *bc,
140848b8605Smrg			enum chip_class chip_class,
141848b8605Smrg			enum radeon_family family,
142848b8605Smrg			bool has_compressed_msaa_texturing)
143848b8605Smrg{
144848b8605Smrg	static unsigned next_shader_id = 0;
145848b8605Smrg
146848b8605Smrg	bc->debug_id = ++next_shader_id;
147848b8605Smrg
148848b8605Smrg	if ((chip_class == R600) &&
149848b8605Smrg	    (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
150848b8605Smrg		bc->ar_handling = AR_HANDLE_RV6XX;
151848b8605Smrg		bc->r6xx_nop_after_rel_dst = 1;
152848b8605Smrg	} else {
153848b8605Smrg		bc->ar_handling = AR_HANDLE_NORMAL;
154848b8605Smrg		bc->r6xx_nop_after_rel_dst = 0;
155848b8605Smrg	}
156848b8605Smrg
157848b8605Smrg	LIST_INITHEAD(&bc->cf);
158848b8605Smrg	bc->chip_class = chip_class;
159848b8605Smrg	bc->family = family;
160848b8605Smrg	bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
161848b8605Smrg	bc->stack.entry_size = stack_entry_size(family);
162848b8605Smrg}
163848b8605Smrg
164848b8605Smrgint r600_bytecode_add_cf(struct r600_bytecode *bc)
165848b8605Smrg{
166848b8605Smrg	struct r600_bytecode_cf *cf = r600_bytecode_cf();
167848b8605Smrg
168b8e80941Smrg	if (!cf)
169848b8605Smrg		return -ENOMEM;
170848b8605Smrg	LIST_ADDTAIL(&cf->list, &bc->cf);
171848b8605Smrg	if (bc->cf_last) {
172848b8605Smrg		cf->id = bc->cf_last->id + 2;
173848b8605Smrg		if (bc->cf_last->eg_alu_extended) {
174848b8605Smrg			/* take into account extended alu size */
175848b8605Smrg			cf->id += 2;
176848b8605Smrg			bc->ndw += 2;
177848b8605Smrg		}
178848b8605Smrg	}
179848b8605Smrg	bc->cf_last = cf;
180848b8605Smrg	bc->ncf++;
181848b8605Smrg	bc->ndw += 2;
182848b8605Smrg	bc->force_add_cf = 0;
183848b8605Smrg	bc->ar_loaded = 0;
184848b8605Smrg	return 0;
185848b8605Smrg}
186848b8605Smrg
187848b8605Smrgint r600_bytecode_add_output(struct r600_bytecode *bc,
188848b8605Smrg		const struct r600_bytecode_output *output)
189848b8605Smrg{
190848b8605Smrg	int r;
191848b8605Smrg
192848b8605Smrg	if (output->gpr >= bc->ngpr)
193848b8605Smrg		bc->ngpr = output->gpr + 1;
194848b8605Smrg
195848b8605Smrg	if (bc->cf_last && (bc->cf_last->op == output->op ||
196848b8605Smrg		(bc->cf_last->op == CF_OP_EXPORT &&
197848b8605Smrg		output->op == CF_OP_EXPORT_DONE)) &&
198848b8605Smrg		output->type == bc->cf_last->output.type &&
199848b8605Smrg		output->elem_size == bc->cf_last->output.elem_size &&
200848b8605Smrg		output->swizzle_x == bc->cf_last->output.swizzle_x &&
201848b8605Smrg		output->swizzle_y == bc->cf_last->output.swizzle_y &&
202848b8605Smrg		output->swizzle_z == bc->cf_last->output.swizzle_z &&
203848b8605Smrg		output->swizzle_w == bc->cf_last->output.swizzle_w &&
204848b8605Smrg		output->comp_mask == bc->cf_last->output.comp_mask &&
205848b8605Smrg		(output->burst_count + bc->cf_last->output.burst_count) <= 16) {
206848b8605Smrg
207848b8605Smrg		if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
208848b8605Smrg			(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
209848b8605Smrg
210848b8605Smrg			bc->cf_last->op = bc->cf_last->output.op = output->op;
211848b8605Smrg			bc->cf_last->output.gpr = output->gpr;
212848b8605Smrg			bc->cf_last->output.array_base = output->array_base;
213848b8605Smrg			bc->cf_last->output.burst_count += output->burst_count;
214848b8605Smrg			return 0;
215848b8605Smrg
216848b8605Smrg		} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
217848b8605Smrg			output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
218848b8605Smrg
219848b8605Smrg			bc->cf_last->op = bc->cf_last->output.op = output->op;
220848b8605Smrg			bc->cf_last->output.burst_count += output->burst_count;
221848b8605Smrg			return 0;
222848b8605Smrg		}
223848b8605Smrg	}
224848b8605Smrg
225848b8605Smrg	r = r600_bytecode_add_cf(bc);
226848b8605Smrg	if (r)
227848b8605Smrg		return r;
228848b8605Smrg	bc->cf_last->op = output->op;
229848b8605Smrg	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
230848b8605Smrg	bc->cf_last->barrier = 1;
231848b8605Smrg	return 0;
232848b8605Smrg}
233848b8605Smrg
234b8e80941Smrgint r600_bytecode_add_pending_output(struct r600_bytecode *bc,
235b8e80941Smrg		const struct r600_bytecode_output *output)
236b8e80941Smrg{
237b8e80941Smrg	assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
238b8e80941Smrg	bc->pending_outputs[bc->n_pending_outputs++] = *output;
239b8e80941Smrg
240b8e80941Smrg	return 0;
241b8e80941Smrg}
242b8e80941Smrg
243b8e80941Smrgvoid r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
244b8e80941Smrg{
245b8e80941Smrg	bc->need_wait_ack = need_wait_ack;
246b8e80941Smrg}
247b8e80941Smrg
248b8e80941Smrgboolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
249b8e80941Smrg{
250b8e80941Smrg	return bc->need_wait_ack;
251b8e80941Smrg}
252b8e80941Smrg
253848b8605Smrg/* alu instructions that can ony exits once per group */
254b8e80941Smrgstatic int is_alu_once_inst(struct r600_bytecode_alu *alu)
255848b8605Smrg{
256b8e80941Smrg	return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
257848b8605Smrg}
258848b8605Smrg
259848b8605Smrgstatic int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
260848b8605Smrg{
261848b8605Smrg	return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
262848b8605Smrg			(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
263848b8605Smrg}
264848b8605Smrg
265b8e80941Smrgstatic int is_alu_mova_inst(struct r600_bytecode_alu *alu)
266848b8605Smrg{
267848b8605Smrg	return r600_isa_alu(alu->op)->flags & AF_MOVA;
268848b8605Smrg}
269848b8605Smrg
270b8e80941Smrgstatic int alu_uses_rel(struct r600_bytecode_alu *alu)
271848b8605Smrg{
272b8e80941Smrg	unsigned num_src = r600_bytecode_get_num_operands(alu);
273848b8605Smrg	unsigned src;
274848b8605Smrg
275848b8605Smrg	if (alu->dst.rel) {
276848b8605Smrg		return 1;
277848b8605Smrg	}
278848b8605Smrg
279848b8605Smrg	for (src = 0; src < num_src; ++src) {
280848b8605Smrg		if (alu->src[src].rel) {
281848b8605Smrg			return 1;
282848b8605Smrg		}
283848b8605Smrg	}
284848b8605Smrg	return 0;
285848b8605Smrg}
286848b8605Smrg
287b8e80941Smrgstatic int is_lds_read(int sel)
288b8e80941Smrg{
289b8e80941Smrg  return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
290b8e80941Smrg}
291b8e80941Smrg
292b8e80941Smrgstatic int alu_uses_lds(struct r600_bytecode_alu *alu)
293b8e80941Smrg{
294b8e80941Smrg	unsigned num_src = r600_bytecode_get_num_operands(alu);
295b8e80941Smrg	unsigned src;
296b8e80941Smrg
297b8e80941Smrg	for (src = 0; src < num_src; ++src) {
298b8e80941Smrg		if (is_lds_read(alu->src[src].sel)) {
299b8e80941Smrg			return 1;
300b8e80941Smrg		}
301b8e80941Smrg	}
302b8e80941Smrg	return 0;
303b8e80941Smrg}
304b8e80941Smrg
305b8e80941Smrgstatic int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
306b8e80941Smrg{
307b8e80941Smrg	const struct alu_op_info *op = r600_isa_alu(alu->op);
308b8e80941Smrg	return (op->flags & AF_64);
309b8e80941Smrg}
310b8e80941Smrg
311848b8605Smrgstatic int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
312848b8605Smrg{
313848b8605Smrg	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
314848b8605Smrg	return !(slots & AF_S);
315848b8605Smrg}
316848b8605Smrg
317848b8605Smrgstatic int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
318848b8605Smrg{
319848b8605Smrg	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
320848b8605Smrg	return !(slots & AF_V);
321848b8605Smrg}
322848b8605Smrg
323848b8605Smrg/* alu instructions that can execute on any unit */
324848b8605Smrgstatic int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
325848b8605Smrg{
326848b8605Smrg	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
327848b8605Smrg	return slots == AF_VS;
328848b8605Smrg}
329848b8605Smrg
330b8e80941Smrgstatic int is_nop_inst(struct r600_bytecode_alu *alu)
331848b8605Smrg{
332848b8605Smrg	return alu->op == ALU_OP0_NOP;
333b8e80941Smrg}
334848b8605Smrg
335848b8605Smrgstatic int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
336848b8605Smrg			    struct r600_bytecode_alu *assignment[5])
337848b8605Smrg{
338848b8605Smrg	struct r600_bytecode_alu *alu;
339848b8605Smrg	unsigned i, chan, trans;
340848b8605Smrg	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
341848b8605Smrg
342848b8605Smrg	for (i = 0; i < max_slots; i++)
343848b8605Smrg		assignment[i] = NULL;
344848b8605Smrg
345848b8605Smrg	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) {
346848b8605Smrg		chan = alu->dst.chan;
347848b8605Smrg		if (max_slots == 4)
348848b8605Smrg			trans = 0;
349848b8605Smrg		else if (is_alu_trans_unit_inst(bc, alu))
350848b8605Smrg			trans = 1;
351848b8605Smrg		else if (is_alu_vec_unit_inst(bc, alu))
352848b8605Smrg			trans = 0;
353848b8605Smrg		else if (assignment[chan])
354848b8605Smrg			trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */
355848b8605Smrg		else
356848b8605Smrg			trans = 0;
357848b8605Smrg
358848b8605Smrg		if (trans) {
359848b8605Smrg			if (assignment[4]) {
360848b8605Smrg				assert(0); /* ALU.Trans has already been allocated. */
361848b8605Smrg				return -1;
362848b8605Smrg			}
363848b8605Smrg			assignment[4] = alu;
364848b8605Smrg		} else {
365848b8605Smrg			if (assignment[chan]) {
366848b8605Smrg				assert(0); /* ALU.chan has already been allocated. */
367848b8605Smrg				return -1;
368848b8605Smrg			}
369848b8605Smrg			assignment[chan] = alu;
370848b8605Smrg		}
371848b8605Smrg
372848b8605Smrg		if (alu->last)
373848b8605Smrg			break;
374848b8605Smrg	}
375848b8605Smrg	return 0;
376848b8605Smrg}
377848b8605Smrg
378848b8605Smrgstruct alu_bank_swizzle {
379848b8605Smrg	int	hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
380848b8605Smrg	int	hw_cfile_addr[4];
381848b8605Smrg	int	hw_cfile_elem[4];
382848b8605Smrg};
383848b8605Smrg
384848b8605Smrgstatic const unsigned cycle_for_bank_swizzle_vec[][3] = {
385848b8605Smrg	[SQ_ALU_VEC_012] = { 0, 1, 2 },
386848b8605Smrg	[SQ_ALU_VEC_021] = { 0, 2, 1 },
387848b8605Smrg	[SQ_ALU_VEC_120] = { 1, 2, 0 },
388848b8605Smrg	[SQ_ALU_VEC_102] = { 1, 0, 2 },
389848b8605Smrg	[SQ_ALU_VEC_201] = { 2, 0, 1 },
390848b8605Smrg	[SQ_ALU_VEC_210] = { 2, 1, 0 }
391848b8605Smrg};
392848b8605Smrg
393848b8605Smrgstatic const unsigned cycle_for_bank_swizzle_scl[][3] = {
394848b8605Smrg	[SQ_ALU_SCL_210] = { 2, 1, 0 },
395848b8605Smrg	[SQ_ALU_SCL_122] = { 1, 2, 2 },
396848b8605Smrg	[SQ_ALU_SCL_212] = { 2, 1, 2 },
397848b8605Smrg	[SQ_ALU_SCL_221] = { 2, 2, 1 }
398848b8605Smrg};
399848b8605Smrg
400848b8605Smrgstatic void init_bank_swizzle(struct alu_bank_swizzle *bs)
401848b8605Smrg{
402848b8605Smrg	int i, cycle, component;
403848b8605Smrg	/* set up gpr use */
404848b8605Smrg	for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
405848b8605Smrg		for (component = 0; component < NUM_OF_COMPONENTS; component++)
406848b8605Smrg			 bs->hw_gpr[cycle][component] = -1;
407848b8605Smrg	for (i = 0; i < 4; i++)
408848b8605Smrg		bs->hw_cfile_addr[i] = -1;
409848b8605Smrg	for (i = 0; i < 4; i++)
410848b8605Smrg		bs->hw_cfile_elem[i] = -1;
411848b8605Smrg}
412848b8605Smrg
413848b8605Smrgstatic int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
414848b8605Smrg{
415848b8605Smrg	if (bs->hw_gpr[cycle][chan] == -1)
416848b8605Smrg		bs->hw_gpr[cycle][chan] = sel;
417848b8605Smrg	else if (bs->hw_gpr[cycle][chan] != (int)sel) {
418848b8605Smrg		/* Another scalar operation has already used the GPR read port for the channel. */
419848b8605Smrg		return -1;
420848b8605Smrg	}
421848b8605Smrg	return 0;
422848b8605Smrg}
423848b8605Smrg
424b8e80941Smrgstatic int reserve_cfile(const struct r600_bytecode *bc,
425b8e80941Smrg			 struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
426848b8605Smrg{
427848b8605Smrg	int res, num_res = 4;
428848b8605Smrg	if (bc->chip_class >= R700) {
429848b8605Smrg		num_res = 2;
430848b8605Smrg		chan /= 2;
431848b8605Smrg	}
432848b8605Smrg	for (res = 0; res < num_res; ++res) {
433848b8605Smrg		if (bs->hw_cfile_addr[res] == -1) {
434848b8605Smrg			bs->hw_cfile_addr[res] = sel;
435848b8605Smrg			bs->hw_cfile_elem[res] = chan;
436848b8605Smrg			return 0;
437848b8605Smrg		} else if (bs->hw_cfile_addr[res] == sel &&
438848b8605Smrg			bs->hw_cfile_elem[res] == chan)
439848b8605Smrg			return 0; /* Read for this scalar element already reserved, nothing to do here. */
440848b8605Smrg	}
441848b8605Smrg	/* All cfile read ports are used, cannot reference vector element. */
442848b8605Smrg	return -1;
443848b8605Smrg}
444848b8605Smrg
445848b8605Smrgstatic int is_gpr(unsigned sel)
446848b8605Smrg{
447848b8605Smrg	return (sel <= 127);
448848b8605Smrg}
449848b8605Smrg
450848b8605Smrg/* CB constants start at 512, and get translated to a kcache index when ALU
451848b8605Smrg * clauses are constructed. Note that we handle kcache constants the same way
452848b8605Smrg * as (the now gone) cfile constants, is that really required? */
453848b8605Smrgstatic int is_cfile(unsigned sel)
454848b8605Smrg{
455848b8605Smrg	return (sel > 255 && sel < 512) ||
456848b8605Smrg		(sel > 511 && sel < 4607) || /* Kcache before translation. */
457848b8605Smrg		(sel > 127 && sel < 192); /* Kcache after translation. */
458848b8605Smrg}
459848b8605Smrg
460848b8605Smrgstatic int is_const(int sel)
461848b8605Smrg{
462848b8605Smrg	return is_cfile(sel) ||
463848b8605Smrg		(sel >= V_SQ_ALU_SRC_0 &&
464848b8605Smrg		sel <= V_SQ_ALU_SRC_LITERAL);
465848b8605Smrg}
466848b8605Smrg
467b8e80941Smrgstatic int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
468848b8605Smrg			struct alu_bank_swizzle *bs, int bank_swizzle)
469848b8605Smrg{
470848b8605Smrg	int r, src, num_src, sel, elem, cycle;
471848b8605Smrg
472b8e80941Smrg	num_src = r600_bytecode_get_num_operands(alu);
473848b8605Smrg	for (src = 0; src < num_src; src++) {
474848b8605Smrg		sel = alu->src[src].sel;
475848b8605Smrg		elem = alu->src[src].chan;
476848b8605Smrg		if (is_gpr(sel)) {
477848b8605Smrg			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
478848b8605Smrg			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
479848b8605Smrg				/* Nothing to do; special-case optimization,
480848b8605Smrg				 * second source uses first source’s reservation. */
481848b8605Smrg				continue;
482848b8605Smrg			else {
483848b8605Smrg				r = reserve_gpr(bs, sel, elem, cycle);
484848b8605Smrg				if (r)
485848b8605Smrg					return r;
486848b8605Smrg			}
487848b8605Smrg		} else if (is_cfile(sel)) {
488848b8605Smrg			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
489848b8605Smrg			if (r)
490848b8605Smrg				return r;
491848b8605Smrg		}
492848b8605Smrg		/* No restrictions on PV, PS, literal or special constants. */
493848b8605Smrg	}
494848b8605Smrg	return 0;
495848b8605Smrg}
496848b8605Smrg
497b8e80941Smrgstatic int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
498848b8605Smrg			struct alu_bank_swizzle *bs, int bank_swizzle)
499848b8605Smrg{
500848b8605Smrg	int r, src, num_src, const_count, sel, elem, cycle;
501848b8605Smrg
502b8e80941Smrg	num_src = r600_bytecode_get_num_operands(alu);
503848b8605Smrg	for (const_count = 0, src = 0; src < num_src; ++src) {
504848b8605Smrg		sel = alu->src[src].sel;
505848b8605Smrg		elem = alu->src[src].chan;
506848b8605Smrg		if (is_const(sel)) { /* Any constant, including literal and inline constants. */
507848b8605Smrg			if (const_count >= 2)
508848b8605Smrg				/* More than two references to a constant in
509848b8605Smrg				 * transcendental operation. */
510848b8605Smrg				return -1;
511848b8605Smrg			else
512848b8605Smrg				const_count++;
513848b8605Smrg		}
514848b8605Smrg		if (is_cfile(sel)) {
515848b8605Smrg			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
516848b8605Smrg			if (r)
517848b8605Smrg				return r;
518848b8605Smrg		}
519848b8605Smrg	}
520848b8605Smrg	for (src = 0; src < num_src; ++src) {
521848b8605Smrg		sel = alu->src[src].sel;
522848b8605Smrg		elem = alu->src[src].chan;
523848b8605Smrg		if (is_gpr(sel)) {
524848b8605Smrg			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
525848b8605Smrg			if (cycle < const_count)
526848b8605Smrg				/* Cycle for GPR load conflicts with
527848b8605Smrg				 * constant load in transcendental operation. */
528848b8605Smrg				return -1;
529848b8605Smrg			r = reserve_gpr(bs, sel, elem, cycle);
530848b8605Smrg			if (r)
531848b8605Smrg				return r;
532848b8605Smrg		}
533848b8605Smrg		/* PV PS restrictions */
534848b8605Smrg		if (const_count && (sel == 254 || sel == 255)) {
535848b8605Smrg			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
536848b8605Smrg			if (cycle < const_count)
537848b8605Smrg				return -1;
538848b8605Smrg		}
539848b8605Smrg	}
540848b8605Smrg	return 0;
541848b8605Smrg}
542848b8605Smrg
543b8e80941Smrgstatic int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
544848b8605Smrg				      struct r600_bytecode_alu *slots[5])
545848b8605Smrg{
546848b8605Smrg	struct alu_bank_swizzle bs;
547848b8605Smrg	int bank_swizzle[5];
548848b8605Smrg	int i, r = 0, forced = 1;
549848b8605Smrg	boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
550848b8605Smrg	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
551848b8605Smrg
552848b8605Smrg	for (i = 0; i < max_slots; i++) {
553848b8605Smrg		if (slots[i]) {
554848b8605Smrg			if (slots[i]->bank_swizzle_force) {
555848b8605Smrg				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
556848b8605Smrg			} else {
557848b8605Smrg				forced = 0;
558848b8605Smrg			}
559848b8605Smrg		}
560848b8605Smrg
561848b8605Smrg		if (i < 4 && slots[i])
562848b8605Smrg			scalar_only = false;
563848b8605Smrg	}
564848b8605Smrg	if (forced)
565848b8605Smrg		return 0;
566848b8605Smrg
567848b8605Smrg	/* Just check every possible combination of bank swizzle.
568848b8605Smrg	 * Not very efficent, but works on the first try in most of the cases. */
569848b8605Smrg	for (i = 0; i < 4; i++)
570848b8605Smrg		if (!slots[i] || !slots[i]->bank_swizzle_force)
571848b8605Smrg			bank_swizzle[i] = SQ_ALU_VEC_012;
572848b8605Smrg		else
573848b8605Smrg			bank_swizzle[i] = slots[i]->bank_swizzle;
574848b8605Smrg
575848b8605Smrg	bank_swizzle[4] = SQ_ALU_SCL_210;
576848b8605Smrg	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
577848b8605Smrg
578848b8605Smrg		init_bank_swizzle(&bs);
579848b8605Smrg		if (scalar_only == false) {
580848b8605Smrg			for (i = 0; i < 4; i++) {
581848b8605Smrg				if (slots[i]) {
582848b8605Smrg					r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
583848b8605Smrg					if (r)
584848b8605Smrg						break;
585848b8605Smrg				}
586848b8605Smrg			}
587848b8605Smrg		} else
588848b8605Smrg			r = 0;
589848b8605Smrg
590848b8605Smrg		if (!r && max_slots == 5 && slots[4]) {
591848b8605Smrg			r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
592848b8605Smrg		}
593848b8605Smrg		if (!r) {
594848b8605Smrg			for (i = 0; i < max_slots; i++) {
595848b8605Smrg				if (slots[i])
596848b8605Smrg					slots[i]->bank_swizzle = bank_swizzle[i];
597848b8605Smrg			}
598848b8605Smrg			return 0;
599848b8605Smrg		}
600848b8605Smrg
601848b8605Smrg		if (scalar_only) {
602848b8605Smrg			bank_swizzle[4]++;
603848b8605Smrg		} else {
604848b8605Smrg			for (i = 0; i < max_slots; i++) {
605848b8605Smrg				if (!slots[i] || !slots[i]->bank_swizzle_force) {
606848b8605Smrg					bank_swizzle[i]++;
607848b8605Smrg					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
608848b8605Smrg						break;
609848b8605Smrg					else if (i < max_slots - 1)
610848b8605Smrg						bank_swizzle[i] = SQ_ALU_VEC_012;
611848b8605Smrg					else
612848b8605Smrg						return -1;
613848b8605Smrg				}
614848b8605Smrg			}
615848b8605Smrg		}
616848b8605Smrg	}
617848b8605Smrg
618848b8605Smrg	/* Couldn't find a working swizzle. */
619848b8605Smrg	return -1;
620848b8605Smrg}
621848b8605Smrg
622848b8605Smrgstatic int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
623848b8605Smrg				  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
624848b8605Smrg{
625848b8605Smrg	struct r600_bytecode_alu *prev[5];
626848b8605Smrg	int gpr[5], chan[5];
627848b8605Smrg	int i, j, r, src, num_src;
628848b8605Smrg	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
629848b8605Smrg
630848b8605Smrg	r = assign_alu_units(bc, alu_prev, prev);
631848b8605Smrg	if (r)
632848b8605Smrg		return r;
633848b8605Smrg
634848b8605Smrg	for (i = 0; i < max_slots; ++i) {
635b8e80941Smrg		if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
636b8e80941Smrg
637b8e80941Smrg			if (is_alu_64bit_inst(prev[i])) {
638b8e80941Smrg				gpr[i] = -1;
639b8e80941Smrg				continue;
640b8e80941Smrg			}
641b8e80941Smrg
642848b8605Smrg			gpr[i] = prev[i]->dst.sel;
643848b8605Smrg			/* cube writes more than PV.X */
644848b8605Smrg			if (is_alu_reduction_inst(bc, prev[i]))
645848b8605Smrg				chan[i] = 0;
646848b8605Smrg			else
647848b8605Smrg				chan[i] = prev[i]->dst.chan;
648848b8605Smrg		} else
649848b8605Smrg			gpr[i] = -1;
650848b8605Smrg	}
651848b8605Smrg
652848b8605Smrg	for (i = 0; i < max_slots; ++i) {
653848b8605Smrg		struct r600_bytecode_alu *alu = slots[i];
654b8e80941Smrg		if (!alu)
655848b8605Smrg			continue;
656848b8605Smrg
657b8e80941Smrg		if (is_alu_64bit_inst(alu))
658b8e80941Smrg			continue;
659b8e80941Smrg		num_src = r600_bytecode_get_num_operands(alu);
660848b8605Smrg		for (src = 0; src < num_src; ++src) {
661848b8605Smrg			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
662848b8605Smrg				continue;
663848b8605Smrg
664848b8605Smrg			if (bc->chip_class < CAYMAN) {
665848b8605Smrg				if (alu->src[src].sel == gpr[4] &&
666848b8605Smrg				    alu->src[src].chan == chan[4] &&
667848b8605Smrg				    alu_prev->pred_sel == alu->pred_sel) {
668848b8605Smrg					alu->src[src].sel = V_SQ_ALU_SRC_PS;
669848b8605Smrg					alu->src[src].chan = 0;
670848b8605Smrg					continue;
671848b8605Smrg				}
672848b8605Smrg			}
673848b8605Smrg
674848b8605Smrg			for (j = 0; j < 4; ++j) {
675848b8605Smrg				if (alu->src[src].sel == gpr[j] &&
676848b8605Smrg					alu->src[src].chan == j &&
677848b8605Smrg				      alu_prev->pred_sel == alu->pred_sel) {
678848b8605Smrg					alu->src[src].sel = V_SQ_ALU_SRC_PV;
679848b8605Smrg					alu->src[src].chan = chan[j];
680848b8605Smrg					break;
681848b8605Smrg				}
682848b8605Smrg			}
683848b8605Smrg		}
684848b8605Smrg	}
685848b8605Smrg
686848b8605Smrg	return 0;
687848b8605Smrg}
688848b8605Smrg
689b8e80941Smrgvoid r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
690848b8605Smrg{
691848b8605Smrg	switch(value) {
692848b8605Smrg	case 0:
693848b8605Smrg		*sel = V_SQ_ALU_SRC_0;
694848b8605Smrg		break;
695848b8605Smrg	case 1:
696848b8605Smrg		*sel = V_SQ_ALU_SRC_1_INT;
697848b8605Smrg		break;
698848b8605Smrg	case -1:
699848b8605Smrg		*sel = V_SQ_ALU_SRC_M_1_INT;
700848b8605Smrg		break;
701848b8605Smrg	case 0x3F800000: /* 1.0f */
702848b8605Smrg		*sel = V_SQ_ALU_SRC_1;
703848b8605Smrg		break;
704848b8605Smrg	case 0x3F000000: /* 0.5f */
705848b8605Smrg		*sel = V_SQ_ALU_SRC_0_5;
706848b8605Smrg		break;
707848b8605Smrg	case 0xBF800000: /* -1.0f */
708848b8605Smrg		*sel = V_SQ_ALU_SRC_1;
709b8e80941Smrg		*neg ^= !abs;
710848b8605Smrg		break;
711848b8605Smrg	case 0xBF000000: /* -0.5f */
712848b8605Smrg		*sel = V_SQ_ALU_SRC_0_5;
713b8e80941Smrg		*neg ^= !abs;
714848b8605Smrg		break;
715848b8605Smrg	default:
716848b8605Smrg		*sel = V_SQ_ALU_SRC_LITERAL;
717848b8605Smrg		break;
718848b8605Smrg	}
719848b8605Smrg}
720848b8605Smrg
721848b8605Smrg/* compute how many literal are needed */
722b8e80941Smrgstatic int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
723848b8605Smrg				 uint32_t literal[4], unsigned *nliteral)
724848b8605Smrg{
725b8e80941Smrg	unsigned num_src = r600_bytecode_get_num_operands(alu);
726848b8605Smrg	unsigned i, j;
727848b8605Smrg
728848b8605Smrg	for (i = 0; i < num_src; ++i) {
729848b8605Smrg		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
730848b8605Smrg			uint32_t value = alu->src[i].value;
731848b8605Smrg			unsigned found = 0;
732848b8605Smrg			for (j = 0; j < *nliteral; ++j) {
733848b8605Smrg				if (literal[j] == value) {
734848b8605Smrg					found = 1;
735848b8605Smrg					break;
736848b8605Smrg				}
737848b8605Smrg			}
738848b8605Smrg			if (!found) {
739848b8605Smrg				if (*nliteral >= 4)
740848b8605Smrg					return -EINVAL;
741848b8605Smrg				literal[(*nliteral)++] = value;
742848b8605Smrg			}
743848b8605Smrg		}
744848b8605Smrg	}
745848b8605Smrg	return 0;
746848b8605Smrg}
747848b8605Smrg
748b8e80941Smrgstatic void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
749b8e80941Smrg					      uint32_t literal[4], unsigned nliteral)
750848b8605Smrg{
751b8e80941Smrg	unsigned num_src = r600_bytecode_get_num_operands(alu);
752848b8605Smrg	unsigned i, j;
753848b8605Smrg
754848b8605Smrg	for (i = 0; i < num_src; ++i) {
755848b8605Smrg		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
756848b8605Smrg			uint32_t value = alu->src[i].value;
757848b8605Smrg			for (j = 0; j < nliteral; ++j) {
758848b8605Smrg				if (literal[j] == value) {
759848b8605Smrg					alu->src[i].chan = j;
760848b8605Smrg					break;
761848b8605Smrg				}
762848b8605Smrg			}
763848b8605Smrg		}
764848b8605Smrg	}
765848b8605Smrg}
766848b8605Smrg
767848b8605Smrgstatic int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
768848b8605Smrg			     struct r600_bytecode_alu *alu_prev)
769848b8605Smrg{
770848b8605Smrg	struct r600_bytecode_alu *prev[5];
771848b8605Smrg	struct r600_bytecode_alu *result[5] = { NULL };
772848b8605Smrg
773848b8605Smrg	uint32_t literal[4], prev_literal[4];
774848b8605Smrg	unsigned nliteral = 0, prev_nliteral = 0;
775848b8605Smrg
776848b8605Smrg	int i, j, r, src, num_src;
777848b8605Smrg	int num_once_inst = 0;
778848b8605Smrg	int have_mova = 0, have_rel = 0;
779848b8605Smrg	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
780848b8605Smrg
781848b8605Smrg	r = assign_alu_units(bc, alu_prev, prev);
782848b8605Smrg	if (r)
783848b8605Smrg		return r;
784848b8605Smrg
785848b8605Smrg	for (i = 0; i < max_slots; ++i) {
786848b8605Smrg		if (prev[i]) {
787848b8605Smrg		      if (prev[i]->pred_sel)
788848b8605Smrg			      return 0;
789b8e80941Smrg		      if (is_alu_once_inst(prev[i]))
790848b8605Smrg			      return 0;
791848b8605Smrg		}
792848b8605Smrg		if (slots[i]) {
793848b8605Smrg			if (slots[i]->pred_sel)
794848b8605Smrg				return 0;
795b8e80941Smrg			if (is_alu_once_inst(slots[i]))
796848b8605Smrg				return 0;
797848b8605Smrg		}
798848b8605Smrg	}
799848b8605Smrg
800848b8605Smrg	for (i = 0; i < max_slots; ++i) {
801848b8605Smrg		struct r600_bytecode_alu *alu;
802848b8605Smrg
803848b8605Smrg		if (num_once_inst > 0)
804848b8605Smrg		   return 0;
805848b8605Smrg
806848b8605Smrg		/* check number of literals */
807848b8605Smrg		if (prev[i]) {
808b8e80941Smrg			if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
809848b8605Smrg				return 0;
810b8e80941Smrg			if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
811848b8605Smrg				return 0;
812b8e80941Smrg			if (is_alu_mova_inst(prev[i])) {
813848b8605Smrg				if (have_rel)
814848b8605Smrg					return 0;
815848b8605Smrg				have_mova = 1;
816848b8605Smrg			}
817848b8605Smrg
818b8e80941Smrg			if (alu_uses_rel(prev[i])) {
819848b8605Smrg				if (have_mova) {
820848b8605Smrg					return 0;
821848b8605Smrg				}
822848b8605Smrg				have_rel = 1;
823848b8605Smrg			}
824b8e80941Smrg			if (alu_uses_lds(prev[i]))
825b8e80941Smrg				return 0;
826848b8605Smrg
827b8e80941Smrg			num_once_inst += is_alu_once_inst(prev[i]);
828848b8605Smrg		}
829b8e80941Smrg		if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
830848b8605Smrg			return 0;
831848b8605Smrg
832848b8605Smrg		/* Let's check used slots. */
833848b8605Smrg		if (prev[i] && !slots[i]) {
834848b8605Smrg			result[i] = prev[i];
835848b8605Smrg			continue;
836848b8605Smrg		} else if (prev[i] && slots[i]) {
837848b8605Smrg			if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
838848b8605Smrg				/* Trans unit is still free try to use it. */
839b8e80941Smrg				if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
840848b8605Smrg					result[i] = prev[i];
841848b8605Smrg					result[4] = slots[i];
842848b8605Smrg				} else if (is_alu_any_unit_inst(bc, prev[i])) {
843848b8605Smrg					if (slots[i]->dst.sel == prev[i]->dst.sel &&
844b8e80941Smrg					    alu_writes(slots[i]) &&
845b8e80941Smrg					    alu_writes(prev[i]))
846848b8605Smrg						return 0;
847848b8605Smrg
848848b8605Smrg					result[i] = slots[i];
849848b8605Smrg					result[4] = prev[i];
850848b8605Smrg				} else
851848b8605Smrg					return 0;
852848b8605Smrg			} else
853848b8605Smrg				return 0;
854848b8605Smrg		} else if(!slots[i]) {
855848b8605Smrg			continue;
856848b8605Smrg		} else {
857848b8605Smrg			if (max_slots == 5 && slots[i] && prev[4] &&
858848b8605Smrg					slots[i]->dst.sel == prev[4]->dst.sel &&
859848b8605Smrg					slots[i]->dst.chan == prev[4]->dst.chan &&
860b8e80941Smrg					alu_writes(slots[i]) &&
861b8e80941Smrg					alu_writes(prev[4]))
862848b8605Smrg				return 0;
863848b8605Smrg
864848b8605Smrg			result[i] = slots[i];
865848b8605Smrg		}
866848b8605Smrg
867848b8605Smrg		alu = slots[i];
868b8e80941Smrg		num_once_inst += is_alu_once_inst(alu);
869848b8605Smrg
870848b8605Smrg		/* don't reschedule NOPs */
871b8e80941Smrg		if (is_nop_inst(alu))
872848b8605Smrg			return 0;
873848b8605Smrg
874b8e80941Smrg		if (is_alu_mova_inst(alu)) {
875848b8605Smrg			if (have_rel) {
876848b8605Smrg				return 0;
877848b8605Smrg			}
878848b8605Smrg			have_mova = 1;
879848b8605Smrg		}
880848b8605Smrg
881b8e80941Smrg		if (alu_uses_rel(alu)) {
882848b8605Smrg			if (have_mova) {
883848b8605Smrg				return 0;
884848b8605Smrg			}
885848b8605Smrg			have_rel = 1;
886848b8605Smrg		}
887848b8605Smrg
888b8e80941Smrg		if (alu->op == ALU_OP0_SET_CF_IDX0 ||
889b8e80941Smrg			alu->op == ALU_OP0_SET_CF_IDX1)
890b8e80941Smrg			return 0; /* data hazard with MOVA */
891b8e80941Smrg
892848b8605Smrg		/* Let's check source gprs */
893b8e80941Smrg		num_src = r600_bytecode_get_num_operands(alu);
894848b8605Smrg		for (src = 0; src < num_src; ++src) {
895848b8605Smrg
896848b8605Smrg			/* Constants don't matter. */
897848b8605Smrg			if (!is_gpr(alu->src[src].sel))
898848b8605Smrg				continue;
899848b8605Smrg
900848b8605Smrg			for (j = 0; j < max_slots; ++j) {
901b8e80941Smrg				if (!prev[j] || !alu_writes(prev[j]))
902848b8605Smrg					continue;
903848b8605Smrg
904848b8605Smrg				/* If it's relative then we can't determin which gpr is really used. */
905848b8605Smrg				if (prev[j]->dst.chan == alu->src[src].chan &&
906848b8605Smrg					(prev[j]->dst.sel == alu->src[src].sel ||
907848b8605Smrg					prev[j]->dst.rel || alu->src[src].rel))
908848b8605Smrg					return 0;
909848b8605Smrg			}
910848b8605Smrg		}
911848b8605Smrg	}
912848b8605Smrg
913848b8605Smrg	/* more than one PRED_ or KILL_ ? */
914848b8605Smrg	if (num_once_inst > 1)
915848b8605Smrg		return 0;
916848b8605Smrg
917848b8605Smrg	/* check if the result can still be swizzlet */
918848b8605Smrg	r = check_and_set_bank_swizzle(bc, result);
919848b8605Smrg	if (r)
920848b8605Smrg		return 0;
921848b8605Smrg
922848b8605Smrg	/* looks like everything worked out right, apply the changes */
923848b8605Smrg
924848b8605Smrg	/* undo adding previus literals */
925848b8605Smrg	bc->cf_last->ndw -= align(prev_nliteral, 2);
926848b8605Smrg
927848b8605Smrg	/* sort instructions */
928848b8605Smrg	for (i = 0; i < max_slots; ++i) {
929848b8605Smrg		slots[i] = result[i];
930848b8605Smrg		if (result[i]) {
931848b8605Smrg			LIST_DEL(&result[i]->list);
932848b8605Smrg			result[i]->last = 0;
933848b8605Smrg			LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
934848b8605Smrg		}
935848b8605Smrg	}
936848b8605Smrg
937848b8605Smrg	/* determine new last instruction */
938848b8605Smrg	LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1;
939848b8605Smrg
940848b8605Smrg	/* determine new first instruction */
941848b8605Smrg	for (i = 0; i < max_slots; ++i) {
942848b8605Smrg		if (result[i]) {
943848b8605Smrg			bc->cf_last->curr_bs_head = result[i];
944848b8605Smrg			break;
945848b8605Smrg		}
946848b8605Smrg	}
947848b8605Smrg
948848b8605Smrg	bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
949848b8605Smrg	bc->cf_last->prev2_bs_head = NULL;
950848b8605Smrg
951848b8605Smrg	return 0;
952848b8605Smrg}
953848b8605Smrg
954848b8605Smrg/* we'll keep kcache sets sorted by bank & addr */
955848b8605Smrgstatic int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc,
956848b8605Smrg		struct r600_bytecode_kcache *kcache,
957b8e80941Smrg		unsigned bank, unsigned line, unsigned index_mode)
958848b8605Smrg{
959848b8605Smrg	int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2;
960848b8605Smrg
961848b8605Smrg	for (i = 0; i < kcache_banks; i++) {
962848b8605Smrg		if (kcache[i].mode) {
963848b8605Smrg			int d;
964848b8605Smrg
965848b8605Smrg			if (kcache[i].bank < bank)
966848b8605Smrg				continue;
967848b8605Smrg
968848b8605Smrg			if ((kcache[i].bank == bank && kcache[i].addr > line+1) ||
969848b8605Smrg					kcache[i].bank > bank) {
970848b8605Smrg				/* try to insert new line */
971848b8605Smrg				if (kcache[kcache_banks-1].mode) {
972848b8605Smrg					/* all sets are in use */
973848b8605Smrg					return -ENOMEM;
974848b8605Smrg				}
975848b8605Smrg
976848b8605Smrg				memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache));
977848b8605Smrg				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
978848b8605Smrg				kcache[i].bank = bank;
979848b8605Smrg				kcache[i].addr = line;
980b8e80941Smrg				kcache[i].index_mode = index_mode;
981848b8605Smrg				return 0;
982848b8605Smrg			}
983848b8605Smrg
984848b8605Smrg			d = line - kcache[i].addr;
985848b8605Smrg
986848b8605Smrg			if (d == -1) {
987848b8605Smrg				kcache[i].addr--;
988848b8605Smrg				if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) {
989848b8605Smrg					/* we are prepending the line to the current set,
990848b8605Smrg					 * discarding the existing second line,
991848b8605Smrg					 * so we'll have to insert line+2 after it */
992848b8605Smrg					line += 2;
993848b8605Smrg					continue;
994848b8605Smrg				} else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) {
995848b8605Smrg					kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
996848b8605Smrg					return 0;
997848b8605Smrg				} else {
998848b8605Smrg					/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
999848b8605Smrg					return -ENOMEM;
1000848b8605Smrg				}
1001848b8605Smrg			} else if (d == 1) {
1002848b8605Smrg				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1003848b8605Smrg				return 0;
1004848b8605Smrg			} else if (d == 0)
1005848b8605Smrg				return 0;
1006848b8605Smrg		} else { /* free kcache set - use it */
1007848b8605Smrg			kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1008848b8605Smrg			kcache[i].bank = bank;
1009848b8605Smrg			kcache[i].addr = line;
1010b8e80941Smrg			kcache[i].index_mode = index_mode;
1011848b8605Smrg			return 0;
1012848b8605Smrg		}
1013848b8605Smrg	}
1014848b8605Smrg	return -ENOMEM;
1015848b8605Smrg}
1016848b8605Smrg
1017848b8605Smrgstatic int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc,
1018848b8605Smrg		struct r600_bytecode_kcache *kcache,
1019848b8605Smrg		struct r600_bytecode_alu *alu)
1020848b8605Smrg{
1021848b8605Smrg	int i, r;
1022848b8605Smrg
1023848b8605Smrg	for (i = 0; i < 3; i++) {
1024b8e80941Smrg		unsigned bank, line, sel = alu->src[i].sel, index_mode;
1025848b8605Smrg
1026848b8605Smrg		if (sel < 512)
1027848b8605Smrg			continue;
1028848b8605Smrg
1029848b8605Smrg		bank = alu->src[i].kc_bank;
1030b8e80941Smrg		assert(bank < R600_MAX_HW_CONST_BUFFERS);
1031848b8605Smrg		line = (sel-512)>>4;
1032b8e80941Smrg		index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
1033848b8605Smrg
1034b8e80941Smrg		if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode)))
1035848b8605Smrg			return r;
1036848b8605Smrg	}
1037848b8605Smrg	return 0;
1038848b8605Smrg}
1039848b8605Smrg
1040b8e80941Smrgstatic int r600_bytecode_assign_kcache_banks(
1041848b8605Smrg		struct r600_bytecode_alu *alu,
1042848b8605Smrg		struct r600_bytecode_kcache * kcache)
1043848b8605Smrg{
1044848b8605Smrg	int i, j;
1045848b8605Smrg
1046848b8605Smrg	/* Alter the src operands to refer to the kcache. */
1047848b8605Smrg	for (i = 0; i < 3; ++i) {
1048848b8605Smrg		static const unsigned int base[] = {128, 160, 256, 288};
1049848b8605Smrg		unsigned int line, sel = alu->src[i].sel, found = 0;
1050848b8605Smrg
1051848b8605Smrg		if (sel < 512)
1052848b8605Smrg			continue;
1053848b8605Smrg
1054848b8605Smrg		sel -= 512;
1055848b8605Smrg		line = sel>>4;
1056848b8605Smrg
1057848b8605Smrg		for (j = 0; j < 4 && !found; ++j) {
1058848b8605Smrg			switch (kcache[j].mode) {
1059848b8605Smrg			case V_SQ_CF_KCACHE_NOP:
1060848b8605Smrg			case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX:
1061848b8605Smrg				R600_ERR("unexpected kcache line mode\n");
1062848b8605Smrg				return -ENOMEM;
1063848b8605Smrg			default:
1064848b8605Smrg				if (kcache[j].bank == alu->src[i].kc_bank &&
1065848b8605Smrg						kcache[j].addr <= line &&
1066848b8605Smrg						line < kcache[j].addr + kcache[j].mode) {
1067848b8605Smrg					alu->src[i].sel = sel - (kcache[j].addr<<4);
1068848b8605Smrg					alu->src[i].sel += base[j];
1069848b8605Smrg					found=1;
1070848b8605Smrg			    }
1071848b8605Smrg			}
1072848b8605Smrg		}
1073848b8605Smrg	}
1074848b8605Smrg	return 0;
1075848b8605Smrg}
1076848b8605Smrg
1077848b8605Smrgstatic int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc,
1078848b8605Smrg		struct r600_bytecode_alu *alu,
1079848b8605Smrg		unsigned type)
1080848b8605Smrg{
1081848b8605Smrg	struct r600_bytecode_kcache kcache_sets[4];
1082848b8605Smrg	struct r600_bytecode_kcache *kcache = kcache_sets;
1083848b8605Smrg	int r;
1084848b8605Smrg
1085848b8605Smrg	memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache));
1086848b8605Smrg
1087848b8605Smrg	if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1088848b8605Smrg		/* can't alloc, need to start new clause */
1089848b8605Smrg		if ((r = r600_bytecode_add_cf(bc))) {
1090848b8605Smrg			return r;
1091848b8605Smrg		}
1092848b8605Smrg		bc->cf_last->op = type;
1093848b8605Smrg
1094848b8605Smrg		/* retry with the new clause */
1095848b8605Smrg		kcache = bc->cf_last->kcache;
1096848b8605Smrg		if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1097848b8605Smrg			/* can't alloc again- should never happen */
1098848b8605Smrg			return r;
1099848b8605Smrg		}
1100848b8605Smrg	} else {
1101848b8605Smrg		/* update kcache sets */
1102848b8605Smrg		memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache));
1103848b8605Smrg	}
1104848b8605Smrg
1105b8e80941Smrg	/* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */
1106b8e80941Smrg	if (kcache[2].mode != V_SQ_CF_KCACHE_NOP ||
1107b8e80941Smrg		kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) {
1108848b8605Smrg		if (bc->chip_class < EVERGREEN)
1109848b8605Smrg			return -ENOMEM;
1110848b8605Smrg		bc->cf_last->eg_alu_extended = 1;
1111848b8605Smrg	}
1112848b8605Smrg
1113848b8605Smrg	return 0;
1114848b8605Smrg}
1115848b8605Smrg
1116848b8605Smrgstatic int insert_nop_r6xx(struct r600_bytecode *bc)
1117848b8605Smrg{
1118848b8605Smrg	struct r600_bytecode_alu alu;
1119848b8605Smrg	int r, i;
1120848b8605Smrg
1121848b8605Smrg	for (i = 0; i < 4; i++) {
1122848b8605Smrg		memset(&alu, 0, sizeof(alu));
1123848b8605Smrg		alu.op = ALU_OP0_NOP;
1124848b8605Smrg		alu.src[0].chan = i;
1125848b8605Smrg		alu.dst.chan = i;
1126848b8605Smrg		alu.last = (i == 3);
1127848b8605Smrg		r = r600_bytecode_add_alu(bc, &alu);
1128848b8605Smrg		if (r)
1129848b8605Smrg			return r;
1130848b8605Smrg	}
1131848b8605Smrg	return 0;
1132848b8605Smrg}
1133848b8605Smrg
1134848b8605Smrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1135848b8605Smrgstatic int load_ar_r6xx(struct r600_bytecode *bc)
1136848b8605Smrg{
1137848b8605Smrg	struct r600_bytecode_alu alu;
1138848b8605Smrg	int r;
1139848b8605Smrg
1140848b8605Smrg	if (bc->ar_loaded)
1141848b8605Smrg		return 0;
1142848b8605Smrg
1143848b8605Smrg	/* hack to avoid making MOVA the last instruction in the clause */
1144848b8605Smrg	if ((bc->cf_last->ndw>>1) >= 110)
1145848b8605Smrg		bc->force_add_cf = 1;
1146848b8605Smrg
1147848b8605Smrg	memset(&alu, 0, sizeof(alu));
1148848b8605Smrg	alu.op = ALU_OP1_MOVA_GPR_INT;
1149848b8605Smrg	alu.src[0].sel = bc->ar_reg;
1150848b8605Smrg	alu.src[0].chan = bc->ar_chan;
1151848b8605Smrg	alu.last = 1;
1152848b8605Smrg	alu.index_mode = INDEX_MODE_LOOP;
1153848b8605Smrg	r = r600_bytecode_add_alu(bc, &alu);
1154848b8605Smrg	if (r)
1155848b8605Smrg		return r;
1156848b8605Smrg
1157848b8605Smrg	/* no requirement to set uses waterfall on MOVA_GPR_INT */
1158848b8605Smrg	bc->ar_loaded = 1;
1159848b8605Smrg	return 0;
1160848b8605Smrg}
1161848b8605Smrg
1162848b8605Smrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1163848b8605Smrgstatic int load_ar(struct r600_bytecode *bc)
1164848b8605Smrg{
1165848b8605Smrg	struct r600_bytecode_alu alu;
1166848b8605Smrg	int r;
1167848b8605Smrg
1168848b8605Smrg	if (bc->ar_handling)
1169848b8605Smrg		return load_ar_r6xx(bc);
1170848b8605Smrg
1171848b8605Smrg	if (bc->ar_loaded)
1172848b8605Smrg		return 0;
1173848b8605Smrg
1174848b8605Smrg	/* hack to avoid making MOVA the last instruction in the clause */
1175848b8605Smrg	if ((bc->cf_last->ndw>>1) >= 110)
1176848b8605Smrg		bc->force_add_cf = 1;
1177848b8605Smrg
1178848b8605Smrg	memset(&alu, 0, sizeof(alu));
1179848b8605Smrg	alu.op = ALU_OP1_MOVA_INT;
1180848b8605Smrg	alu.src[0].sel = bc->ar_reg;
1181848b8605Smrg	alu.src[0].chan = bc->ar_chan;
1182848b8605Smrg	alu.last = 1;
1183848b8605Smrg	r = r600_bytecode_add_alu(bc, &alu);
1184848b8605Smrg	if (r)
1185848b8605Smrg		return r;
1186848b8605Smrg
1187848b8605Smrg	bc->cf_last->r6xx_uses_waterfall = 1;
1188848b8605Smrg	bc->ar_loaded = 1;
1189848b8605Smrg	return 0;
1190848b8605Smrg}
1191848b8605Smrg
1192848b8605Smrgint r600_bytecode_add_alu_type(struct r600_bytecode *bc,
1193848b8605Smrg		const struct r600_bytecode_alu *alu, unsigned type)
1194848b8605Smrg{
1195848b8605Smrg	struct r600_bytecode_alu *nalu = r600_bytecode_alu();
1196848b8605Smrg	struct r600_bytecode_alu *lalu;
1197848b8605Smrg	int i, r;
1198848b8605Smrg
1199b8e80941Smrg	if (!nalu)
1200848b8605Smrg		return -ENOMEM;
1201848b8605Smrg	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1202848b8605Smrg
1203b8e80941Smrg	if (alu->is_op3) {
1204b8e80941Smrg		/* will fail later since alu does not support it. */
1205b8e80941Smrg		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1206b8e80941Smrg	}
1207b8e80941Smrg
1208848b8605Smrg	if (bc->cf_last != NULL && bc->cf_last->op != type) {
1209848b8605Smrg		/* check if we could add it anyway */
1210848b8605Smrg		if (bc->cf_last->op == CF_OP_ALU &&
1211848b8605Smrg			type == CF_OP_ALU_PUSH_BEFORE) {
1212848b8605Smrg			LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1213848b8605Smrg				if (lalu->execute_mask) {
1214848b8605Smrg					bc->force_add_cf = 1;
1215848b8605Smrg					break;
1216848b8605Smrg				}
1217848b8605Smrg			}
1218848b8605Smrg		} else
1219848b8605Smrg			bc->force_add_cf = 1;
1220848b8605Smrg	}
1221848b8605Smrg
1222848b8605Smrg	/* cf can contains only alu or only vtx or only tex */
1223848b8605Smrg	if (bc->cf_last == NULL || bc->force_add_cf) {
1224848b8605Smrg		r = r600_bytecode_add_cf(bc);
1225848b8605Smrg		if (r) {
1226848b8605Smrg			free(nalu);
1227848b8605Smrg			return r;
1228848b8605Smrg		}
1229848b8605Smrg	}
1230848b8605Smrg	bc->cf_last->op = type;
1231848b8605Smrg
1232b8e80941Smrg	/* Load index register if required */
1233b8e80941Smrg	if (bc->chip_class >= EVERGREEN) {
1234b8e80941Smrg		for (i = 0; i < 3; i++)
1235b8e80941Smrg			if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
1236b8e80941Smrg				egcm_load_index_reg(bc, 0, true);
1237b8e80941Smrg	}
1238b8e80941Smrg
1239848b8605Smrg	/* Check AR usage and load it if required */
1240848b8605Smrg	for (i = 0; i < 3; i++)
1241848b8605Smrg		if (nalu->src[i].rel && !bc->ar_loaded)
1242848b8605Smrg			load_ar(bc);
1243848b8605Smrg
1244848b8605Smrg	if (nalu->dst.rel && !bc->ar_loaded)
1245848b8605Smrg		load_ar(bc);
1246848b8605Smrg
1247848b8605Smrg	/* Setup the kcache for this ALU instruction. This will start a new
1248848b8605Smrg	 * ALU clause if needed. */
1249848b8605Smrg	if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
1250848b8605Smrg		free(nalu);
1251848b8605Smrg		return r;
1252848b8605Smrg	}
1253848b8605Smrg
1254848b8605Smrg	if (!bc->cf_last->curr_bs_head) {
1255848b8605Smrg		bc->cf_last->curr_bs_head = nalu;
1256848b8605Smrg	}
1257848b8605Smrg	/* number of gpr == the last gpr used in any alu */
1258848b8605Smrg	for (i = 0; i < 3; i++) {
1259848b8605Smrg		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
1260848b8605Smrg			bc->ngpr = nalu->src[i].sel + 1;
1261848b8605Smrg		}
1262848b8605Smrg		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1263848b8605Smrg			r600_bytecode_special_constants(nalu->src[i].value,
1264b8e80941Smrg				&nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
1265848b8605Smrg	}
1266848b8605Smrg	if (nalu->dst.sel >= bc->ngpr) {
1267848b8605Smrg		bc->ngpr = nalu->dst.sel + 1;
1268848b8605Smrg	}
1269848b8605Smrg	LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
1270848b8605Smrg	/* each alu use 2 dwords */
1271848b8605Smrg	bc->cf_last->ndw += 2;
1272848b8605Smrg	bc->ndw += 2;
1273848b8605Smrg
1274848b8605Smrg	/* process cur ALU instructions for bank swizzle */
1275848b8605Smrg	if (nalu->last) {
1276848b8605Smrg		uint32_t literal[4];
1277848b8605Smrg		unsigned nliteral;
1278848b8605Smrg		struct r600_bytecode_alu *slots[5];
1279848b8605Smrg		int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
1280848b8605Smrg		r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1281848b8605Smrg		if (r)
1282848b8605Smrg			return r;
1283848b8605Smrg
1284848b8605Smrg		if (bc->cf_last->prev_bs_head) {
1285848b8605Smrg			r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
1286848b8605Smrg			if (r)
1287848b8605Smrg				return r;
1288848b8605Smrg		}
1289848b8605Smrg
1290848b8605Smrg		if (bc->cf_last->prev_bs_head) {
1291848b8605Smrg			r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1292848b8605Smrg			if (r)
1293848b8605Smrg				return r;
1294848b8605Smrg		}
1295848b8605Smrg
1296848b8605Smrg		r = check_and_set_bank_swizzle(bc, slots);
1297848b8605Smrg		if (r)
1298848b8605Smrg			return r;
1299848b8605Smrg
1300848b8605Smrg		for (i = 0, nliteral = 0; i < max_slots; i++) {
1301848b8605Smrg			if (slots[i]) {
1302b8e80941Smrg				r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
1303848b8605Smrg				if (r)
1304848b8605Smrg					return r;
1305848b8605Smrg			}
1306848b8605Smrg		}
1307848b8605Smrg		bc->cf_last->ndw += align(nliteral, 2);
1308848b8605Smrg
1309848b8605Smrg		/* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
1310848b8605Smrg		 * worst case */
1311848b8605Smrg		if ((bc->cf_last->ndw >> 1) >= 120) {
1312848b8605Smrg			bc->force_add_cf = 1;
1313848b8605Smrg		}
1314848b8605Smrg
1315848b8605Smrg		bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1316848b8605Smrg		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1317848b8605Smrg		bc->cf_last->curr_bs_head = NULL;
1318848b8605Smrg	}
1319848b8605Smrg
1320848b8605Smrg	if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
1321848b8605Smrg		insert_nop_r6xx(bc);
1322848b8605Smrg
1323b8e80941Smrg	/* Might need to insert spill write ops after current clause */
1324b8e80941Smrg	if (nalu->last && bc->n_pending_outputs) {
1325b8e80941Smrg		while (bc->n_pending_outputs) {
1326b8e80941Smrg			r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
1327b8e80941Smrg			if (r)
1328b8e80941Smrg				return r;
1329b8e80941Smrg		}
1330b8e80941Smrg	}
1331b8e80941Smrg
1332848b8605Smrg	return 0;
1333848b8605Smrg}
1334848b8605Smrg
1335848b8605Smrgint r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1336848b8605Smrg{
1337848b8605Smrg	return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1338848b8605Smrg}
1339848b8605Smrg
1340848b8605Smrgstatic unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
1341848b8605Smrg{
1342848b8605Smrg	switch (bc->chip_class) {
1343848b8605Smrg	case R600:
1344848b8605Smrg		return 8;
1345848b8605Smrg
1346848b8605Smrg	case R700:
1347848b8605Smrg	case EVERGREEN:
1348848b8605Smrg	case CAYMAN:
1349848b8605Smrg		return 16;
1350848b8605Smrg
1351848b8605Smrg	default:
1352848b8605Smrg		R600_ERR("Unknown chip class %d.\n", bc->chip_class);
1353848b8605Smrg		return 8;
1354848b8605Smrg	}
1355848b8605Smrg}
1356848b8605Smrg
1357848b8605Smrgstatic inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
1358848b8605Smrg{
1359848b8605Smrg	return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
1360b8e80941Smrg		 bc->cf_last->op != CF_OP_GDS &&
1361b8e80941Smrg		 (bc->chip_class == CAYMAN ||
1362b8e80941Smrg		  bc->cf_last->op != CF_OP_TEX));
1363848b8605Smrg}
1364848b8605Smrg
1365b8e80941Smrgstatic int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
1366b8e80941Smrg					  bool use_tc)
1367848b8605Smrg{
1368848b8605Smrg	struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
1369848b8605Smrg	int r;
1370848b8605Smrg
1371b8e80941Smrg	if (!nvtx)
1372848b8605Smrg		return -ENOMEM;
1373848b8605Smrg	memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
1374848b8605Smrg
1375b8e80941Smrg	/* Load index register if required */
1376b8e80941Smrg	if (bc->chip_class >= EVERGREEN) {
1377b8e80941Smrg		if (vtx->buffer_index_mode)
1378b8e80941Smrg			egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false);
1379b8e80941Smrg	}
1380b8e80941Smrg
1381848b8605Smrg	/* cf can contains only alu or only vtx or only tex */
1382848b8605Smrg	if (bc->cf_last == NULL ||
1383848b8605Smrg	    last_inst_was_not_vtx_fetch(bc) ||
1384848b8605Smrg	    bc->force_add_cf) {
1385848b8605Smrg		r = r600_bytecode_add_cf(bc);
1386848b8605Smrg		if (r) {
1387848b8605Smrg			free(nvtx);
1388848b8605Smrg			return r;
1389848b8605Smrg		}
1390848b8605Smrg		switch (bc->chip_class) {
1391848b8605Smrg		case R600:
1392848b8605Smrg		case R700:
1393848b8605Smrg			bc->cf_last->op = CF_OP_VTX;
1394848b8605Smrg			break;
1395b8e80941Smrg		case EVERGREEN:
1396b8e80941Smrg			if (use_tc)
1397b8e80941Smrg				bc->cf_last->op = CF_OP_TEX;
1398b8e80941Smrg			else
1399b8e80941Smrg				bc->cf_last->op = CF_OP_VTX;
1400b8e80941Smrg			break;
1401848b8605Smrg		case CAYMAN:
1402848b8605Smrg			bc->cf_last->op = CF_OP_TEX;
1403848b8605Smrg			break;
1404848b8605Smrg		default:
1405848b8605Smrg			R600_ERR("Unknown chip class %d.\n", bc->chip_class);
1406848b8605Smrg			free(nvtx);
1407848b8605Smrg			return -EINVAL;
1408848b8605Smrg		}
1409848b8605Smrg	}
1410848b8605Smrg	LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
1411848b8605Smrg	/* each fetch use 4 dwords */
1412848b8605Smrg	bc->cf_last->ndw += 4;
1413848b8605Smrg	bc->ndw += 4;
1414848b8605Smrg	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1415848b8605Smrg		bc->force_add_cf = 1;
1416848b8605Smrg
1417848b8605Smrg	bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1);
1418848b8605Smrg	bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1);
1419848b8605Smrg
1420848b8605Smrg	return 0;
1421848b8605Smrg}
1422848b8605Smrg
1423b8e80941Smrgint r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1424b8e80941Smrg{
1425b8e80941Smrg	return r600_bytecode_add_vtx_internal(bc, vtx, false);
1426b8e80941Smrg}
1427b8e80941Smrg
1428b8e80941Smrgint r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1429b8e80941Smrg{
1430b8e80941Smrg	return r600_bytecode_add_vtx_internal(bc, vtx, true);
1431b8e80941Smrg}
1432b8e80941Smrg
1433848b8605Smrgint r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
1434848b8605Smrg{
1435848b8605Smrg	struct r600_bytecode_tex *ntex = r600_bytecode_tex();
1436848b8605Smrg	int r;
1437848b8605Smrg
1438b8e80941Smrg	if (!ntex)
1439848b8605Smrg		return -ENOMEM;
1440848b8605Smrg	memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
1441848b8605Smrg
1442b8e80941Smrg	/* Load index register if required */
1443b8e80941Smrg	if (bc->chip_class >= EVERGREEN) {
1444b8e80941Smrg		if (tex->sampler_index_mode || tex->resource_index_mode)
1445b8e80941Smrg			egcm_load_index_reg(bc, 1, false);
1446b8e80941Smrg	}
1447b8e80941Smrg
1448848b8605Smrg	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
1449848b8605Smrg	if (bc->cf_last != NULL &&
1450848b8605Smrg		bc->cf_last->op == CF_OP_TEX) {
1451848b8605Smrg		struct r600_bytecode_tex *ttex;
1452848b8605Smrg		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1453848b8605Smrg			if (ttex->dst_gpr == ntex->src_gpr) {
1454848b8605Smrg				bc->force_add_cf = 1;
1455848b8605Smrg				break;
1456848b8605Smrg			}
1457848b8605Smrg		}
1458848b8605Smrg		/* slight hack to make gradients always go into same cf */
1459848b8605Smrg		if (ntex->op == FETCH_OP_SET_GRADIENTS_H)
1460848b8605Smrg			bc->force_add_cf = 1;
1461848b8605Smrg	}
1462848b8605Smrg
1463848b8605Smrg	/* cf can contains only alu or only vtx or only tex */
1464848b8605Smrg	if (bc->cf_last == NULL ||
1465848b8605Smrg		bc->cf_last->op != CF_OP_TEX ||
1466848b8605Smrg	        bc->force_add_cf) {
1467848b8605Smrg		r = r600_bytecode_add_cf(bc);
1468848b8605Smrg		if (r) {
1469848b8605Smrg			free(ntex);
1470848b8605Smrg			return r;
1471848b8605Smrg		}
1472848b8605Smrg		bc->cf_last->op = CF_OP_TEX;
1473848b8605Smrg	}
1474848b8605Smrg	if (ntex->src_gpr >= bc->ngpr) {
1475848b8605Smrg		bc->ngpr = ntex->src_gpr + 1;
1476848b8605Smrg	}
1477848b8605Smrg	if (ntex->dst_gpr >= bc->ngpr) {
1478848b8605Smrg		bc->ngpr = ntex->dst_gpr + 1;
1479848b8605Smrg	}
1480848b8605Smrg	LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
1481848b8605Smrg	/* each texture fetch use 4 dwords */
1482848b8605Smrg	bc->cf_last->ndw += 4;
1483848b8605Smrg	bc->ndw += 4;
1484848b8605Smrg	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1485848b8605Smrg		bc->force_add_cf = 1;
1486848b8605Smrg	return 0;
1487848b8605Smrg}
1488848b8605Smrg
1489b8e80941Smrgint r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
1490b8e80941Smrg{
1491b8e80941Smrg	struct r600_bytecode_gds *ngds = r600_bytecode_gds();
1492b8e80941Smrg	int r;
1493b8e80941Smrg
1494b8e80941Smrg	if (ngds == NULL)
1495b8e80941Smrg		return -ENOMEM;
1496b8e80941Smrg	memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
1497b8e80941Smrg
1498b8e80941Smrg	if (bc->chip_class >= EVERGREEN) {
1499b8e80941Smrg		if (gds->uav_index_mode)
1500b8e80941Smrg			egcm_load_index_reg(bc, gds->uav_index_mode - 1, false);
1501b8e80941Smrg	}
1502b8e80941Smrg
1503b8e80941Smrg	if (bc->cf_last == NULL ||
1504b8e80941Smrg	    bc->cf_last->op != CF_OP_GDS ||
1505b8e80941Smrg	    bc->force_add_cf) {
1506b8e80941Smrg		r = r600_bytecode_add_cf(bc);
1507b8e80941Smrg		if (r) {
1508b8e80941Smrg			free(ngds);
1509b8e80941Smrg			return r;
1510b8e80941Smrg		}
1511b8e80941Smrg		bc->cf_last->op = CF_OP_GDS;
1512b8e80941Smrg	}
1513b8e80941Smrg
1514b8e80941Smrg	LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds);
1515b8e80941Smrg	bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
1516b8e80941Smrg	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1517b8e80941Smrg		bc->force_add_cf = 1;
1518b8e80941Smrg	return 0;
1519b8e80941Smrg}
1520b8e80941Smrg
1521848b8605Smrgint r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
1522848b8605Smrg{
1523848b8605Smrg	int r;
1524b8e80941Smrg
1525b8e80941Smrg	/* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
1526b8e80941Smrg	if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
1527b8e80941Smrg		bc->need_wait_ack = false;
1528b8e80941Smrg		r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
1529b8e80941Smrg	}
1530b8e80941Smrg
1531848b8605Smrg	r = r600_bytecode_add_cf(bc);
1532848b8605Smrg	if (r)
1533848b8605Smrg		return r;
1534848b8605Smrg
1535848b8605Smrg	bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1536848b8605Smrg	bc->cf_last->op = op;
1537848b8605Smrg	return 0;
1538848b8605Smrg}
1539848b8605Smrg
1540848b8605Smrgint cm_bytecode_add_cf_end(struct r600_bytecode *bc)
1541848b8605Smrg{
1542848b8605Smrg	return r600_bytecode_add_cfinst(bc, CF_OP_CF_END);
1543848b8605Smrg}
1544848b8605Smrg
1545848b8605Smrg/* common to all 3 families */
1546848b8605Smrgstatic int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
1547848b8605Smrg{
1548b8e80941Smrg	if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
1549b8e80941Smrg		return r700_bytecode_fetch_mem_build(bc, vtx, id);
1550b8e80941Smrg	bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
1551b8e80941Smrg			S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
1552848b8605Smrg			S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1553848b8605Smrg			S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1554848b8605Smrg			S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
1555848b8605Smrg	if (bc->chip_class < CAYMAN)
1556848b8605Smrg		bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1557848b8605Smrg	id++;
1558848b8605Smrg	bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1559848b8605Smrg				S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1560848b8605Smrg				S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1561848b8605Smrg				S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1562848b8605Smrg				S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1563848b8605Smrg				S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1564848b8605Smrg				S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1565848b8605Smrg				S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1566848b8605Smrg				S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1567848b8605Smrg				S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1568848b8605Smrg	bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
1569848b8605Smrg				S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
1570b8e80941Smrg	if (bc->chip_class >= EVERGREEN)
1571b8e80941Smrg		bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode);
1572848b8605Smrg	if (bc->chip_class < CAYMAN)
1573848b8605Smrg		bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
1574848b8605Smrg	id++;
1575848b8605Smrg	bc->bytecode[id++] = 0;
1576848b8605Smrg	return 0;
1577848b8605Smrg}
1578848b8605Smrg
1579848b8605Smrg/* common to all 3 families */
1580848b8605Smrgstatic int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1581848b8605Smrg{
1582b8e80941Smrg	bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST(
1583848b8605Smrg					r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) |
1584848b8605Smrg			    EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1585848b8605Smrg				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1586848b8605Smrg				S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1587848b8605Smrg				S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1588b8e80941Smrg	if (bc->chip_class >= EVERGREEN)
1589b8e80941Smrg		bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode);
1590b8e80941Smrg				((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode)
1591b8e80941Smrg	id++;
1592848b8605Smrg	bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1593848b8605Smrg				S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1594848b8605Smrg				S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1595848b8605Smrg				S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1596848b8605Smrg				S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1597848b8605Smrg				S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1598848b8605Smrg				S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1599848b8605Smrg				S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1600848b8605Smrg				S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1601848b8605Smrg				S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1602848b8605Smrg				S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1603848b8605Smrg	bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1604848b8605Smrg				S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1605848b8605Smrg				S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1606848b8605Smrg				S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1607848b8605Smrg				S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1608848b8605Smrg				S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1609848b8605Smrg				S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1610848b8605Smrg				S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1611848b8605Smrg	bc->bytecode[id++] = 0;
1612848b8605Smrg	return 0;
1613848b8605Smrg}
1614848b8605Smrg
1615848b8605Smrg/* r600 only, r700/eg bits in r700_asm.c */
1616848b8605Smrgstatic int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1617848b8605Smrg{
1618848b8605Smrg	unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1619848b8605Smrg
1620848b8605Smrg	/* don't replace gpr by pv or ps for destination register */
1621848b8605Smrg	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1622848b8605Smrg				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1623848b8605Smrg				S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1624848b8605Smrg				S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1625848b8605Smrg				S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1626848b8605Smrg				S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1627848b8605Smrg				S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1628848b8605Smrg				S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1629848b8605Smrg				S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1630848b8605Smrg				S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1631848b8605Smrg				S_SQ_ALU_WORD0_LAST(alu->last);
1632848b8605Smrg
1633848b8605Smrg	if (alu->is_op3) {
1634b8e80941Smrg		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1635848b8605Smrg		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1636848b8605Smrg					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1637848b8605Smrg					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1638848b8605Smrg					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1639848b8605Smrg					S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1640848b8605Smrg					S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1641848b8605Smrg					S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1642848b8605Smrg					S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1643848b8605Smrg					S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) |
1644848b8605Smrg					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1645848b8605Smrg	} else {
1646848b8605Smrg		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1647848b8605Smrg					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1648848b8605Smrg					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1649848b8605Smrg					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1650848b8605Smrg					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1651848b8605Smrg					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1652848b8605Smrg					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1653848b8605Smrg					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1654848b8605Smrg					S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) |
1655848b8605Smrg					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1656848b8605Smrg					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1657848b8605Smrg					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1658848b8605Smrg	}
1659848b8605Smrg	return 0;
1660848b8605Smrg}
1661848b8605Smrg
1662848b8605Smrgstatic void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
1663848b8605Smrg{
1664848b8605Smrg	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1665848b8605Smrg	*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
1666848b8605Smrg			S_SQ_CF_WORD1_BARRIER(1) |
1667b8e80941Smrg			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
1668b8e80941Smrg			S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1669848b8605Smrg}
1670848b8605Smrg
1671848b8605Smrg/* common for r600/r700 - eg in eg_asm.c */
1672848b8605Smrgstatic int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
1673848b8605Smrg{
1674848b8605Smrg	unsigned id = cf->id;
1675848b8605Smrg	const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1676848b8605Smrg	unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op);
1677848b8605Smrg
1678848b8605Smrg
1679848b8605Smrg	if (cf->op == CF_NATIVE) {
1680848b8605Smrg		bc->bytecode[id++] = cf->isa[0];
1681848b8605Smrg		bc->bytecode[id++] = cf->isa[1];
1682848b8605Smrg	} else if (cfop->flags & CF_ALU) {
1683848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1684848b8605Smrg			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1685848b8605Smrg			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1686848b8605Smrg			S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1687848b8605Smrg
1688848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) |
1689848b8605Smrg			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1690848b8605Smrg			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1691848b8605Smrg			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1692848b8605Smrg					S_SQ_CF_ALU_WORD1_BARRIER(1) |
1693848b8605Smrg					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) |
1694848b8605Smrg					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1695848b8605Smrg	} else if (cfop->flags & CF_FETCH) {
1696848b8605Smrg		if (bc->chip_class == R700)
1697848b8605Smrg			r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1698848b8605Smrg		else
1699848b8605Smrg			r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1700848b8605Smrg	} else if (cfop->flags & CF_EXP) {
1701848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1702848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1703848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1704848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1705848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1706848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1707848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1708848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1709848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1710848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1711848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1712848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1713848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
1714848b8605Smrg	} else if (cfop->flags & CF_MEM) {
1715848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1716848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1717848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1718848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1719848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1720848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1721848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1722848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1723848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
1724848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
1725848b8605Smrg			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
1726848b8605Smrg	} else {
1727848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1728848b8605Smrg		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
1729848b8605Smrg					S_SQ_CF_WORD1_BARRIER(1) |
1730848b8605Smrg			                S_SQ_CF_WORD1_COND(cf->cond) |
1731848b8605Smrg			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1732848b8605Smrg					S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1733848b8605Smrg	}
1734848b8605Smrg	return 0;
1735848b8605Smrg}
1736848b8605Smrg
1737848b8605Smrgint r600_bytecode_build(struct r600_bytecode *bc)
1738848b8605Smrg{
1739848b8605Smrg	struct r600_bytecode_cf *cf;
1740848b8605Smrg	struct r600_bytecode_alu *alu;
1741848b8605Smrg	struct r600_bytecode_vtx *vtx;
1742848b8605Smrg	struct r600_bytecode_tex *tex;
1743b8e80941Smrg	struct r600_bytecode_gds *gds;
1744848b8605Smrg	uint32_t literal[4];
1745848b8605Smrg	unsigned nliteral;
1746848b8605Smrg	unsigned addr;
1747848b8605Smrg	int i, r;
1748848b8605Smrg
1749b8e80941Smrg	if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
1750b8e80941Smrg		if (bc->stack.max_entries)
1751b8e80941Smrg			bc->nstack = bc->stack.max_entries;
1752b8e80941Smrg		else if (bc->type == PIPE_SHADER_VERTEX ||
1753b8e80941Smrg			 bc->type == PIPE_SHADER_TESS_EVAL ||
1754b8e80941Smrg			 bc->type == PIPE_SHADER_TESS_CTRL)
1755b8e80941Smrg			bc->nstack = 1;
1756848b8605Smrg	}
1757848b8605Smrg
1758848b8605Smrg	/* first path compute addr of each CF block */
1759848b8605Smrg	/* addr start after all the CF instructions */
1760848b8605Smrg	addr = bc->cf_last->id + 2;
1761848b8605Smrg	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1762848b8605Smrg		if (r600_isa_cf(cf->op)->flags & CF_FETCH) {
1763848b8605Smrg			addr += 3;
1764848b8605Smrg			addr &= 0xFFFFFFFCUL;
1765848b8605Smrg		}
1766848b8605Smrg		cf->addr = addr;
1767848b8605Smrg		addr += cf->ndw;
1768848b8605Smrg		bc->ndw = cf->addr + cf->ndw;
1769848b8605Smrg	}
1770848b8605Smrg	free(bc->bytecode);
1771b8e80941Smrg	bc->bytecode = calloc(4, bc->ndw);
1772848b8605Smrg	if (bc->bytecode == NULL)
1773848b8605Smrg		return -ENOMEM;
1774848b8605Smrg	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1775848b8605Smrg		const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1776848b8605Smrg		addr = cf->addr;
1777848b8605Smrg		if (bc->chip_class >= EVERGREEN)
1778848b8605Smrg			r = eg_bytecode_cf_build(bc, cf);
1779848b8605Smrg		else
1780848b8605Smrg			r = r600_bytecode_cf_build(bc, cf);
1781848b8605Smrg		if (r)
1782848b8605Smrg			return r;
1783848b8605Smrg		if (cfop->flags & CF_ALU) {
1784848b8605Smrg			nliteral = 0;
1785848b8605Smrg			memset(literal, 0, sizeof(literal));
1786848b8605Smrg			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1787b8e80941Smrg				r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
1788848b8605Smrg				if (r)
1789848b8605Smrg					return r;
1790b8e80941Smrg				r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
1791b8e80941Smrg				r600_bytecode_assign_kcache_banks(alu, cf->kcache);
1792848b8605Smrg
1793848b8605Smrg				switch(bc->chip_class) {
1794848b8605Smrg				case R600:
1795848b8605Smrg					r = r600_bytecode_alu_build(bc, alu, addr);
1796848b8605Smrg					break;
1797848b8605Smrg				case R700:
1798848b8605Smrg					r = r700_bytecode_alu_build(bc, alu, addr);
1799848b8605Smrg					break;
1800b8e80941Smrg				case EVERGREEN:
1801b8e80941Smrg				case CAYMAN:
1802b8e80941Smrg					r = eg_bytecode_alu_build(bc, alu, addr);
1803b8e80941Smrg					break;
1804848b8605Smrg				default:
1805848b8605Smrg					R600_ERR("unknown chip class %d.\n", bc->chip_class);
1806848b8605Smrg					return -EINVAL;
1807848b8605Smrg				}
1808848b8605Smrg				if (r)
1809848b8605Smrg					return r;
1810848b8605Smrg				addr += 2;
1811848b8605Smrg				if (alu->last) {
1812848b8605Smrg					for (i = 0; i < align(nliteral, 2); ++i) {
1813848b8605Smrg						bc->bytecode[addr++] = literal[i];
1814848b8605Smrg					}
1815848b8605Smrg					nliteral = 0;
1816848b8605Smrg					memset(literal, 0, sizeof(literal));
1817848b8605Smrg				}
1818848b8605Smrg			}
1819848b8605Smrg		} else if (cf->op == CF_OP_VTX) {
1820848b8605Smrg			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1821848b8605Smrg				r = r600_bytecode_vtx_build(bc, vtx, addr);
1822848b8605Smrg				if (r)
1823848b8605Smrg					return r;
1824848b8605Smrg				addr += 4;
1825848b8605Smrg			}
1826b8e80941Smrg		} else if (cf->op == CF_OP_GDS) {
1827b8e80941Smrg			assert(bc->chip_class >= EVERGREEN);
1828b8e80941Smrg			LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
1829b8e80941Smrg				r = eg_bytecode_gds_build(bc, gds, addr);
1830b8e80941Smrg				if (r)
1831b8e80941Smrg					return r;
1832b8e80941Smrg				addr += 4;
1833b8e80941Smrg			}
1834848b8605Smrg		} else if (cf->op == CF_OP_TEX) {
1835848b8605Smrg			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1836848b8605Smrg				assert(bc->chip_class >= EVERGREEN);
1837848b8605Smrg				r = r600_bytecode_vtx_build(bc, vtx, addr);
1838848b8605Smrg				if (r)
1839848b8605Smrg					return r;
1840848b8605Smrg				addr += 4;
1841848b8605Smrg			}
1842848b8605Smrg			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1843848b8605Smrg				r = r600_bytecode_tex_build(bc, tex, addr);
1844848b8605Smrg				if (r)
1845848b8605Smrg					return r;
1846848b8605Smrg				addr += 4;
1847848b8605Smrg			}
1848848b8605Smrg		}
1849848b8605Smrg	}
1850848b8605Smrg	return 0;
1851848b8605Smrg}
1852848b8605Smrg
1853848b8605Smrgvoid r600_bytecode_clear(struct r600_bytecode *bc)
1854848b8605Smrg{
1855848b8605Smrg	struct r600_bytecode_cf *cf = NULL, *next_cf;
1856848b8605Smrg
1857848b8605Smrg	free(bc->bytecode);
1858848b8605Smrg	bc->bytecode = NULL;
1859848b8605Smrg
1860848b8605Smrg	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
1861848b8605Smrg		struct r600_bytecode_alu *alu = NULL, *next_alu;
1862848b8605Smrg		struct r600_bytecode_tex *tex = NULL, *next_tex;
1863848b8605Smrg		struct r600_bytecode_tex *vtx = NULL, *next_vtx;
1864b8e80941Smrg		struct r600_bytecode_gds *gds = NULL, *next_gds;
1865848b8605Smrg
1866848b8605Smrg		LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1867848b8605Smrg			free(alu);
1868848b8605Smrg		}
1869848b8605Smrg
1870848b8605Smrg		LIST_INITHEAD(&cf->alu);
1871848b8605Smrg
1872848b8605Smrg		LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
1873848b8605Smrg			free(tex);
1874848b8605Smrg		}
1875848b8605Smrg
1876848b8605Smrg		LIST_INITHEAD(&cf->tex);
1877848b8605Smrg
1878848b8605Smrg		LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
1879848b8605Smrg			free(vtx);
1880848b8605Smrg		}
1881848b8605Smrg
1882848b8605Smrg		LIST_INITHEAD(&cf->vtx);
1883848b8605Smrg
1884b8e80941Smrg		LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
1885b8e80941Smrg			free(gds);
1886b8e80941Smrg		}
1887b8e80941Smrg
1888b8e80941Smrg		LIST_INITHEAD(&cf->gds);
1889b8e80941Smrg
1890848b8605Smrg		free(cf);
1891848b8605Smrg	}
1892848b8605Smrg
1893848b8605Smrg	LIST_INITHEAD(&cf->list);
1894848b8605Smrg}
1895848b8605Smrg
1896848b8605Smrgstatic int print_swizzle(unsigned swz)
1897848b8605Smrg{
1898848b8605Smrg	const char * swzchars = "xyzw01?_";
1899848b8605Smrg	assert(swz<8 && swz != 6);
1900848b8605Smrg	return fprintf(stderr, "%c", swzchars[swz]);
1901848b8605Smrg}
1902848b8605Smrg
1903848b8605Smrgstatic int print_sel(unsigned sel, unsigned rel, unsigned index_mode,
1904848b8605Smrg		unsigned need_brackets)
1905848b8605Smrg{
1906848b8605Smrg	int o = 0;
1907848b8605Smrg	if (rel && index_mode >= 5 && sel < 128)
1908848b8605Smrg		o += fprintf(stderr, "G");
1909848b8605Smrg	if (rel || need_brackets) {
1910848b8605Smrg		o += fprintf(stderr, "[");
1911848b8605Smrg	}
1912848b8605Smrg	o += fprintf(stderr, "%d", sel);
1913848b8605Smrg	if (rel) {
1914848b8605Smrg		if (index_mode == 0 || index_mode == 6)
1915848b8605Smrg			o += fprintf(stderr, "+AR");
1916848b8605Smrg		else if (index_mode == 4)
1917848b8605Smrg			o += fprintf(stderr, "+AL");
1918848b8605Smrg	}
1919848b8605Smrg	if (rel || need_brackets) {
1920848b8605Smrg		o += fprintf(stderr, "]");
1921848b8605Smrg	}
1922848b8605Smrg	return o;
1923848b8605Smrg}
1924848b8605Smrg
1925848b8605Smrgstatic int print_dst(struct r600_bytecode_alu *alu)
1926848b8605Smrg{
1927848b8605Smrg	int o = 0;
1928848b8605Smrg	unsigned sel = alu->dst.sel;
1929848b8605Smrg	char reg_char = 'R';
1930848b8605Smrg	if (sel > 128 - 4) { /* clause temporary gpr */
1931848b8605Smrg		sel -= 128 - 4;
1932848b8605Smrg		reg_char = 'T';
1933848b8605Smrg	}
1934848b8605Smrg
1935b8e80941Smrg	if (alu_writes(alu)) {
1936848b8605Smrg		o += fprintf(stderr, "%c", reg_char);
1937848b8605Smrg		o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0);
1938848b8605Smrg	} else {
1939848b8605Smrg		o += fprintf(stderr, "__");
1940848b8605Smrg	}
1941848b8605Smrg	o += fprintf(stderr, ".");
1942848b8605Smrg	o += print_swizzle(alu->dst.chan);
1943848b8605Smrg	return o;
1944848b8605Smrg}
1945848b8605Smrg
1946848b8605Smrgstatic int print_src(struct r600_bytecode_alu *alu, unsigned idx)
1947848b8605Smrg{
1948848b8605Smrg	int o = 0;
1949848b8605Smrg	struct r600_bytecode_alu_src *src = &alu->src[idx];
1950848b8605Smrg	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
1951848b8605Smrg
1952848b8605Smrg	if (src->neg)
1953848b8605Smrg		o += fprintf(stderr,"-");
1954848b8605Smrg	if (src->abs)
1955848b8605Smrg		o += fprintf(stderr,"|");
1956848b8605Smrg
1957848b8605Smrg	if (sel < 128 - 4) {
1958848b8605Smrg		o += fprintf(stderr, "R");
1959848b8605Smrg	} else if (sel < 128) {
1960848b8605Smrg		o += fprintf(stderr, "T");
1961848b8605Smrg		sel -= 128 - 4;
1962848b8605Smrg	} else if (sel < 160) {
1963848b8605Smrg		o += fprintf(stderr, "KC0");
1964848b8605Smrg		need_brackets = 1;
1965848b8605Smrg		sel -= 128;
1966848b8605Smrg	} else if (sel < 192) {
1967848b8605Smrg		o += fprintf(stderr, "KC1");
1968848b8605Smrg		need_brackets = 1;
1969848b8605Smrg		sel -= 160;
1970848b8605Smrg	} else if (sel >= 512) {
1971848b8605Smrg		o += fprintf(stderr, "C%d", src->kc_bank);
1972848b8605Smrg		need_brackets = 1;
1973848b8605Smrg		sel -= 512;
1974848b8605Smrg	} else if (sel >= 448) {
1975848b8605Smrg		o += fprintf(stderr, "Param");
1976848b8605Smrg		sel -= 448;
1977848b8605Smrg		need_chan = 0;
1978848b8605Smrg	} else if (sel >= 288) {
1979848b8605Smrg		o += fprintf(stderr, "KC3");
1980848b8605Smrg		need_brackets = 1;
1981848b8605Smrg		sel -= 288;
1982848b8605Smrg	} else if (sel >= 256) {
1983848b8605Smrg		o += fprintf(stderr, "KC2");
1984848b8605Smrg		need_brackets = 1;
1985848b8605Smrg		sel -= 256;
1986848b8605Smrg	} else {
1987848b8605Smrg		need_sel = 0;
1988848b8605Smrg		need_chan = 0;
1989848b8605Smrg		switch (sel) {
1990b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
1991b8e80941Smrg			o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
1992b8e80941Smrg			break;
1993b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
1994b8e80941Smrg			o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
1995b8e80941Smrg			break;
1996b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_OQ_A:
1997b8e80941Smrg			o += fprintf(stderr, "LDS_OQ_A");
1998b8e80941Smrg			need_chan = 1;
1999b8e80941Smrg			break;
2000b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_OQ_B:
2001b8e80941Smrg			o += fprintf(stderr, "LDS_OQ_B");
2002b8e80941Smrg			need_chan = 1;
2003b8e80941Smrg			break;
2004b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
2005b8e80941Smrg			o += fprintf(stderr, "LDS_OQ_A_POP");
2006b8e80941Smrg			need_chan = 1;
2007b8e80941Smrg			break;
2008b8e80941Smrg		case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
2009b8e80941Smrg			o += fprintf(stderr, "LDS_OQ_B_POP");
2010b8e80941Smrg			need_chan = 1;
2011b8e80941Smrg			break;
2012b8e80941Smrg		case EG_V_SQ_ALU_SRC_TIME_LO:
2013b8e80941Smrg			o += fprintf(stderr, "TIME_LO");
2014b8e80941Smrg			break;
2015b8e80941Smrg		case EG_V_SQ_ALU_SRC_TIME_HI:
2016b8e80941Smrg			o += fprintf(stderr, "TIME_HI");
2017b8e80941Smrg			break;
2018b8e80941Smrg		case EG_V_SQ_ALU_SRC_SE_ID:
2019b8e80941Smrg			o += fprintf(stderr, "SE_ID");
2020b8e80941Smrg			break;
2021b8e80941Smrg		case EG_V_SQ_ALU_SRC_SIMD_ID:
2022b8e80941Smrg			o += fprintf(stderr, "SIMD_ID");
2023b8e80941Smrg			break;
2024b8e80941Smrg		case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
2025b8e80941Smrg			o += fprintf(stderr, "HW_WAVE_ID");
2026b8e80941Smrg			break;
2027848b8605Smrg		case V_SQ_ALU_SRC_PS:
2028848b8605Smrg			o += fprintf(stderr, "PS");
2029848b8605Smrg			break;
2030848b8605Smrg		case V_SQ_ALU_SRC_PV:
2031848b8605Smrg			o += fprintf(stderr, "PV");
2032848b8605Smrg			need_chan = 1;
2033848b8605Smrg			break;
2034848b8605Smrg		case V_SQ_ALU_SRC_LITERAL:
2035b8e80941Smrg			o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value));
2036848b8605Smrg			break;
2037848b8605Smrg		case V_SQ_ALU_SRC_0_5:
2038848b8605Smrg			o += fprintf(stderr, "0.5");
2039848b8605Smrg			break;
2040848b8605Smrg		case V_SQ_ALU_SRC_M_1_INT:
2041848b8605Smrg			o += fprintf(stderr, "-1");
2042848b8605Smrg			break;
2043848b8605Smrg		case V_SQ_ALU_SRC_1_INT:
2044848b8605Smrg			o += fprintf(stderr, "1");
2045848b8605Smrg			break;
2046848b8605Smrg		case V_SQ_ALU_SRC_1:
2047848b8605Smrg			o += fprintf(stderr, "1.0");
2048848b8605Smrg			break;
2049848b8605Smrg		case V_SQ_ALU_SRC_0:
2050848b8605Smrg			o += fprintf(stderr, "0");
2051848b8605Smrg			break;
2052848b8605Smrg		default:
2053848b8605Smrg			o += fprintf(stderr, "??IMM_%d", sel);
2054848b8605Smrg			break;
2055848b8605Smrg		}
2056848b8605Smrg	}
2057848b8605Smrg
2058848b8605Smrg	if (need_sel)
2059848b8605Smrg		o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2060848b8605Smrg
2061848b8605Smrg	if (need_chan) {
2062848b8605Smrg		o += fprintf(stderr, ".");
2063848b8605Smrg		o += print_swizzle(src->chan);
2064848b8605Smrg	}
2065848b8605Smrg
2066848b8605Smrg	if (src->abs)
2067848b8605Smrg		o += fprintf(stderr,"|");
2068848b8605Smrg
2069848b8605Smrg	return o;
2070848b8605Smrg}
2071848b8605Smrg
2072848b8605Smrgstatic int print_indent(int p, int c)
2073848b8605Smrg{
2074848b8605Smrg	int o = 0;
2075848b8605Smrg	while (p++ < c)
2076848b8605Smrg		o += fprintf(stderr, " ");
2077848b8605Smrg	return o;
2078848b8605Smrg}
2079848b8605Smrg
2080848b8605Smrgvoid r600_bytecode_disasm(struct r600_bytecode *bc)
2081848b8605Smrg{
2082b8e80941Smrg	const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"};
2083848b8605Smrg	static int index = 0;
2084848b8605Smrg	struct r600_bytecode_cf *cf = NULL;
2085848b8605Smrg	struct r600_bytecode_alu *alu = NULL;
2086848b8605Smrg	struct r600_bytecode_vtx *vtx = NULL;
2087848b8605Smrg	struct r600_bytecode_tex *tex = NULL;
2088b8e80941Smrg	struct r600_bytecode_gds *gds = NULL;
2089848b8605Smrg
2090848b8605Smrg	unsigned i, id, ngr = 0, last;
2091848b8605Smrg	uint32_t literal[4];
2092848b8605Smrg	unsigned nliteral;
2093848b8605Smrg	char chip = '6';
2094848b8605Smrg
2095848b8605Smrg	switch (bc->chip_class) {
2096848b8605Smrg	case R700:
2097848b8605Smrg		chip = '7';
2098848b8605Smrg		break;
2099848b8605Smrg	case EVERGREEN:
2100848b8605Smrg		chip = 'E';
2101848b8605Smrg		break;
2102848b8605Smrg	case CAYMAN:
2103848b8605Smrg		chip = 'C';
2104848b8605Smrg		break;
2105848b8605Smrg	case R600:
2106848b8605Smrg	default:
2107848b8605Smrg		chip = '6';
2108848b8605Smrg		break;
2109848b8605Smrg	}
2110848b8605Smrg	fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
2111848b8605Smrg	        bc->ndw, bc->ngpr, bc->nstack);
2112848b8605Smrg	fprintf(stderr, "shader %d -- %c\n", index++, chip);
2113848b8605Smrg
2114848b8605Smrg	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2115848b8605Smrg		id = cf->id;
2116848b8605Smrg		if (cf->op == CF_NATIVE) {
2117848b8605Smrg			fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id],
2118848b8605Smrg					bc->bytecode[id + 1]);
2119848b8605Smrg		} else {
2120848b8605Smrg			const struct cf_op_info *cfop = r600_isa_cf(cf->op);
2121848b8605Smrg			if (cfop->flags & CF_ALU) {
2122848b8605Smrg				if (cf->eg_alu_extended) {
2123848b8605Smrg					fprintf(stderr, "%04d %08X %08X  %s\n", id, bc->bytecode[id],
2124848b8605Smrg							bc->bytecode[id + 1], "ALU_EXT");
2125848b8605Smrg					id += 2;
2126848b8605Smrg				}
2127848b8605Smrg				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2128848b8605Smrg						bc->bytecode[id + 1], cfop->name);
2129848b8605Smrg				fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr);
2130848b8605Smrg				for (i = 0; i < 4; ++i) {
2131848b8605Smrg					if (cf->kcache[i].mode) {
2132848b8605Smrg						int c_start = (cf->kcache[i].addr << 4);
2133848b8605Smrg						int c_end = c_start + (cf->kcache[i].mode << 4);
2134b8e80941Smrg						fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ",
2135b8e80941Smrg						        i, cf->kcache[i].bank, c_start, c_end,
2136b8e80941Smrg						        cf->kcache[i].index_mode ? " " : "",
2137b8e80941Smrg						        cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : "");
2138848b8605Smrg					}
2139848b8605Smrg				}
2140848b8605Smrg				fprintf(stderr, "\n");
2141848b8605Smrg			} else if (cfop->flags & CF_FETCH) {
2142848b8605Smrg				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2143848b8605Smrg						bc->bytecode[id + 1], cfop->name);
2144848b8605Smrg				fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
2145b8e80941Smrg				if (cf->vpm)
2146b8e80941Smrg					fprintf(stderr, "VPM ");
2147b8e80941Smrg				if (cf->end_of_program)
2148b8e80941Smrg					fprintf(stderr, "EOP ");
2149848b8605Smrg				fprintf(stderr, "\n");
2150b8e80941Smrg
2151848b8605Smrg			} else if (cfop->flags & CF_EXP) {
2152848b8605Smrg				int o = 0;
2153848b8605Smrg				const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
2154848b8605Smrg				o += fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2155848b8605Smrg						bc->bytecode[id + 1], cfop->name);
2156848b8605Smrg				o += print_indent(o, 43);
2157848b8605Smrg				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2158848b8605Smrg				if (cf->output.burst_count > 1) {
2159848b8605Smrg					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2160848b8605Smrg							cf->output.array_base + cf->output.burst_count - 1);
2161848b8605Smrg
2162848b8605Smrg					o += print_indent(o, 55);
2163848b8605Smrg					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2164848b8605Smrg							cf->output.gpr + cf->output.burst_count - 1);
2165848b8605Smrg				} else {
2166848b8605Smrg					o += fprintf(stderr, "%d ", cf->output.array_base);
2167848b8605Smrg					o += print_indent(o, 55);
2168848b8605Smrg					o += fprintf(stderr, "R%d.", cf->output.gpr);
2169848b8605Smrg				}
2170848b8605Smrg
2171848b8605Smrg				o += print_swizzle(cf->output.swizzle_x);
2172848b8605Smrg				o += print_swizzle(cf->output.swizzle_y);
2173848b8605Smrg				o += print_swizzle(cf->output.swizzle_z);
2174848b8605Smrg				o += print_swizzle(cf->output.swizzle_w);
2175848b8605Smrg
2176848b8605Smrg				print_indent(o, 67);
2177848b8605Smrg
2178848b8605Smrg				fprintf(stderr, " ES:%X ", cf->output.elem_size);
2179b8e80941Smrg				if (cf->mark)
2180b8e80941Smrg					fprintf(stderr, "MARK ");
2181848b8605Smrg				if (!cf->barrier)
2182848b8605Smrg					fprintf(stderr, "NO_BARRIER ");
2183848b8605Smrg				if (cf->end_of_program)
2184848b8605Smrg					fprintf(stderr, "EOP ");
2185848b8605Smrg				fprintf(stderr, "\n");
2186848b8605Smrg			} else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
2187848b8605Smrg				int o = 0;
2188848b8605Smrg				const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
2189848b8605Smrg						"WRITE_IND_ACK"};
2190848b8605Smrg				o += fprintf(stderr, "%04d %08X %08X  %s ", id,
2191848b8605Smrg						bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
2192848b8605Smrg				o += print_indent(o, 43);
2193848b8605Smrg				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2194b8e80941Smrg
2195b8e80941Smrg				if (r600_isa_cf(cf->op)->flags & CF_RAT) {
2196b8e80941Smrg					o += fprintf(stderr, "RAT%d", cf->rat.id);
2197b8e80941Smrg					if (cf->rat.index_mode) {
2198b8e80941Smrg						o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
2199b8e80941Smrg					}
2200b8e80941Smrg					o += fprintf(stderr, " INST: %d ", cf->rat.inst);
2201b8e80941Smrg				}
2202b8e80941Smrg
2203848b8605Smrg				if (cf->output.burst_count > 1) {
2204848b8605Smrg					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2205848b8605Smrg							cf->output.array_base + cf->output.burst_count - 1);
2206848b8605Smrg					o += print_indent(o, 55);
2207848b8605Smrg					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2208848b8605Smrg							cf->output.gpr + cf->output.burst_count - 1);
2209848b8605Smrg				} else {
2210848b8605Smrg					o += fprintf(stderr, "%d ", cf->output.array_base);
2211848b8605Smrg					o += print_indent(o, 55);
2212848b8605Smrg					o += fprintf(stderr, "R%d.", cf->output.gpr);
2213848b8605Smrg				}
2214848b8605Smrg				for (i = 0; i < 4; ++i) {
2215848b8605Smrg					if (cf->output.comp_mask & (1 << i))
2216848b8605Smrg						o += print_swizzle(i);
2217848b8605Smrg					else
2218848b8605Smrg						o += print_swizzle(7);
2219848b8605Smrg				}
2220848b8605Smrg
2221b8e80941Smrg				if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
2222b8e80941Smrg				    cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
2223848b8605Smrg					o += fprintf(stderr, " R%d", cf->output.index_gpr);
2224848b8605Smrg
2225848b8605Smrg				o += print_indent(o, 67);
2226848b8605Smrg
2227848b8605Smrg				fprintf(stderr, " ES:%i ", cf->output.elem_size);
2228848b8605Smrg				if (cf->output.array_size != 0xFFF)
2229848b8605Smrg					fprintf(stderr, "AS:%i ", cf->output.array_size);
2230b8e80941Smrg				if (cf->mark)
2231b8e80941Smrg					fprintf(stderr, "MARK ");
2232848b8605Smrg				if (!cf->barrier)
2233848b8605Smrg					fprintf(stderr, "NO_BARRIER ");
2234848b8605Smrg				if (cf->end_of_program)
2235848b8605Smrg					fprintf(stderr, "EOP ");
2236b8e80941Smrg
2237b8e80941Smrg				if (cf->output.mark)
2238b8e80941Smrg					fprintf(stderr, "MARK ");
2239b8e80941Smrg
2240848b8605Smrg				fprintf(stderr, "\n");
2241848b8605Smrg			} else {
2242848b8605Smrg				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2243848b8605Smrg						bc->bytecode[id + 1], cfop->name);
2244848b8605Smrg				fprintf(stderr, "@%d ", cf->cf_addr);
2245848b8605Smrg				if (cf->cond)
2246848b8605Smrg					fprintf(stderr, "CND:%X ", cf->cond);
2247848b8605Smrg				if (cf->pop_count)
2248848b8605Smrg					fprintf(stderr, "POP:%X ", cf->pop_count);
2249b8e80941Smrg				if (cf->count && (cfop->flags & CF_EMIT))
2250b8e80941Smrg					fprintf(stderr, "STREAM%d ", cf->count);
2251b8e80941Smrg				if (cf->vpm)
2252b8e80941Smrg					fprintf(stderr, "VPM ");
2253b8e80941Smrg				if (cf->end_of_program)
2254b8e80941Smrg					fprintf(stderr, "EOP ");
2255848b8605Smrg				fprintf(stderr, "\n");
2256848b8605Smrg			}
2257848b8605Smrg		}
2258848b8605Smrg
2259848b8605Smrg		id = cf->addr;
2260848b8605Smrg		nliteral = 0;
2261848b8605Smrg		last = 1;
2262848b8605Smrg		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2263848b8605Smrg			const char *omod_str[] = {"","*2","*4","/2"};
2264848b8605Smrg			const struct alu_op_info *aop = r600_isa_alu(alu->op);
2265848b8605Smrg			int o = 0;
2266848b8605Smrg
2267b8e80941Smrg			r600_bytecode_alu_nliterals(alu, literal, &nliteral);
2268848b8605Smrg			o += fprintf(stderr, " %04d %08X %08X  ", id, bc->bytecode[id], bc->bytecode[id+1]);
2269848b8605Smrg			if (last)
2270848b8605Smrg				o += fprintf(stderr, "%4d ", ++ngr);
2271848b8605Smrg			else
2272848b8605Smrg				o += fprintf(stderr, "     ");
2273848b8605Smrg			o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2274848b8605Smrg					alu->update_pred ? 'P':' ',
2275848b8605Smrg					alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2276848b8605Smrg
2277848b8605Smrg			o += fprintf(stderr, "%s%s%s ", aop->name,
2278848b8605Smrg					omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2279848b8605Smrg
2280848b8605Smrg			o += print_indent(o,60);
2281848b8605Smrg			o += print_dst(alu);
2282848b8605Smrg			for (i = 0; i < aop->src_count; ++i) {
2283848b8605Smrg				o += fprintf(stderr, i == 0 ? ",  ": ", ");
2284848b8605Smrg				o += print_src(alu, i);
2285848b8605Smrg			}
2286848b8605Smrg
2287848b8605Smrg			if (alu->bank_swizzle) {
2288848b8605Smrg				o += print_indent(o,75);
2289848b8605Smrg				o += fprintf(stderr, "  BS:%d", alu->bank_swizzle);
2290848b8605Smrg			}
2291848b8605Smrg
2292848b8605Smrg			fprintf(stderr, "\n");
2293848b8605Smrg			id += 2;
2294848b8605Smrg
2295848b8605Smrg			if (alu->last) {
2296848b8605Smrg				for (i = 0; i < nliteral; i++, id++) {
2297848b8605Smrg					float *f = (float*)(bc->bytecode + id);
2298848b8605Smrg					o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]);
2299848b8605Smrg					print_indent(o, 60);
2300848b8605Smrg					fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id));
2301848b8605Smrg				}
2302848b8605Smrg				id += nliteral & 1;
2303848b8605Smrg				nliteral = 0;
2304848b8605Smrg			}
2305848b8605Smrg			last = alu->last;
2306848b8605Smrg		}
2307848b8605Smrg
2308848b8605Smrg		LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2309848b8605Smrg			int o = 0;
2310848b8605Smrg			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2311848b8605Smrg					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2312848b8605Smrg
2313848b8605Smrg			o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name);
2314848b8605Smrg
2315848b8605Smrg			o += print_indent(o, 50);
2316848b8605Smrg
2317848b8605Smrg			o += fprintf(stderr, "R%d.", tex->dst_gpr);
2318848b8605Smrg			o += print_swizzle(tex->dst_sel_x);
2319848b8605Smrg			o += print_swizzle(tex->dst_sel_y);
2320848b8605Smrg			o += print_swizzle(tex->dst_sel_z);
2321848b8605Smrg			o += print_swizzle(tex->dst_sel_w);
2322848b8605Smrg
2323848b8605Smrg			o += fprintf(stderr, ", R%d.", tex->src_gpr);
2324848b8605Smrg			o += print_swizzle(tex->src_sel_x);
2325848b8605Smrg			o += print_swizzle(tex->src_sel_y);
2326848b8605Smrg			o += print_swizzle(tex->src_sel_z);
2327848b8605Smrg			o += print_swizzle(tex->src_sel_w);
2328848b8605Smrg
2329848b8605Smrg			o += fprintf(stderr, ",  RID:%d", tex->resource_id);
2330848b8605Smrg			o += fprintf(stderr, ", SID:%d  ", tex->sampler_id);
2331848b8605Smrg
2332b8e80941Smrg			if (tex->sampler_index_mode)
2333b8e80941Smrg				fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]);
2334b8e80941Smrg
2335848b8605Smrg			if (tex->lod_bias)
2336848b8605Smrg				fprintf(stderr, "LB:%d ", tex->lod_bias);
2337848b8605Smrg
2338848b8605Smrg			fprintf(stderr, "CT:%c%c%c%c ",
2339848b8605Smrg					tex->coord_type_x ? 'N' : 'U',
2340848b8605Smrg					tex->coord_type_y ? 'N' : 'U',
2341848b8605Smrg					tex->coord_type_z ? 'N' : 'U',
2342848b8605Smrg					tex->coord_type_w ? 'N' : 'U');
2343848b8605Smrg
2344848b8605Smrg			if (tex->offset_x)
2345848b8605Smrg				fprintf(stderr, "OX:%d ", tex->offset_x);
2346848b8605Smrg			if (tex->offset_y)
2347848b8605Smrg				fprintf(stderr, "OY:%d ", tex->offset_y);
2348848b8605Smrg			if (tex->offset_z)
2349848b8605Smrg				fprintf(stderr, "OZ:%d ", tex->offset_z);
2350848b8605Smrg
2351848b8605Smrg			id += 4;
2352848b8605Smrg			fprintf(stderr, "\n");
2353848b8605Smrg		}
2354848b8605Smrg
2355848b8605Smrg		LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2356848b8605Smrg			int o = 0;
2357848b8605Smrg			const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
2358848b8605Smrg			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2359848b8605Smrg					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2360848b8605Smrg
2361848b8605Smrg			o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name);
2362848b8605Smrg
2363848b8605Smrg			o += print_indent(o, 50);
2364848b8605Smrg
2365848b8605Smrg			o += fprintf(stderr, "R%d.", vtx->dst_gpr);
2366848b8605Smrg			o += print_swizzle(vtx->dst_sel_x);
2367848b8605Smrg			o += print_swizzle(vtx->dst_sel_y);
2368848b8605Smrg			o += print_swizzle(vtx->dst_sel_z);
2369848b8605Smrg			o += print_swizzle(vtx->dst_sel_w);
2370848b8605Smrg
2371848b8605Smrg			o += fprintf(stderr, ", R%d.", vtx->src_gpr);
2372848b8605Smrg			o += print_swizzle(vtx->src_sel_x);
2373b8e80941Smrg			if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
2374b8e80941Smrg				o += print_swizzle(vtx->src_sel_y);
2375848b8605Smrg
2376848b8605Smrg			if (vtx->offset)
2377848b8605Smrg				fprintf(stderr, " +%db", vtx->offset);
2378848b8605Smrg
2379848b8605Smrg			o += print_indent(o, 55);
2380848b8605Smrg
2381848b8605Smrg			fprintf(stderr, ",  RID:%d ", vtx->buffer_id);
2382848b8605Smrg
2383848b8605Smrg			fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]);
2384848b8605Smrg
2385848b8605Smrg			if (bc->chip_class < CAYMAN && vtx->mega_fetch_count)
2386848b8605Smrg				fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count);
2387848b8605Smrg
2388b8e80941Smrg			if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
2389b8e80941Smrg				fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
2390b8e80941Smrg
2391b8e80941Smrg			if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
2392b8e80941Smrg				if (vtx->uncached)
2393b8e80941Smrg					fprintf(stderr, "UNCACHED ");
2394b8e80941Smrg				if (vtx->indexed)
2395b8e80941Smrg					fprintf(stderr, "INDEXED:%d ", vtx->indexed);
2396b8e80941Smrg
2397b8e80941Smrg				fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
2398b8e80941Smrg				if (vtx->burst_count)
2399b8e80941Smrg					fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
2400b8e80941Smrg				fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
2401b8e80941Smrg				fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
2402b8e80941Smrg			}
2403b8e80941Smrg
2404848b8605Smrg			fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
2405848b8605Smrg			fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
2406848b8605Smrg			fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2407848b8605Smrg			fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2408848b8605Smrg			fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2409848b8605Smrg
2410848b8605Smrg			id += 4;
2411848b8605Smrg		}
2412b8e80941Smrg
2413b8e80941Smrg		LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
2414b8e80941Smrg			int o = 0;
2415b8e80941Smrg			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2416b8e80941Smrg					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2417b8e80941Smrg
2418b8e80941Smrg			o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
2419b8e80941Smrg
2420b8e80941Smrg			if (gds->op != FETCH_OP_TF_WRITE) {
2421b8e80941Smrg				o += fprintf(stderr, "R%d.", gds->dst_gpr);
2422b8e80941Smrg				o += print_swizzle(gds->dst_sel_x);
2423b8e80941Smrg				o += print_swizzle(gds->dst_sel_y);
2424b8e80941Smrg				o += print_swizzle(gds->dst_sel_z);
2425b8e80941Smrg				o += print_swizzle(gds->dst_sel_w);
2426b8e80941Smrg			}
2427b8e80941Smrg
2428b8e80941Smrg			o += fprintf(stderr, ", R%d.", gds->src_gpr);
2429b8e80941Smrg			o += print_swizzle(gds->src_sel_x);
2430b8e80941Smrg			o += print_swizzle(gds->src_sel_y);
2431b8e80941Smrg			o += print_swizzle(gds->src_sel_z);
2432b8e80941Smrg
2433b8e80941Smrg			if (gds->op != FETCH_OP_TF_WRITE) {
2434b8e80941Smrg				o += fprintf(stderr, ", R%d.", gds->src_gpr2);
2435b8e80941Smrg			}
2436b8e80941Smrg			if (gds->alloc_consume) {
2437b8e80941Smrg				o += fprintf(stderr, " UAV: %d", gds->uav_id);
2438b8e80941Smrg				if (gds->uav_index_mode)
2439b8e80941Smrg					o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
2440b8e80941Smrg			}
2441b8e80941Smrg			fprintf(stderr, "\n");
2442b8e80941Smrg			id += 4;
2443b8e80941Smrg		}
2444848b8605Smrg	}
2445848b8605Smrg
2446848b8605Smrg	fprintf(stderr, "--------------------------------------\n");
2447848b8605Smrg}
2448848b8605Smrg
2449848b8605Smrgvoid r600_vertex_data_type(enum pipe_format pformat,
2450848b8605Smrg				  unsigned *format,
2451848b8605Smrg				  unsigned *num_format, unsigned *format_comp, unsigned *endian)
2452848b8605Smrg{
2453848b8605Smrg	const struct util_format_description *desc;
2454848b8605Smrg	unsigned i;
2455848b8605Smrg
2456848b8605Smrg	*format = 0;
2457848b8605Smrg	*num_format = 0;
2458848b8605Smrg	*format_comp = 0;
2459848b8605Smrg	*endian = ENDIAN_NONE;
2460848b8605Smrg
2461848b8605Smrg	if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) {
2462848b8605Smrg		*format = FMT_10_11_11_FLOAT;
2463848b8605Smrg		*endian = r600_endian_swap(32);
2464848b8605Smrg		return;
2465848b8605Smrg	}
2466848b8605Smrg
2467b8e80941Smrg	if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
2468b8e80941Smrg		*format = FMT_5_6_5;
2469b8e80941Smrg		*endian = r600_endian_swap(16);
2470b8e80941Smrg		return;
2471b8e80941Smrg	}
2472b8e80941Smrg
2473b8e80941Smrg	if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
2474b8e80941Smrg		*format = FMT_1_5_5_5;
2475b8e80941Smrg		*endian = r600_endian_swap(16);
2476b8e80941Smrg		return;
2477b8e80941Smrg	}
2478b8e80941Smrg
2479b8e80941Smrg	if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
2480b8e80941Smrg		*format = FMT_5_5_5_1;
2481b8e80941Smrg		return;
2482b8e80941Smrg	}
2483b8e80941Smrg
2484848b8605Smrg	desc = util_format_description(pformat);
2485848b8605Smrg	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2486848b8605Smrg		goto out_unknown;
2487848b8605Smrg	}
2488848b8605Smrg
2489848b8605Smrg	/* Find the first non-VOID channel. */
2490848b8605Smrg	for (i = 0; i < 4; i++) {
2491848b8605Smrg		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2492848b8605Smrg			break;
2493848b8605Smrg		}
2494848b8605Smrg	}
2495848b8605Smrg
2496848b8605Smrg	*endian = r600_endian_swap(desc->channel[i].size);
2497848b8605Smrg
2498848b8605Smrg	switch (desc->channel[i].type) {
2499848b8605Smrg	/* Half-floats, floats, ints */
2500848b8605Smrg	case UTIL_FORMAT_TYPE_FLOAT:
2501848b8605Smrg		switch (desc->channel[i].size) {
2502848b8605Smrg		case 16:
2503848b8605Smrg			switch (desc->nr_channels) {
2504848b8605Smrg			case 1:
2505848b8605Smrg				*format = FMT_16_FLOAT;
2506848b8605Smrg				break;
2507848b8605Smrg			case 2:
2508848b8605Smrg				*format = FMT_16_16_FLOAT;
2509848b8605Smrg				break;
2510848b8605Smrg			case 3:
2511848b8605Smrg			case 4:
2512848b8605Smrg				*format = FMT_16_16_16_16_FLOAT;
2513848b8605Smrg				break;
2514848b8605Smrg			}
2515848b8605Smrg			break;
2516848b8605Smrg		case 32:
2517848b8605Smrg			switch (desc->nr_channels) {
2518848b8605Smrg			case 1:
2519848b8605Smrg				*format = FMT_32_FLOAT;
2520848b8605Smrg				break;
2521848b8605Smrg			case 2:
2522848b8605Smrg				*format = FMT_32_32_FLOAT;
2523848b8605Smrg				break;
2524848b8605Smrg			case 3:
2525848b8605Smrg				*format = FMT_32_32_32_FLOAT;
2526848b8605Smrg				break;
2527848b8605Smrg			case 4:
2528848b8605Smrg				*format = FMT_32_32_32_32_FLOAT;
2529848b8605Smrg				break;
2530848b8605Smrg			}
2531848b8605Smrg			break;
2532848b8605Smrg		default:
2533848b8605Smrg			goto out_unknown;
2534848b8605Smrg		}
2535848b8605Smrg		break;
2536848b8605Smrg		/* Unsigned ints */
2537848b8605Smrg	case UTIL_FORMAT_TYPE_UNSIGNED:
2538848b8605Smrg		/* Signed ints */
2539848b8605Smrg	case UTIL_FORMAT_TYPE_SIGNED:
2540848b8605Smrg		switch (desc->channel[i].size) {
2541b8e80941Smrg		case 4:
2542b8e80941Smrg			switch (desc->nr_channels) {
2543b8e80941Smrg			case 2:
2544b8e80941Smrg				*format = FMT_4_4;
2545b8e80941Smrg				break;
2546b8e80941Smrg			case 4:
2547b8e80941Smrg				*format = FMT_4_4_4_4;
2548b8e80941Smrg				break;
2549b8e80941Smrg			}
2550b8e80941Smrg			break;
2551848b8605Smrg		case 8:
2552848b8605Smrg			switch (desc->nr_channels) {
2553848b8605Smrg			case 1:
2554848b8605Smrg				*format = FMT_8;
2555848b8605Smrg				break;
2556848b8605Smrg			case 2:
2557848b8605Smrg				*format = FMT_8_8;
2558848b8605Smrg				break;
2559848b8605Smrg			case 3:
2560848b8605Smrg			case 4:
2561848b8605Smrg				*format = FMT_8_8_8_8;
2562848b8605Smrg				break;
2563848b8605Smrg			}
2564848b8605Smrg			break;
2565848b8605Smrg		case 10:
2566848b8605Smrg			if (desc->nr_channels != 4)
2567848b8605Smrg				goto out_unknown;
2568848b8605Smrg
2569848b8605Smrg			*format = FMT_2_10_10_10;
2570848b8605Smrg			break;
2571848b8605Smrg		case 16:
2572848b8605Smrg			switch (desc->nr_channels) {
2573848b8605Smrg			case 1:
2574848b8605Smrg				*format = FMT_16;
2575848b8605Smrg				break;
2576848b8605Smrg			case 2:
2577848b8605Smrg				*format = FMT_16_16;
2578848b8605Smrg				break;
2579848b8605Smrg			case 3:
2580848b8605Smrg			case 4:
2581848b8605Smrg				*format = FMT_16_16_16_16;
2582848b8605Smrg				break;
2583848b8605Smrg			}
2584848b8605Smrg			break;
2585848b8605Smrg		case 32:
2586848b8605Smrg			switch (desc->nr_channels) {
2587848b8605Smrg			case 1:
2588848b8605Smrg				*format = FMT_32;
2589848b8605Smrg				break;
2590848b8605Smrg			case 2:
2591848b8605Smrg				*format = FMT_32_32;
2592848b8605Smrg				break;
2593848b8605Smrg			case 3:
2594848b8605Smrg				*format = FMT_32_32_32;
2595848b8605Smrg				break;
2596848b8605Smrg			case 4:
2597848b8605Smrg				*format = FMT_32_32_32_32;
2598848b8605Smrg				break;
2599848b8605Smrg			}
2600848b8605Smrg			break;
2601848b8605Smrg		default:
2602848b8605Smrg			goto out_unknown;
2603848b8605Smrg		}
2604848b8605Smrg		break;
2605848b8605Smrg	default:
2606848b8605Smrg		goto out_unknown;
2607848b8605Smrg	}
2608848b8605Smrg
2609848b8605Smrg	if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2610848b8605Smrg		*format_comp = 1;
2611848b8605Smrg	}
2612848b8605Smrg
2613848b8605Smrg	*num_format = 0;
2614848b8605Smrg	if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2615848b8605Smrg	    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2616848b8605Smrg		if (!desc->channel[i].normalized) {
2617848b8605Smrg			if (desc->channel[i].pure_integer)
2618848b8605Smrg				*num_format = 1;
2619848b8605Smrg			else
2620848b8605Smrg				*num_format = 2;
2621848b8605Smrg		}
2622848b8605Smrg	}
2623848b8605Smrg	return;
2624848b8605Smrgout_unknown:
2625848b8605Smrg	R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2626848b8605Smrg}
2627848b8605Smrg
2628848b8605Smrgvoid *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
2629848b8605Smrg				      unsigned count,
2630848b8605Smrg				      const struct pipe_vertex_element *elements)
2631848b8605Smrg{
2632848b8605Smrg	struct r600_context *rctx = (struct r600_context *)ctx;
2633848b8605Smrg	struct r600_bytecode bc;
2634848b8605Smrg	struct r600_bytecode_vtx vtx;
2635848b8605Smrg	const struct util_format_description *desc;
2636848b8605Smrg	unsigned fetch_resource_start = rctx->b.chip_class >= EVERGREEN ? 0 : 160;
2637848b8605Smrg	unsigned format, num_format, format_comp, endian;
2638848b8605Smrg	uint32_t *bytecode;
2639848b8605Smrg	int i, j, r, fs_size;
2640848b8605Smrg	struct r600_fetch_shader *shader;
2641848b8605Smrg	unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB;
2642848b8605Smrg	unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
2643848b8605Smrg
2644848b8605Smrg	assert(count < 32);
2645848b8605Smrg
2646848b8605Smrg	memset(&bc, 0, sizeof(bc));
2647848b8605Smrg	r600_bytecode_init(&bc, rctx->b.chip_class, rctx->b.family,
2648848b8605Smrg			   rctx->screen->has_compressed_msaa_texturing);
2649848b8605Smrg
2650848b8605Smrg	bc.isa = rctx->isa;
2651848b8605Smrg
2652848b8605Smrg	for (i = 0; i < count; i++) {
2653848b8605Smrg		if (elements[i].instance_divisor > 1) {
2654848b8605Smrg			if (rctx->b.chip_class == CAYMAN) {
2655848b8605Smrg				for (j = 0; j < 4; j++) {
2656848b8605Smrg					struct r600_bytecode_alu alu;
2657848b8605Smrg					memset(&alu, 0, sizeof(alu));
2658848b8605Smrg					alu.op = ALU_OP2_MULHI_UINT;
2659848b8605Smrg					alu.src[0].sel = 0;
2660848b8605Smrg					alu.src[0].chan = 3;
2661848b8605Smrg					alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2662848b8605Smrg					alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2663848b8605Smrg					alu.dst.sel = i + 1;
2664848b8605Smrg					alu.dst.chan = j;
2665848b8605Smrg					alu.dst.write = j == 3;
2666848b8605Smrg					alu.last = j == 3;
2667848b8605Smrg					if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2668848b8605Smrg						r600_bytecode_clear(&bc);
2669848b8605Smrg						return NULL;
2670848b8605Smrg					}
2671848b8605Smrg				}
2672848b8605Smrg			} else {
2673848b8605Smrg				struct r600_bytecode_alu alu;
2674848b8605Smrg				memset(&alu, 0, sizeof(alu));
2675848b8605Smrg				alu.op = ALU_OP2_MULHI_UINT;
2676848b8605Smrg				alu.src[0].sel = 0;
2677848b8605Smrg				alu.src[0].chan = 3;
2678848b8605Smrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2679848b8605Smrg				alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2680848b8605Smrg				alu.dst.sel = i + 1;
2681848b8605Smrg				alu.dst.chan = 3;
2682848b8605Smrg				alu.dst.write = 1;
2683848b8605Smrg				alu.last = 1;
2684848b8605Smrg				if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2685848b8605Smrg					r600_bytecode_clear(&bc);
2686848b8605Smrg					return NULL;
2687848b8605Smrg				}
2688848b8605Smrg			}
2689848b8605Smrg		}
2690848b8605Smrg	}
2691848b8605Smrg
2692848b8605Smrg	for (i = 0; i < count; i++) {
2693848b8605Smrg		r600_vertex_data_type(elements[i].src_format,
2694848b8605Smrg				      &format, &num_format, &format_comp, &endian);
2695848b8605Smrg
2696848b8605Smrg		desc = util_format_description(elements[i].src_format);
2697b8e80941Smrg		if (!desc) {
2698848b8605Smrg			r600_bytecode_clear(&bc);
2699848b8605Smrg			R600_ERR("unknown format %d\n", elements[i].src_format);
2700848b8605Smrg			return NULL;
2701848b8605Smrg		}
2702848b8605Smrg
2703848b8605Smrg		if (elements[i].src_offset > 65535) {
2704848b8605Smrg			r600_bytecode_clear(&bc);
2705848b8605Smrg			R600_ERR("too big src_offset: %u\n", elements[i].src_offset);
2706848b8605Smrg			return NULL;
2707848b8605Smrg		}
2708848b8605Smrg
2709848b8605Smrg		memset(&vtx, 0, sizeof(vtx));
2710848b8605Smrg		vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start;
2711b8e80941Smrg		vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA;
2712848b8605Smrg		vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
2713848b8605Smrg		vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
2714848b8605Smrg		vtx.mega_fetch_count = 0x1F;
2715848b8605Smrg		vtx.dst_gpr = i + 1;
2716848b8605Smrg		vtx.dst_sel_x = desc->swizzle[0];
2717848b8605Smrg		vtx.dst_sel_y = desc->swizzle[1];
2718848b8605Smrg		vtx.dst_sel_z = desc->swizzle[2];
2719848b8605Smrg		vtx.dst_sel_w = desc->swizzle[3];
2720848b8605Smrg		vtx.data_format = format;
2721848b8605Smrg		vtx.num_format_all = num_format;
2722848b8605Smrg		vtx.format_comp_all = format_comp;
2723848b8605Smrg		vtx.offset = elements[i].src_offset;
2724848b8605Smrg		vtx.endian = endian;
2725848b8605Smrg
2726848b8605Smrg		if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
2727848b8605Smrg			r600_bytecode_clear(&bc);
2728848b8605Smrg			return NULL;
2729848b8605Smrg		}
2730848b8605Smrg	}
2731848b8605Smrg
2732848b8605Smrg	r600_bytecode_add_cfinst(&bc, CF_OP_RET);
2733848b8605Smrg
2734848b8605Smrg	if ((r = r600_bytecode_build(&bc))) {
2735848b8605Smrg		r600_bytecode_clear(&bc);
2736848b8605Smrg		return NULL;
2737848b8605Smrg	}
2738848b8605Smrg
2739848b8605Smrg	if (rctx->screen->b.debug_flags & DBG_FS) {
2740848b8605Smrg		fprintf(stderr, "--------------------------------------------------------------\n");
2741848b8605Smrg		fprintf(stderr, "Vertex elements state:\n");
2742848b8605Smrg		for (i = 0; i < count; i++) {
2743848b8605Smrg			fprintf(stderr, "   ");
2744848b8605Smrg			util_dump_vertex_element(stderr, elements+i);
2745848b8605Smrg			fprintf(stderr, "\n");
2746848b8605Smrg		}
2747848b8605Smrg
2748848b8605Smrg		if (!sb_disasm) {
2749848b8605Smrg			r600_bytecode_disasm(&bc);
2750848b8605Smrg
2751848b8605Smrg			fprintf(stderr, "______________________________________________________________\n");
2752848b8605Smrg		} else {
2753848b8605Smrg			r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/);
2754848b8605Smrg		}
2755848b8605Smrg	}
2756848b8605Smrg
2757848b8605Smrg	fs_size = bc.ndw*4;
2758848b8605Smrg
2759848b8605Smrg	/* Allocate the CSO. */
2760848b8605Smrg	shader = CALLOC_STRUCT(r600_fetch_shader);
2761848b8605Smrg	if (!shader) {
2762848b8605Smrg		r600_bytecode_clear(&bc);
2763848b8605Smrg		return NULL;
2764848b8605Smrg	}
2765848b8605Smrg
2766b8e80941Smrg	u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
2767b8e80941Smrg			     &shader->offset,
2768848b8605Smrg			     (struct pipe_resource**)&shader->buffer);
2769848b8605Smrg	if (!shader->buffer) {
2770848b8605Smrg		r600_bytecode_clear(&bc);
2771848b8605Smrg		FREE(shader);
2772848b8605Smrg		return NULL;
2773848b8605Smrg	}
2774848b8605Smrg
2775b8e80941Smrg	bytecode = r600_buffer_map_sync_with_rings
2776b8e80941Smrg		(&rctx->b, shader->buffer,
2777b8e80941Smrg		PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
2778848b8605Smrg	bytecode += shader->offset / 4;
2779848b8605Smrg
2780848b8605Smrg	if (R600_BIG_ENDIAN) {
2781848b8605Smrg		for (i = 0; i < fs_size / 4; ++i) {
2782848b8605Smrg			bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
2783848b8605Smrg		}
2784848b8605Smrg	} else {
2785848b8605Smrg		memcpy(bytecode, bc.bytecode, fs_size);
2786848b8605Smrg	}
2787b8e80941Smrg	rctx->b.ws->buffer_unmap(shader->buffer->buf);
2788848b8605Smrg
2789848b8605Smrg	r600_bytecode_clear(&bc);
2790848b8605Smrg	return shader;
2791848b8605Smrg}
2792848b8605Smrg
2793848b8605Smrgvoid r600_bytecode_alu_read(struct r600_bytecode *bc,
2794848b8605Smrg		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2795848b8605Smrg{
2796848b8605Smrg	/* WORD0 */
2797848b8605Smrg	alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2798848b8605Smrg	alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2799848b8605Smrg	alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2800848b8605Smrg	alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2801848b8605Smrg	alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2802848b8605Smrg	alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2803848b8605Smrg	alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2804848b8605Smrg	alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2805848b8605Smrg	alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2806848b8605Smrg	alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2807848b8605Smrg	alu->last = G_SQ_ALU_WORD0_LAST(word0);
2808848b8605Smrg
2809848b8605Smrg	/* WORD1 */
2810848b8605Smrg	alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2811848b8605Smrg	if (alu->bank_swizzle)
2812848b8605Smrg		alu->bank_swizzle_force = alu->bank_swizzle;
2813848b8605Smrg	alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2814848b8605Smrg	alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2815848b8605Smrg	alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2816848b8605Smrg	alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2817848b8605Smrg	if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
2818848b8605Smrg	{
2819848b8605Smrg		alu->is_op3 = 1;
2820848b8605Smrg		alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2821848b8605Smrg		alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2822848b8605Smrg		alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2823848b8605Smrg		alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2824848b8605Smrg		alu->op = r600_isa_alu_by_opcode(bc->isa,
2825848b8605Smrg				G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1);
2826848b8605Smrg
2827848b8605Smrg	}
2828848b8605Smrg	else /*ALU_DWORD1_OP2*/
2829848b8605Smrg	{
2830848b8605Smrg		alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2831848b8605Smrg		alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2832848b8605Smrg		alu->op = r600_isa_alu_by_opcode(bc->isa,
2833848b8605Smrg				G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0);
2834848b8605Smrg		alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2835848b8605Smrg		alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2836848b8605Smrg		alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2837848b8605Smrg		alu->execute_mask =
2838848b8605Smrg			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
2839848b8605Smrg	}
2840848b8605Smrg}
2841848b8605Smrg
2842848b8605Smrg#if 0
2843848b8605Smrgvoid r600_bytecode_export_read(struct r600_bytecode *bc,
2844848b8605Smrg		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
2845848b8605Smrg{
2846848b8605Smrg	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
2847848b8605Smrg	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
2848848b8605Smrg	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
2849848b8605Smrg	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
2850848b8605Smrg
2851848b8605Smrg	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
2852848b8605Smrg	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
2853848b8605Smrg	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
2854848b8605Smrg	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
2855848b8605Smrg	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
2856848b8605Smrg	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
2857848b8605Smrg    output->op = r600_isa_cf_by_opcode(bc->isa,
2858848b8605Smrg			G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0);
2859848b8605Smrg	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
2860848b8605Smrg	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
2861848b8605Smrg	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
2862848b8605Smrg}
2863848b8605Smrg#endif
2864