r600_shader.c revision 3464ebd5
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60
61int r600_find_vs_semantic_index(struct r600_shader *vs,
62				struct r600_shader *ps, int id)
63{
64	struct r600_shader_io *input = &ps->input[id];
65
66	for (int i = 0; i < vs->noutput; i++) {
67		if (input->name == vs->output[i].name &&
68			input->sid == vs->output[i].sid) {
69			return i - 1;
70		}
71	}
72	return 0;
73}
74
75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76{
77	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78	struct r600_shader *rshader = &shader->shader;
79	uint32_t *ptr;
80	int	i;
81
82	/* copy new shader */
83	if (shader->bo == NULL) {
84		/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86		if (shader->bo == NULL) {
87			return -ENOMEM;
88		}
89		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
90		if (R600_BIG_ENDIAN) {
91			for (i = 0; i < rshader->bc.ndw; ++i) {
92				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93			}
94		} else {
95			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96		}
97		r600_bo_unmap(rctx->radeon, shader->bo);
98	}
99	/* build state */
100	switch (rshader->processor_type) {
101	case TGSI_PROCESSOR_VERTEX:
102		if (rshader->family >= CHIP_CEDAR) {
103			evergreen_pipe_shader_vs(ctx, shader);
104		} else {
105			r600_pipe_shader_vs(ctx, shader);
106		}
107		break;
108	case TGSI_PROCESSOR_FRAGMENT:
109		if (rshader->family >= CHIP_CEDAR) {
110			evergreen_pipe_shader_ps(ctx, shader);
111		} else {
112			r600_pipe_shader_ps(ctx, shader);
113		}
114		break;
115	default:
116		return -EINVAL;
117	}
118	return 0;
119}
120
121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124{
125	static int dump_shaders = -1;
126	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127	int r;
128
129	/* Would like some magic "get_bool_option_once" routine.
130	*/
131	if (dump_shaders == -1)
132		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134	if (dump_shaders) {
135		fprintf(stderr, "--------------------------------------------------------------\n");
136		tgsi_dump(shader->tokens, 0);
137	}
138	shader->shader.family = r600_get_family(rctx->radeon);
139	r = r600_shader_from_tgsi(rctx, shader);
140	if (r) {
141		R600_ERR("translation from TGSI failed !\n");
142		return r;
143	}
144	r = r600_bc_build(&shader->shader.bc);
145	if (r) {
146		R600_ERR("building bytecode failed !\n");
147		return r;
148	}
149	if (dump_shaders) {
150		r600_bc_dump(&shader->shader.bc);
151		fprintf(stderr, "______________________________________________________________\n");
152	}
153	return r600_pipe_shader(ctx, shader);
154}
155
156void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157{
158	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
159
160	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
161	r600_bc_clear(&shader->shader.bc);
162
163	memset(&shader->shader,0,sizeof(struct r600_shader));
164}
165
166/*
167 * tgsi -> r600 shader
168 */
169struct r600_shader_tgsi_instruction;
170
171struct r600_shader_src {
172	unsigned				sel;
173	unsigned				swizzle[4];
174	unsigned				neg;
175	unsigned				abs;
176	unsigned				rel;
177	uint32_t				value[4];
178};
179
180struct r600_shader_ctx {
181	struct tgsi_shader_info			info;
182	struct tgsi_parse_context		parse;
183	const struct tgsi_token			*tokens;
184	unsigned				type;
185	unsigned				file_offset[TGSI_FILE_COUNT];
186	unsigned				temp_reg;
187	unsigned				ar_reg;
188	struct r600_shader_tgsi_instruction	*inst_info;
189	struct r600_bc				*bc;
190	struct r600_shader			*shader;
191	struct r600_shader_src			src[4];
192	u32					*literals;
193	u32					nliterals;
194	u32					max_driver_temp_used;
195	/* needed for evergreen interpolation */
196	boolean                                 input_centroid;
197	boolean                                 input_linear;
198	boolean                                 input_perspective;
199	int					num_interp_gpr;
200};
201
202struct r600_shader_tgsi_instruction {
203	unsigned	tgsi_opcode;
204	unsigned	is_op3;
205	unsigned	r600_opcode;
206	int (*process)(struct r600_shader_ctx *ctx);
207};
208
209static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
210static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
211
212static int tgsi_is_supported(struct r600_shader_ctx *ctx)
213{
214	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
215	int j;
216
217	if (i->Instruction.NumDstRegs > 1) {
218		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
219		return -EINVAL;
220	}
221	if (i->Instruction.Predicate) {
222		R600_ERR("predicate unsupported\n");
223		return -EINVAL;
224	}
225#if 0
226	if (i->Instruction.Label) {
227		R600_ERR("label unsupported\n");
228		return -EINVAL;
229	}
230#endif
231	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
232		if (i->Src[j].Register.Dimension) {
233			R600_ERR("unsupported src %d (dimension %d)\n", j,
234				 i->Src[j].Register.Dimension);
235			return -EINVAL;
236		}
237	}
238	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
239		if (i->Dst[j].Register.Dimension) {
240			R600_ERR("unsupported dst (dimension)\n");
241			return -EINVAL;
242		}
243	}
244	return 0;
245}
246
247static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
248{
249	int i, r;
250	struct r600_bc_alu alu;
251	int gpr = 0, base_chan = 0;
252	int ij_index = 0;
253
254	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
255		ij_index = 0;
256		if (ctx->shader->input[input].centroid)
257			ij_index++;
258	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
259		ij_index = 0;
260		/* if we have perspective add one */
261		if (ctx->input_perspective)  {
262			ij_index++;
263			/* if we have perspective centroid */
264			if (ctx->input_centroid)
265				ij_index++;
266		}
267		if (ctx->shader->input[input].centroid)
268			ij_index++;
269	}
270
271	/* work out gpr and base_chan from index */
272	gpr = ij_index / 2;
273	base_chan = (2 * (ij_index % 2)) + 1;
274
275	for (i = 0; i < 8; i++) {
276		memset(&alu, 0, sizeof(struct r600_bc_alu));
277
278		if (i < 4)
279			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
280		else
281			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
282
283		if ((i > 1) && (i < 6)) {
284			alu.dst.sel = ctx->shader->input[input].gpr;
285			alu.dst.write = 1;
286		}
287
288		alu.dst.chan = i % 4;
289
290		alu.src[0].sel = gpr;
291		alu.src[0].chan = (base_chan - (i % 2));
292
293		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
294
295		alu.bank_swizzle_force = SQ_ALU_VEC_210;
296		if ((i % 4) == 3)
297			alu.last = 1;
298		r = r600_bc_add_alu(ctx->bc, &alu);
299		if (r)
300			return r;
301	}
302	return 0;
303}
304
305
306static int tgsi_declaration(struct r600_shader_ctx *ctx)
307{
308	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
309	unsigned i;
310	int r;
311
312	switch (d->Declaration.File) {
313	case TGSI_FILE_INPUT:
314		i = ctx->shader->ninput++;
315		ctx->shader->input[i].name = d->Semantic.Name;
316		ctx->shader->input[i].sid = d->Semantic.Index;
317		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
318		ctx->shader->input[i].centroid = d->Declaration.Centroid;
319		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
320		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) {
321			/* turn input into interpolate on EG */
322			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
323				if (ctx->shader->input[i].interpolate > 0) {
324					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
325					evergreen_interp_alu(ctx, i);
326				}
327			}
328		}
329		break;
330	case TGSI_FILE_OUTPUT:
331		i = ctx->shader->noutput++;
332		ctx->shader->output[i].name = d->Semantic.Name;
333		ctx->shader->output[i].sid = d->Semantic.Index;
334		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
335		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
336		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
337			/* these don't count as vertex param exports */
338			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
339			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
340				ctx->shader->npos++;
341		}
342		break;
343	case TGSI_FILE_CONSTANT:
344	case TGSI_FILE_TEMPORARY:
345	case TGSI_FILE_SAMPLER:
346	case TGSI_FILE_ADDRESS:
347		break;
348
349	case TGSI_FILE_SYSTEM_VALUE:
350		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
351			struct r600_bc_alu alu;
352			memset(&alu, 0, sizeof(struct r600_bc_alu));
353
354			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
355			alu.src[0].sel = 0;
356			alu.src[0].chan = 3;
357
358			alu.dst.sel = 0;
359			alu.dst.chan = 3;
360			alu.dst.write = 1;
361			alu.last = 1;
362
363			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
364				return r;
365			break;
366		}
367
368	default:
369		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
370		return -EINVAL;
371	}
372	return 0;
373}
374
375static int r600_get_temp(struct r600_shader_ctx *ctx)
376{
377	return ctx->temp_reg + ctx->max_driver_temp_used++;
378}
379
380/*
381 * for evergreen we need to scan the shader to find the number of GPRs we need to
382 * reserve for interpolation.
383 *
384 * we need to know if we are going to emit
385 * any centroid inputs
386 * if perspective and linear are required
387*/
388static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
389{
390	int i;
391	int num_baryc;
392
393	ctx->input_linear = FALSE;
394	ctx->input_perspective = FALSE;
395	ctx->input_centroid = FALSE;
396	ctx->num_interp_gpr = 1;
397
398	/* any centroid inputs */
399	for (i = 0; i < ctx->info.num_inputs; i++) {
400		/* skip position/face */
401		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
402		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
403			continue;
404		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
405			ctx->input_linear = TRUE;
406		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
407			ctx->input_perspective = TRUE;
408		if (ctx->info.input_centroid[i])
409			ctx->input_centroid = TRUE;
410	}
411
412	num_baryc = 0;
413	/* ignoring sample for now */
414	if (ctx->input_perspective)
415		num_baryc++;
416	if (ctx->input_linear)
417		num_baryc++;
418	if (ctx->input_centroid)
419		num_baryc *= 2;
420
421	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
422
423	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
424	return ctx->num_interp_gpr;
425}
426
427static void tgsi_src(struct r600_shader_ctx *ctx,
428		     const struct tgsi_full_src_register *tgsi_src,
429		     struct r600_shader_src *r600_src)
430{
431	memset(r600_src, 0, sizeof(*r600_src));
432	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
433	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
434	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
435	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
436	r600_src->neg = tgsi_src->Register.Negate;
437	r600_src->abs = tgsi_src->Register.Absolute;
438
439	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
440		int index;
441		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
442			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
443			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
444
445			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
446			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
447			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
448				return;
449		}
450		index = tgsi_src->Register.Index;
451		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
452		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
453	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
454		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
455		r600_src->swizzle[0] = 3;
456		r600_src->swizzle[1] = 3;
457		r600_src->swizzle[2] = 3;
458		r600_src->swizzle[3] = 3;
459		r600_src->sel = 0;
460	} else {
461		if (tgsi_src->Register.Indirect)
462			r600_src->rel = V_SQ_REL_RELATIVE;
463		r600_src->sel = tgsi_src->Register.Index;
464		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
465	}
466}
467
468static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
469{
470	struct r600_bc_vtx vtx;
471	unsigned int ar_reg;
472	int r;
473
474	if (offset) {
475		struct r600_bc_alu alu;
476
477		memset(&alu, 0, sizeof(alu));
478
479		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
480		alu.src[0].sel = ctx->ar_reg;
481
482		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
483		alu.src[1].value = offset;
484
485		alu.dst.sel = dst_reg;
486		alu.dst.write = 1;
487		alu.last = 1;
488
489		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
490			return r;
491
492		ar_reg = dst_reg;
493	} else {
494		ar_reg = ctx->ar_reg;
495	}
496
497	memset(&vtx, 0, sizeof(vtx));
498	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
499	vtx.src_gpr = ar_reg;
500	vtx.mega_fetch_count = 16;
501	vtx.dst_gpr = dst_reg;
502	vtx.dst_sel_x = 0;		/* SEL_X */
503	vtx.dst_sel_y = 1;		/* SEL_Y */
504	vtx.dst_sel_z = 2;		/* SEL_Z */
505	vtx.dst_sel_w = 3;		/* SEL_W */
506	vtx.data_format = FMT_32_32_32_32_FLOAT;
507	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
508	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
509	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
510	vtx.endian = r600_endian_swap(32);
511
512	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
513		return r;
514
515	return 0;
516}
517
518static int tgsi_split_constant(struct r600_shader_ctx *ctx)
519{
520	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
521	struct r600_bc_alu alu;
522	int i, j, k, nconst, r;
523
524	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
525		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
526			nconst++;
527		}
528		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
529	}
530	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
531		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
532			continue;
533		}
534
535		if (ctx->src[i].rel) {
536			int treg = r600_get_temp(ctx);
537			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
538				return r;
539
540			ctx->src[i].sel = treg;
541			ctx->src[i].rel = 0;
542			j--;
543		} else if (j > 0) {
544			int treg = r600_get_temp(ctx);
545			for (k = 0; k < 4; k++) {
546				memset(&alu, 0, sizeof(struct r600_bc_alu));
547				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
548				alu.src[0].sel = ctx->src[i].sel;
549				alu.src[0].chan = k;
550				alu.src[0].rel = ctx->src[i].rel;
551				alu.dst.sel = treg;
552				alu.dst.chan = k;
553				alu.dst.write = 1;
554				if (k == 3)
555					alu.last = 1;
556				r = r600_bc_add_alu(ctx->bc, &alu);
557				if (r)
558					return r;
559			}
560			ctx->src[i].sel = treg;
561			ctx->src[i].rel =0;
562			j--;
563		}
564	}
565	return 0;
566}
567
568/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
569static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
570{
571	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
572	struct r600_bc_alu alu;
573	int i, j, k, nliteral, r;
574
575	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
576		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
577			nliteral++;
578		}
579	}
580	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
581		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
582			int treg = r600_get_temp(ctx);
583			for (k = 0; k < 4; k++) {
584				memset(&alu, 0, sizeof(struct r600_bc_alu));
585				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
586				alu.src[0].sel = ctx->src[i].sel;
587				alu.src[0].chan = k;
588				alu.src[0].value = ctx->src[i].value[k];
589				alu.dst.sel = treg;
590				alu.dst.chan = k;
591				alu.dst.write = 1;
592				if (k == 3)
593					alu.last = 1;
594				r = r600_bc_add_alu(ctx->bc, &alu);
595				if (r)
596					return r;
597			}
598			ctx->src[i].sel = treg;
599			j--;
600		}
601	}
602	return 0;
603}
604
605static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
606{
607	struct r600_shader *shader = &pipeshader->shader;
608	struct tgsi_token *tokens = pipeshader->tokens;
609	struct tgsi_full_immediate *immediate;
610	struct tgsi_full_property *property;
611	struct r600_shader_ctx ctx;
612	struct r600_bc_output output[32];
613	unsigned output_done, noutput;
614	unsigned opcode;
615	int i, j, r = 0, pos0;
616
617	ctx.bc = &shader->bc;
618	ctx.shader = shader;
619	r = r600_bc_init(ctx.bc, shader->family);
620	if (r)
621		return r;
622	ctx.tokens = tokens;
623	tgsi_scan_shader(tokens, &ctx.info);
624	tgsi_parse_init(&ctx.parse, tokens);
625	ctx.type = ctx.parse.FullHeader.Processor.Processor;
626	shader->processor_type = ctx.type;
627	ctx.bc->type = shader->processor_type;
628
629	shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
630		((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
631
632	shader->nr_cbufs = rctx->nr_cbufs;
633
634	/* register allocations */
635	/* Values [0,127] correspond to GPR[0..127].
636	 * Values [128,159] correspond to constant buffer bank 0
637	 * Values [160,191] correspond to constant buffer bank 1
638	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
639	 * Values [256,287] correspond to constant buffer bank 2 (EG)
640	 * Values [288,319] correspond to constant buffer bank 3 (EG)
641	 * Other special values are shown in the list below.
642	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
643	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
644	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
645	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
646	 * 248	SQ_ALU_SRC_0: special constant 0.0.
647	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
648	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
649	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
650	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
651	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
652	 * 254	SQ_ALU_SRC_PV: previous vector result.
653	 * 255	SQ_ALU_SRC_PS: previous scalar result.
654	 */
655	for (i = 0; i < TGSI_FILE_COUNT; i++) {
656		ctx.file_offset[i] = 0;
657	}
658	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
659		ctx.file_offset[TGSI_FILE_INPUT] = 1;
660		if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
661			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
662		} else {
663			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
664		}
665	}
666	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
667		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
668	}
669	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
670						ctx.info.file_count[TGSI_FILE_INPUT];
671	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
672						ctx.info.file_count[TGSI_FILE_OUTPUT];
673
674	/* Outside the GPR range. This will be translated to one of the
675	 * kcache banks later. */
676	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
677
678	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
679	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
680			ctx.info.file_count[TGSI_FILE_TEMPORARY];
681	ctx.temp_reg = ctx.ar_reg + 1;
682
683	ctx.nliterals = 0;
684	ctx.literals = NULL;
685	shader->fs_write_all = FALSE;
686	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
687		tgsi_parse_token(&ctx.parse);
688		switch (ctx.parse.FullToken.Token.Type) {
689		case TGSI_TOKEN_TYPE_IMMEDIATE:
690			immediate = &ctx.parse.FullToken.FullImmediate;
691			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
692			if(ctx.literals == NULL) {
693				r = -ENOMEM;
694				goto out_err;
695			}
696			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
697			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
698			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
699			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
700			ctx.nliterals++;
701			break;
702		case TGSI_TOKEN_TYPE_DECLARATION:
703			r = tgsi_declaration(&ctx);
704			if (r)
705				goto out_err;
706			break;
707		case TGSI_TOKEN_TYPE_INSTRUCTION:
708			r = tgsi_is_supported(&ctx);
709			if (r)
710				goto out_err;
711			ctx.max_driver_temp_used = 0;
712			/* reserve first tmp for everyone */
713			r600_get_temp(&ctx);
714
715			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
716			if ((r = tgsi_split_constant(&ctx)))
717				goto out_err;
718			if ((r = tgsi_split_literal_constant(&ctx)))
719				goto out_err;
720			if (ctx.bc->chiprev == CHIPREV_CAYMAN)
721				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
722			else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN)
723				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
724			else
725				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
726			r = ctx.inst_info->process(&ctx);
727			if (r)
728				goto out_err;
729			break;
730		case TGSI_TOKEN_TYPE_PROPERTY:
731			property = &ctx.parse.FullToken.FullProperty;
732			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
733				if (property->u[0].Data == 1)
734					shader->fs_write_all = TRUE;
735			}
736			break;
737		default:
738			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
739			r = -EINVAL;
740			goto out_err;
741		}
742	}
743
744	noutput = shader->noutput;
745
746	/* clamp color outputs */
747	if (shader->clamp_color) {
748		for (i = 0; i < noutput; i++) {
749			if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
750				shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
751
752				int j;
753				for (j = 0; j < 4; j++) {
754					struct r600_bc_alu alu;
755					memset(&alu, 0, sizeof(struct r600_bc_alu));
756
757					/* MOV_SAT R, R */
758					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
759					alu.dst.sel = shader->output[i].gpr;
760					alu.dst.chan = j;
761					alu.dst.write = 1;
762					alu.dst.clamp = 1;
763					alu.src[0].sel = alu.dst.sel;
764					alu.src[0].chan = j;
765
766					if (j == 3) {
767						alu.last = 1;
768					}
769					r = r600_bc_add_alu(ctx.bc, &alu);
770					if (r)
771						return r;
772				}
773			}
774		}
775	}
776
777	/* export output */
778	j = 0;
779	for (i = 0, pos0 = 0; i < noutput; i++) {
780		memset(&output[i], 0, sizeof(struct r600_bc_output));
781		output[i + j].gpr = shader->output[i].gpr;
782		output[i + j].elem_size = 3;
783		output[i + j].swizzle_x = 0;
784		output[i + j].swizzle_y = 1;
785		output[i + j].swizzle_z = 2;
786		output[i + j].swizzle_w = 3;
787		output[i + j].burst_count = 1;
788		output[i + j].barrier = 1;
789		output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
790		output[i + j].array_base = i - pos0;
791		output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
792		switch (ctx.type) {
793		case TGSI_PROCESSOR_VERTEX:
794			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
795				output[i + j].array_base = 60;
796				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
797				/* position doesn't count in array_base */
798				pos0++;
799			}
800			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
801				output[i + j].array_base = 61;
802				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
803				/* position doesn't count in array_base */
804				pos0++;
805			}
806			break;
807		case TGSI_PROCESSOR_FRAGMENT:
808			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
809				output[i + j].array_base = shader->output[i].sid;
810				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
811				if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) {
812					for (j = 1; j < shader->nr_cbufs; j++) {
813						memset(&output[i + j], 0, sizeof(struct r600_bc_output));
814						output[i + j].gpr = shader->output[i].gpr;
815						output[i + j].elem_size = 3;
816						output[i + j].swizzle_x = 0;
817						output[i + j].swizzle_y = 1;
818						output[i + j].swizzle_z = 2;
819						output[i + j].swizzle_w = 3;
820						output[i + j].burst_count = 1;
821						output[i + j].barrier = 1;
822						output[i + j].array_base = shader->output[i].sid + j;
823						output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
824						output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
825					}
826					j--;
827				}
828			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
829				output[i + j].array_base = 61;
830				output[i + j].swizzle_x = 2;
831				output[i + j].swizzle_y = 7;
832				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
833				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
834			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
835				output[i + j].array_base = 61;
836				output[i + j].swizzle_x = 7;
837				output[i + j].swizzle_y = 1;
838				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
839				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
840			} else {
841				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
842				r = -EINVAL;
843				goto out_err;
844			}
845			break;
846		default:
847			R600_ERR("unsupported processor type %d\n", ctx.type);
848			r = -EINVAL;
849			goto out_err;
850		}
851	}
852	noutput += j;
853	/* add fake param output for vertex shader if no param is exported */
854	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
855		for (i = 0, pos0 = 0; i < noutput; i++) {
856			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
857				pos0 = 1;
858				break;
859			}
860		}
861		if (!pos0) {
862			memset(&output[i], 0, sizeof(struct r600_bc_output));
863			output[i].gpr = 0;
864			output[i].elem_size = 3;
865			output[i].swizzle_x = 0;
866			output[i].swizzle_y = 1;
867			output[i].swizzle_z = 2;
868			output[i].swizzle_w = 3;
869			output[i].burst_count = 1;
870			output[i].barrier = 1;
871			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
872			output[i].array_base = 0;
873			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
874			noutput++;
875		}
876	}
877	/* add fake pixel export */
878	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
879		memset(&output[0], 0, sizeof(struct r600_bc_output));
880		output[0].gpr = 0;
881		output[0].elem_size = 3;
882		output[0].swizzle_x = 7;
883		output[0].swizzle_y = 7;
884		output[0].swizzle_z = 7;
885		output[0].swizzle_w = 7;
886		output[0].burst_count = 1;
887		output[0].barrier = 1;
888		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
889		output[0].array_base = 0;
890		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
891		noutput++;
892	}
893	/* set export done on last export of each type */
894	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
895		if (ctx.bc->chiprev < CHIPREV_CAYMAN) {
896			if (i == (noutput - 1)) {
897				output[i].end_of_program = 1;
898			}
899		}
900		if (!(output_done & (1 << output[i].type))) {
901			output_done |= (1 << output[i].type);
902			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
903		}
904	}
905	/* add output to bytecode */
906	for (i = 0; i < noutput; i++) {
907		r = r600_bc_add_output(ctx.bc, &output[i]);
908		if (r)
909			goto out_err;
910	}
911	/* add program end */
912	if (ctx.bc->chiprev == CHIPREV_CAYMAN)
913		cm_bc_add_cf_end(ctx.bc);
914
915	free(ctx.literals);
916	tgsi_parse_free(&ctx.parse);
917	return 0;
918out_err:
919	free(ctx.literals);
920	tgsi_parse_free(&ctx.parse);
921	return r;
922}
923
924static int tgsi_unsupported(struct r600_shader_ctx *ctx)
925{
926	R600_ERR("%s tgsi opcode unsupported\n",
927		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
928	return -EINVAL;
929}
930
931static int tgsi_end(struct r600_shader_ctx *ctx)
932{
933	return 0;
934}
935
936static void r600_bc_src(struct r600_bc_alu_src *bc_src,
937			const struct r600_shader_src *shader_src,
938			unsigned chan)
939{
940	bc_src->sel = shader_src->sel;
941	bc_src->chan = shader_src->swizzle[chan];
942	bc_src->neg = shader_src->neg;
943	bc_src->abs = shader_src->abs;
944	bc_src->rel = shader_src->rel;
945	bc_src->value = shader_src->value[bc_src->chan];
946}
947
948static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src)
949{
950	bc_src->abs = 1;
951	bc_src->neg = 0;
952}
953
954static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
955{
956	bc_src->neg = !bc_src->neg;
957}
958
959static void tgsi_dst(struct r600_shader_ctx *ctx,
960		     const struct tgsi_full_dst_register *tgsi_dst,
961		     unsigned swizzle,
962		     struct r600_bc_alu_dst *r600_dst)
963{
964	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
965
966	r600_dst->sel = tgsi_dst->Register.Index;
967	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
968	r600_dst->chan = swizzle;
969	r600_dst->write = 1;
970	if (tgsi_dst->Register.Indirect)
971		r600_dst->rel = V_SQ_REL_RELATIVE;
972	if (inst->Instruction.Saturate) {
973		r600_dst->clamp = 1;
974	}
975}
976
977static int tgsi_last_instruction(unsigned writemask)
978{
979	int i, lasti = 0;
980
981	for (i = 0; i < 4; i++) {
982		if (writemask & (1 << i)) {
983			lasti = i;
984		}
985	}
986	return lasti;
987}
988
989static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
990{
991	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
992	struct r600_bc_alu alu;
993	int i, j, r;
994	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
995
996	for (i = 0; i < lasti + 1; i++) {
997		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
998			continue;
999
1000		memset(&alu, 0, sizeof(struct r600_bc_alu));
1001		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1002
1003		alu.inst = ctx->inst_info->r600_opcode;
1004		if (!swap) {
1005			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1006				r600_bc_src(&alu.src[j], &ctx->src[j], i);
1007			}
1008		} else {
1009			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1010			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1011		}
1012		/* handle some special cases */
1013		switch (ctx->inst_info->tgsi_opcode) {
1014		case TGSI_OPCODE_SUB:
1015			r600_bc_src_toggle_neg(&alu.src[1]);
1016			break;
1017		case TGSI_OPCODE_ABS:
1018			r600_bc_src_set_abs(&alu.src[0]);
1019			break;
1020		default:
1021			break;
1022		}
1023		if (i == lasti) {
1024			alu.last = 1;
1025		}
1026		r = r600_bc_add_alu(ctx->bc, &alu);
1027		if (r)
1028			return r;
1029	}
1030	return 0;
1031}
1032
1033static int tgsi_op2(struct r600_shader_ctx *ctx)
1034{
1035	return tgsi_op2_s(ctx, 0);
1036}
1037
1038static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1039{
1040	return tgsi_op2_s(ctx, 1);
1041}
1042
1043static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1044{
1045	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1046	int i, j, r;
1047	struct r600_bc_alu alu;
1048	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1049
1050	for (i = 0 ; i < last_slot; i++) {
1051		memset(&alu, 0, sizeof(struct r600_bc_alu));
1052		alu.inst = ctx->inst_info->r600_opcode;
1053		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1054			r600_bc_src(&alu.src[j], &ctx->src[j], 0);
1055		}
1056		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1057		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1058
1059		if (i == last_slot - 1)
1060			alu.last = 1;
1061		r = r600_bc_add_alu(ctx->bc, &alu);
1062		if (r)
1063			return r;
1064	}
1065	return 0;
1066}
1067
1068/*
1069 * r600 - trunc to -PI..PI range
1070 * r700 - normalize by dividing by 2PI
1071 * see fdo bug 27901
1072 */
1073static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1074{
1075	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1076	static float double_pi = 3.1415926535 * 2;
1077	static float neg_pi = -3.1415926535;
1078
1079	int r;
1080	struct r600_bc_alu alu;
1081
1082	memset(&alu, 0, sizeof(struct r600_bc_alu));
1083	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1084	alu.is_op3 = 1;
1085
1086	alu.dst.chan = 0;
1087	alu.dst.sel = ctx->temp_reg;
1088	alu.dst.write = 1;
1089
1090	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1091
1092	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1093	alu.src[1].chan = 0;
1094	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1095	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1096	alu.src[2].chan = 0;
1097	alu.last = 1;
1098	r = r600_bc_add_alu(ctx->bc, &alu);
1099	if (r)
1100		return r;
1101
1102	memset(&alu, 0, sizeof(struct r600_bc_alu));
1103	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1104
1105	alu.dst.chan = 0;
1106	alu.dst.sel = ctx->temp_reg;
1107	alu.dst.write = 1;
1108
1109	alu.src[0].sel = ctx->temp_reg;
1110	alu.src[0].chan = 0;
1111	alu.last = 1;
1112	r = r600_bc_add_alu(ctx->bc, &alu);
1113	if (r)
1114		return r;
1115
1116	memset(&alu, 0, sizeof(struct r600_bc_alu));
1117	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1118	alu.is_op3 = 1;
1119
1120	alu.dst.chan = 0;
1121	alu.dst.sel = ctx->temp_reg;
1122	alu.dst.write = 1;
1123
1124	alu.src[0].sel = ctx->temp_reg;
1125	alu.src[0].chan = 0;
1126
1127	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1128	alu.src[1].chan = 0;
1129	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1130	alu.src[2].chan = 0;
1131
1132	if (ctx->bc->chiprev == CHIPREV_R600) {
1133		alu.src[1].value = *(uint32_t *)&double_pi;
1134		alu.src[2].value = *(uint32_t *)&neg_pi;
1135	} else {
1136		alu.src[1].sel = V_SQ_ALU_SRC_1;
1137		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1138		alu.src[2].neg = 1;
1139	}
1140
1141	alu.last = 1;
1142	r = r600_bc_add_alu(ctx->bc, &alu);
1143	if (r)
1144		return r;
1145	return 0;
1146}
1147
1148static int cayman_trig(struct r600_shader_ctx *ctx)
1149{
1150	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1151	struct r600_bc_alu alu;
1152	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1153	int i, r;
1154
1155	r = tgsi_setup_trig(ctx);
1156	if (r)
1157		return r;
1158
1159
1160	for (i = 0; i < last_slot; i++) {
1161		memset(&alu, 0, sizeof(struct r600_bc_alu));
1162		alu.inst = ctx->inst_info->r600_opcode;
1163		alu.dst.chan = i;
1164
1165		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1166		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1167
1168		alu.src[0].sel = ctx->temp_reg;
1169		alu.src[0].chan = 0;
1170		if (i == last_slot - 1)
1171			alu.last = 1;
1172		r = r600_bc_add_alu(ctx->bc, &alu);
1173		if (r)
1174			return r;
1175	}
1176	return 0;
1177}
1178
1179static int tgsi_trig(struct r600_shader_ctx *ctx)
1180{
1181	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1182	struct r600_bc_alu alu;
1183	int i, r;
1184	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1185
1186	r = tgsi_setup_trig(ctx);
1187	if (r)
1188		return r;
1189
1190	memset(&alu, 0, sizeof(struct r600_bc_alu));
1191	alu.inst = ctx->inst_info->r600_opcode;
1192	alu.dst.chan = 0;
1193	alu.dst.sel = ctx->temp_reg;
1194	alu.dst.write = 1;
1195
1196	alu.src[0].sel = ctx->temp_reg;
1197	alu.src[0].chan = 0;
1198	alu.last = 1;
1199	r = r600_bc_add_alu(ctx->bc, &alu);
1200	if (r)
1201		return r;
1202
1203	/* replicate result */
1204	for (i = 0; i < lasti + 1; i++) {
1205		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1206			continue;
1207
1208		memset(&alu, 0, sizeof(struct r600_bc_alu));
1209		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1210
1211		alu.src[0].sel = ctx->temp_reg;
1212		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1213		if (i == lasti)
1214			alu.last = 1;
1215		r = r600_bc_add_alu(ctx->bc, &alu);
1216		if (r)
1217			return r;
1218	}
1219	return 0;
1220}
1221
1222static int tgsi_scs(struct r600_shader_ctx *ctx)
1223{
1224	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1225	struct r600_bc_alu alu;
1226	int i, r;
1227
1228	/* We'll only need the trig stuff if we are going to write to the
1229	 * X or Y components of the destination vector.
1230	 */
1231	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1232		r = tgsi_setup_trig(ctx);
1233		if (r)
1234			return r;
1235	}
1236
1237	/* dst.x = COS */
1238	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1239		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1240			for (i = 0 ; i < 3; i++) {
1241				memset(&alu, 0, sizeof(struct r600_bc_alu));
1242				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1243				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1244
1245				if (i == 0)
1246					alu.dst.write = 1;
1247				else
1248					alu.dst.write = 0;
1249				alu.src[0].sel = ctx->temp_reg;
1250				alu.src[0].chan = 0;
1251				if (i == 2)
1252					alu.last = 1;
1253				r = r600_bc_add_alu(ctx->bc, &alu);
1254				if (r)
1255					return r;
1256			}
1257		} else {
1258			memset(&alu, 0, sizeof(struct r600_bc_alu));
1259			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1260			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1261
1262			alu.src[0].sel = ctx->temp_reg;
1263			alu.src[0].chan = 0;
1264			alu.last = 1;
1265			r = r600_bc_add_alu(ctx->bc, &alu);
1266			if (r)
1267				return r;
1268		}
1269	}
1270
1271	/* dst.y = SIN */
1272	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1273		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1274			for (i = 0 ; i < 3; i++) {
1275				memset(&alu, 0, sizeof(struct r600_bc_alu));
1276				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1277				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1278				if (i == 1)
1279					alu.dst.write = 1;
1280				else
1281					alu.dst.write = 0;
1282				alu.src[0].sel = ctx->temp_reg;
1283				alu.src[0].chan = 0;
1284				if (i == 2)
1285					alu.last = 1;
1286				r = r600_bc_add_alu(ctx->bc, &alu);
1287				if (r)
1288					return r;
1289			}
1290		} else {
1291			memset(&alu, 0, sizeof(struct r600_bc_alu));
1292			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1293			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1294
1295			alu.src[0].sel = ctx->temp_reg;
1296			alu.src[0].chan = 0;
1297			alu.last = 1;
1298			r = r600_bc_add_alu(ctx->bc, &alu);
1299			if (r)
1300				return r;
1301		}
1302	}
1303
1304	/* dst.z = 0.0; */
1305	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1306		memset(&alu, 0, sizeof(struct r600_bc_alu));
1307
1308		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1309
1310		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1311
1312		alu.src[0].sel = V_SQ_ALU_SRC_0;
1313		alu.src[0].chan = 0;
1314
1315		alu.last = 1;
1316
1317		r = r600_bc_add_alu(ctx->bc, &alu);
1318		if (r)
1319			return r;
1320	}
1321
1322	/* dst.w = 1.0; */
1323	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1324		memset(&alu, 0, sizeof(struct r600_bc_alu));
1325
1326		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1327
1328		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1329
1330		alu.src[0].sel = V_SQ_ALU_SRC_1;
1331		alu.src[0].chan = 0;
1332
1333		alu.last = 1;
1334
1335		r = r600_bc_add_alu(ctx->bc, &alu);
1336		if (r)
1337			return r;
1338	}
1339
1340	return 0;
1341}
1342
1343static int tgsi_kill(struct r600_shader_ctx *ctx)
1344{
1345	struct r600_bc_alu alu;
1346	int i, r;
1347
1348	for (i = 0; i < 4; i++) {
1349		memset(&alu, 0, sizeof(struct r600_bc_alu));
1350		alu.inst = ctx->inst_info->r600_opcode;
1351
1352		alu.dst.chan = i;
1353
1354		alu.src[0].sel = V_SQ_ALU_SRC_0;
1355
1356		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1357			alu.src[1].sel = V_SQ_ALU_SRC_1;
1358			alu.src[1].neg = 1;
1359		} else {
1360			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1361		}
1362		if (i == 3) {
1363			alu.last = 1;
1364		}
1365		r = r600_bc_add_alu(ctx->bc, &alu);
1366		if (r)
1367			return r;
1368	}
1369
1370	/* kill must be last in ALU */
1371	ctx->bc->force_add_cf = 1;
1372	ctx->shader->uses_kill = TRUE;
1373	return 0;
1374}
1375
1376static int tgsi_lit(struct r600_shader_ctx *ctx)
1377{
1378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1379	struct r600_bc_alu alu;
1380	int r;
1381
1382	/* tmp.x = max(src.y, 0.0) */
1383	memset(&alu, 0, sizeof(struct r600_bc_alu));
1384	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1385	r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1386	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1387	alu.src[1].chan = 1;
1388
1389	alu.dst.sel = ctx->temp_reg;
1390	alu.dst.chan = 0;
1391	alu.dst.write = 1;
1392
1393	alu.last = 1;
1394	r = r600_bc_add_alu(ctx->bc, &alu);
1395	if (r)
1396		return r;
1397
1398	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1399	{
1400		int chan;
1401		int sel;
1402		int i;
1403
1404		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1405			for (i = 0; i < 3; i++) {
1406				/* tmp.z = log(tmp.x) */
1407				memset(&alu, 0, sizeof(struct r600_bc_alu));
1408				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1409				alu.src[0].sel = ctx->temp_reg;
1410				alu.src[0].chan = 0;
1411				alu.dst.sel = ctx->temp_reg;
1412				alu.dst.chan = i;
1413				if (i == 2) {
1414					alu.dst.write = 1;
1415					alu.last = 1;
1416				} else
1417					alu.dst.write = 0;
1418
1419				r = r600_bc_add_alu(ctx->bc, &alu);
1420				if (r)
1421					return r;
1422			}
1423		} else {
1424			/* tmp.z = log(tmp.x) */
1425			memset(&alu, 0, sizeof(struct r600_bc_alu));
1426			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1427			alu.src[0].sel = ctx->temp_reg;
1428			alu.src[0].chan = 0;
1429			alu.dst.sel = ctx->temp_reg;
1430			alu.dst.chan = 2;
1431			alu.dst.write = 1;
1432			alu.last = 1;
1433			r = r600_bc_add_alu(ctx->bc, &alu);
1434			if (r)
1435				return r;
1436		}
1437
1438		chan = alu.dst.chan;
1439		sel = alu.dst.sel;
1440
1441		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1442		memset(&alu, 0, sizeof(struct r600_bc_alu));
1443		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1444		alu.src[0].sel  = sel;
1445		alu.src[0].chan = chan;
1446		r600_bc_src(&alu.src[1], &ctx->src[0], 3);
1447		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1448		alu.dst.sel = ctx->temp_reg;
1449		alu.dst.chan = 0;
1450		alu.dst.write = 1;
1451		alu.is_op3 = 1;
1452		alu.last = 1;
1453		r = r600_bc_add_alu(ctx->bc, &alu);
1454		if (r)
1455			return r;
1456
1457		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1458			for (i = 0; i < 3; i++) {
1459				/* dst.z = exp(tmp.x) */
1460				memset(&alu, 0, sizeof(struct r600_bc_alu));
1461				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1462				alu.src[0].sel = ctx->temp_reg;
1463				alu.src[0].chan = 0;
1464				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1465				if (i == 2) {
1466					alu.dst.write = 1;
1467					alu.last = 1;
1468				} else
1469					alu.dst.write = 0;
1470				r = r600_bc_add_alu(ctx->bc, &alu);
1471				if (r)
1472					return r;
1473			}
1474		} else {
1475			/* dst.z = exp(tmp.x) */
1476			memset(&alu, 0, sizeof(struct r600_bc_alu));
1477			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1478			alu.src[0].sel = ctx->temp_reg;
1479			alu.src[0].chan = 0;
1480			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1481			alu.last = 1;
1482			r = r600_bc_add_alu(ctx->bc, &alu);
1483			if (r)
1484				return r;
1485		}
1486	}
1487
1488	/* dst.x, <- 1.0  */
1489	memset(&alu, 0, sizeof(struct r600_bc_alu));
1490	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1491	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1492	alu.src[0].chan = 0;
1493	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1494	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1495	r = r600_bc_add_alu(ctx->bc, &alu);
1496	if (r)
1497		return r;
1498
1499	/* dst.y = max(src.x, 0.0) */
1500	memset(&alu, 0, sizeof(struct r600_bc_alu));
1501	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1502	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1503	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1504	alu.src[1].chan = 0;
1505	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1506	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1507	r = r600_bc_add_alu(ctx->bc, &alu);
1508	if (r)
1509		return r;
1510
1511	/* dst.w, <- 1.0  */
1512	memset(&alu, 0, sizeof(struct r600_bc_alu));
1513	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1514	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1515	alu.src[0].chan = 0;
1516	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1517	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1518	alu.last = 1;
1519	r = r600_bc_add_alu(ctx->bc, &alu);
1520	if (r)
1521		return r;
1522
1523	return 0;
1524}
1525
1526static int tgsi_rsq(struct r600_shader_ctx *ctx)
1527{
1528	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1529	struct r600_bc_alu alu;
1530	int i, r;
1531
1532	memset(&alu, 0, sizeof(struct r600_bc_alu));
1533
1534	/* FIXME:
1535	 * For state trackers other than OpenGL, we'll want to use
1536	 * _RECIPSQRT_IEEE instead.
1537	 */
1538	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1539
1540	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1541		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1542		r600_bc_src_set_abs(&alu.src[i]);
1543	}
1544	alu.dst.sel = ctx->temp_reg;
1545	alu.dst.write = 1;
1546	alu.last = 1;
1547	r = r600_bc_add_alu(ctx->bc, &alu);
1548	if (r)
1549		return r;
1550	/* replicate result */
1551	return tgsi_helper_tempx_replicate(ctx);
1552}
1553
1554static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1555{
1556	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1557	struct r600_bc_alu alu;
1558	int i, r;
1559
1560	for (i = 0; i < 4; i++) {
1561		memset(&alu, 0, sizeof(struct r600_bc_alu));
1562		alu.src[0].sel = ctx->temp_reg;
1563		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1564		alu.dst.chan = i;
1565		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1566		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1567		if (i == 3)
1568			alu.last = 1;
1569		r = r600_bc_add_alu(ctx->bc, &alu);
1570		if (r)
1571			return r;
1572	}
1573	return 0;
1574}
1575
1576static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1577{
1578	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1579	struct r600_bc_alu alu;
1580	int i, r;
1581
1582	memset(&alu, 0, sizeof(struct r600_bc_alu));
1583	alu.inst = ctx->inst_info->r600_opcode;
1584	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1585		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1586	}
1587	alu.dst.sel = ctx->temp_reg;
1588	alu.dst.write = 1;
1589	alu.last = 1;
1590	r = r600_bc_add_alu(ctx->bc, &alu);
1591	if (r)
1592		return r;
1593	/* replicate result */
1594	return tgsi_helper_tempx_replicate(ctx);
1595}
1596
1597static int cayman_pow(struct r600_shader_ctx *ctx)
1598{
1599	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1600	int i, r;
1601	struct r600_bc_alu alu;
1602	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1603
1604	for (i = 0; i < 3; i++) {
1605		memset(&alu, 0, sizeof(struct r600_bc_alu));
1606		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1607		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1608		alu.dst.sel = ctx->temp_reg;
1609		alu.dst.chan = i;
1610		alu.dst.write = 1;
1611		if (i == 2)
1612			alu.last = 1;
1613		r = r600_bc_add_alu(ctx->bc, &alu);
1614		if (r)
1615			return r;
1616	}
1617
1618	/* b * LOG2(a) */
1619	memset(&alu, 0, sizeof(struct r600_bc_alu));
1620	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1621	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1622	alu.src[1].sel = ctx->temp_reg;
1623	alu.dst.sel = ctx->temp_reg;
1624	alu.dst.write = 1;
1625	alu.last = 1;
1626	r = r600_bc_add_alu(ctx->bc, &alu);
1627	if (r)
1628		return r;
1629
1630	for (i = 0; i < last_slot; i++) {
1631		/* POW(a,b) = EXP2(b * LOG2(a))*/
1632		memset(&alu, 0, sizeof(struct r600_bc_alu));
1633		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1634		alu.src[0].sel = ctx->temp_reg;
1635
1636		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1637		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1638		if (i == last_slot - 1)
1639			alu.last = 1;
1640		r = r600_bc_add_alu(ctx->bc, &alu);
1641		if (r)
1642			return r;
1643	}
1644	return 0;
1645}
1646
1647static int tgsi_pow(struct r600_shader_ctx *ctx)
1648{
1649	struct r600_bc_alu alu;
1650	int r;
1651
1652	/* LOG2(a) */
1653	memset(&alu, 0, sizeof(struct r600_bc_alu));
1654	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1655	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1656	alu.dst.sel = ctx->temp_reg;
1657	alu.dst.write = 1;
1658	alu.last = 1;
1659	r = r600_bc_add_alu(ctx->bc, &alu);
1660	if (r)
1661		return r;
1662	/* b * LOG2(a) */
1663	memset(&alu, 0, sizeof(struct r600_bc_alu));
1664	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1665	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1666	alu.src[1].sel = ctx->temp_reg;
1667	alu.dst.sel = ctx->temp_reg;
1668	alu.dst.write = 1;
1669	alu.last = 1;
1670	r = r600_bc_add_alu(ctx->bc, &alu);
1671	if (r)
1672		return r;
1673	/* POW(a,b) = EXP2(b * LOG2(a))*/
1674	memset(&alu, 0, sizeof(struct r600_bc_alu));
1675	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1676	alu.src[0].sel = ctx->temp_reg;
1677	alu.dst.sel = ctx->temp_reg;
1678	alu.dst.write = 1;
1679	alu.last = 1;
1680	r = r600_bc_add_alu(ctx->bc, &alu);
1681	if (r)
1682		return r;
1683	return tgsi_helper_tempx_replicate(ctx);
1684}
1685
1686static int tgsi_ssg(struct r600_shader_ctx *ctx)
1687{
1688	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1689	struct r600_bc_alu alu;
1690	int i, r;
1691
1692	/* tmp = (src > 0 ? 1 : src) */
1693	for (i = 0; i < 4; i++) {
1694		memset(&alu, 0, sizeof(struct r600_bc_alu));
1695		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1696		alu.is_op3 = 1;
1697
1698		alu.dst.sel = ctx->temp_reg;
1699		alu.dst.chan = i;
1700
1701		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1702		alu.src[1].sel = V_SQ_ALU_SRC_1;
1703		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1704
1705		if (i == 3)
1706			alu.last = 1;
1707		r = r600_bc_add_alu(ctx->bc, &alu);
1708		if (r)
1709			return r;
1710	}
1711
1712	/* dst = (-tmp > 0 ? -1 : tmp) */
1713	for (i = 0; i < 4; i++) {
1714		memset(&alu, 0, sizeof(struct r600_bc_alu));
1715		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1716		alu.is_op3 = 1;
1717		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1718
1719		alu.src[0].sel = ctx->temp_reg;
1720		alu.src[0].chan = i;
1721		alu.src[0].neg = 1;
1722
1723		alu.src[1].sel = V_SQ_ALU_SRC_1;
1724		alu.src[1].neg = 1;
1725
1726		alu.src[2].sel = ctx->temp_reg;
1727		alu.src[2].chan = i;
1728
1729		if (i == 3)
1730			alu.last = 1;
1731		r = r600_bc_add_alu(ctx->bc, &alu);
1732		if (r)
1733			return r;
1734	}
1735	return 0;
1736}
1737
1738static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1739{
1740	struct r600_bc_alu alu;
1741	int i, r;
1742
1743	for (i = 0; i < 4; i++) {
1744		memset(&alu, 0, sizeof(struct r600_bc_alu));
1745		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1746			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1747			alu.dst.chan = i;
1748		} else {
1749			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1750			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1751			alu.src[0].sel = ctx->temp_reg;
1752			alu.src[0].chan = i;
1753		}
1754		if (i == 3) {
1755			alu.last = 1;
1756		}
1757		r = r600_bc_add_alu(ctx->bc, &alu);
1758		if (r)
1759			return r;
1760	}
1761	return 0;
1762}
1763
1764static int tgsi_op3(struct r600_shader_ctx *ctx)
1765{
1766	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1767	struct r600_bc_alu alu;
1768	int i, j, r;
1769	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1770
1771	for (i = 0; i < lasti + 1; i++) {
1772		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1773			continue;
1774
1775		memset(&alu, 0, sizeof(struct r600_bc_alu));
1776		alu.inst = ctx->inst_info->r600_opcode;
1777		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1778			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1779		}
1780
1781		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1782		alu.dst.chan = i;
1783		alu.dst.write = 1;
1784		alu.is_op3 = 1;
1785		if (i == lasti) {
1786			alu.last = 1;
1787		}
1788		r = r600_bc_add_alu(ctx->bc, &alu);
1789		if (r)
1790			return r;
1791	}
1792	return 0;
1793}
1794
1795static int tgsi_dp(struct r600_shader_ctx *ctx)
1796{
1797	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1798	struct r600_bc_alu alu;
1799	int i, j, r;
1800
1801	for (i = 0; i < 4; i++) {
1802		memset(&alu, 0, sizeof(struct r600_bc_alu));
1803		alu.inst = ctx->inst_info->r600_opcode;
1804		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1805			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1806		}
1807
1808		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1809		alu.dst.chan = i;
1810		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1811		/* handle some special cases */
1812		switch (ctx->inst_info->tgsi_opcode) {
1813		case TGSI_OPCODE_DP2:
1814			if (i > 1) {
1815				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1816				alu.src[0].chan = alu.src[1].chan = 0;
1817			}
1818			break;
1819		case TGSI_OPCODE_DP3:
1820			if (i > 2) {
1821				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1822				alu.src[0].chan = alu.src[1].chan = 0;
1823			}
1824			break;
1825		case TGSI_OPCODE_DPH:
1826			if (i == 3) {
1827				alu.src[0].sel = V_SQ_ALU_SRC_1;
1828				alu.src[0].chan = 0;
1829				alu.src[0].neg = 0;
1830			}
1831			break;
1832		default:
1833			break;
1834		}
1835		if (i == 3) {
1836			alu.last = 1;
1837		}
1838		r = r600_bc_add_alu(ctx->bc, &alu);
1839		if (r)
1840			return r;
1841	}
1842	return 0;
1843}
1844
1845static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1846						    unsigned index)
1847{
1848	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1849	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1850		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1851		ctx->src[index].neg || ctx->src[index].abs;
1852}
1853
1854static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1855					unsigned index)
1856{
1857	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1858	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1859}
1860
1861static int tgsi_tex(struct r600_shader_ctx *ctx)
1862{
1863	static float one_point_five = 1.5f;
1864	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1865	struct r600_bc_tex tex;
1866	struct r600_bc_alu alu;
1867	unsigned src_gpr;
1868	int r, i, j;
1869	int opcode;
1870	/* Texture fetch instructions can only use gprs as source.
1871	 * Also they cannot negate the source or take the absolute value */
1872	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1873	boolean src_loaded = FALSE;
1874	unsigned sampler_src_reg = 1;
1875
1876	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1877
1878	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1879		/* TGSI moves the sampler to src reg 3 for TXD */
1880		sampler_src_reg = 3;
1881
1882		for (i = 1; i < 3; i++) {
1883			/* set gradients h/v */
1884			memset(&tex, 0, sizeof(struct r600_bc_tex));
1885			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1886				SQ_TEX_INST_SET_GRADIENTS_V;
1887			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1888			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1889
1890			if (tgsi_tex_src_requires_loading(ctx, i)) {
1891				tex.src_gpr = r600_get_temp(ctx);
1892				tex.src_sel_x = 0;
1893				tex.src_sel_y = 1;
1894				tex.src_sel_z = 2;
1895				tex.src_sel_w = 3;
1896
1897				for (j = 0; j < 4; j++) {
1898					memset(&alu, 0, sizeof(struct r600_bc_alu));
1899					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1900                                        r600_bc_src(&alu.src[0], &ctx->src[i], j);
1901                                        alu.dst.sel = tex.src_gpr;
1902                                        alu.dst.chan = j;
1903                                        if (j == 3)
1904                                                alu.last = 1;
1905                                        alu.dst.write = 1;
1906                                        r = r600_bc_add_alu(ctx->bc, &alu);
1907                                        if (r)
1908                                                return r;
1909				}
1910
1911			} else {
1912				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1913				tex.src_sel_x = ctx->src[i].swizzle[0];
1914				tex.src_sel_y = ctx->src[i].swizzle[1];
1915				tex.src_sel_z = ctx->src[i].swizzle[2];
1916				tex.src_sel_w = ctx->src[i].swizzle[3];
1917				tex.src_rel = ctx->src[i].rel;
1918			}
1919			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1920			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1921			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1922				tex.coord_type_x = 1;
1923				tex.coord_type_y = 1;
1924				tex.coord_type_z = 1;
1925				tex.coord_type_w = 1;
1926			}
1927			r = r600_bc_add_tex(ctx->bc, &tex);
1928			if (r)
1929				return r;
1930		}
1931	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1932		int out_chan;
1933		/* Add perspective divide */
1934		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1935			out_chan = 2;
1936			for (i = 0; i < 3; i++) {
1937				memset(&alu, 0, sizeof(struct r600_bc_alu));
1938				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1939				r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1940
1941				alu.dst.sel = ctx->temp_reg;
1942				alu.dst.chan = i;
1943				if (i == 2)
1944					alu.last = 1;
1945				if (out_chan == i)
1946					alu.dst.write = 1;
1947				r = r600_bc_add_alu(ctx->bc, &alu);
1948				if (r)
1949					return r;
1950			}
1951
1952		} else {
1953			out_chan = 3;
1954			memset(&alu, 0, sizeof(struct r600_bc_alu));
1955			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1956			r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1957
1958			alu.dst.sel = ctx->temp_reg;
1959			alu.dst.chan = out_chan;
1960			alu.last = 1;
1961			alu.dst.write = 1;
1962			r = r600_bc_add_alu(ctx->bc, &alu);
1963			if (r)
1964				return r;
1965		}
1966
1967		for (i = 0; i < 3; i++) {
1968			memset(&alu, 0, sizeof(struct r600_bc_alu));
1969			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1970			alu.src[0].sel = ctx->temp_reg;
1971			alu.src[0].chan = out_chan;
1972			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1973			alu.dst.sel = ctx->temp_reg;
1974			alu.dst.chan = i;
1975			alu.dst.write = 1;
1976			r = r600_bc_add_alu(ctx->bc, &alu);
1977			if (r)
1978				return r;
1979		}
1980		memset(&alu, 0, sizeof(struct r600_bc_alu));
1981		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1982		alu.src[0].sel = V_SQ_ALU_SRC_1;
1983		alu.src[0].chan = 0;
1984		alu.dst.sel = ctx->temp_reg;
1985		alu.dst.chan = 3;
1986		alu.last = 1;
1987		alu.dst.write = 1;
1988		r = r600_bc_add_alu(ctx->bc, &alu);
1989		if (r)
1990			return r;
1991		src_loaded = TRUE;
1992		src_gpr = ctx->temp_reg;
1993	}
1994
1995	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1996		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1997		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1998
1999		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2000		for (i = 0; i < 4; i++) {
2001			memset(&alu, 0, sizeof(struct r600_bc_alu));
2002			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2003			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2004			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2005			alu.dst.sel = ctx->temp_reg;
2006			alu.dst.chan = i;
2007			if (i == 3)
2008				alu.last = 1;
2009			alu.dst.write = 1;
2010			r = r600_bc_add_alu(ctx->bc, &alu);
2011			if (r)
2012				return r;
2013		}
2014
2015		/* tmp1.z = RCP_e(|tmp1.z|) */
2016		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2017			for (i = 0; i < 3; i++) {
2018				memset(&alu, 0, sizeof(struct r600_bc_alu));
2019				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2020				alu.src[0].sel = ctx->temp_reg;
2021				alu.src[0].chan = 2;
2022				alu.src[0].abs = 1;
2023				alu.dst.sel = ctx->temp_reg;
2024				alu.dst.chan = i;
2025				if (i == 2)
2026					alu.dst.write = 1;
2027				if (i == 2)
2028					alu.last = 1;
2029				r = r600_bc_add_alu(ctx->bc, &alu);
2030				if (r)
2031					return r;
2032			}
2033		} else {
2034			memset(&alu, 0, sizeof(struct r600_bc_alu));
2035			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2036			alu.src[0].sel = ctx->temp_reg;
2037			alu.src[0].chan = 2;
2038			alu.src[0].abs = 1;
2039			alu.dst.sel = ctx->temp_reg;
2040			alu.dst.chan = 2;
2041			alu.dst.write = 1;
2042			alu.last = 1;
2043			r = r600_bc_add_alu(ctx->bc, &alu);
2044			if (r)
2045				return r;
2046		}
2047
2048		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
2049		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
2050		 * muladd has no writemask, have to use another temp
2051		 */
2052		memset(&alu, 0, sizeof(struct r600_bc_alu));
2053		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2054		alu.is_op3 = 1;
2055
2056		alu.src[0].sel = ctx->temp_reg;
2057		alu.src[0].chan = 0;
2058		alu.src[1].sel = ctx->temp_reg;
2059		alu.src[1].chan = 2;
2060
2061		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2062		alu.src[2].chan = 0;
2063		alu.src[2].value = *(uint32_t *)&one_point_five;
2064
2065		alu.dst.sel = ctx->temp_reg;
2066		alu.dst.chan = 0;
2067		alu.dst.write = 1;
2068
2069		r = r600_bc_add_alu(ctx->bc, &alu);
2070		if (r)
2071			return r;
2072
2073		memset(&alu, 0, sizeof(struct r600_bc_alu));
2074		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2075		alu.is_op3 = 1;
2076
2077		alu.src[0].sel = ctx->temp_reg;
2078		alu.src[0].chan = 1;
2079		alu.src[1].sel = ctx->temp_reg;
2080		alu.src[1].chan = 2;
2081
2082		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2083		alu.src[2].chan = 0;
2084		alu.src[2].value = *(uint32_t *)&one_point_five;
2085
2086		alu.dst.sel = ctx->temp_reg;
2087		alu.dst.chan = 1;
2088		alu.dst.write = 1;
2089
2090		alu.last = 1;
2091		r = r600_bc_add_alu(ctx->bc, &alu);
2092		if (r)
2093			return r;
2094
2095		src_loaded = TRUE;
2096		src_gpr = ctx->temp_reg;
2097	}
2098
2099	if (src_requires_loading && !src_loaded) {
2100		for (i = 0; i < 4; i++) {
2101			memset(&alu, 0, sizeof(struct r600_bc_alu));
2102			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2103			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2104			alu.dst.sel = ctx->temp_reg;
2105			alu.dst.chan = i;
2106			if (i == 3)
2107				alu.last = 1;
2108			alu.dst.write = 1;
2109			r = r600_bc_add_alu(ctx->bc, &alu);
2110			if (r)
2111				return r;
2112		}
2113		src_loaded = TRUE;
2114		src_gpr = ctx->temp_reg;
2115	}
2116
2117	opcode = ctx->inst_info->r600_opcode;
2118	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
2119		switch (opcode) {
2120		case SQ_TEX_INST_SAMPLE:
2121			opcode = SQ_TEX_INST_SAMPLE_C;
2122			break;
2123		case SQ_TEX_INST_SAMPLE_L:
2124			opcode = SQ_TEX_INST_SAMPLE_C_L;
2125			break;
2126		case SQ_TEX_INST_SAMPLE_G:
2127			opcode = SQ_TEX_INST_SAMPLE_C_G;
2128			break;
2129		}
2130	}
2131
2132	memset(&tex, 0, sizeof(struct r600_bc_tex));
2133	tex.inst = opcode;
2134
2135	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2136	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2137	tex.src_gpr = src_gpr;
2138	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2139	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2140	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2141	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2142	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2143	if (src_loaded) {
2144		tex.src_sel_x = 0;
2145		tex.src_sel_y = 1;
2146		tex.src_sel_z = 2;
2147		tex.src_sel_w = 3;
2148	} else {
2149		tex.src_sel_x = ctx->src[0].swizzle[0];
2150		tex.src_sel_y = ctx->src[0].swizzle[1];
2151		tex.src_sel_z = ctx->src[0].swizzle[2];
2152		tex.src_sel_w = ctx->src[0].swizzle[3];
2153		tex.src_rel = ctx->src[0].rel;
2154	}
2155
2156	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2157		tex.src_sel_x = 1;
2158		tex.src_sel_y = 0;
2159		tex.src_sel_z = 3;
2160		tex.src_sel_w = 1;
2161	}
2162
2163	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2164		tex.coord_type_x = 1;
2165		tex.coord_type_y = 1;
2166		tex.coord_type_z = 1;
2167		tex.coord_type_w = 1;
2168	}
2169
2170	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
2171		tex.coord_type_z = 0;
2172		tex.src_sel_z = tex.src_sel_y;
2173	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
2174		tex.coord_type_z = 0;
2175
2176	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2177		tex.src_sel_w = tex.src_sel_z;
2178
2179	r = r600_bc_add_tex(ctx->bc, &tex);
2180	if (r)
2181		return r;
2182
2183	/* add shadow ambient support  - gallium doesn't do it yet */
2184	return 0;
2185}
2186
2187static int tgsi_lrp(struct r600_shader_ctx *ctx)
2188{
2189	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2190	struct r600_bc_alu alu;
2191	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2192	unsigned i;
2193	int r;
2194
2195	/* optimize if it's just an equal balance */
2196	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2197		for (i = 0; i < lasti + 1; i++) {
2198			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2199				continue;
2200
2201			memset(&alu, 0, sizeof(struct r600_bc_alu));
2202			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2203			r600_bc_src(&alu.src[0], &ctx->src[1], i);
2204			r600_bc_src(&alu.src[1], &ctx->src[2], i);
2205			alu.omod = 3;
2206			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2207			alu.dst.chan = i;
2208			if (i == lasti) {
2209				alu.last = 1;
2210			}
2211			r = r600_bc_add_alu(ctx->bc, &alu);
2212			if (r)
2213				return r;
2214		}
2215		return 0;
2216	}
2217
2218	/* 1 - src0 */
2219	for (i = 0; i < lasti + 1; i++) {
2220		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2221			continue;
2222
2223		memset(&alu, 0, sizeof(struct r600_bc_alu));
2224		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2225		alu.src[0].sel = V_SQ_ALU_SRC_1;
2226		alu.src[0].chan = 0;
2227		r600_bc_src(&alu.src[1], &ctx->src[0], i);
2228		r600_bc_src_toggle_neg(&alu.src[1]);
2229		alu.dst.sel = ctx->temp_reg;
2230		alu.dst.chan = i;
2231		if (i == lasti) {
2232			alu.last = 1;
2233		}
2234		alu.dst.write = 1;
2235		r = r600_bc_add_alu(ctx->bc, &alu);
2236		if (r)
2237			return r;
2238	}
2239
2240	/* (1 - src0) * src2 */
2241	for (i = 0; i < lasti + 1; i++) {
2242		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2243			continue;
2244
2245		memset(&alu, 0, sizeof(struct r600_bc_alu));
2246		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2247		alu.src[0].sel = ctx->temp_reg;
2248		alu.src[0].chan = i;
2249		r600_bc_src(&alu.src[1], &ctx->src[2], i);
2250		alu.dst.sel = ctx->temp_reg;
2251		alu.dst.chan = i;
2252		if (i == lasti) {
2253			alu.last = 1;
2254		}
2255		alu.dst.write = 1;
2256		r = r600_bc_add_alu(ctx->bc, &alu);
2257		if (r)
2258			return r;
2259	}
2260
2261	/* src0 * src1 + (1 - src0) * src2 */
2262	for (i = 0; i < lasti + 1; i++) {
2263		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2264			continue;
2265
2266		memset(&alu, 0, sizeof(struct r600_bc_alu));
2267		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2268		alu.is_op3 = 1;
2269		r600_bc_src(&alu.src[0], &ctx->src[0], i);
2270		r600_bc_src(&alu.src[1], &ctx->src[1], i);
2271		alu.src[2].sel = ctx->temp_reg;
2272		alu.src[2].chan = i;
2273
2274		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2275		alu.dst.chan = i;
2276		if (i == lasti) {
2277			alu.last = 1;
2278		}
2279		r = r600_bc_add_alu(ctx->bc, &alu);
2280		if (r)
2281			return r;
2282	}
2283	return 0;
2284}
2285
2286static int tgsi_cmp(struct r600_shader_ctx *ctx)
2287{
2288	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2289	struct r600_bc_alu alu;
2290	int i, r;
2291	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2292
2293	for (i = 0; i < lasti + 1; i++) {
2294		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2295			continue;
2296
2297		memset(&alu, 0, sizeof(struct r600_bc_alu));
2298		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2299		r600_bc_src(&alu.src[0], &ctx->src[0], i);
2300		r600_bc_src(&alu.src[1], &ctx->src[2], i);
2301		r600_bc_src(&alu.src[2], &ctx->src[1], i);
2302		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2303		alu.dst.chan = i;
2304		alu.dst.write = 1;
2305		alu.is_op3 = 1;
2306		if (i == lasti)
2307			alu.last = 1;
2308		r = r600_bc_add_alu(ctx->bc, &alu);
2309		if (r)
2310			return r;
2311	}
2312	return 0;
2313}
2314
2315static int tgsi_xpd(struct r600_shader_ctx *ctx)
2316{
2317	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2318	static const unsigned int src0_swizzle[] = {2, 0, 1};
2319	static const unsigned int src1_swizzle[] = {1, 2, 0};
2320	struct r600_bc_alu alu;
2321	uint32_t use_temp = 0;
2322	int i, r;
2323
2324	if (inst->Dst[0].Register.WriteMask != 0xf)
2325		use_temp = 1;
2326
2327	for (i = 0; i < 4; i++) {
2328		memset(&alu, 0, sizeof(struct r600_bc_alu));
2329		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2330		if (i < 3) {
2331			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2332			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2333		} else {
2334			alu.src[0].sel = V_SQ_ALU_SRC_0;
2335			alu.src[0].chan = i;
2336			alu.src[1].sel = V_SQ_ALU_SRC_0;
2337			alu.src[1].chan = i;
2338		}
2339
2340		alu.dst.sel = ctx->temp_reg;
2341		alu.dst.chan = i;
2342		alu.dst.write = 1;
2343
2344		if (i == 3)
2345			alu.last = 1;
2346		r = r600_bc_add_alu(ctx->bc, &alu);
2347		if (r)
2348			return r;
2349	}
2350
2351	for (i = 0; i < 4; i++) {
2352		memset(&alu, 0, sizeof(struct r600_bc_alu));
2353		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2354
2355		if (i < 3) {
2356			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2357			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2358		} else {
2359			alu.src[0].sel = V_SQ_ALU_SRC_0;
2360			alu.src[0].chan = i;
2361			alu.src[1].sel = V_SQ_ALU_SRC_0;
2362			alu.src[1].chan = i;
2363		}
2364
2365		alu.src[2].sel = ctx->temp_reg;
2366		alu.src[2].neg = 1;
2367		alu.src[2].chan = i;
2368
2369		if (use_temp)
2370			alu.dst.sel = ctx->temp_reg;
2371		else
2372			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2373		alu.dst.chan = i;
2374		alu.dst.write = 1;
2375		alu.is_op3 = 1;
2376		if (i == 3)
2377			alu.last = 1;
2378		r = r600_bc_add_alu(ctx->bc, &alu);
2379		if (r)
2380			return r;
2381	}
2382	if (use_temp)
2383		return tgsi_helper_copy(ctx, inst);
2384	return 0;
2385}
2386
2387static int tgsi_exp(struct r600_shader_ctx *ctx)
2388{
2389	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2390	struct r600_bc_alu alu;
2391	int r;
2392	int i;
2393
2394	/* result.x = 2^floor(src); */
2395	if (inst->Dst[0].Register.WriteMask & 1) {
2396		memset(&alu, 0, sizeof(struct r600_bc_alu));
2397
2398		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2399		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2400
2401		alu.dst.sel = ctx->temp_reg;
2402		alu.dst.chan = 0;
2403		alu.dst.write = 1;
2404		alu.last = 1;
2405		r = r600_bc_add_alu(ctx->bc, &alu);
2406		if (r)
2407			return r;
2408
2409		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2410			for (i = 0; i < 3; i++) {
2411				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2412				alu.src[0].sel = ctx->temp_reg;
2413				alu.src[0].chan = 0;
2414
2415				alu.dst.sel = ctx->temp_reg;
2416				alu.dst.chan = i;
2417				if (i == 0)
2418					alu.dst.write = 1;
2419				if (i == 2)
2420					alu.last = 1;
2421				r = r600_bc_add_alu(ctx->bc, &alu);
2422				if (r)
2423					return r;
2424			}
2425		} else {
2426			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2427			alu.src[0].sel = ctx->temp_reg;
2428			alu.src[0].chan = 0;
2429
2430			alu.dst.sel = ctx->temp_reg;
2431			alu.dst.chan = 0;
2432			alu.dst.write = 1;
2433			alu.last = 1;
2434			r = r600_bc_add_alu(ctx->bc, &alu);
2435			if (r)
2436				return r;
2437		}
2438	}
2439
2440	/* result.y = tmp - floor(tmp); */
2441	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2442		memset(&alu, 0, sizeof(struct r600_bc_alu));
2443
2444		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2445		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2446
2447		alu.dst.sel = ctx->temp_reg;
2448#if 0
2449		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2450		if (r)
2451			return r;
2452#endif
2453		alu.dst.write = 1;
2454		alu.dst.chan = 1;
2455
2456		alu.last = 1;
2457
2458		r = r600_bc_add_alu(ctx->bc, &alu);
2459		if (r)
2460			return r;
2461	}
2462
2463	/* result.z = RoughApprox2ToX(tmp);*/
2464	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2465		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2466			for (i = 0; i < 3; i++) {
2467				memset(&alu, 0, sizeof(struct r600_bc_alu));
2468				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2469				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2470
2471				alu.dst.sel = ctx->temp_reg;
2472				alu.dst.chan = i;
2473				if (i == 2) {
2474					alu.dst.write = 1;
2475					alu.last = 1;
2476				}
2477
2478				r = r600_bc_add_alu(ctx->bc, &alu);
2479				if (r)
2480					return r;
2481			}
2482		} else {
2483			memset(&alu, 0, sizeof(struct r600_bc_alu));
2484			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2485			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2486
2487			alu.dst.sel = ctx->temp_reg;
2488			alu.dst.write = 1;
2489			alu.dst.chan = 2;
2490
2491			alu.last = 1;
2492
2493			r = r600_bc_add_alu(ctx->bc, &alu);
2494			if (r)
2495				return r;
2496		}
2497	}
2498
2499	/* result.w = 1.0;*/
2500	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2501		memset(&alu, 0, sizeof(struct r600_bc_alu));
2502
2503		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2504		alu.src[0].sel = V_SQ_ALU_SRC_1;
2505		alu.src[0].chan = 0;
2506
2507		alu.dst.sel = ctx->temp_reg;
2508		alu.dst.chan = 3;
2509		alu.dst.write = 1;
2510		alu.last = 1;
2511		r = r600_bc_add_alu(ctx->bc, &alu);
2512		if (r)
2513			return r;
2514	}
2515	return tgsi_helper_copy(ctx, inst);
2516}
2517
2518static int tgsi_log(struct r600_shader_ctx *ctx)
2519{
2520	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2521	struct r600_bc_alu alu;
2522	int r;
2523	int i;
2524
2525	/* result.x = floor(log2(|src|)); */
2526	if (inst->Dst[0].Register.WriteMask & 1) {
2527		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2528			for (i = 0; i < 3; i++) {
2529				memset(&alu, 0, sizeof(struct r600_bc_alu));
2530
2531				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2532				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2533				r600_bc_src_set_abs(&alu.src[0]);
2534
2535				alu.dst.sel = ctx->temp_reg;
2536				alu.dst.chan = i;
2537				if (i == 0)
2538					alu.dst.write = 1;
2539				if (i == 2)
2540					alu.last = 1;
2541				r = r600_bc_add_alu(ctx->bc, &alu);
2542				if (r)
2543					return r;
2544			}
2545
2546		} else {
2547			memset(&alu, 0, sizeof(struct r600_bc_alu));
2548
2549			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2550			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2551			r600_bc_src_set_abs(&alu.src[0]);
2552
2553			alu.dst.sel = ctx->temp_reg;
2554			alu.dst.chan = 0;
2555			alu.dst.write = 1;
2556			alu.last = 1;
2557			r = r600_bc_add_alu(ctx->bc, &alu);
2558			if (r)
2559				return r;
2560		}
2561
2562		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2563		alu.src[0].sel = ctx->temp_reg;
2564		alu.src[0].chan = 0;
2565
2566		alu.dst.sel = ctx->temp_reg;
2567		alu.dst.chan = 0;
2568		alu.dst.write = 1;
2569		alu.last = 1;
2570
2571		r = r600_bc_add_alu(ctx->bc, &alu);
2572		if (r)
2573			return r;
2574	}
2575
2576	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2577	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2578
2579		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2580			for (i = 0; i < 3; i++) {
2581				memset(&alu, 0, sizeof(struct r600_bc_alu));
2582
2583				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2584				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2585				r600_bc_src_set_abs(&alu.src[0]);
2586
2587				alu.dst.sel = ctx->temp_reg;
2588				alu.dst.chan = i;
2589				if (i == 1)
2590					alu.dst.write = 1;
2591				if (i == 2)
2592					alu.last = 1;
2593
2594				r = r600_bc_add_alu(ctx->bc, &alu);
2595				if (r)
2596					return r;
2597			}
2598		} else {
2599			memset(&alu, 0, sizeof(struct r600_bc_alu));
2600
2601			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2602			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2603			r600_bc_src_set_abs(&alu.src[0]);
2604
2605			alu.dst.sel = ctx->temp_reg;
2606			alu.dst.chan = 1;
2607			alu.dst.write = 1;
2608			alu.last = 1;
2609
2610			r = r600_bc_add_alu(ctx->bc, &alu);
2611			if (r)
2612				return r;
2613		}
2614
2615		memset(&alu, 0, sizeof(struct r600_bc_alu));
2616
2617		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2618		alu.src[0].sel = ctx->temp_reg;
2619		alu.src[0].chan = 1;
2620
2621		alu.dst.sel = ctx->temp_reg;
2622		alu.dst.chan = 1;
2623		alu.dst.write = 1;
2624		alu.last = 1;
2625
2626		r = r600_bc_add_alu(ctx->bc, &alu);
2627		if (r)
2628			return r;
2629
2630		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2631			for (i = 0; i < 3; i++) {
2632				memset(&alu, 0, sizeof(struct r600_bc_alu));
2633				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2634				alu.src[0].sel = ctx->temp_reg;
2635				alu.src[0].chan = 1;
2636
2637				alu.dst.sel = ctx->temp_reg;
2638				alu.dst.chan = i;
2639				if (i == 1)
2640					alu.dst.write = 1;
2641				if (i == 2)
2642					alu.last = 1;
2643
2644				r = r600_bc_add_alu(ctx->bc, &alu);
2645				if (r)
2646					return r;
2647			}
2648		} else {
2649			memset(&alu, 0, sizeof(struct r600_bc_alu));
2650			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2651			alu.src[0].sel = ctx->temp_reg;
2652			alu.src[0].chan = 1;
2653
2654			alu.dst.sel = ctx->temp_reg;
2655			alu.dst.chan = 1;
2656			alu.dst.write = 1;
2657			alu.last = 1;
2658
2659			r = r600_bc_add_alu(ctx->bc, &alu);
2660			if (r)
2661				return r;
2662		}
2663
2664		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2665			for (i = 0; i < 3; i++) {
2666				memset(&alu, 0, sizeof(struct r600_bc_alu));
2667				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2668				alu.src[0].sel = ctx->temp_reg;
2669				alu.src[0].chan = 1;
2670
2671				alu.dst.sel = ctx->temp_reg;
2672				alu.dst.chan = i;
2673				if (i == 1)
2674					alu.dst.write = 1;
2675				if (i == 2)
2676					alu.last = 1;
2677
2678				r = r600_bc_add_alu(ctx->bc, &alu);
2679				if (r)
2680					return r;
2681			}
2682		} else {
2683			memset(&alu, 0, sizeof(struct r600_bc_alu));
2684			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2685			alu.src[0].sel = ctx->temp_reg;
2686			alu.src[0].chan = 1;
2687
2688			alu.dst.sel = ctx->temp_reg;
2689			alu.dst.chan = 1;
2690			alu.dst.write = 1;
2691			alu.last = 1;
2692
2693			r = r600_bc_add_alu(ctx->bc, &alu);
2694			if (r)
2695				return r;
2696		}
2697
2698		memset(&alu, 0, sizeof(struct r600_bc_alu));
2699
2700		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2701
2702		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2703		r600_bc_src_set_abs(&alu.src[0]);
2704
2705		alu.src[1].sel = ctx->temp_reg;
2706		alu.src[1].chan = 1;
2707
2708		alu.dst.sel = ctx->temp_reg;
2709		alu.dst.chan = 1;
2710		alu.dst.write = 1;
2711		alu.last = 1;
2712
2713		r = r600_bc_add_alu(ctx->bc, &alu);
2714		if (r)
2715			return r;
2716	}
2717
2718	/* result.z = log2(|src|);*/
2719	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2720		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2721			for (i = 0; i < 3; i++) {
2722				memset(&alu, 0, sizeof(struct r600_bc_alu));
2723
2724				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2725				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2726				r600_bc_src_set_abs(&alu.src[0]);
2727
2728				alu.dst.sel = ctx->temp_reg;
2729				if (i == 2)
2730					alu.dst.write = 1;
2731				alu.dst.chan = i;
2732				if (i == 2)
2733					alu.last = 1;
2734
2735				r = r600_bc_add_alu(ctx->bc, &alu);
2736				if (r)
2737					return r;
2738			}
2739		} else {
2740			memset(&alu, 0, sizeof(struct r600_bc_alu));
2741
2742			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2743			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2744			r600_bc_src_set_abs(&alu.src[0]);
2745
2746			alu.dst.sel = ctx->temp_reg;
2747			alu.dst.write = 1;
2748			alu.dst.chan = 2;
2749			alu.last = 1;
2750
2751			r = r600_bc_add_alu(ctx->bc, &alu);
2752			if (r)
2753				return r;
2754		}
2755	}
2756
2757	/* result.w = 1.0; */
2758	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2759		memset(&alu, 0, sizeof(struct r600_bc_alu));
2760
2761		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2762		alu.src[0].sel = V_SQ_ALU_SRC_1;
2763		alu.src[0].chan = 0;
2764
2765		alu.dst.sel = ctx->temp_reg;
2766		alu.dst.chan = 3;
2767		alu.dst.write = 1;
2768		alu.last = 1;
2769
2770		r = r600_bc_add_alu(ctx->bc, &alu);
2771		if (r)
2772			return r;
2773	}
2774
2775	return tgsi_helper_copy(ctx, inst);
2776}
2777
2778static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2779{
2780	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2781	struct r600_bc_alu alu;
2782	int r;
2783
2784	memset(&alu, 0, sizeof(struct r600_bc_alu));
2785
2786	switch (inst->Instruction.Opcode) {
2787	case TGSI_OPCODE_ARL:
2788		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2789		break;
2790	case TGSI_OPCODE_ARR:
2791		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2792		break;
2793	default:
2794		assert(0);
2795		return -1;
2796	}
2797
2798	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2799	alu.last = 1;
2800	alu.dst.sel = ctx->ar_reg;
2801	alu.dst.write = 1;
2802	r = r600_bc_add_alu(ctx->bc, &alu);
2803	if (r)
2804		return r;
2805
2806	/* TODO: Note that the MOVA can be avoided if we never use AR for
2807	 * indexing non-CB registers in the current ALU clause. Similarly, we
2808	 * need to load AR from ar_reg again if we started a new clause
2809	 * between ARL and AR usage. The easy way to do that is to remove
2810	 * the MOVA here, and load it for the first AR access after ar_reg
2811	 * has been modified in each clause. */
2812	memset(&alu, 0, sizeof(struct r600_bc_alu));
2813	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2814	alu.src[0].sel = ctx->ar_reg;
2815	alu.src[0].chan = 0;
2816	alu.last = 1;
2817	r = r600_bc_add_alu(ctx->bc, &alu);
2818	if (r)
2819		return r;
2820	return 0;
2821}
2822static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2823{
2824	/* TODO from r600c, ar values don't persist between clauses */
2825	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2826	struct r600_bc_alu alu;
2827	int r;
2828
2829	switch (inst->Instruction.Opcode) {
2830	case TGSI_OPCODE_ARL:
2831		memset(&alu, 0, sizeof(alu));
2832		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2833		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2834		alu.dst.sel = ctx->ar_reg;
2835		alu.dst.write = 1;
2836		alu.last = 1;
2837
2838		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2839			return r;
2840
2841		memset(&alu, 0, sizeof(alu));
2842		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2843		alu.src[0].sel = ctx->ar_reg;
2844		alu.dst.sel = ctx->ar_reg;
2845		alu.dst.write = 1;
2846		alu.last = 1;
2847
2848		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2849			return r;
2850		break;
2851	case TGSI_OPCODE_ARR:
2852		memset(&alu, 0, sizeof(alu));
2853		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2854		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2855		alu.dst.sel = ctx->ar_reg;
2856		alu.dst.write = 1;
2857		alu.last = 1;
2858
2859		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2860			return r;
2861		break;
2862	default:
2863		assert(0);
2864		return -1;
2865	}
2866
2867	memset(&alu, 0, sizeof(alu));
2868	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2869	alu.src[0].sel = ctx->ar_reg;
2870	alu.last = 1;
2871
2872	r = r600_bc_add_alu(ctx->bc, &alu);
2873	if (r)
2874		return r;
2875	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2876	return 0;
2877}
2878
2879static int tgsi_opdst(struct r600_shader_ctx *ctx)
2880{
2881	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2882	struct r600_bc_alu alu;
2883	int i, r = 0;
2884
2885	for (i = 0; i < 4; i++) {
2886		memset(&alu, 0, sizeof(struct r600_bc_alu));
2887
2888		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2889		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2890
2891		if (i == 0 || i == 3) {
2892			alu.src[0].sel = V_SQ_ALU_SRC_1;
2893		} else {
2894			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2895		}
2896
2897		if (i == 0 || i == 2) {
2898			alu.src[1].sel = V_SQ_ALU_SRC_1;
2899		} else {
2900			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2901		}
2902		if (i == 3)
2903			alu.last = 1;
2904		r = r600_bc_add_alu(ctx->bc, &alu);
2905		if (r)
2906			return r;
2907	}
2908	return 0;
2909}
2910
2911static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2912{
2913	struct r600_bc_alu alu;
2914	int r;
2915
2916	memset(&alu, 0, sizeof(struct r600_bc_alu));
2917	alu.inst = opcode;
2918	alu.predicate = 1;
2919
2920	alu.dst.sel = ctx->temp_reg;
2921	alu.dst.write = 1;
2922	alu.dst.chan = 0;
2923
2924	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2925	alu.src[1].sel = V_SQ_ALU_SRC_0;
2926	alu.src[1].chan = 0;
2927
2928	alu.last = 1;
2929
2930	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2931	if (r)
2932		return r;
2933	return 0;
2934}
2935
2936static int pops(struct r600_shader_ctx *ctx, int pops)
2937{
2938	unsigned force_pop = ctx->bc->force_add_cf;
2939
2940	if (!force_pop) {
2941		int alu_pop = 3;
2942		if (ctx->bc->cf_last) {
2943			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2944				alu_pop = 0;
2945			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2946				alu_pop = 1;
2947		}
2948		alu_pop += pops;
2949		if (alu_pop == 1) {
2950			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2951			ctx->bc->force_add_cf = 1;
2952		} else if (alu_pop == 2) {
2953			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2954			ctx->bc->force_add_cf = 1;
2955		} else {
2956			force_pop = 1;
2957		}
2958	}
2959
2960	if (force_pop) {
2961		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2962		ctx->bc->cf_last->pop_count = pops;
2963		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2964	}
2965
2966	return 0;
2967}
2968
2969static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2970{
2971	switch(reason) {
2972	case FC_PUSH_VPM:
2973		ctx->bc->callstack[ctx->bc->call_sp].current--;
2974		break;
2975	case FC_PUSH_WQM:
2976	case FC_LOOP:
2977		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2978		break;
2979	case FC_REP:
2980		/* TOODO : for 16 vp asic should -= 2; */
2981		ctx->bc->callstack[ctx->bc->call_sp].current --;
2982		break;
2983	}
2984}
2985
2986static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2987{
2988	if (check_max_only) {
2989		int diff;
2990		switch (reason) {
2991		case FC_PUSH_VPM:
2992			diff = 1;
2993			break;
2994		case FC_PUSH_WQM:
2995			diff = 4;
2996			break;
2997		default:
2998			assert(0);
2999			diff = 0;
3000		}
3001		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3002		    ctx->bc->callstack[ctx->bc->call_sp].max) {
3003			ctx->bc->callstack[ctx->bc->call_sp].max =
3004				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3005		}
3006		return;
3007	}
3008	switch (reason) {
3009	case FC_PUSH_VPM:
3010		ctx->bc->callstack[ctx->bc->call_sp].current++;
3011		break;
3012	case FC_PUSH_WQM:
3013	case FC_LOOP:
3014		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3015		break;
3016	case FC_REP:
3017		ctx->bc->callstack[ctx->bc->call_sp].current++;
3018		break;
3019	}
3020
3021	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3022	    ctx->bc->callstack[ctx->bc->call_sp].max) {
3023		ctx->bc->callstack[ctx->bc->call_sp].max =
3024			ctx->bc->callstack[ctx->bc->call_sp].current;
3025	}
3026}
3027
3028static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3029{
3030	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3031
3032	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
3033						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
3034	sp->mid[sp->num_mid] = ctx->bc->cf_last;
3035	sp->num_mid++;
3036}
3037
3038static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3039{
3040	ctx->bc->fc_sp++;
3041	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3042	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3043}
3044
3045static void fc_poplevel(struct r600_shader_ctx *ctx)
3046{
3047	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3048	if (sp->mid) {
3049		free(sp->mid);
3050		sp->mid = NULL;
3051	}
3052	sp->num_mid = 0;
3053	sp->start = NULL;
3054	sp->type = 0;
3055	ctx->bc->fc_sp--;
3056}
3057
3058#if 0
3059static int emit_return(struct r600_shader_ctx *ctx)
3060{
3061	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3062	return 0;
3063}
3064
3065static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3066{
3067
3068	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3069	ctx->bc->cf_last->pop_count = pops;
3070	/* TODO work out offset */
3071	return 0;
3072}
3073
3074static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3075{
3076	return 0;
3077}
3078
3079static void emit_testflag(struct r600_shader_ctx *ctx)
3080{
3081
3082}
3083
3084static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3085{
3086	emit_testflag(ctx);
3087	emit_jump_to_offset(ctx, 1, 4);
3088	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3089	pops(ctx, ifidx + 1);
3090	emit_return(ctx);
3091}
3092
3093static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3094{
3095	emit_testflag(ctx);
3096
3097	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3098	ctx->bc->cf_last->pop_count = 1;
3099
3100	fc_set_mid(ctx, fc_sp);
3101
3102	pops(ctx, 1);
3103}
3104#endif
3105
3106static int tgsi_if(struct r600_shader_ctx *ctx)
3107{
3108	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3109
3110	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3111
3112	fc_pushlevel(ctx, FC_IF);
3113
3114	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3115	return 0;
3116}
3117
3118static int tgsi_else(struct r600_shader_ctx *ctx)
3119{
3120	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3121	ctx->bc->cf_last->pop_count = 1;
3122
3123	fc_set_mid(ctx, ctx->bc->fc_sp);
3124	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3125	return 0;
3126}
3127
3128static int tgsi_endif(struct r600_shader_ctx *ctx)
3129{
3130	pops(ctx, 1);
3131	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3132		R600_ERR("if/endif unbalanced in shader\n");
3133		return -1;
3134	}
3135
3136	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3137		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3138		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3139	} else {
3140		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3141	}
3142	fc_poplevel(ctx);
3143
3144	callstack_decrease_current(ctx, FC_PUSH_VPM);
3145	return 0;
3146}
3147
3148static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3149{
3150	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3151
3152	fc_pushlevel(ctx, FC_LOOP);
3153
3154	/* check stack depth */
3155	callstack_check_depth(ctx, FC_LOOP, 0);
3156	return 0;
3157}
3158
3159static int tgsi_endloop(struct r600_shader_ctx *ctx)
3160{
3161	int i;
3162
3163	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3164
3165	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3166		R600_ERR("loop/endloop in shader code are not paired.\n");
3167		return -EINVAL;
3168	}
3169
3170	/* fixup loop pointers - from r600isa
3171	   LOOP END points to CF after LOOP START,
3172	   LOOP START point to CF after LOOP END
3173	   BRK/CONT point to LOOP END CF
3174	*/
3175	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3176
3177	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3178
3179	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3180		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3181	}
3182	/* TODO add LOOPRET support */
3183	fc_poplevel(ctx);
3184	callstack_decrease_current(ctx, FC_LOOP);
3185	return 0;
3186}
3187
3188static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3189{
3190	unsigned int fscp;
3191
3192	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3193	{
3194		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3195			break;
3196	}
3197
3198	if (fscp == 0) {
3199		R600_ERR("Break not inside loop/endloop pair\n");
3200		return -EINVAL;
3201	}
3202
3203	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3204	ctx->bc->cf_last->pop_count = 1;
3205
3206	fc_set_mid(ctx, fscp);
3207
3208	pops(ctx, 1);
3209	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3210	return 0;
3211}
3212
3213static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3214	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3215	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3216	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3217
3218	/* FIXME:
3219	 * For state trackers other than OpenGL, we'll want to use
3220	 * _RECIP_IEEE instead.
3221	 */
3222	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3223
3224	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3225	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3226	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3227	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3228	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3229	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3230	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3231	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3232	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3233	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3234	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3235	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3236	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3237	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3238	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3239	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	/* gap */
3241	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	/* gap */
3244	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3247	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3249	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3251	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3252	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3253	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3254	/* gap */
3255	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3257	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3259	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3260	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3261	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3262	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3263	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3269	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3271	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3272	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3273	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3274	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3276	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3277	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3278	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3285	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3289	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3290	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3291	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3292	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3295	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3296	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3297	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3298	/* gap */
3299	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3302	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3303	/* gap */
3304	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3312	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	/* gap */
3314	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3323	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3326	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3328	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329	/* gap */
3330	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335	/* gap */
3336	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3345	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3346	/* gap */
3347	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3373	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3375};
3376
3377static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3378	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3379	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3380	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3381	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3382	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3383	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3384	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3385	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3386	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3387	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3388	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3389	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3390	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3391	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3392	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3393	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3394	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3395	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3396	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3397	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3398	/* gap */
3399	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3400	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3401	/* gap */
3402	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3403	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3404	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3405	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3406	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3407	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3408	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3409	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3410	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3411	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3412	/* gap */
3413	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3414	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3415	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3416	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3417	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3418	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3419	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3420	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3421	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3422	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3424	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3425	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3426	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3427	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3428	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3429	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3430	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3431	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3432	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3433	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3434	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3435	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3436	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3437	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3438	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3439	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3440	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3441	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3442	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3443	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3444	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3445	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3446	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3447	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3448	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3449	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3450	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3451	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3452	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3453	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3454	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3455	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3456	/* gap */
3457	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3458	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3460	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3461	/* gap */
3462	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3463	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3464	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3466	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3467	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3468	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3469	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3470	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471	/* gap */
3472	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3474	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3476	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3477	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3478	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3479	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3480	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3481	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3484	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3486	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487	/* gap */
3488	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3489	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3491	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3493	/* gap */
3494	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3495	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3500	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3501	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3502	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3503	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3504	/* gap */
3505	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3507	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3521	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3523	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3524	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3526	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3527	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3528	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3529	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3531	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3533};
3534
3535static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3536	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3537	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3538	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3539	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3540	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3541	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3542	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3543	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3544	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3545	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3546	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3547	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3548	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3549	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3550	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3551	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3552	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3553	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3554	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3555	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556	/* gap */
3557	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3559	/* gap */
3560	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3562	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3563	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3564	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3565	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3566	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3567	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3568	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3569	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3570	/* gap */
3571	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3572	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3573	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3574	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3575	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3576	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3577	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3578	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3579	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3582	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3583	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3584	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3585	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3586	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3587	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3588	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3589	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3590	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3591	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3592	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3593	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3594	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3595	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3596	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3597	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3598	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3600	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3601	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3602	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3604	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3605	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3606	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3607	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3608	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3609	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3610	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3611	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3612	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3613	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3614	/* gap */
3615	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3616	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3618	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3619	/* gap */
3620	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3621	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3623	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3624	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3625	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3626	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3627	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3628	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629	/* gap */
3630	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3631	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3632	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3633	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3636	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3637	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3638	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3639	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3640	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3641	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3642	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3643	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3644	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645	/* gap */
3646	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3649	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3650	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3651	/* gap */
3652	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3657	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3658	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3659	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3660	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3661	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3662	/* gap */
3663	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3664	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3665	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3666	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3668	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3670	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3671	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3673	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3674	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3675	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3677	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3678	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3679	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3680	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3681	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3682	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3683	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3684	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3685	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3687	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3690	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3691};
3692