1848b8605Smrg/*
2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
8848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom
9848b8605Smrg * the Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice (including the next
12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
13848b8605Smrg * Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
22848b8605Smrg */
23848b8605Smrg#include "r600_sq.h"
24848b8605Smrg#include "r600_formats.h"
25848b8605Smrg#include "r600_opcodes.h"
26848b8605Smrg#include "r600_shader.h"
27848b8605Smrg#include "r600d.h"
28848b8605Smrg
29848b8605Smrg#include "sb/sb_public.h"
30848b8605Smrg
31848b8605Smrg#include "pipe/p_shader_tokens.h"
32848b8605Smrg#include "tgsi/tgsi_info.h"
33848b8605Smrg#include "tgsi/tgsi_parse.h"
34848b8605Smrg#include "tgsi/tgsi_scan.h"
35848b8605Smrg#include "tgsi/tgsi_dump.h"
36b8e80941Smrg#include "util/u_bitcast.h"
37848b8605Smrg#include "util/u_memory.h"
38848b8605Smrg#include "util/u_math.h"
39848b8605Smrg#include <stdio.h>
40848b8605Smrg#include <errno.h>
41848b8605Smrg
42b8e80941Smrg/* CAYMAN notes
43848b8605SmrgWhy CAYMAN got loops for lots of instructions is explained here.
44848b8605Smrg
45848b8605Smrg-These 8xx t-slot only ops are implemented in all vector slots.
46848b8605SmrgMUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
47b8e80941SmrgThese 8xx t-slot only opcodes become vector ops, with all four
48b8e80941Smrgslots expecting the arguments on sources a and b. Result is
49848b8605Smrgbroadcast to all channels.
50b8e80941SmrgMULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
51b8e80941SmrgThese 8xx t-slot only opcodes become vector ops in the z, y, and
52848b8605Smrgx slots.
53848b8605SmrgEXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
54848b8605SmrgRECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
55848b8605SmrgSQRT_IEEE/_64
56848b8605SmrgSIN/COS
57b8e80941SmrgThe w slot may have an independent co-issued operation, or if the
58b8e80941Smrgresult is required to be in the w slot, the opcode above may be
59848b8605Smrgissued in the w slot as well.
60848b8605SmrgThe compiler must issue the source argument to slots z, y, and x
61848b8605Smrg*/
62848b8605Smrg
63b8e80941Smrg/* Contents of r0 on entry to various shaders
64b8e80941Smrg
65b8e80941Smrg VS - .x = VertexID
66b8e80941Smrg      .y = RelVertexID (??)
67b8e80941Smrg      .w = InstanceID
68b8e80941Smrg
69b8e80941Smrg GS - r0.xyw, r1.xyz = per-vertex offsets
70b8e80941Smrg      r0.z = PrimitiveID
71b8e80941Smrg
72b8e80941Smrg TCS - .x = PatchID
73b8e80941Smrg       .y = RelPatchID (??)
74b8e80941Smrg       .z = InvocationID
75b8e80941Smrg       .w = tess factor base.
76b8e80941Smrg
77b8e80941Smrg TES - .x = TessCoord.x
78b8e80941Smrg     - .y = TessCoord.y
79b8e80941Smrg     - .z = RelPatchID (??)
80b8e80941Smrg     - .w = PrimitiveID
81b8e80941Smrg
82b8e80941Smrg PS - face_gpr.z = SampleMask
83b8e80941Smrg      face_gpr.w = SampleID
84b8e80941Smrg*/
85b8e80941Smrg#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
86848b8605Smrgstatic int r600_shader_from_tgsi(struct r600_context *rctx,
87848b8605Smrg				 struct r600_pipe_shader *pipeshader,
88b8e80941Smrg				 union r600_shader_key key);
89848b8605Smrg
90848b8605Smrgstatic void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
91848b8605Smrg                           int size, unsigned comp_mask) {
92848b8605Smrg
93848b8605Smrg	if (!size)
94848b8605Smrg		return;
95848b8605Smrg
96848b8605Smrg	if (ps->num_arrays == ps->max_arrays) {
97848b8605Smrg		ps->max_arrays += 64;
98848b8605Smrg		ps->arrays = realloc(ps->arrays, ps->max_arrays *
99848b8605Smrg		                     sizeof(struct r600_shader_array));
100848b8605Smrg	}
101848b8605Smrg
102848b8605Smrg	int n = ps->num_arrays;
103848b8605Smrg	++ps->num_arrays;
104848b8605Smrg
105848b8605Smrg	ps->arrays[n].comp_mask = comp_mask;
106848b8605Smrg	ps->arrays[n].gpr_start = start_gpr;
107848b8605Smrg	ps->arrays[n].gpr_count = size;
108848b8605Smrg}
109848b8605Smrg
110848b8605Smrgstatic void r600_dump_streamout(struct pipe_stream_output_info *so)
111848b8605Smrg{
112848b8605Smrg	unsigned i;
113848b8605Smrg
114848b8605Smrg	fprintf(stderr, "STREAMOUT\n");
115848b8605Smrg	for (i = 0; i < so->num_outputs; i++) {
116848b8605Smrg		unsigned mask = ((1 << so->output[i].num_components) - 1) <<
117848b8605Smrg				so->output[i].start_component;
118b8e80941Smrg		fprintf(stderr, "  %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
119b8e80941Smrg			i,
120b8e80941Smrg			so->output[i].stream,
121b8e80941Smrg			so->output[i].output_buffer,
122848b8605Smrg			so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
123848b8605Smrg			so->output[i].register_index,
124848b8605Smrg			mask & 1 ? "x" : "",
125848b8605Smrg		        mask & 2 ? "y" : "",
126848b8605Smrg		        mask & 4 ? "z" : "",
127848b8605Smrg		        mask & 8 ? "w" : "",
128848b8605Smrg			so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : "");
129848b8605Smrg	}
130848b8605Smrg}
131848b8605Smrg
132848b8605Smrgstatic int store_shader(struct pipe_context *ctx,
133848b8605Smrg			struct r600_pipe_shader *shader)
134848b8605Smrg{
135848b8605Smrg	struct r600_context *rctx = (struct r600_context *)ctx;
136848b8605Smrg	uint32_t *ptr, i;
137848b8605Smrg
138848b8605Smrg	if (shader->bo == NULL) {
139848b8605Smrg		shader->bo = (struct r600_resource*)
140b8e80941Smrg			pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
141848b8605Smrg		if (shader->bo == NULL) {
142848b8605Smrg			return -ENOMEM;
143848b8605Smrg		}
144b8e80941Smrg		ptr = r600_buffer_map_sync_with_rings(
145b8e80941Smrg			&rctx->b, shader->bo,
146b8e80941Smrg			PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
147848b8605Smrg		if (R600_BIG_ENDIAN) {
148848b8605Smrg			for (i = 0; i < shader->shader.bc.ndw; ++i) {
149848b8605Smrg				ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
150848b8605Smrg			}
151848b8605Smrg		} else {
152848b8605Smrg			memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
153848b8605Smrg		}
154b8e80941Smrg		rctx->b.ws->buffer_unmap(shader->bo->buf);
155848b8605Smrg	}
156848b8605Smrg
157848b8605Smrg	return 0;
158848b8605Smrg}
159848b8605Smrg
160848b8605Smrgint r600_pipe_shader_create(struct pipe_context *ctx,
161848b8605Smrg			    struct r600_pipe_shader *shader,
162b8e80941Smrg			    union r600_shader_key key)
163848b8605Smrg{
164848b8605Smrg	struct r600_context *rctx = (struct r600_context *)ctx;
165848b8605Smrg	struct r600_pipe_shader_selector *sel = shader->selector;
166848b8605Smrg	int r;
167b8e80941Smrg	bool dump = r600_can_dump_shader(&rctx->screen->b,
168b8e80941Smrg					 tgsi_get_processor_type(sel->tokens));
169848b8605Smrg	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
170b8e80941Smrg	unsigned sb_disasm;
171b8e80941Smrg	unsigned export_shader;
172848b8605Smrg
173848b8605Smrg	shader->shader.bc.isa = rctx->isa;
174848b8605Smrg
175848b8605Smrg	if (dump) {
176848b8605Smrg		fprintf(stderr, "--------------------------------------------------------------\n");
177848b8605Smrg		tgsi_dump(sel->tokens, 0);
178848b8605Smrg
179848b8605Smrg		if (sel->so.num_outputs) {
180848b8605Smrg			r600_dump_streamout(&sel->so);
181848b8605Smrg		}
182848b8605Smrg	}
183848b8605Smrg	r = r600_shader_from_tgsi(rctx, shader, key);
184848b8605Smrg	if (r) {
185848b8605Smrg		R600_ERR("translation from TGSI failed !\n");
186848b8605Smrg		goto error;
187848b8605Smrg	}
188b8e80941Smrg	if (shader->shader.processor_type == PIPE_SHADER_VERTEX) {
189b8e80941Smrg		/* only disable for vertex shaders in tess paths */
190b8e80941Smrg		if (key.vs.as_ls)
191b8e80941Smrg			use_sb = 0;
192b8e80941Smrg	}
193b8e80941Smrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL);
194b8e80941Smrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL);
195b8e80941Smrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_COMPUTE);
196848b8605Smrg
197b8e80941Smrg	/* disable SB for shaders using doubles */
198b8e80941Smrg	use_sb &= !shader->shader.uses_doubles;
199848b8605Smrg
200b8e80941Smrg	use_sb &= !shader->shader.uses_atomics;
201b8e80941Smrg	use_sb &= !shader->shader.uses_images;
202b8e80941Smrg	use_sb &= !shader->shader.uses_helper_invocation;
203b8e80941Smrg
204b8e80941Smrg	/* Check if the bytecode has already been built. */
205848b8605Smrg	if (!shader->shader.bc.bytecode) {
206848b8605Smrg		r = r600_bytecode_build(&shader->shader.bc);
207848b8605Smrg		if (r) {
208848b8605Smrg			R600_ERR("building bytecode failed !\n");
209848b8605Smrg			goto error;
210848b8605Smrg		}
211848b8605Smrg	}
212848b8605Smrg
213b8e80941Smrg	sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
214848b8605Smrg	if (dump && !sb_disasm) {
215848b8605Smrg		fprintf(stderr, "--------------------------------------------------------------\n");
216848b8605Smrg		r600_bytecode_disasm(&shader->shader.bc);
217848b8605Smrg		fprintf(stderr, "______________________________________________________________\n");
218848b8605Smrg	} else if ((dump && sb_disasm) || use_sb) {
219848b8605Smrg		r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
220848b8605Smrg		                             dump, use_sb);
221848b8605Smrg		if (r) {
222848b8605Smrg			R600_ERR("r600_sb_bytecode_process failed !\n");
223848b8605Smrg			goto error;
224848b8605Smrg		}
225848b8605Smrg	}
226848b8605Smrg
227848b8605Smrg	if (shader->gs_copy_shader) {
228848b8605Smrg		if (dump) {
229848b8605Smrg			// dump copy shader
230848b8605Smrg			r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
231848b8605Smrg						     &shader->gs_copy_shader->shader, dump, 0);
232848b8605Smrg			if (r)
233848b8605Smrg				goto error;
234848b8605Smrg		}
235848b8605Smrg
236848b8605Smrg		if ((r = store_shader(ctx, shader->gs_copy_shader)))
237848b8605Smrg			goto error;
238848b8605Smrg	}
239848b8605Smrg
240848b8605Smrg	/* Store the shader in a buffer. */
241848b8605Smrg	if ((r = store_shader(ctx, shader)))
242848b8605Smrg		goto error;
243848b8605Smrg
244848b8605Smrg	/* Build state. */
245848b8605Smrg	switch (shader->shader.processor_type) {
246b8e80941Smrg	case PIPE_SHADER_TESS_CTRL:
247b8e80941Smrg		evergreen_update_hs_state(ctx, shader);
248b8e80941Smrg		break;
249b8e80941Smrg	case PIPE_SHADER_TESS_EVAL:
250b8e80941Smrg		if (key.tes.as_es)
251b8e80941Smrg			evergreen_update_es_state(ctx, shader);
252b8e80941Smrg		else
253b8e80941Smrg			evergreen_update_vs_state(ctx, shader);
254b8e80941Smrg		break;
255b8e80941Smrg	case PIPE_SHADER_GEOMETRY:
256848b8605Smrg		if (rctx->b.chip_class >= EVERGREEN) {
257848b8605Smrg			evergreen_update_gs_state(ctx, shader);
258848b8605Smrg			evergreen_update_vs_state(ctx, shader->gs_copy_shader);
259848b8605Smrg		} else {
260848b8605Smrg			r600_update_gs_state(ctx, shader);
261848b8605Smrg			r600_update_vs_state(ctx, shader->gs_copy_shader);
262848b8605Smrg		}
263848b8605Smrg		break;
264b8e80941Smrg	case PIPE_SHADER_VERTEX:
265b8e80941Smrg		export_shader = key.vs.as_es;
266848b8605Smrg		if (rctx->b.chip_class >= EVERGREEN) {
267b8e80941Smrg			if (key.vs.as_ls)
268b8e80941Smrg				evergreen_update_ls_state(ctx, shader);
269b8e80941Smrg			else if (key.vs.as_es)
270848b8605Smrg				evergreen_update_es_state(ctx, shader);
271848b8605Smrg			else
272848b8605Smrg				evergreen_update_vs_state(ctx, shader);
273848b8605Smrg		} else {
274848b8605Smrg			if (export_shader)
275848b8605Smrg				r600_update_es_state(ctx, shader);
276848b8605Smrg			else
277848b8605Smrg				r600_update_vs_state(ctx, shader);
278848b8605Smrg		}
279848b8605Smrg		break;
280b8e80941Smrg	case PIPE_SHADER_FRAGMENT:
281848b8605Smrg		if (rctx->b.chip_class >= EVERGREEN) {
282848b8605Smrg			evergreen_update_ps_state(ctx, shader);
283848b8605Smrg		} else {
284848b8605Smrg			r600_update_ps_state(ctx, shader);
285848b8605Smrg		}
286848b8605Smrg		break;
287b8e80941Smrg	case PIPE_SHADER_COMPUTE:
288b8e80941Smrg		evergreen_update_ls_state(ctx, shader);
289b8e80941Smrg		break;
290848b8605Smrg	default:
291848b8605Smrg		r = -EINVAL;
292848b8605Smrg		goto error;
293848b8605Smrg	}
294848b8605Smrg	return 0;
295848b8605Smrg
296848b8605Smrgerror:
297848b8605Smrg	r600_pipe_shader_destroy(ctx, shader);
298848b8605Smrg	return r;
299848b8605Smrg}
300848b8605Smrg
301b8e80941Smrgvoid r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader)
302848b8605Smrg{
303b8e80941Smrg	r600_resource_reference(&shader->bo, NULL);
304848b8605Smrg	r600_bytecode_clear(&shader->shader.bc);
305848b8605Smrg	r600_release_command_buffer(&shader->command_buffer);
306848b8605Smrg}
307848b8605Smrg
308848b8605Smrg/*
309848b8605Smrg * tgsi -> r600 shader
310848b8605Smrg */
311848b8605Smrgstruct r600_shader_tgsi_instruction;
312848b8605Smrg
313848b8605Smrgstruct r600_shader_src {
314848b8605Smrg	unsigned				sel;
315848b8605Smrg	unsigned				swizzle[4];
316848b8605Smrg	unsigned				neg;
317848b8605Smrg	unsigned				abs;
318848b8605Smrg	unsigned				rel;
319848b8605Smrg	unsigned				kc_bank;
320b8e80941Smrg	boolean					kc_rel; /* true if cache bank is indexed */
321848b8605Smrg	uint32_t				value[4];
322848b8605Smrg};
323848b8605Smrg
324b8e80941Smrgstruct eg_interp {
325b8e80941Smrg	boolean					enabled;
326b8e80941Smrg	unsigned				ij_index;
327b8e80941Smrg};
328b8e80941Smrg
329848b8605Smrgstruct r600_shader_ctx {
330848b8605Smrg	struct tgsi_shader_info			info;
331b8e80941Smrg	struct tgsi_array_info			*array_infos;
332b8e80941Smrg	/* flag for each tgsi temp array if its been spilled or not */
333b8e80941Smrg	bool					*spilled_arrays;
334848b8605Smrg	struct tgsi_parse_context		parse;
335848b8605Smrg	const struct tgsi_token			*tokens;
336848b8605Smrg	unsigned				type;
337848b8605Smrg	unsigned				file_offset[TGSI_FILE_COUNT];
338848b8605Smrg	unsigned				temp_reg;
339b8e80941Smrg	const struct r600_shader_tgsi_instruction	*inst_info;
340848b8605Smrg	struct r600_bytecode			*bc;
341848b8605Smrg	struct r600_shader			*shader;
342848b8605Smrg	struct r600_shader_src			src[4];
343848b8605Smrg	uint32_t				*literals;
344848b8605Smrg	uint32_t				nliterals;
345848b8605Smrg	uint32_t				max_driver_temp_used;
346848b8605Smrg	/* needed for evergreen interpolation */
347b8e80941Smrg	struct eg_interp		eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
348848b8605Smrg	/* evergreen/cayman also store sample mask in face register */
349848b8605Smrg	int					face_gpr;
350b8e80941Smrg	/* sample id is .w component stored in fixed point position register */
351b8e80941Smrg	int					fixed_pt_position_gpr;
352848b8605Smrg	int					colors_used;
353848b8605Smrg	boolean                 clip_vertex_write;
354848b8605Smrg	unsigned                cv_output;
355848b8605Smrg	unsigned		edgeflag_output;
356b8e80941Smrg	int					helper_invoc_reg;
357b8e80941Smrg	int                                     cs_block_size_reg;
358b8e80941Smrg	int                                     cs_grid_size_reg;
359b8e80941Smrg	bool cs_block_size_loaded, cs_grid_size_loaded;
360848b8605Smrg	int					fragcoord_input;
361848b8605Smrg	int					next_ring_offset;
362848b8605Smrg	int					gs_out_ring_offset;
363848b8605Smrg	int					gs_next_vertex;
364848b8605Smrg	struct r600_shader	*gs_for_vs;
365b8e80941Smrg	int					gs_export_gpr_tregs[4];
366b8e80941Smrg	int                                     gs_rotated_input[2];
367b8e80941Smrg	const struct pipe_stream_output_info	*gs_stream_output_info;
368b8e80941Smrg	unsigned				enabled_stream_buffers_mask;
369b8e80941Smrg	unsigned                                tess_input_info; /* temp with tess input offsets */
370b8e80941Smrg	unsigned                                tess_output_info; /* temp with tess input offsets */
371b8e80941Smrg	unsigned                                thread_id_gpr; /* temp with thread id calculated for images */
372848b8605Smrg};
373848b8605Smrg
374848b8605Smrgstruct r600_shader_tgsi_instruction {
375848b8605Smrg	unsigned	op;
376848b8605Smrg	int (*process)(struct r600_shader_ctx *ctx);
377848b8605Smrg};
378848b8605Smrg
379b8e80941Smrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
380b8e80941Smrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
381848b8605Smrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
382b8e80941Smrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
383848b8605Smrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
384848b8605Smrgstatic int tgsi_else(struct r600_shader_ctx *ctx);
385848b8605Smrgstatic int tgsi_endif(struct r600_shader_ctx *ctx);
386848b8605Smrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx);
387848b8605Smrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx);
388848b8605Smrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
389b8e80941Smrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
390b8e80941Smrg                                unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
391b8e80941Smrg                                unsigned int dst_reg);
392b8e80941Smrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
393b8e80941Smrg			const struct r600_shader_src *shader_src,
394b8e80941Smrg			unsigned chan);
395b8e80941Smrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
396b8e80941Smrg			       unsigned dst_reg, unsigned mask);
397b8e80941Smrg
398b8e80941Smrgstatic bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
399b8e80941Smrg{
400b8e80941Smrg	if (ctx->bc->family == CHIP_HEMLOCK ||
401b8e80941Smrg	    ctx->bc->family == CHIP_CYPRESS ||
402b8e80941Smrg	    ctx->bc->family == CHIP_JUNIPER)
403b8e80941Smrg		return false;
404b8e80941Smrg	return true;
405b8e80941Smrg}
406b8e80941Smrg
407b8e80941Smrgstatic int tgsi_last_instruction(unsigned writemask)
408b8e80941Smrg{
409b8e80941Smrg	int i, lasti = 0;
410b8e80941Smrg
411b8e80941Smrg	for (i = 0; i < 4; i++) {
412b8e80941Smrg		if (writemask & (1 << i)) {
413b8e80941Smrg			lasti = i;
414b8e80941Smrg		}
415b8e80941Smrg	}
416b8e80941Smrg	return lasti;
417b8e80941Smrg}
418848b8605Smrg
419848b8605Smrgstatic int tgsi_is_supported(struct r600_shader_ctx *ctx)
420848b8605Smrg{
421848b8605Smrg	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
422b8e80941Smrg	unsigned j;
423848b8605Smrg
424b8e80941Smrg	if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
425848b8605Smrg		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
426848b8605Smrg		return -EINVAL;
427848b8605Smrg	}
428848b8605Smrg#if 0
429848b8605Smrg	if (i->Instruction.Label) {
430848b8605Smrg		R600_ERR("label unsupported\n");
431848b8605Smrg		return -EINVAL;
432848b8605Smrg	}
433848b8605Smrg#endif
434848b8605Smrg	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
435848b8605Smrg		if (i->Src[j].Register.Dimension) {
436848b8605Smrg		   switch (i->Src[j].Register.File) {
437848b8605Smrg		   case TGSI_FILE_CONSTANT:
438b8e80941Smrg		   case TGSI_FILE_HW_ATOMIC:
439848b8605Smrg			   break;
440848b8605Smrg		   case TGSI_FILE_INPUT:
441b8e80941Smrg			   if (ctx->type == PIPE_SHADER_GEOMETRY ||
442b8e80941Smrg			       ctx->type == PIPE_SHADER_TESS_CTRL ||
443b8e80941Smrg			       ctx->type == PIPE_SHADER_TESS_EVAL)
444b8e80941Smrg				   break;
445b8e80941Smrg		   case TGSI_FILE_OUTPUT:
446b8e80941Smrg			   if (ctx->type == PIPE_SHADER_TESS_CTRL)
447848b8605Smrg				   break;
448848b8605Smrg		   default:
449b8e80941Smrg			   R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
450b8e80941Smrg				    i->Src[j].Register.File,
451848b8605Smrg				    i->Src[j].Register.Dimension);
452848b8605Smrg			   return -EINVAL;
453848b8605Smrg		   }
454848b8605Smrg		}
455848b8605Smrg	}
456848b8605Smrg	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
457848b8605Smrg		if (i->Dst[j].Register.Dimension) {
458b8e80941Smrg			if (ctx->type == PIPE_SHADER_TESS_CTRL)
459b8e80941Smrg				continue;
460848b8605Smrg			R600_ERR("unsupported dst (dimension)\n");
461848b8605Smrg			return -EINVAL;
462848b8605Smrg		}
463848b8605Smrg	}
464848b8605Smrg	return 0;
465848b8605Smrg}
466848b8605Smrg
467b8e80941Smrgint eg_get_interpolator_index(unsigned interpolate, unsigned location)
468848b8605Smrg{
469b8e80941Smrg	if (interpolate == TGSI_INTERPOLATE_COLOR ||
470b8e80941Smrg		interpolate == TGSI_INTERPOLATE_LINEAR ||
471b8e80941Smrg		interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
472b8e80941Smrg	{
473b8e80941Smrg		int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
474b8e80941Smrg		int loc;
475848b8605Smrg
476b8e80941Smrg		switch(location) {
477b8e80941Smrg		case TGSI_INTERPOLATE_LOC_CENTER:
478b8e80941Smrg			loc = 1;
479b8e80941Smrg			break;
480b8e80941Smrg		case TGSI_INTERPOLATE_LOC_CENTROID:
481b8e80941Smrg			loc = 2;
482b8e80941Smrg			break;
483b8e80941Smrg		case TGSI_INTERPOLATE_LOC_SAMPLE:
484b8e80941Smrg		default:
485b8e80941Smrg			loc = 0; break;
486848b8605Smrg		}
487b8e80941Smrg
488b8e80941Smrg		return is_linear * 3 + loc;
489848b8605Smrg	}
490848b8605Smrg
491b8e80941Smrg	return -1;
492b8e80941Smrg}
493b8e80941Smrg
494b8e80941Smrgstatic void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
495b8e80941Smrg		int input)
496b8e80941Smrg{
497b8e80941Smrg	int i = eg_get_interpolator_index(
498b8e80941Smrg		ctx->shader->input[input].interpolate,
499b8e80941Smrg		ctx->shader->input[input].interpolate_location);
500b8e80941Smrg	assert(i >= 0);
501b8e80941Smrg	ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
502848b8605Smrg}
503848b8605Smrg
504848b8605Smrgstatic int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
505848b8605Smrg{
506848b8605Smrg	int i, r;
507848b8605Smrg	struct r600_bytecode_alu alu;
508848b8605Smrg	int gpr = 0, base_chan = 0;
509848b8605Smrg	int ij_index = ctx->shader->input[input].ij_index;
510848b8605Smrg
511848b8605Smrg	/* work out gpr and base_chan from index */
512848b8605Smrg	gpr = ij_index / 2;
513848b8605Smrg	base_chan = (2 * (ij_index % 2)) + 1;
514848b8605Smrg
515848b8605Smrg	for (i = 0; i < 8; i++) {
516848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
517848b8605Smrg
518848b8605Smrg		if (i < 4)
519848b8605Smrg			alu.op = ALU_OP2_INTERP_ZW;
520848b8605Smrg		else
521848b8605Smrg			alu.op = ALU_OP2_INTERP_XY;
522848b8605Smrg
523848b8605Smrg		if ((i > 1) && (i < 6)) {
524848b8605Smrg			alu.dst.sel = ctx->shader->input[input].gpr;
525848b8605Smrg			alu.dst.write = 1;
526848b8605Smrg		}
527848b8605Smrg
528848b8605Smrg		alu.dst.chan = i % 4;
529848b8605Smrg
530848b8605Smrg		alu.src[0].sel = gpr;
531848b8605Smrg		alu.src[0].chan = (base_chan - (i % 2));
532848b8605Smrg
533848b8605Smrg		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
534848b8605Smrg
535848b8605Smrg		alu.bank_swizzle_force = SQ_ALU_VEC_210;
536848b8605Smrg		if ((i % 4) == 3)
537848b8605Smrg			alu.last = 1;
538848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
539848b8605Smrg		if (r)
540848b8605Smrg			return r;
541848b8605Smrg	}
542848b8605Smrg	return 0;
543848b8605Smrg}
544848b8605Smrg
545848b8605Smrgstatic int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
546848b8605Smrg{
547848b8605Smrg	int i, r;
548848b8605Smrg	struct r600_bytecode_alu alu;
549848b8605Smrg
550848b8605Smrg	for (i = 0; i < 4; i++) {
551848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
552848b8605Smrg
553848b8605Smrg		alu.op = ALU_OP1_INTERP_LOAD_P0;
554848b8605Smrg
555848b8605Smrg		alu.dst.sel = ctx->shader->input[input].gpr;
556848b8605Smrg		alu.dst.write = 1;
557848b8605Smrg
558848b8605Smrg		alu.dst.chan = i;
559848b8605Smrg
560848b8605Smrg		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
561848b8605Smrg		alu.src[0].chan = i;
562848b8605Smrg
563848b8605Smrg		if (i == 3)
564848b8605Smrg			alu.last = 1;
565848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
566848b8605Smrg		if (r)
567848b8605Smrg			return r;
568848b8605Smrg	}
569848b8605Smrg	return 0;
570848b8605Smrg}
571848b8605Smrg
572848b8605Smrg/*
573848b8605Smrg * Special export handling in shaders
574848b8605Smrg *
575848b8605Smrg * shader export ARRAY_BASE for EXPORT_POS:
576848b8605Smrg * 60 is position
577848b8605Smrg * 61 is misc vector
578848b8605Smrg * 62, 63 are clip distance vectors
579848b8605Smrg *
580848b8605Smrg * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
581848b8605Smrg * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
582848b8605Smrg * USE_VTX_POINT_SIZE - point size in the X channel of export 61
583848b8605Smrg * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
584848b8605Smrg * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
585848b8605Smrg * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
586848b8605Smrg * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
587848b8605Smrg * exclusive from render target index)
588848b8605Smrg * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
589848b8605Smrg *
590848b8605Smrg *
591848b8605Smrg * shader export ARRAY_BASE for EXPORT_PIXEL:
592848b8605Smrg * 0-7 CB targets
593848b8605Smrg * 61 computed Z vector
594848b8605Smrg *
595848b8605Smrg * The use of the values exported in the computed Z vector are controlled
596848b8605Smrg * by DB_SHADER_CONTROL:
597848b8605Smrg * Z_EXPORT_ENABLE - Z as a float in RED
598848b8605Smrg * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
599848b8605Smrg * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
600848b8605Smrg * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
601848b8605Smrg * DB_SOURCE_FORMAT - export control restrictions
602848b8605Smrg *
603848b8605Smrg */
604848b8605Smrg
605848b8605Smrg
606848b8605Smrg/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
607848b8605Smrgstatic int r600_spi_sid(struct r600_shader_io * io)
608848b8605Smrg{
609848b8605Smrg	int index, name = io->name;
610848b8605Smrg
611848b8605Smrg	/* These params are handled differently, they don't need
612848b8605Smrg	 * semantic indices, so we'll use 0 for them.
613848b8605Smrg	 */
614848b8605Smrg	if (name == TGSI_SEMANTIC_POSITION ||
615848b8605Smrg	    name == TGSI_SEMANTIC_PSIZE ||
616848b8605Smrg	    name == TGSI_SEMANTIC_EDGEFLAG ||
617848b8605Smrg	    name == TGSI_SEMANTIC_FACE ||
618848b8605Smrg	    name == TGSI_SEMANTIC_SAMPLEMASK)
619848b8605Smrg		index = 0;
620848b8605Smrg	else {
621848b8605Smrg		if (name == TGSI_SEMANTIC_GENERIC) {
622848b8605Smrg			/* For generic params simply use sid from tgsi */
623848b8605Smrg			index = io->sid;
624848b8605Smrg		} else {
625848b8605Smrg			/* For non-generic params - pack name and sid into 8 bits */
626848b8605Smrg			index = 0x80 | (name<<3) | (io->sid);
627848b8605Smrg		}
628848b8605Smrg
629848b8605Smrg		/* Make sure that all really used indices have nonzero value, so
630848b8605Smrg		 * we can just compare it to 0 later instead of comparing the name
631848b8605Smrg		 * with different values to detect special cases. */
632848b8605Smrg		index++;
633848b8605Smrg	}
634848b8605Smrg
635848b8605Smrg	return index;
636848b8605Smrg};
637848b8605Smrg
638b8e80941Smrg/* we need this to get a common lds index for vs/tcs/tes input/outputs */
639b8e80941Smrgint r600_get_lds_unique_index(unsigned semantic_name, unsigned index)
640b8e80941Smrg{
641b8e80941Smrg	switch (semantic_name) {
642b8e80941Smrg	case TGSI_SEMANTIC_POSITION:
643b8e80941Smrg		return 0;
644b8e80941Smrg	case TGSI_SEMANTIC_PSIZE:
645b8e80941Smrg		return 1;
646b8e80941Smrg	case TGSI_SEMANTIC_CLIPDIST:
647b8e80941Smrg		assert(index <= 1);
648b8e80941Smrg		return 2 + index;
649b8e80941Smrg	case TGSI_SEMANTIC_GENERIC:
650b8e80941Smrg		if (index <= 63-4)
651b8e80941Smrg			return 4 + index - 9;
652b8e80941Smrg		else
653b8e80941Smrg			/* same explanation as in the default statement,
654b8e80941Smrg			 * the only user hitting this is st/nine.
655b8e80941Smrg			 */
656b8e80941Smrg			return 0;
657b8e80941Smrg
658b8e80941Smrg	/* patch indices are completely separate and thus start from 0 */
659b8e80941Smrg	case TGSI_SEMANTIC_TESSOUTER:
660b8e80941Smrg		return 0;
661b8e80941Smrg	case TGSI_SEMANTIC_TESSINNER:
662b8e80941Smrg		return 1;
663b8e80941Smrg	case TGSI_SEMANTIC_PATCH:
664b8e80941Smrg		return 2 + index;
665b8e80941Smrg
666b8e80941Smrg	default:
667b8e80941Smrg		/* Don't fail here. The result of this function is only used
668b8e80941Smrg		 * for LS, TCS, TES, and GS, where legacy GL semantics can't
669b8e80941Smrg		 * occur, but this function is called for all vertex shaders
670b8e80941Smrg		 * before it's known whether LS will be compiled or not.
671b8e80941Smrg		 */
672b8e80941Smrg		return 0;
673b8e80941Smrg	}
674b8e80941Smrg}
675b8e80941Smrg
676848b8605Smrg/* turn input into interpolate on EG */
677848b8605Smrgstatic int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
678848b8605Smrg{
679848b8605Smrg	int r = 0;
680848b8605Smrg
681848b8605Smrg	if (ctx->shader->input[index].spi_sid) {
682848b8605Smrg		ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
683848b8605Smrg		if (ctx->shader->input[index].interpolate > 0) {
684848b8605Smrg			evergreen_interp_assign_ij_index(ctx, index);
685b8e80941Smrg			r = evergreen_interp_alu(ctx, index);
686848b8605Smrg		} else {
687b8e80941Smrg			r = evergreen_interp_flat(ctx, index);
688848b8605Smrg		}
689848b8605Smrg	}
690848b8605Smrg	return r;
691848b8605Smrg}
692848b8605Smrg
693848b8605Smrgstatic int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
694848b8605Smrg{
695848b8605Smrg	struct r600_bytecode_alu alu;
696848b8605Smrg	int i, r;
697848b8605Smrg	int gpr_front = ctx->shader->input[front].gpr;
698848b8605Smrg	int gpr_back = ctx->shader->input[back].gpr;
699848b8605Smrg
700848b8605Smrg	for (i = 0; i < 4; i++) {
701848b8605Smrg		memset(&alu, 0, sizeof(alu));
702848b8605Smrg		alu.op = ALU_OP3_CNDGT;
703848b8605Smrg		alu.is_op3 = 1;
704848b8605Smrg		alu.dst.write = 1;
705848b8605Smrg		alu.dst.sel = gpr_front;
706848b8605Smrg		alu.src[0].sel = ctx->face_gpr;
707848b8605Smrg		alu.src[1].sel = gpr_front;
708848b8605Smrg		alu.src[2].sel = gpr_back;
709848b8605Smrg
710848b8605Smrg		alu.dst.chan = i;
711848b8605Smrg		alu.src[1].chan = i;
712848b8605Smrg		alu.src[2].chan = i;
713848b8605Smrg		alu.last = (i==3);
714848b8605Smrg
715848b8605Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
716848b8605Smrg			return r;
717848b8605Smrg	}
718848b8605Smrg
719848b8605Smrg	return 0;
720848b8605Smrg}
721848b8605Smrg
722b8e80941Smrg/* execute a single slot ALU calculation */
723b8e80941Smrgstatic int single_alu_op2(struct r600_shader_ctx *ctx, int op,
724b8e80941Smrg			  int dst_sel, int dst_chan,
725b8e80941Smrg			  int src0_sel, unsigned src0_chan_val,
726b8e80941Smrg			  int src1_sel, unsigned src1_chan_val)
727b8e80941Smrg{
728b8e80941Smrg	struct r600_bytecode_alu alu;
729b8e80941Smrg	int r, i;
730b8e80941Smrg
731b8e80941Smrg	if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) {
732b8e80941Smrg		for (i = 0; i < 4; i++) {
733b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
734b8e80941Smrg			alu.op = op;
735b8e80941Smrg			alu.src[0].sel = src0_sel;
736b8e80941Smrg			if (src0_sel == V_SQ_ALU_SRC_LITERAL)
737b8e80941Smrg				alu.src[0].value = src0_chan_val;
738b8e80941Smrg			else
739b8e80941Smrg				alu.src[0].chan = src0_chan_val;
740b8e80941Smrg			alu.src[1].sel = src1_sel;
741b8e80941Smrg			if (src1_sel == V_SQ_ALU_SRC_LITERAL)
742b8e80941Smrg				alu.src[1].value = src1_chan_val;
743b8e80941Smrg			else
744b8e80941Smrg				alu.src[1].chan = src1_chan_val;
745b8e80941Smrg			alu.dst.sel = dst_sel;
746b8e80941Smrg			alu.dst.chan = i;
747b8e80941Smrg			alu.dst.write = i == dst_chan;
748b8e80941Smrg			alu.last = (i == 3);
749b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
750b8e80941Smrg			if (r)
751b8e80941Smrg				return r;
752b8e80941Smrg		}
753b8e80941Smrg		return 0;
754b8e80941Smrg	}
755b8e80941Smrg
756b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
757b8e80941Smrg	alu.op = op;
758b8e80941Smrg	alu.src[0].sel = src0_sel;
759b8e80941Smrg	if (src0_sel == V_SQ_ALU_SRC_LITERAL)
760b8e80941Smrg		alu.src[0].value = src0_chan_val;
761b8e80941Smrg	else
762b8e80941Smrg		alu.src[0].chan = src0_chan_val;
763b8e80941Smrg	alu.src[1].sel = src1_sel;
764b8e80941Smrg	if (src1_sel == V_SQ_ALU_SRC_LITERAL)
765b8e80941Smrg		alu.src[1].value = src1_chan_val;
766b8e80941Smrg	else
767b8e80941Smrg		alu.src[1].chan = src1_chan_val;
768b8e80941Smrg	alu.dst.sel = dst_sel;
769b8e80941Smrg	alu.dst.chan = dst_chan;
770b8e80941Smrg	alu.dst.write = 1;
771b8e80941Smrg	alu.last = 1;
772b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
773b8e80941Smrg	if (r)
774b8e80941Smrg		return r;
775b8e80941Smrg	return 0;
776b8e80941Smrg}
777b8e80941Smrg
778b8e80941Smrg/* execute a single slot ALU calculation */
779b8e80941Smrgstatic int single_alu_op3(struct r600_shader_ctx *ctx, int op,
780b8e80941Smrg			  int dst_sel, int dst_chan,
781b8e80941Smrg			  int src0_sel, unsigned src0_chan_val,
782b8e80941Smrg			  int src1_sel, unsigned src1_chan_val,
783b8e80941Smrg			  int src2_sel, unsigned src2_chan_val)
784b8e80941Smrg{
785b8e80941Smrg	struct r600_bytecode_alu alu;
786b8e80941Smrg	int r;
787b8e80941Smrg
788b8e80941Smrg	/* validate this for other ops */
789b8e80941Smrg	assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT);
790b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
791b8e80941Smrg	alu.op = op;
792b8e80941Smrg	alu.src[0].sel = src0_sel;
793b8e80941Smrg	if (src0_sel == V_SQ_ALU_SRC_LITERAL)
794b8e80941Smrg		alu.src[0].value = src0_chan_val;
795b8e80941Smrg	else
796b8e80941Smrg		alu.src[0].chan = src0_chan_val;
797b8e80941Smrg	alu.src[1].sel = src1_sel;
798b8e80941Smrg	if (src1_sel == V_SQ_ALU_SRC_LITERAL)
799b8e80941Smrg		alu.src[1].value = src1_chan_val;
800b8e80941Smrg	else
801b8e80941Smrg		alu.src[1].chan = src1_chan_val;
802b8e80941Smrg	alu.src[2].sel = src2_sel;
803b8e80941Smrg	if (src2_sel == V_SQ_ALU_SRC_LITERAL)
804b8e80941Smrg		alu.src[2].value = src2_chan_val;
805b8e80941Smrg	else
806b8e80941Smrg		alu.src[2].chan = src2_chan_val;
807b8e80941Smrg	alu.dst.sel = dst_sel;
808b8e80941Smrg	alu.dst.chan = dst_chan;
809b8e80941Smrg	alu.is_op3 = 1;
810b8e80941Smrg	alu.last = 1;
811b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
812b8e80941Smrg	if (r)
813b8e80941Smrg		return r;
814b8e80941Smrg	return 0;
815b8e80941Smrg}
816b8e80941Smrg
817b8e80941Smrg/* put it in temp_reg.x */
818b8e80941Smrgstatic int get_lds_offset0(struct r600_shader_ctx *ctx,
819b8e80941Smrg			   int rel_patch_chan,
820b8e80941Smrg			   int temp_reg, bool is_patch_var)
821b8e80941Smrg{
822b8e80941Smrg	int r;
823b8e80941Smrg
824b8e80941Smrg	/* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */
825b8e80941Smrg	/* ADD
826b8e80941Smrg	   Dimension - patch0_offset (input_vals.z),
827b8e80941Smrg	   Non-dim - patch0_data_offset (input_vals.w)
828b8e80941Smrg	*/
829b8e80941Smrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
830b8e80941Smrg			   temp_reg, 0,
831b8e80941Smrg			   ctx->tess_output_info, 0,
832b8e80941Smrg			   0, rel_patch_chan,
833b8e80941Smrg			   ctx->tess_output_info, is_patch_var ? 3 : 2);
834b8e80941Smrg	if (r)
835b8e80941Smrg		return r;
836b8e80941Smrg	return 0;
837b8e80941Smrg}
838b8e80941Smrg
839b8e80941Smrgstatic inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
840b8e80941Smrg{
841b8e80941Smrg	return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg;
842b8e80941Smrg}
843b8e80941Smrg
844b8e80941Smrgstatic int r600_get_temp(struct r600_shader_ctx *ctx)
845b8e80941Smrg{
846b8e80941Smrg	return ctx->temp_reg + ctx->max_driver_temp_used++;
847b8e80941Smrg}
848b8e80941Smrg
849b8e80941Smrgstatic int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
850b8e80941Smrg{
851b8e80941Smrg	int i;
852b8e80941Smrg	i = ctx->shader->noutput++;
853b8e80941Smrg	ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID;
854b8e80941Smrg	ctx->shader->output[i].sid = 0;
855b8e80941Smrg	ctx->shader->output[i].gpr = 0;
856b8e80941Smrg	ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT;
857b8e80941Smrg	ctx->shader->output[i].write_mask = 0x4;
858b8e80941Smrg	ctx->shader->output[i].spi_sid = prim_id_sid;
859b8e80941Smrg
860b8e80941Smrg	return 0;
861b8e80941Smrg}
862b8e80941Smrg
863b8e80941Smrgstatic int tgsi_barrier(struct r600_shader_ctx *ctx)
864b8e80941Smrg{
865b8e80941Smrg	struct r600_bytecode_alu alu;
866b8e80941Smrg	int r;
867b8e80941Smrg
868b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
869b8e80941Smrg	alu.op = ctx->inst_info->op;
870b8e80941Smrg	alu.last = 1;
871b8e80941Smrg
872b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
873b8e80941Smrg	if (r)
874b8e80941Smrg		return r;
875b8e80941Smrg	return 0;
876b8e80941Smrg}
877b8e80941Smrg
878b8e80941Smrgstatic void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, unsigned *scratch_space_needed)
879b8e80941Smrg{
880b8e80941Smrg	// pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays
881b8e80941Smrg	unsigned n = ctx->info.array_max[TGSI_FILE_TEMPORARY];
882b8e80941Smrg	unsigned narrays_left = n;
883b8e80941Smrg	bool *spilled = ctx->spilled_arrays; // assumed calloc:ed
884b8e80941Smrg
885b8e80941Smrg	*scratch_space_needed = 0;
886b8e80941Smrg	while (*regno > 124 && narrays_left) {
887b8e80941Smrg		unsigned i;
888b8e80941Smrg		unsigned largest = 0;
889b8e80941Smrg		unsigned largest_index = 0;
890b8e80941Smrg
891b8e80941Smrg		for (i = 0; i < n; i++) {
892b8e80941Smrg			unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
893b8e80941Smrg			if (!spilled[i] && size > largest) {
894b8e80941Smrg				largest = size;
895b8e80941Smrg				largest_index = i;
896b8e80941Smrg			}
897b8e80941Smrg		}
898b8e80941Smrg
899b8e80941Smrg		spilled[largest_index] = true;
900b8e80941Smrg		*regno -= largest;
901b8e80941Smrg		*scratch_space_needed += largest;
902b8e80941Smrg
903b8e80941Smrg		narrays_left --;
904b8e80941Smrg	}
905b8e80941Smrg
906b8e80941Smrg	if (narrays_left == 0) {
907b8e80941Smrg		ctx->info.indirect_files &= ~(1 << TGSI_FILE_TEMPORARY);
908b8e80941Smrg	}
909b8e80941Smrg}
910b8e80941Smrg
911b8e80941Smrg/* Take spilled temp arrays into account when translating tgsi register
912b8e80941Smrg * indexes into r600 gprs if spilled is false, or scratch array offset if
913b8e80941Smrg * spilled is true */
914b8e80941Smrgstatic int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, bool *spilled)
915b8e80941Smrg{
916b8e80941Smrg	unsigned i;
917b8e80941Smrg	unsigned spilled_size = 0;
918b8e80941Smrg
919b8e80941Smrg	for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) {
920b8e80941Smrg		if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) {
921b8e80941Smrg			if (ctx->spilled_arrays[i]) {
922b8e80941Smrg				/* vec4 index into spilled scratch memory */
923b8e80941Smrg				*spilled = true;
924b8e80941Smrg				return tgsi_reg_index - ctx->array_infos[i].range.First + spilled_size;
925b8e80941Smrg			}
926b8e80941Smrg			else {
927b8e80941Smrg				/* regular GPR array */
928b8e80941Smrg				*spilled = false;
929b8e80941Smrg				return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY];
930b8e80941Smrg			}
931b8e80941Smrg		}
932b8e80941Smrg
933b8e80941Smrg		if (tgsi_reg_index < ctx->array_infos[i].range.First)
934b8e80941Smrg			break;
935b8e80941Smrg		if (ctx->spilled_arrays[i]) {
936b8e80941Smrg			spilled_size += ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
937b8e80941Smrg		}
938b8e80941Smrg	}
939b8e80941Smrg
940b8e80941Smrg	/* regular GPR index, minus the holes from spilled arrays */
941b8e80941Smrg	*spilled = false;
942b8e80941Smrg
943b8e80941Smrg	return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY];
944b8e80941Smrg}
945b8e80941Smrg
946b8e80941Smrg/* look up spill area base offset and array size for a spilled temp array */
947b8e80941Smrgstatic void get_spilled_array_base_and_size(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index,
948b8e80941Smrg	unsigned *array_base, unsigned *array_size)
949b8e80941Smrg{
950b8e80941Smrg	unsigned i;
951b8e80941Smrg	unsigned offset = 0;
952b8e80941Smrg
953b8e80941Smrg	for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) {
954b8e80941Smrg		if (ctx->spilled_arrays[i]) {
955b8e80941Smrg			unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
956b8e80941Smrg
957b8e80941Smrg			if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) {
958b8e80941Smrg				*array_base = offset;
959b8e80941Smrg				*array_size = size - 1; /* hw counts from 1 */
960b8e80941Smrg
961b8e80941Smrg				return;
962b8e80941Smrg			}
963b8e80941Smrg
964b8e80941Smrg			offset += size;
965b8e80941Smrg		}
966b8e80941Smrg	}
967b8e80941Smrg}
968b8e80941Smrg
969848b8605Smrgstatic int tgsi_declaration(struct r600_shader_ctx *ctx)
970848b8605Smrg{
971848b8605Smrg	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
972848b8605Smrg	int r, i, j, count = d->Range.Last - d->Range.First + 1;
973848b8605Smrg
974848b8605Smrg	switch (d->Declaration.File) {
975848b8605Smrg	case TGSI_FILE_INPUT:
976b8e80941Smrg		for (j = 0; j < count; j++) {
977b8e80941Smrg			i = ctx->shader->ninput + j;
978b8e80941Smrg			assert(i < ARRAY_SIZE(ctx->shader->input));
979b8e80941Smrg			ctx->shader->input[i].name = d->Semantic.Name;
980b8e80941Smrg			ctx->shader->input[i].sid = d->Semantic.Index + j;
981b8e80941Smrg			ctx->shader->input[i].interpolate = d->Interp.Interpolate;
982b8e80941Smrg			ctx->shader->input[i].interpolate_location = d->Interp.Location;
983b8e80941Smrg			ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j;
984b8e80941Smrg			if (ctx->type == PIPE_SHADER_FRAGMENT) {
985b8e80941Smrg				ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
986b8e80941Smrg				switch (ctx->shader->input[i].name) {
987b8e80941Smrg				case TGSI_SEMANTIC_FACE:
988b8e80941Smrg					if (ctx->face_gpr != -1)
989b8e80941Smrg						ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
990b8e80941Smrg					else
991b8e80941Smrg						ctx->face_gpr = ctx->shader->input[i].gpr;
992b8e80941Smrg					break;
993b8e80941Smrg				case TGSI_SEMANTIC_COLOR:
994b8e80941Smrg					ctx->colors_used++;
995b8e80941Smrg					break;
996b8e80941Smrg				case TGSI_SEMANTIC_POSITION:
997b8e80941Smrg					ctx->fragcoord_input = i;
998b8e80941Smrg					break;
999b8e80941Smrg				case TGSI_SEMANTIC_PRIMID:
1000b8e80941Smrg					/* set this for now */
1001b8e80941Smrg					ctx->shader->gs_prim_id_input = true;
1002b8e80941Smrg					ctx->shader->ps_prim_id_input = i;
1003b8e80941Smrg					break;
1004b8e80941Smrg				}
1005b8e80941Smrg				if (ctx->bc->chip_class >= EVERGREEN) {
1006b8e80941Smrg					if ((r = evergreen_interp_input(ctx, i)))
1007b8e80941Smrg						return r;
1008b8e80941Smrg				}
1009b8e80941Smrg			} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
1010b8e80941Smrg				/* FIXME probably skip inputs if they aren't passed in the ring */
1011b8e80941Smrg				ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
1012b8e80941Smrg				ctx->next_ring_offset += 16;
1013b8e80941Smrg				if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
1014b8e80941Smrg					ctx->shader->gs_prim_id_input = true;
1015848b8605Smrg			}
1016848b8605Smrg		}
1017b8e80941Smrg		ctx->shader->ninput += count;
1018848b8605Smrg		break;
1019848b8605Smrg	case TGSI_FILE_OUTPUT:
1020b8e80941Smrg		for (j = 0; j < count; j++) {
1021b8e80941Smrg			i = ctx->shader->noutput + j;
1022b8e80941Smrg			assert(i < ARRAY_SIZE(ctx->shader->output));
1023b8e80941Smrg			ctx->shader->output[i].name = d->Semantic.Name;
1024b8e80941Smrg			ctx->shader->output[i].sid = d->Semantic.Index + j;
1025b8e80941Smrg			ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j;
1026b8e80941Smrg			ctx->shader->output[i].interpolate = d->Interp.Interpolate;
1027b8e80941Smrg			ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
1028b8e80941Smrg			if (ctx->type == PIPE_SHADER_VERTEX ||
1029b8e80941Smrg			    ctx->type == PIPE_SHADER_GEOMETRY ||
1030b8e80941Smrg			    ctx->type == PIPE_SHADER_TESS_EVAL) {
1031b8e80941Smrg				ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
1032b8e80941Smrg				switch (d->Semantic.Name) {
1033b8e80941Smrg				case TGSI_SEMANTIC_CLIPDIST:
1034b8e80941Smrg					break;
1035b8e80941Smrg				case TGSI_SEMANTIC_PSIZE:
1036b8e80941Smrg					ctx->shader->vs_out_misc_write = 1;
1037b8e80941Smrg					ctx->shader->vs_out_point_size = 1;
1038b8e80941Smrg					break;
1039b8e80941Smrg				case TGSI_SEMANTIC_EDGEFLAG:
1040b8e80941Smrg					ctx->shader->vs_out_misc_write = 1;
1041b8e80941Smrg					ctx->shader->vs_out_edgeflag = 1;
1042b8e80941Smrg					ctx->edgeflag_output = i;
1043b8e80941Smrg					break;
1044b8e80941Smrg				case TGSI_SEMANTIC_VIEWPORT_INDEX:
1045b8e80941Smrg					ctx->shader->vs_out_misc_write = 1;
1046b8e80941Smrg					ctx->shader->vs_out_viewport = 1;
1047b8e80941Smrg					break;
1048b8e80941Smrg				case TGSI_SEMANTIC_LAYER:
1049b8e80941Smrg					ctx->shader->vs_out_misc_write = 1;
1050b8e80941Smrg					ctx->shader->vs_out_layer = 1;
1051b8e80941Smrg					break;
1052b8e80941Smrg				case TGSI_SEMANTIC_CLIPVERTEX:
1053b8e80941Smrg					ctx->clip_vertex_write = TRUE;
1054b8e80941Smrg					ctx->cv_output = i;
1055b8e80941Smrg					break;
1056b8e80941Smrg				}
1057b8e80941Smrg				if (ctx->type == PIPE_SHADER_GEOMETRY) {
1058b8e80941Smrg					ctx->gs_out_ring_offset += 16;
1059b8e80941Smrg				}
1060b8e80941Smrg			} else if (ctx->type == PIPE_SHADER_FRAGMENT) {
1061b8e80941Smrg				switch (d->Semantic.Name) {
1062b8e80941Smrg				case TGSI_SEMANTIC_COLOR:
1063b8e80941Smrg					ctx->shader->nr_ps_max_color_exports++;
1064b8e80941Smrg					break;
1065b8e80941Smrg				}
1066848b8605Smrg			}
1067848b8605Smrg		}
1068b8e80941Smrg		ctx->shader->noutput += count;
1069848b8605Smrg		break;
1070848b8605Smrg	case TGSI_FILE_TEMPORARY:
1071848b8605Smrg		if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1072848b8605Smrg			if (d->Array.ArrayID) {
1073b8e80941Smrg				bool spilled;
1074b8e80941Smrg				unsigned idx = map_tgsi_reg_index_to_r600_gpr(ctx,
1075b8e80941Smrg					d->Range.First,
1076b8e80941Smrg					&spilled);
1077b8e80941Smrg
1078b8e80941Smrg				if (!spilled) {
1079b8e80941Smrg					r600_add_gpr_array(ctx->shader, idx,
1080b8e80941Smrg						d->Range.Last - d->Range.First + 1, 0x0F);
1081b8e80941Smrg				}
1082848b8605Smrg			}
1083848b8605Smrg		}
1084848b8605Smrg		break;
1085848b8605Smrg
1086848b8605Smrg	case TGSI_FILE_CONSTANT:
1087848b8605Smrg	case TGSI_FILE_SAMPLER:
1088b8e80941Smrg	case TGSI_FILE_SAMPLER_VIEW:
1089848b8605Smrg	case TGSI_FILE_ADDRESS:
1090b8e80941Smrg	case TGSI_FILE_BUFFER:
1091b8e80941Smrg	case TGSI_FILE_IMAGE:
1092b8e80941Smrg	case TGSI_FILE_MEMORY:
1093b8e80941Smrg		break;
1094b8e80941Smrg
1095b8e80941Smrg	case TGSI_FILE_HW_ATOMIC:
1096b8e80941Smrg		i = ctx->shader->nhwatomic_ranges;
1097b8e80941Smrg		ctx->shader->atomics[i].start = d->Range.First;
1098b8e80941Smrg		ctx->shader->atomics[i].end = d->Range.Last;
1099b8e80941Smrg		ctx->shader->atomics[i].hw_idx = ctx->shader->atomic_base + ctx->shader->nhwatomic;
1100b8e80941Smrg		ctx->shader->atomics[i].array_id = d->Array.ArrayID;
1101b8e80941Smrg		ctx->shader->atomics[i].buffer_id = d->Dim.Index2D;
1102b8e80941Smrg		ctx->shader->nhwatomic_ranges++;
1103b8e80941Smrg		ctx->shader->nhwatomic += count;
1104848b8605Smrg		break;
1105848b8605Smrg
1106848b8605Smrg	case TGSI_FILE_SYSTEM_VALUE:
1107b8e80941Smrg		if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
1108b8e80941Smrg			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
1109b8e80941Smrg			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
1110b8e80941Smrg			break; /* Already handled from allocate_system_value_inputs */
1111b8e80941Smrg		} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
1112848b8605Smrg			break;
1113b8e80941Smrg		} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
1114b8e80941Smrg			break;
1115b8e80941Smrg		else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
1116b8e80941Smrg			break;
1117b8e80941Smrg		else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ||
1118b8e80941Smrg			 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) {
1119b8e80941Smrg			int param = r600_get_lds_unique_index(d->Semantic.Name, 0);
1120b8e80941Smrg			int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2;
1121b8e80941Smrg			unsigned temp_reg = r600_get_temp(ctx);
1122b8e80941Smrg
1123b8e80941Smrg			r = get_lds_offset0(ctx, 2, temp_reg, true);
1124b8e80941Smrg			if (r)
1125b8e80941Smrg				return r;
1126b8e80941Smrg
1127b8e80941Smrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
1128b8e80941Smrg					   temp_reg, 0,
1129b8e80941Smrg					   temp_reg, 0,
1130b8e80941Smrg					   V_SQ_ALU_SRC_LITERAL, param * 16);
1131b8e80941Smrg			if (r)
1132b8e80941Smrg				return r;
1133b8e80941Smrg
1134b8e80941Smrg			do_lds_fetch_values(ctx, temp_reg, dreg, 0xf);
1135848b8605Smrg		}
1136b8e80941Smrg		else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) {
1137b8e80941Smrg			/* MOV r1.x, r0.x;
1138b8e80941Smrg			   MOV r1.y, r0.y;
1139b8e80941Smrg			*/
1140b8e80941Smrg			for (i = 0; i < 2; i++) {
1141848b8605Smrg				struct r600_bytecode_alu alu;
1142848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1143b8e80941Smrg				alu.op = ALU_OP1_MOV;
1144848b8605Smrg				alu.src[0].sel = 0;
1145b8e80941Smrg				alu.src[0].chan = 0 + i;
1146b8e80941Smrg				alu.dst.sel = 1;
1147b8e80941Smrg				alu.dst.chan = 0 + i;
1148848b8605Smrg				alu.dst.write = 1;
1149b8e80941Smrg				alu.last = (i == 1) ? 1 : 0;
1150848b8605Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1151848b8605Smrg					return r;
1152848b8605Smrg			}
1153b8e80941Smrg			/* ADD r1.z, 1.0f, -r0.x */
1154b8e80941Smrg			struct r600_bytecode_alu alu;
1155b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1156b8e80941Smrg			alu.op = ALU_OP2_ADD;
1157b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_1;
1158b8e80941Smrg			alu.src[1].sel = 1;
1159b8e80941Smrg			alu.src[1].chan = 0;
1160b8e80941Smrg			alu.src[1].neg = 1;
1161b8e80941Smrg			alu.dst.sel = 1;
1162b8e80941Smrg			alu.dst.chan = 2;
1163b8e80941Smrg			alu.dst.write = 1;
1164b8e80941Smrg			alu.last = 1;
1165b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1166b8e80941Smrg				return r;
1167848b8605Smrg
1168b8e80941Smrg			/* ADD r1.z, r1.z, -r1.y */
1169b8e80941Smrg			alu.op = ALU_OP2_ADD;
1170b8e80941Smrg			alu.src[0].sel = 1;
1171b8e80941Smrg			alu.src[0].chan = 2;
1172b8e80941Smrg			alu.src[1].sel = 1;
1173b8e80941Smrg			alu.src[1].chan = 1;
1174b8e80941Smrg			alu.src[1].neg = 1;
1175b8e80941Smrg			alu.dst.sel = 1;
1176b8e80941Smrg			alu.dst.chan = 2;
1177b8e80941Smrg			alu.dst.write = 1;
1178b8e80941Smrg			alu.last = 1;
1179b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1180b8e80941Smrg				return r;
1181b8e80941Smrg			break;
1182b8e80941Smrg		}
1183b8e80941Smrg		break;
1184b8e80941Smrg	default:
1185b8e80941Smrg		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
1186b8e80941Smrg		return -EINVAL;
1187b8e80941Smrg	}
1188b8e80941Smrg	return 0;
1189b8e80941Smrg}
1190b8e80941Smrg
1191b8e80941Smrgstatic int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
1192b8e80941Smrg{
1193b8e80941Smrg	struct tgsi_parse_context parse;
1194b8e80941Smrg	struct {
1195b8e80941Smrg		boolean enabled;
1196b8e80941Smrg		int *reg;
1197b8e80941Smrg		unsigned name, alternate_name;
1198b8e80941Smrg	} inputs[2] = {
1199b8e80941Smrg		{ false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
1200b8e80941Smrg
1201b8e80941Smrg		{ false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
1202b8e80941Smrg	};
1203b8e80941Smrg	int num_regs = 0;
1204b8e80941Smrg	unsigned k, i;
1205b8e80941Smrg
1206b8e80941Smrg	if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
1207b8e80941Smrg		return 0;
1208b8e80941Smrg	}
1209b8e80941Smrg
1210b8e80941Smrg	/* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
1211b8e80941Smrg	while (!tgsi_parse_end_of_tokens(&parse)) {
1212b8e80941Smrg		tgsi_parse_token(&parse);
1213b8e80941Smrg
1214b8e80941Smrg		if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
1215b8e80941Smrg			const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1216b8e80941Smrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
1217b8e80941Smrg				inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
1218b8e80941Smrg				inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
1219b8e80941Smrg			{
1220b8e80941Smrg				int interpolate, location, k;
1221b8e80941Smrg
1222b8e80941Smrg				if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
1223b8e80941Smrg					location = TGSI_INTERPOLATE_LOC_CENTER;
1224b8e80941Smrg				} else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
1225b8e80941Smrg					location = TGSI_INTERPOLATE_LOC_CENTER;
1226b8e80941Smrg					/* Needs sample positions, currently those are always available */
1227b8e80941Smrg				} else {
1228b8e80941Smrg					location = TGSI_INTERPOLATE_LOC_CENTROID;
1229b8e80941Smrg				}
1230b8e80941Smrg
1231b8e80941Smrg				interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
1232b8e80941Smrg				k = eg_get_interpolator_index(interpolate, location);
1233b8e80941Smrg				if (k >= 0)
1234b8e80941Smrg					ctx->eg_interpolators[k].enabled = true;
1235b8e80941Smrg			}
1236b8e80941Smrg		} else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
1237b8e80941Smrg			struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
1238b8e80941Smrg			if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1239b8e80941Smrg				for (k = 0; k < ARRAY_SIZE(inputs); k++) {
1240b8e80941Smrg					if (d->Semantic.Name == inputs[k].name ||
1241b8e80941Smrg						d->Semantic.Name == inputs[k].alternate_name) {
1242b8e80941Smrg						inputs[k].enabled = true;
1243b8e80941Smrg					}
1244b8e80941Smrg				}
1245b8e80941Smrg			}
1246b8e80941Smrg		}
1247b8e80941Smrg	}
1248b8e80941Smrg
1249b8e80941Smrg	tgsi_parse_free(&parse);
1250b8e80941Smrg
1251b8e80941Smrg	if (ctx->info.reads_samplemask &&
1252b8e80941Smrg	    (ctx->info.uses_linear_sample || ctx->info.uses_persp_sample)) {
1253b8e80941Smrg		inputs[1].enabled = true;
1254b8e80941Smrg	}
1255b8e80941Smrg
1256b8e80941Smrg	if (ctx->bc->chip_class >= EVERGREEN) {
1257b8e80941Smrg		int num_baryc = 0;
1258b8e80941Smrg		/* assign gpr to each interpolator according to priority */
1259b8e80941Smrg		for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) {
1260b8e80941Smrg			if (ctx->eg_interpolators[i].enabled) {
1261b8e80941Smrg				ctx->eg_interpolators[i].ij_index = num_baryc;
1262b8e80941Smrg				num_baryc++;
1263b8e80941Smrg			}
1264b8e80941Smrg		}
1265b8e80941Smrg		num_baryc = (num_baryc + 1) >> 1;
1266b8e80941Smrg		gpr_offset += num_baryc;
1267b8e80941Smrg	}
1268b8e80941Smrg
1269b8e80941Smrg	for (i = 0; i < ARRAY_SIZE(inputs); i++) {
1270b8e80941Smrg		boolean enabled = inputs[i].enabled;
1271b8e80941Smrg		int *reg = inputs[i].reg;
1272b8e80941Smrg		unsigned name = inputs[i].name;
1273b8e80941Smrg
1274b8e80941Smrg		if (enabled) {
1275b8e80941Smrg			int gpr = gpr_offset + num_regs++;
1276b8e80941Smrg			ctx->shader->nsys_inputs++;
1277b8e80941Smrg
1278b8e80941Smrg			// add to inputs, allocate a gpr
1279b8e80941Smrg			k = ctx->shader->ninput++;
1280b8e80941Smrg			ctx->shader->input[k].name = name;
1281b8e80941Smrg			ctx->shader->input[k].sid = 0;
1282b8e80941Smrg			ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
1283b8e80941Smrg			ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
1284b8e80941Smrg			*reg = ctx->shader->input[k].gpr = gpr;
1285b8e80941Smrg		}
1286b8e80941Smrg	}
1287b8e80941Smrg
1288b8e80941Smrg	return gpr_offset + num_regs;
1289848b8605Smrg}
1290848b8605Smrg
1291848b8605Smrg/*
1292848b8605Smrg * for evergreen we need to scan the shader to find the number of GPRs we need to
1293b8e80941Smrg * reserve for interpolation and system values
1294848b8605Smrg *
1295b8e80941Smrg * we need to know if we are going to emit any sample or centroid inputs
1296848b8605Smrg * if perspective and linear are required
1297848b8605Smrg*/
1298848b8605Smrgstatic int evergreen_gpr_count(struct r600_shader_ctx *ctx)
1299848b8605Smrg{
1300b8e80941Smrg	unsigned i;
1301848b8605Smrg
1302b8e80941Smrg	memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
1303848b8605Smrg
1304b8e80941Smrg	/*
1305b8e80941Smrg	 * Could get this information from the shader info. But right now
1306b8e80941Smrg	 * we interpolate all declared inputs, whereas the shader info will
1307b8e80941Smrg	 * only contain the bits if the inputs are actually used, so it might
1308b8e80941Smrg	 * not be safe...
1309b8e80941Smrg	 */
1310848b8605Smrg	for (i = 0; i < ctx->info.num_inputs; i++) {
1311b8e80941Smrg		int k;
1312b8e80941Smrg		/* skip position/face/mask/sampleid */
1313848b8605Smrg		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
1314848b8605Smrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
1315b8e80941Smrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
1316b8e80941Smrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
1317848b8605Smrg			continue;
1318b8e80941Smrg
1319b8e80941Smrg		k = eg_get_interpolator_index(
1320b8e80941Smrg			ctx->info.input_interpolate[i],
1321b8e80941Smrg			ctx->info.input_interpolate_loc[i]);
1322b8e80941Smrg		if (k >= 0)
1323b8e80941Smrg			ctx->eg_interpolators[k].enabled = TRUE;
1324b8e80941Smrg	}
1325b8e80941Smrg
1326b8e80941Smrg	/* XXX PULL MODEL and LINE STIPPLE */
1327b8e80941Smrg
1328b8e80941Smrg	return allocate_system_value_inputs(ctx, 0);
1329b8e80941Smrg}
1330b8e80941Smrg
1331b8e80941Smrg/* sample_id_sel == NULL means fetch for current sample */
1332b8e80941Smrgstatic int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
1333b8e80941Smrg{
1334b8e80941Smrg	struct r600_bytecode_vtx vtx;
1335b8e80941Smrg	int r, t1;
1336b8e80941Smrg
1337b8e80941Smrg	t1 = r600_get_temp(ctx);
1338b8e80941Smrg
1339b8e80941Smrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
1340b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
1341b8e80941Smrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
1342b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1343b8e80941Smrg	if (sample_id == NULL) {
1344b8e80941Smrg		assert(ctx->fixed_pt_position_gpr != -1);
1345b8e80941Smrg
1346b8e80941Smrg		vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
1347b8e80941Smrg		vtx.src_sel_x = 3;
1348b8e80941Smrg	}
1349b8e80941Smrg	else {
1350b8e80941Smrg		struct r600_bytecode_alu alu;
1351b8e80941Smrg
1352b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1353b8e80941Smrg		alu.op = ALU_OP1_MOV;
1354b8e80941Smrg		r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
1355b8e80941Smrg		alu.dst.sel = t1;
1356b8e80941Smrg		alu.dst.write = 1;
1357b8e80941Smrg		alu.last = 1;
1358b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1359b8e80941Smrg		if (r)
1360b8e80941Smrg			return r;
1361b8e80941Smrg
1362b8e80941Smrg		vtx.src_gpr = t1;
1363b8e80941Smrg		vtx.src_sel_x = 0;
1364848b8605Smrg	}
1365b8e80941Smrg	vtx.mega_fetch_count = 16;
1366b8e80941Smrg	vtx.dst_gpr = t1;
1367b8e80941Smrg	vtx.dst_sel_x = 0;
1368b8e80941Smrg	vtx.dst_sel_y = 1;
1369b8e80941Smrg	vtx.dst_sel_z = 2;
1370b8e80941Smrg	vtx.dst_sel_w = 3;
1371b8e80941Smrg	vtx.data_format = FMT_32_32_32_32_FLOAT;
1372b8e80941Smrg	vtx.num_format_all = 2;
1373b8e80941Smrg	vtx.format_comp_all = 1;
1374b8e80941Smrg	vtx.use_const_fields = 0;
1375b8e80941Smrg	vtx.offset = 0;
1376b8e80941Smrg	vtx.endian = r600_endian_swap(32);
1377b8e80941Smrg	vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1378b8e80941Smrg
1379b8e80941Smrg	r = r600_bytecode_add_vtx(ctx->bc, &vtx);
1380b8e80941Smrg	if (r)
1381b8e80941Smrg		return r;
1382b8e80941Smrg
1383b8e80941Smrg	return t1;
1384b8e80941Smrg}
1385b8e80941Smrg
1386b8e80941Smrgstatic int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
1387b8e80941Smrg{
1388b8e80941Smrg	int r;
1389b8e80941Smrg	struct r600_bytecode_alu alu;
1390b8e80941Smrg
1391b8e80941Smrg	/* do a vtx fetch with wqm set on the vtx fetch */
1392b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1393b8e80941Smrg	alu.op = ALU_OP1_MOV;
1394b8e80941Smrg	alu.dst.sel = ctx->helper_invoc_reg;
1395b8e80941Smrg	alu.dst.chan = 0;
1396b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
1397b8e80941Smrg	alu.src[0].value = 0xffffffff;
1398b8e80941Smrg	alu.dst.write = 1;
1399b8e80941Smrg	alu.last = 1;
1400b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1401b8e80941Smrg	if (r)
1402b8e80941Smrg		return r;
1403b8e80941Smrg
1404b8e80941Smrg	/* do a vtx fetch in VPM mode */
1405b8e80941Smrg	struct r600_bytecode_vtx vtx;
1406b8e80941Smrg	memset(&vtx, 0, sizeof(vtx));
1407b8e80941Smrg	vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
1408b8e80941Smrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
1409b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1410b8e80941Smrg	vtx.src_gpr = 0;
1411b8e80941Smrg	vtx.mega_fetch_count = 16; /* no idea here really... */
1412b8e80941Smrg	vtx.dst_gpr = ctx->helper_invoc_reg;
1413b8e80941Smrg	vtx.dst_sel_x = 4;
1414b8e80941Smrg	vtx.dst_sel_y = 7;		/* SEL_Y */
1415b8e80941Smrg	vtx.dst_sel_z = 7;		/* SEL_Z */
1416b8e80941Smrg	vtx.dst_sel_w = 7;		/* SEL_W */
1417b8e80941Smrg	vtx.data_format = FMT_32;
1418b8e80941Smrg	if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
1419b8e80941Smrg		return r;
1420b8e80941Smrg	ctx->bc->cf_last->vpm = 1;
1421b8e80941Smrg	return 0;
1422b8e80941Smrg}
1423b8e80941Smrg
1424b8e80941Smrgstatic int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
1425b8e80941Smrg{
1426b8e80941Smrg	int r;
1427b8e80941Smrg	struct r600_bytecode_alu alu;
1428b8e80941Smrg
1429b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1430b8e80941Smrg	alu.op = ALU_OP1_MOV;
1431b8e80941Smrg	alu.dst.sel = ctx->helper_invoc_reg;
1432b8e80941Smrg	alu.dst.chan = 0;
1433b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
1434b8e80941Smrg	alu.src[0].value = 0xffffffff;
1435b8e80941Smrg	alu.dst.write = 1;
1436b8e80941Smrg	alu.last = 1;
1437b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1438b8e80941Smrg	if (r)
1439b8e80941Smrg		return r;
1440b8e80941Smrg
1441b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1442b8e80941Smrg	alu.op = ALU_OP1_MOV;
1443b8e80941Smrg	alu.dst.sel = ctx->helper_invoc_reg;
1444b8e80941Smrg	alu.dst.chan = 0;
1445b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
1446b8e80941Smrg	alu.dst.write = 1;
1447b8e80941Smrg	alu.last = 1;
1448b8e80941Smrg	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
1449b8e80941Smrg	if (r)
1450b8e80941Smrg		return r;
1451b8e80941Smrg
1452b8e80941Smrg	return ctx->helper_invoc_reg;
1453b8e80941Smrg}
1454b8e80941Smrg
1455b8e80941Smrgstatic int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
1456b8e80941Smrg{
1457b8e80941Smrg	struct r600_bytecode_vtx vtx;
1458b8e80941Smrg	int r, t1;
1459b8e80941Smrg
1460b8e80941Smrg	if (ctx->cs_block_size_loaded)
1461b8e80941Smrg		return ctx->cs_block_size_reg;
1462b8e80941Smrg	if (ctx->cs_grid_size_loaded)
1463b8e80941Smrg		return ctx->cs_grid_size_reg;
1464b8e80941Smrg
1465b8e80941Smrg	t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg;
1466b8e80941Smrg	struct r600_bytecode_alu alu;
1467b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1468b8e80941Smrg	alu.op = ALU_OP1_MOV;
1469b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
1470b8e80941Smrg	alu.dst.sel = t1;
1471b8e80941Smrg	alu.dst.write = 1;
1472b8e80941Smrg	alu.last = 1;
1473b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1474b8e80941Smrg	if (r)
1475b8e80941Smrg		return r;
1476b8e80941Smrg
1477b8e80941Smrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
1478b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
1479b8e80941Smrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
1480b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1481b8e80941Smrg	vtx.src_gpr = t1;
1482b8e80941Smrg	vtx.src_sel_x = 0;
1483848b8605Smrg
1484b8e80941Smrg	vtx.mega_fetch_count = 16;
1485b8e80941Smrg	vtx.dst_gpr = t1;
1486b8e80941Smrg	vtx.dst_sel_x = 0;
1487b8e80941Smrg	vtx.dst_sel_y = 1;
1488b8e80941Smrg	vtx.dst_sel_z = 2;
1489b8e80941Smrg	vtx.dst_sel_w = 7;
1490b8e80941Smrg	vtx.data_format = FMT_32_32_32_32;
1491b8e80941Smrg	vtx.num_format_all = 1;
1492b8e80941Smrg	vtx.format_comp_all = 0;
1493b8e80941Smrg	vtx.use_const_fields = 0;
1494b8e80941Smrg	vtx.offset = load_block ? 0 : 16; // first element is size of buffer
1495b8e80941Smrg	vtx.endian = r600_endian_swap(32);
1496b8e80941Smrg	vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
1497848b8605Smrg
1498b8e80941Smrg	r = r600_bytecode_add_vtx(ctx->bc, &vtx);
1499b8e80941Smrg	if (r)
1500b8e80941Smrg		return r;
1501848b8605Smrg
1502b8e80941Smrg	if (load_block)
1503b8e80941Smrg		ctx->cs_block_size_loaded = true;
1504b8e80941Smrg	else
1505b8e80941Smrg		ctx->cs_grid_size_loaded = true;
1506b8e80941Smrg	return t1;
1507848b8605Smrg}
1508848b8605Smrg
1509848b8605Smrgstatic void tgsi_src(struct r600_shader_ctx *ctx,
1510848b8605Smrg		     const struct tgsi_full_src_register *tgsi_src,
1511848b8605Smrg		     struct r600_shader_src *r600_src)
1512848b8605Smrg{
1513848b8605Smrg	memset(r600_src, 0, sizeof(*r600_src));
1514848b8605Smrg	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
1515848b8605Smrg	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
1516848b8605Smrg	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
1517848b8605Smrg	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
1518848b8605Smrg	r600_src->neg = tgsi_src->Register.Negate;
1519848b8605Smrg	r600_src->abs = tgsi_src->Register.Absolute;
1520848b8605Smrg
1521b8e80941Smrg	if (tgsi_src->Register.File == TGSI_FILE_TEMPORARY) {
1522b8e80941Smrg		bool spilled;
1523b8e80941Smrg		unsigned idx;
1524b8e80941Smrg
1525b8e80941Smrg		idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_src->Register.Index, &spilled);
1526b8e80941Smrg
1527b8e80941Smrg		if (spilled) {
1528b8e80941Smrg			int reg = r600_get_temp(ctx);
1529b8e80941Smrg			int r;
1530b8e80941Smrg
1531b8e80941Smrg			r600_src->sel = reg;
1532b8e80941Smrg
1533b8e80941Smrg			if (ctx->bc->chip_class < R700) {
1534b8e80941Smrg				struct r600_bytecode_output cf;
1535b8e80941Smrg
1536b8e80941Smrg				memset(&cf, 0, sizeof(struct r600_bytecode_output));
1537b8e80941Smrg				cf.op = CF_OP_MEM_SCRATCH;
1538b8e80941Smrg				cf.elem_size = 3;
1539b8e80941Smrg				cf.gpr = reg;
1540b8e80941Smrg				cf.comp_mask = 0xF;
1541b8e80941Smrg				cf.swizzle_x = 0;
1542b8e80941Smrg				cf.swizzle_y = 1;
1543b8e80941Smrg				cf.swizzle_z = 2;
1544b8e80941Smrg				cf.swizzle_w = 3;
1545b8e80941Smrg				cf.burst_count = 1;
1546b8e80941Smrg
1547b8e80941Smrg				get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index,
1548b8e80941Smrg					&cf.array_base, &cf.array_size);
1549b8e80941Smrg
1550b8e80941Smrg				if (tgsi_src->Register.Indirect) {
1551b8e80941Smrg					cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
1552b8e80941Smrg					cf.index_gpr = ctx->bc->ar_reg;
1553b8e80941Smrg				}
1554b8e80941Smrg				else {
1555b8e80941Smrg					cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ;
1556b8e80941Smrg					cf.array_base += idx;
1557b8e80941Smrg					cf.array_size = 0;
1558b8e80941Smrg				}
1559b8e80941Smrg
1560b8e80941Smrg				r = r600_bytecode_add_output(ctx->bc, &cf);
1561b8e80941Smrg			}
1562b8e80941Smrg			else {
1563b8e80941Smrg				struct r600_bytecode_vtx vtx;
1564b8e80941Smrg
1565b8e80941Smrg				if (r600_bytecode_get_need_wait_ack(ctx->bc)) {
1566b8e80941Smrg					r600_bytecode_need_wait_ack(ctx->bc, false);
1567b8e80941Smrg					r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
1568b8e80941Smrg				}
1569b8e80941Smrg
1570b8e80941Smrg				memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
1571b8e80941Smrg				vtx.op = FETCH_OP_READ_SCRATCH;
1572b8e80941Smrg				vtx.dst_gpr = reg;
1573b8e80941Smrg				vtx.uncached = 1; // Must bypass cache since prior spill written in same invocation
1574b8e80941Smrg				vtx.elem_size = 3;
1575b8e80941Smrg				vtx.data_format = FMT_32_32_32_32;
1576b8e80941Smrg				vtx.num_format_all = V_038010_SQ_NUM_FORMAT_INT;
1577b8e80941Smrg				vtx.dst_sel_x = tgsi_src->Register.SwizzleX;
1578b8e80941Smrg				vtx.dst_sel_y = tgsi_src->Register.SwizzleY;
1579b8e80941Smrg				vtx.dst_sel_z = tgsi_src->Register.SwizzleZ;
1580b8e80941Smrg				vtx.dst_sel_w = tgsi_src->Register.SwizzleW;
1581b8e80941Smrg
1582b8e80941Smrg				get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index,
1583b8e80941Smrg					&vtx.array_base, &vtx.array_size);
1584b8e80941Smrg
1585b8e80941Smrg				if (tgsi_src->Register.Indirect) {
1586b8e80941Smrg					vtx.indexed = 1;
1587b8e80941Smrg					vtx.src_gpr = ctx->bc->ar_reg;
1588b8e80941Smrg				}
1589b8e80941Smrg				else {
1590b8e80941Smrg					vtx.array_base += idx;
1591b8e80941Smrg					vtx.array_size = 0;
1592b8e80941Smrg				}
1593b8e80941Smrg
1594b8e80941Smrg				r = r600_bytecode_add_vtx(ctx->bc, &vtx);
1595b8e80941Smrg			}
1596b8e80941Smrg
1597b8e80941Smrg			if (r)
1598b8e80941Smrg				return;
1599b8e80941Smrg		}
1600b8e80941Smrg		else {
1601b8e80941Smrg			if (tgsi_src->Register.Indirect)
1602b8e80941Smrg				r600_src->rel = V_SQ_REL_RELATIVE;
1603b8e80941Smrg
1604b8e80941Smrg			r600_src->sel = idx;
1605b8e80941Smrg		}
1606b8e80941Smrg
1607b8e80941Smrg		return;
1608b8e80941Smrg	}
1609b8e80941Smrg
1610848b8605Smrg	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
1611848b8605Smrg		int index;
1612848b8605Smrg		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
1613848b8605Smrg			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
1614848b8605Smrg			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
1615848b8605Smrg
1616848b8605Smrg			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
1617b8e80941Smrg			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
1618848b8605Smrg			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
1619848b8605Smrg				return;
1620848b8605Smrg		}
1621848b8605Smrg		index = tgsi_src->Register.Index;
1622848b8605Smrg		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
1623848b8605Smrg		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
1624848b8605Smrg	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
1625848b8605Smrg		if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
1626848b8605Smrg			r600_src->swizzle[0] = 2; // Z value
1627b8e80941Smrg			r600_src->swizzle[1] = 2;
1628b8e80941Smrg			r600_src->swizzle[2] = 2;
1629b8e80941Smrg			r600_src->swizzle[3] = 2;
1630848b8605Smrg			r600_src->sel = ctx->face_gpr;
1631b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
1632b8e80941Smrg			r600_src->swizzle[0] = 3; // W value
1633b8e80941Smrg			r600_src->swizzle[1] = 3;
1634b8e80941Smrg			r600_src->swizzle[2] = 3;
1635b8e80941Smrg			r600_src->swizzle[3] = 3;
1636b8e80941Smrg			r600_src->sel = ctx->fixed_pt_position_gpr;
1637b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
1638b8e80941Smrg			r600_src->swizzle[0] = 0;
1639b8e80941Smrg			r600_src->swizzle[1] = 1;
1640b8e80941Smrg			r600_src->swizzle[2] = 4;
1641b8e80941Smrg			r600_src->swizzle[3] = 4;
1642b8e80941Smrg			r600_src->sel = load_sample_position(ctx, NULL, -1);
1643848b8605Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
1644848b8605Smrg			r600_src->swizzle[0] = 3;
1645848b8605Smrg			r600_src->swizzle[1] = 3;
1646848b8605Smrg			r600_src->swizzle[2] = 3;
1647848b8605Smrg			r600_src->swizzle[3] = 3;
1648848b8605Smrg			r600_src->sel = 0;
1649848b8605Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
1650848b8605Smrg			r600_src->swizzle[0] = 0;
1651848b8605Smrg			r600_src->swizzle[1] = 0;
1652848b8605Smrg			r600_src->swizzle[2] = 0;
1653848b8605Smrg			r600_src->swizzle[3] = 0;
1654848b8605Smrg			r600_src->sel = 0;
1655b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_THREAD_ID) {
1656b8e80941Smrg			r600_src->sel = 0;
1657b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_ID) {
1658b8e80941Smrg			r600_src->sel = 1;
1659b8e80941Smrg		} else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
1660b8e80941Smrg			r600_src->swizzle[0] = 3;
1661b8e80941Smrg			r600_src->swizzle[1] = 3;
1662b8e80941Smrg			r600_src->swizzle[2] = 3;
1663b8e80941Smrg			r600_src->swizzle[3] = 3;
1664b8e80941Smrg			r600_src->sel = 1;
1665b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
1666b8e80941Smrg			r600_src->swizzle[0] = 2;
1667b8e80941Smrg			r600_src->swizzle[1] = 2;
1668b8e80941Smrg			r600_src->swizzle[2] = 2;
1669b8e80941Smrg			r600_src->swizzle[3] = 2;
1670b8e80941Smrg			r600_src->sel = 0;
1671b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) {
1672b8e80941Smrg			r600_src->sel = 1;
1673b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) {
1674b8e80941Smrg			r600_src->sel = 3;
1675b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) {
1676b8e80941Smrg			r600_src->sel = 2;
1677b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) {
1678b8e80941Smrg			r600_src->sel = ctx->tess_input_info;
1679b8e80941Smrg			r600_src->swizzle[0] = 2;
1680b8e80941Smrg			r600_src->swizzle[1] = 2;
1681b8e80941Smrg			r600_src->swizzle[2] = 2;
1682b8e80941Smrg			r600_src->swizzle[3] = 2;
1683b8e80941Smrg		} else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1684b8e80941Smrg			r600_src->sel = 0;
1685b8e80941Smrg			r600_src->swizzle[0] = 0;
1686b8e80941Smrg			r600_src->swizzle[1] = 0;
1687b8e80941Smrg			r600_src->swizzle[2] = 0;
1688b8e80941Smrg			r600_src->swizzle[3] = 0;
1689b8e80941Smrg		} else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1690b8e80941Smrg			r600_src->sel = 0;
1691b8e80941Smrg			r600_src->swizzle[0] = 3;
1692b8e80941Smrg			r600_src->swizzle[1] = 3;
1693b8e80941Smrg			r600_src->swizzle[2] = 3;
1694b8e80941Smrg			r600_src->swizzle[3] = 3;
1695b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) {
1696b8e80941Smrg			r600_src->sel = load_block_grid_size(ctx, false);
1697b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
1698b8e80941Smrg			r600_src->sel = load_block_grid_size(ctx, true);
1699b8e80941Smrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
1700b8e80941Smrg			r600_src->sel = ctx->helper_invoc_reg;
1701b8e80941Smrg			r600_src->swizzle[0] = 0;
1702b8e80941Smrg			r600_src->swizzle[1] = 0;
1703b8e80941Smrg			r600_src->swizzle[2] = 0;
1704b8e80941Smrg			r600_src->swizzle[3] = 0;
1705848b8605Smrg		}
1706848b8605Smrg	} else {
1707848b8605Smrg		if (tgsi_src->Register.Indirect)
1708848b8605Smrg			r600_src->rel = V_SQ_REL_RELATIVE;
1709848b8605Smrg		r600_src->sel = tgsi_src->Register.Index;
1710848b8605Smrg		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
1711848b8605Smrg	}
1712848b8605Smrg	if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) {
1713848b8605Smrg		if (tgsi_src->Register.Dimension) {
1714848b8605Smrg			r600_src->kc_bank = tgsi_src->Dimension.Index;
1715b8e80941Smrg			if (tgsi_src->Dimension.Indirect) {
1716b8e80941Smrg				r600_src->kc_rel = 1;
1717b8e80941Smrg			}
1718848b8605Smrg		}
1719848b8605Smrg	}
1720848b8605Smrg}
1721848b8605Smrg
1722848b8605Smrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
1723b8e80941Smrg                                unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
1724848b8605Smrg                                unsigned int dst_reg)
1725848b8605Smrg{
1726848b8605Smrg	struct r600_bytecode_vtx vtx;
1727848b8605Smrg	unsigned int ar_reg;
1728848b8605Smrg	int r;
1729848b8605Smrg
1730848b8605Smrg	if (offset) {
1731848b8605Smrg		struct r600_bytecode_alu alu;
1732848b8605Smrg
1733848b8605Smrg		memset(&alu, 0, sizeof(alu));
1734848b8605Smrg
1735848b8605Smrg		alu.op = ALU_OP2_ADD_INT;
1736848b8605Smrg		alu.src[0].sel = ctx->bc->ar_reg;
1737848b8605Smrg		alu.src[0].chan = ar_chan;
1738848b8605Smrg
1739848b8605Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1740848b8605Smrg		alu.src[1].value = offset;
1741848b8605Smrg
1742848b8605Smrg		alu.dst.sel = dst_reg;
1743848b8605Smrg		alu.dst.chan = ar_chan;
1744848b8605Smrg		alu.dst.write = 1;
1745848b8605Smrg		alu.last = 1;
1746848b8605Smrg
1747848b8605Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1748848b8605Smrg			return r;
1749848b8605Smrg
1750848b8605Smrg		ar_reg = dst_reg;
1751848b8605Smrg	} else {
1752848b8605Smrg		ar_reg = ctx->bc->ar_reg;
1753848b8605Smrg	}
1754848b8605Smrg
1755848b8605Smrg	memset(&vtx, 0, sizeof(vtx));
1756848b8605Smrg	vtx.buffer_id = cb_idx;
1757b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1758848b8605Smrg	vtx.src_gpr = ar_reg;
1759848b8605Smrg	vtx.src_sel_x = ar_chan;
1760848b8605Smrg	vtx.mega_fetch_count = 16;
1761848b8605Smrg	vtx.dst_gpr = dst_reg;
1762848b8605Smrg	vtx.dst_sel_x = 0;		/* SEL_X */
1763848b8605Smrg	vtx.dst_sel_y = 1;		/* SEL_Y */
1764848b8605Smrg	vtx.dst_sel_z = 2;		/* SEL_Z */
1765848b8605Smrg	vtx.dst_sel_w = 3;		/* SEL_W */
1766848b8605Smrg	vtx.data_format = FMT_32_32_32_32_FLOAT;
1767848b8605Smrg	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
1768848b8605Smrg	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
1769848b8605Smrg	vtx.endian = r600_endian_swap(32);
1770b8e80941Smrg	vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE;
1771848b8605Smrg
1772848b8605Smrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1773848b8605Smrg		return r;
1774848b8605Smrg
1775848b8605Smrg	return 0;
1776848b8605Smrg}
1777848b8605Smrg
1778848b8605Smrgstatic int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
1779848b8605Smrg{
1780848b8605Smrg	struct r600_bytecode_vtx vtx;
1781848b8605Smrg	int r;
1782848b8605Smrg	unsigned index = src->Register.Index;
1783848b8605Smrg	unsigned vtx_id = src->Dimension.Index;
1784b8e80941Smrg	int offset_reg = ctx->gs_rotated_input[vtx_id / 3];
1785848b8605Smrg	int offset_chan = vtx_id % 3;
1786b8e80941Smrg	int t2 = 0;
1787848b8605Smrg
1788848b8605Smrg	/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
1789848b8605Smrg	 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
1790848b8605Smrg
1791b8e80941Smrg	if (offset_reg == ctx->gs_rotated_input[0] && offset_chan == 2)
1792848b8605Smrg		offset_chan = 3;
1793848b8605Smrg
1794b8e80941Smrg	if (src->Dimension.Indirect || src->Register.Indirect)
1795b8e80941Smrg		t2 = r600_get_temp(ctx);
1796b8e80941Smrg
1797848b8605Smrg	if (src->Dimension.Indirect) {
1798848b8605Smrg		int treg[3];
1799848b8605Smrg		struct r600_bytecode_alu alu;
1800848b8605Smrg		int r, i;
1801b8e80941Smrg		unsigned addr_reg;
1802b8e80941Smrg		addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index);
1803b8e80941Smrg		if (src->DimIndirect.Index > 0) {
1804b8e80941Smrg			r = single_alu_op2(ctx, ALU_OP1_MOV,
1805b8e80941Smrg					   ctx->bc->ar_reg, 0,
1806b8e80941Smrg					   addr_reg, 0,
1807b8e80941Smrg					   0, 0);
1808b8e80941Smrg			if (r)
1809b8e80941Smrg				return r;
1810b8e80941Smrg		}
1811b8e80941Smrg		/*
1812848b8605Smrg		   we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
1813848b8605Smrg		   at least this is what fglrx seems to do. */
1814848b8605Smrg		for (i = 0; i < 3; i++) {
1815848b8605Smrg			treg[i] = r600_get_temp(ctx);
1816848b8605Smrg		}
1817b8e80941Smrg		r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
1818b8e80941Smrg
1819848b8605Smrg		for (i = 0; i < 3; i++) {
1820848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1821848b8605Smrg			alu.op = ALU_OP1_MOV;
1822b8e80941Smrg			alu.src[0].sel = ctx->gs_rotated_input[0];
1823848b8605Smrg			alu.src[0].chan = i == 2 ? 3 : i;
1824848b8605Smrg			alu.dst.sel = treg[i];
1825848b8605Smrg			alu.dst.chan = 0;
1826848b8605Smrg			alu.dst.write = 1;
1827848b8605Smrg			alu.last = 1;
1828848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
1829848b8605Smrg			if (r)
1830848b8605Smrg				return r;
1831848b8605Smrg		}
1832848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1833848b8605Smrg		alu.op = ALU_OP1_MOV;
1834848b8605Smrg		alu.src[0].sel = treg[0];
1835848b8605Smrg		alu.src[0].rel = 1;
1836848b8605Smrg		alu.dst.sel = t2;
1837848b8605Smrg		alu.dst.write = 1;
1838848b8605Smrg		alu.last = 1;
1839848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1840848b8605Smrg		if (r)
1841848b8605Smrg			return r;
1842848b8605Smrg		offset_reg = t2;
1843b8e80941Smrg		offset_chan = 0;
1844848b8605Smrg	}
1845848b8605Smrg
1846b8e80941Smrg	if (src->Register.Indirect) {
1847b8e80941Smrg		int addr_reg;
1848b8e80941Smrg		unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID];
1849b8e80941Smrg
1850b8e80941Smrg		addr_reg = get_address_file_reg(ctx, src->Indirect.Index);
1851b8e80941Smrg
1852b8e80941Smrg		/* pull the value from index_reg */
1853b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
1854b8e80941Smrg				   t2, 1,
1855b8e80941Smrg				   addr_reg, 0,
1856b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, first);
1857b8e80941Smrg		if (r)
1858b8e80941Smrg			return r;
1859b8e80941Smrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
1860b8e80941Smrg				   t2, 0,
1861b8e80941Smrg				   t2, 1,
1862b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 4,
1863b8e80941Smrg				   offset_reg, offset_chan);
1864b8e80941Smrg		if (r)
1865b8e80941Smrg			return r;
1866b8e80941Smrg		offset_reg = t2;
1867b8e80941Smrg		offset_chan = 0;
1868b8e80941Smrg		index = src->Register.Index - first;
1869b8e80941Smrg	}
1870848b8605Smrg
1871848b8605Smrg	memset(&vtx, 0, sizeof(vtx));
1872848b8605Smrg	vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
1873b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1874848b8605Smrg	vtx.src_gpr = offset_reg;
1875848b8605Smrg	vtx.src_sel_x = offset_chan;
1876848b8605Smrg	vtx.offset = index * 16; /*bytes*/
1877848b8605Smrg	vtx.mega_fetch_count = 16;
1878848b8605Smrg	vtx.dst_gpr = dst_reg;
1879848b8605Smrg	vtx.dst_sel_x = 0;		/* SEL_X */
1880848b8605Smrg	vtx.dst_sel_y = 1;		/* SEL_Y */
1881848b8605Smrg	vtx.dst_sel_z = 2;		/* SEL_Z */
1882848b8605Smrg	vtx.dst_sel_w = 3;		/* SEL_W */
1883848b8605Smrg	if (ctx->bc->chip_class >= EVERGREEN) {
1884848b8605Smrg		vtx.use_const_fields = 1;
1885848b8605Smrg	} else {
1886848b8605Smrg		vtx.data_format = FMT_32_32_32_32_FLOAT;
1887848b8605Smrg	}
1888848b8605Smrg
1889848b8605Smrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1890848b8605Smrg		return r;
1891848b8605Smrg
1892848b8605Smrg	return 0;
1893848b8605Smrg}
1894848b8605Smrg
1895848b8605Smrgstatic int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
1896848b8605Smrg{
1897848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1898b8e80941Smrg	unsigned i;
1899848b8605Smrg
1900848b8605Smrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1901848b8605Smrg		struct tgsi_full_src_register *src = &inst->Src[i];
1902848b8605Smrg
1903848b8605Smrg		if (src->Register.File == TGSI_FILE_INPUT) {
1904848b8605Smrg			if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) {
1905848b8605Smrg				/* primitive id is in R0.z */
1906848b8605Smrg				ctx->src[i].sel = 0;
1907848b8605Smrg				ctx->src[i].swizzle[0] = 2;
1908848b8605Smrg			}
1909848b8605Smrg		}
1910848b8605Smrg		if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
1911848b8605Smrg			int treg = r600_get_temp(ctx);
1912848b8605Smrg
1913848b8605Smrg			fetch_gs_input(ctx, src, treg);
1914848b8605Smrg			ctx->src[i].sel = treg;
1915b8e80941Smrg			ctx->src[i].rel = 0;
1916848b8605Smrg		}
1917848b8605Smrg	}
1918848b8605Smrg	return 0;
1919848b8605Smrg}
1920848b8605Smrg
1921848b8605Smrg
1922b8e80941Smrg/* Tessellation shaders pass outputs to the next shader using LDS.
1923b8e80941Smrg *
1924b8e80941Smrg * LS outputs = TCS(HS) inputs
1925b8e80941Smrg * TCS(HS) outputs = TES(DS) inputs
1926b8e80941Smrg *
1927b8e80941Smrg * The LDS layout is:
1928b8e80941Smrg * - TCS inputs for patch 0
1929b8e80941Smrg * - TCS inputs for patch 1
1930b8e80941Smrg * - TCS inputs for patch 2		= get_tcs_in_current_patch_offset (if RelPatchID==2)
1931b8e80941Smrg * - ...
1932b8e80941Smrg * - TCS outputs for patch 0            = get_tcs_out_patch0_offset
1933b8e80941Smrg * - Per-patch TCS outputs for patch 0  = get_tcs_out_patch0_patch_data_offset
1934b8e80941Smrg * - TCS outputs for patch 1
1935b8e80941Smrg * - Per-patch TCS outputs for patch 1
1936b8e80941Smrg * - TCS outputs for patch 2            = get_tcs_out_current_patch_offset (if RelPatchID==2)
1937b8e80941Smrg * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
1938b8e80941Smrg * - ...
1939b8e80941Smrg *
1940b8e80941Smrg * All three shaders VS(LS), TCS, TES share the same LDS space.
1941b8e80941Smrg */
1942b8e80941Smrg/* this will return with the dw address in temp_reg.x */
1943b8e80941Smrgstatic int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg,
1944b8e80941Smrg				 const struct tgsi_full_dst_register *dst,
1945b8e80941Smrg				 const struct tgsi_full_src_register *src,
1946b8e80941Smrg				 int stride_bytes_reg, int stride_bytes_chan)
1947b8e80941Smrg{
1948b8e80941Smrg	struct tgsi_full_dst_register reg;
1949b8e80941Smrg	ubyte *name, *index, *array_first;
1950b8e80941Smrg	int r;
1951b8e80941Smrg	int param;
1952b8e80941Smrg	struct tgsi_shader_info *info = &ctx->info;
1953b8e80941Smrg	/* Set the register description. The address computation is the same
1954b8e80941Smrg	 * for sources and destinations. */
1955b8e80941Smrg	if (src) {
1956b8e80941Smrg		reg.Register.File = src->Register.File;
1957b8e80941Smrg		reg.Register.Index = src->Register.Index;
1958b8e80941Smrg		reg.Register.Indirect = src->Register.Indirect;
1959b8e80941Smrg		reg.Register.Dimension = src->Register.Dimension;
1960b8e80941Smrg		reg.Indirect = src->Indirect;
1961b8e80941Smrg		reg.Dimension = src->Dimension;
1962b8e80941Smrg		reg.DimIndirect = src->DimIndirect;
1963b8e80941Smrg	} else
1964b8e80941Smrg		reg = *dst;
1965b8e80941Smrg
1966b8e80941Smrg	/* If the register is 2-dimensional (e.g. an array of vertices
1967b8e80941Smrg	 * in a primitive), calculate the base address of the vertex. */
1968b8e80941Smrg	if (reg.Register.Dimension) {
1969b8e80941Smrg		int sel, chan;
1970b8e80941Smrg		if (reg.Dimension.Indirect) {
1971b8e80941Smrg			unsigned addr_reg;
1972b8e80941Smrg			assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS);
1973b8e80941Smrg
1974b8e80941Smrg			addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index);
1975b8e80941Smrg			/* pull the value from index_reg */
1976b8e80941Smrg			sel = addr_reg;
1977b8e80941Smrg			chan = 0;
1978b8e80941Smrg		} else {
1979b8e80941Smrg			sel = V_SQ_ALU_SRC_LITERAL;
1980b8e80941Smrg			chan = reg.Dimension.Index;
1981848b8605Smrg		}
1982848b8605Smrg
1983b8e80941Smrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
1984b8e80941Smrg				   temp_reg, 0,
1985b8e80941Smrg				   stride_bytes_reg, stride_bytes_chan,
1986b8e80941Smrg				   sel, chan,
1987b8e80941Smrg				   temp_reg, 0);
1988b8e80941Smrg		if (r)
1989b8e80941Smrg			return r;
1990b8e80941Smrg	}
1991848b8605Smrg
1992b8e80941Smrg	if (reg.Register.File == TGSI_FILE_INPUT) {
1993b8e80941Smrg		name = info->input_semantic_name;
1994b8e80941Smrg		index = info->input_semantic_index;
1995b8e80941Smrg		array_first = info->input_array_first;
1996b8e80941Smrg	} else if (reg.Register.File == TGSI_FILE_OUTPUT) {
1997b8e80941Smrg		name = info->output_semantic_name;
1998b8e80941Smrg		index = info->output_semantic_index;
1999b8e80941Smrg		array_first = info->output_array_first;
2000b8e80941Smrg	} else {
2001b8e80941Smrg		assert(0);
2002b8e80941Smrg		return -1;
2003b8e80941Smrg	}
2004b8e80941Smrg	if (reg.Register.Indirect) {
2005b8e80941Smrg		int addr_reg;
2006b8e80941Smrg		int first;
2007b8e80941Smrg		/* Add the relative address of the element. */
2008b8e80941Smrg		if (reg.Indirect.ArrayID)
2009b8e80941Smrg			first = array_first[reg.Indirect.ArrayID];
2010b8e80941Smrg		else
2011b8e80941Smrg			first = reg.Register.Index;
2012b8e80941Smrg
2013b8e80941Smrg		addr_reg = get_address_file_reg(ctx, reg.Indirect.Index);
2014b8e80941Smrg
2015b8e80941Smrg		/* pull the value from index_reg */
2016b8e80941Smrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
2017b8e80941Smrg				   temp_reg, 0,
2018b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 16,
2019b8e80941Smrg				   addr_reg, 0,
2020b8e80941Smrg				   temp_reg, 0);
2021b8e80941Smrg		if (r)
2022b8e80941Smrg			return r;
2023b8e80941Smrg
2024b8e80941Smrg		param = r600_get_lds_unique_index(name[first],
2025b8e80941Smrg						  index[first]);
2026b8e80941Smrg
2027b8e80941Smrg	} else {
2028b8e80941Smrg		param = r600_get_lds_unique_index(name[reg.Register.Index],
2029b8e80941Smrg						  index[reg.Register.Index]);
2030b8e80941Smrg	}
2031b8e80941Smrg
2032b8e80941Smrg	/* add to base_addr - passed in temp_reg.x */
2033b8e80941Smrg	if (param) {
2034b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
2035b8e80941Smrg				   temp_reg, 0,
2036b8e80941Smrg				   temp_reg, 0,
2037b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, param * 16);
2038b8e80941Smrg		if (r)
2039b8e80941Smrg			return r;
2040b8e80941Smrg
2041b8e80941Smrg	}
2042b8e80941Smrg	return 0;
2043b8e80941Smrg}
2044b8e80941Smrg
2045b8e80941Smrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
2046b8e80941Smrg			       unsigned dst_reg, unsigned mask)
2047b8e80941Smrg{
2048b8e80941Smrg	struct r600_bytecode_alu alu;
2049b8e80941Smrg	int r, i, lasti;
2050b8e80941Smrg
2051b8e80941Smrg	if ((ctx->bc->cf_last->ndw>>1) >= 0x60)
2052b8e80941Smrg		ctx->bc->force_add_cf = 1;
2053b8e80941Smrg
2054b8e80941Smrg	lasti = tgsi_last_instruction(mask);
2055b8e80941Smrg	for (i = 1; i <= lasti; i++) {
2056b8e80941Smrg		if (!(mask & (1 << i)))
2057b8e80941Smrg			continue;
2058b8e80941Smrg
2059b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
2060b8e80941Smrg				   temp_reg, i,
2061b8e80941Smrg				   temp_reg, 0,
2062b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
2063b8e80941Smrg		if (r)
2064b8e80941Smrg			return r;
2065b8e80941Smrg	}
2066b8e80941Smrg	for (i = 0; i <= lasti; i++) {
2067b8e80941Smrg		if (!(mask & (1 << i)))
2068b8e80941Smrg			continue;
2069b8e80941Smrg
2070b8e80941Smrg		/* emit an LDS_READ_RET */
2071b8e80941Smrg		memset(&alu, 0, sizeof(alu));
2072b8e80941Smrg		alu.op = LDS_OP1_LDS_READ_RET;
2073b8e80941Smrg		alu.src[0].sel = temp_reg;
2074b8e80941Smrg		alu.src[0].chan = i;
2075b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_0;
2076b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
2077b8e80941Smrg		alu.dst.chan = 0;
2078b8e80941Smrg		alu.is_lds_idx_op = true;
2079b8e80941Smrg		alu.last = 1;
2080b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
2081b8e80941Smrg		if (r)
2082b8e80941Smrg			return r;
2083b8e80941Smrg	}
2084b8e80941Smrg	for (i = 0; i <= lasti; i++) {
2085b8e80941Smrg		if (!(mask & (1 << i)))
2086b8e80941Smrg			continue;
2087b8e80941Smrg
2088b8e80941Smrg		/* then read from LDS_OQ_A_POP */
2089b8e80941Smrg		memset(&alu, 0, sizeof(alu));
2090b8e80941Smrg
2091b8e80941Smrg		alu.op = ALU_OP1_MOV;
2092b8e80941Smrg		alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
2093b8e80941Smrg		alu.src[0].chan = 0;
2094b8e80941Smrg		alu.dst.sel = dst_reg;
2095b8e80941Smrg		alu.dst.chan = i;
2096b8e80941Smrg		alu.dst.write = 1;
2097b8e80941Smrg		alu.last = 1;
2098b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
2099b8e80941Smrg		if (r)
2100b8e80941Smrg			return r;
2101b8e80941Smrg	}
2102b8e80941Smrg	return 0;
2103b8e80941Smrg}
2104b8e80941Smrg
2105b8e80941Smrgstatic int fetch_mask(struct tgsi_src_register *reg)
2106b8e80941Smrg{
2107b8e80941Smrg	int mask = 0;
2108b8e80941Smrg	mask |= 1 << reg->SwizzleX;
2109b8e80941Smrg	mask |= 1 << reg->SwizzleY;
2110b8e80941Smrg	mask |= 1 << reg->SwizzleZ;
2111b8e80941Smrg	mask |= 1 << reg->SwizzleW;
2112b8e80941Smrg	return mask;
2113b8e80941Smrg}
2114b8e80941Smrg
2115b8e80941Smrgstatic int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
2116b8e80941Smrg{
2117b8e80941Smrg	int r;
2118b8e80941Smrg	unsigned temp_reg = r600_get_temp(ctx);
2119b8e80941Smrg
2120b8e80941Smrg	r = get_lds_offset0(ctx, 2, temp_reg,
2121b8e80941Smrg			    src->Register.Dimension ? false : true);
2122b8e80941Smrg	if (r)
2123b8e80941Smrg		return r;
2124b8e80941Smrg
2125b8e80941Smrg	/* the base address is now in temp.x */
2126b8e80941Smrg	r = r600_get_byte_address(ctx, temp_reg,
2127b8e80941Smrg				  NULL, src, ctx->tess_output_info, 1);
2128b8e80941Smrg	if (r)
2129b8e80941Smrg		return r;
2130b8e80941Smrg
2131b8e80941Smrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
2132b8e80941Smrg	if (r)
2133b8e80941Smrg		return r;
2134b8e80941Smrg	return 0;
2135b8e80941Smrg}
2136b8e80941Smrg
2137b8e80941Smrgstatic int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
2138b8e80941Smrg{
2139b8e80941Smrg	int r;
2140b8e80941Smrg	unsigned temp_reg = r600_get_temp(ctx);
2141b8e80941Smrg
2142b8e80941Smrg	/* t.x = ips * r0.y */
2143b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24,
2144b8e80941Smrg			   temp_reg, 0,
2145b8e80941Smrg			   ctx->tess_input_info, 0,
2146b8e80941Smrg			   0, 1);
2147b8e80941Smrg
2148b8e80941Smrg	if (r)
2149b8e80941Smrg		return r;
2150b8e80941Smrg
2151b8e80941Smrg	/* the base address is now in temp.x */
2152b8e80941Smrg	r = r600_get_byte_address(ctx, temp_reg,
2153b8e80941Smrg				  NULL, src, ctx->tess_input_info, 1);
2154b8e80941Smrg	if (r)
2155b8e80941Smrg		return r;
2156b8e80941Smrg
2157b8e80941Smrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
2158b8e80941Smrg	if (r)
2159b8e80941Smrg		return r;
2160b8e80941Smrg	return 0;
2161b8e80941Smrg}
2162b8e80941Smrg
2163b8e80941Smrgstatic int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
2164b8e80941Smrg{
2165b8e80941Smrg	int r;
2166b8e80941Smrg	unsigned temp_reg = r600_get_temp(ctx);
2167b8e80941Smrg
2168b8e80941Smrg	r = get_lds_offset0(ctx, 1, temp_reg,
2169b8e80941Smrg			    src->Register.Dimension ? false : true);
2170b8e80941Smrg	if (r)
2171b8e80941Smrg		return r;
2172b8e80941Smrg	/* the base address is now in temp.x */
2173b8e80941Smrg	r = r600_get_byte_address(ctx, temp_reg,
2174b8e80941Smrg				  NULL, src,
2175b8e80941Smrg				  ctx->tess_output_info, 1);
2176b8e80941Smrg	if (r)
2177b8e80941Smrg		return r;
2178b8e80941Smrg
2179b8e80941Smrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
2180b8e80941Smrg	if (r)
2181b8e80941Smrg		return r;
2182b8e80941Smrg	return 0;
2183b8e80941Smrg}
2184b8e80941Smrg
2185b8e80941Smrgstatic int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx)
2186b8e80941Smrg{
2187b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2188b8e80941Smrg	unsigned i;
2189b8e80941Smrg
2190b8e80941Smrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2191b8e80941Smrg		struct tgsi_full_src_register *src = &inst->Src[i];
2192b8e80941Smrg
2193b8e80941Smrg		if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) {
2194b8e80941Smrg			int treg = r600_get_temp(ctx);
2195b8e80941Smrg			fetch_tes_input(ctx, src, treg);
2196b8e80941Smrg			ctx->src[i].sel = treg;
2197b8e80941Smrg			ctx->src[i].rel = 0;
2198b8e80941Smrg		}
2199b8e80941Smrg		if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) {
2200b8e80941Smrg			int treg = r600_get_temp(ctx);
2201b8e80941Smrg			fetch_tcs_input(ctx, src, treg);
2202b8e80941Smrg			ctx->src[i].sel = treg;
2203b8e80941Smrg			ctx->src[i].rel = 0;
2204b8e80941Smrg		}
2205b8e80941Smrg		if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) {
2206b8e80941Smrg			int treg = r600_get_temp(ctx);
2207b8e80941Smrg			fetch_tcs_output(ctx, src, treg);
2208b8e80941Smrg			ctx->src[i].sel = treg;
2209b8e80941Smrg			ctx->src[i].rel = 0;
2210b8e80941Smrg		}
2211b8e80941Smrg	}
2212b8e80941Smrg	return 0;
2213b8e80941Smrg}
2214b8e80941Smrg
2215b8e80941Smrgstatic int tgsi_split_constant(struct r600_shader_ctx *ctx)
2216b8e80941Smrg{
2217b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2218b8e80941Smrg	struct r600_bytecode_alu alu;
2219b8e80941Smrg	int i, j, k, nconst, r;
2220b8e80941Smrg
2221b8e80941Smrg	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
2222b8e80941Smrg		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
2223b8e80941Smrg			nconst++;
2224b8e80941Smrg		}
2225b8e80941Smrg		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
2226b8e80941Smrg	}
2227b8e80941Smrg	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
2228b8e80941Smrg		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
2229b8e80941Smrg			continue;
2230b8e80941Smrg		}
2231b8e80941Smrg
2232b8e80941Smrg		if (ctx->src[i].rel) {
2233b8e80941Smrg			int chan = inst->Src[i].Indirect.Swizzle;
2234b8e80941Smrg			int treg = r600_get_temp(ctx);
2235b8e80941Smrg			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg)))
2236b8e80941Smrg				return r;
2237b8e80941Smrg
2238b8e80941Smrg			ctx->src[i].kc_bank = 0;
2239b8e80941Smrg			ctx->src[i].kc_rel = 0;
2240b8e80941Smrg			ctx->src[i].sel = treg;
2241b8e80941Smrg			ctx->src[i].rel = 0;
2242848b8605Smrg			j--;
2243848b8605Smrg		} else if (j > 0) {
2244848b8605Smrg			int treg = r600_get_temp(ctx);
2245848b8605Smrg			for (k = 0; k < 4; k++) {
2246848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2247848b8605Smrg				alu.op = ALU_OP1_MOV;
2248848b8605Smrg				alu.src[0].sel = ctx->src[i].sel;
2249848b8605Smrg				alu.src[0].chan = k;
2250848b8605Smrg				alu.src[0].rel = ctx->src[i].rel;
2251848b8605Smrg				alu.src[0].kc_bank = ctx->src[i].kc_bank;
2252b8e80941Smrg				alu.src[0].kc_rel = ctx->src[i].kc_rel;
2253848b8605Smrg				alu.dst.sel = treg;
2254848b8605Smrg				alu.dst.chan = k;
2255848b8605Smrg				alu.dst.write = 1;
2256848b8605Smrg				if (k == 3)
2257848b8605Smrg					alu.last = 1;
2258848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
2259848b8605Smrg				if (r)
2260848b8605Smrg					return r;
2261848b8605Smrg			}
2262848b8605Smrg			ctx->src[i].sel = treg;
2263848b8605Smrg			ctx->src[i].rel =0;
2264848b8605Smrg			j--;
2265848b8605Smrg		}
2266848b8605Smrg	}
2267848b8605Smrg	return 0;
2268848b8605Smrg}
2269848b8605Smrg
2270848b8605Smrg/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
2271848b8605Smrgstatic int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
2272848b8605Smrg{
2273848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2274848b8605Smrg	struct r600_bytecode_alu alu;
2275848b8605Smrg	int i, j, k, nliteral, r;
2276848b8605Smrg
2277848b8605Smrg	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
2278848b8605Smrg		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
2279848b8605Smrg			nliteral++;
2280848b8605Smrg		}
2281848b8605Smrg	}
2282848b8605Smrg	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
2283848b8605Smrg		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
2284848b8605Smrg			int treg = r600_get_temp(ctx);
2285848b8605Smrg			for (k = 0; k < 4; k++) {
2286848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2287848b8605Smrg				alu.op = ALU_OP1_MOV;
2288848b8605Smrg				alu.src[0].sel = ctx->src[i].sel;
2289848b8605Smrg				alu.src[0].chan = k;
2290848b8605Smrg				alu.src[0].value = ctx->src[i].value[k];
2291848b8605Smrg				alu.dst.sel = treg;
2292848b8605Smrg				alu.dst.chan = k;
2293848b8605Smrg				alu.dst.write = 1;
2294848b8605Smrg				if (k == 3)
2295848b8605Smrg					alu.last = 1;
2296848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
2297848b8605Smrg				if (r)
2298848b8605Smrg					return r;
2299848b8605Smrg			}
2300848b8605Smrg			ctx->src[i].sel = treg;
2301848b8605Smrg			j--;
2302848b8605Smrg		}
2303848b8605Smrg	}
2304848b8605Smrg	return 0;
2305848b8605Smrg}
2306848b8605Smrg
2307848b8605Smrgstatic int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
2308848b8605Smrg{
2309848b8605Smrg	int i, r, count = ctx->shader->ninput;
2310848b8605Smrg
2311848b8605Smrg	for (i = 0; i < count; i++) {
2312848b8605Smrg		if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
2313848b8605Smrg			r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input);
2314848b8605Smrg			if (r)
2315848b8605Smrg				return r;
2316848b8605Smrg		}
2317848b8605Smrg	}
2318848b8605Smrg	return 0;
2319848b8605Smrg}
2320848b8605Smrg
2321b8e80941Smrgstatic int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
2322b8e80941Smrg						  int stream, unsigned *stream_item_size UNUSED)
2323848b8605Smrg{
2324848b8605Smrg	unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
2325b8e80941Smrg	unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
2326b8e80941Smrg	int j, r;
2327b8e80941Smrg	unsigned i;
2328848b8605Smrg
2329848b8605Smrg	/* Sanity checking. */
2330b8e80941Smrg	if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) {
2331848b8605Smrg		R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
2332848b8605Smrg		r = -EINVAL;
2333848b8605Smrg		goto out_err;
2334848b8605Smrg	}
2335848b8605Smrg	for (i = 0; i < so->num_outputs; i++) {
2336848b8605Smrg		if (so->output[i].output_buffer >= 4) {
2337848b8605Smrg			R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
2338848b8605Smrg				 so->output[i].output_buffer);
2339848b8605Smrg			r = -EINVAL;
2340848b8605Smrg			goto out_err;
2341848b8605Smrg		}
2342848b8605Smrg	}
2343848b8605Smrg
2344848b8605Smrg	/* Initialize locations where the outputs are stored. */
2345848b8605Smrg	for (i = 0; i < so->num_outputs; i++) {
2346848b8605Smrg
2347b8e80941Smrg		so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
2348b8e80941Smrg		start_comp[i] = so->output[i].start_component;
2349848b8605Smrg		/* Lower outputs with dst_offset < start_component.
2350848b8605Smrg		 *
2351848b8605Smrg		 * We can only output 4D vectors with a write mask, e.g. we can
2352848b8605Smrg		 * only output the W component at offset 3, etc. If we want
2353848b8605Smrg		 * to store Y, Z, or W at buffer offset 0, we need to use MOV
2354848b8605Smrg		 * to move it to X and output X. */
2355848b8605Smrg		if (so->output[i].dst_offset < so->output[i].start_component) {
2356848b8605Smrg			unsigned tmp = r600_get_temp(ctx);
2357848b8605Smrg
2358848b8605Smrg			for (j = 0; j < so->output[i].num_components; j++) {
2359848b8605Smrg				struct r600_bytecode_alu alu;
2360848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2361848b8605Smrg				alu.op = ALU_OP1_MOV;
2362848b8605Smrg				alu.src[0].sel = so_gpr[i];
2363848b8605Smrg				alu.src[0].chan = so->output[i].start_component + j;
2364848b8605Smrg
2365848b8605Smrg				alu.dst.sel = tmp;
2366848b8605Smrg				alu.dst.chan = j;
2367848b8605Smrg				alu.dst.write = 1;
2368848b8605Smrg				if (j == so->output[i].num_components - 1)
2369848b8605Smrg					alu.last = 1;
2370848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
2371848b8605Smrg				if (r)
2372848b8605Smrg					return r;
2373848b8605Smrg			}
2374b8e80941Smrg			start_comp[i] = 0;
2375848b8605Smrg			so_gpr[i] = tmp;
2376848b8605Smrg		}
2377848b8605Smrg	}
2378848b8605Smrg
2379848b8605Smrg	/* Write outputs to buffers. */
2380848b8605Smrg	for (i = 0; i < so->num_outputs; i++) {
2381848b8605Smrg		struct r600_bytecode_output output;
2382848b8605Smrg
2383b8e80941Smrg		if (stream != -1 && stream != so->output[i].stream)
2384b8e80941Smrg			continue;
2385b8e80941Smrg
2386848b8605Smrg		memset(&output, 0, sizeof(struct r600_bytecode_output));
2387848b8605Smrg		output.gpr = so_gpr[i];
2388b8e80941Smrg		output.elem_size = so->output[i].num_components - 1;
2389b8e80941Smrg		if (output.elem_size == 2)
2390b8e80941Smrg			output.elem_size = 3; // 3 not supported, write 4 with junk at end
2391b8e80941Smrg		output.array_base = so->output[i].dst_offset - start_comp[i];
2392848b8605Smrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
2393848b8605Smrg		output.burst_count = 1;
2394848b8605Smrg		/* array_size is an upper limit for the burst_count
2395848b8605Smrg		 * with MEM_STREAM instructions */
2396848b8605Smrg		output.array_size = 0xFFF;
2397b8e80941Smrg		output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
2398b8e80941Smrg
2399848b8605Smrg		if (ctx->bc->chip_class >= EVERGREEN) {
2400848b8605Smrg			switch (so->output[i].output_buffer) {
2401848b8605Smrg			case 0:
2402848b8605Smrg				output.op = CF_OP_MEM_STREAM0_BUF0;
2403848b8605Smrg				break;
2404848b8605Smrg			case 1:
2405848b8605Smrg				output.op = CF_OP_MEM_STREAM0_BUF1;
2406848b8605Smrg				break;
2407848b8605Smrg			case 2:
2408848b8605Smrg				output.op = CF_OP_MEM_STREAM0_BUF2;
2409848b8605Smrg				break;
2410848b8605Smrg			case 3:
2411848b8605Smrg				output.op = CF_OP_MEM_STREAM0_BUF3;
2412848b8605Smrg				break;
2413848b8605Smrg			}
2414b8e80941Smrg			output.op += so->output[i].stream * 4;
2415b8e80941Smrg			assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
2416b8e80941Smrg			ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
2417848b8605Smrg		} else {
2418848b8605Smrg			switch (so->output[i].output_buffer) {
2419848b8605Smrg			case 0:
2420848b8605Smrg				output.op = CF_OP_MEM_STREAM0;
2421848b8605Smrg				break;
2422848b8605Smrg			case 1:
2423848b8605Smrg				output.op = CF_OP_MEM_STREAM1;
2424848b8605Smrg				break;
2425848b8605Smrg			case 2:
2426848b8605Smrg				output.op = CF_OP_MEM_STREAM2;
2427848b8605Smrg				break;
2428848b8605Smrg			case 3:
2429848b8605Smrg				output.op = CF_OP_MEM_STREAM3;
2430848b8605Smrg					break;
2431848b8605Smrg			}
2432b8e80941Smrg			ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
2433848b8605Smrg		}
2434848b8605Smrg		r = r600_bytecode_add_output(ctx->bc, &output);
2435848b8605Smrg		if (r)
2436848b8605Smrg			goto out_err;
2437848b8605Smrg	}
2438848b8605Smrg	return 0;
2439848b8605Smrgout_err:
2440848b8605Smrg	return r;
2441848b8605Smrg}
2442848b8605Smrg
2443848b8605Smrgstatic void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
2444848b8605Smrg{
2445848b8605Smrg	struct r600_bytecode_alu alu;
2446848b8605Smrg	unsigned reg;
2447848b8605Smrg
2448848b8605Smrg	if (!ctx->shader->vs_out_edgeflag)
2449848b8605Smrg		return;
2450848b8605Smrg
2451848b8605Smrg	reg = ctx->shader->output[ctx->edgeflag_output].gpr;
2452848b8605Smrg
2453848b8605Smrg	/* clamp(x, 0, 1) */
2454848b8605Smrg	memset(&alu, 0, sizeof(alu));
2455848b8605Smrg	alu.op = ALU_OP1_MOV;
2456848b8605Smrg	alu.src[0].sel = reg;
2457848b8605Smrg	alu.dst.sel = reg;
2458848b8605Smrg	alu.dst.write = 1;
2459848b8605Smrg	alu.dst.clamp = 1;
2460848b8605Smrg	alu.last = 1;
2461848b8605Smrg	r600_bytecode_add_alu(ctx->bc, &alu);
2462848b8605Smrg
2463848b8605Smrg	memset(&alu, 0, sizeof(alu));
2464848b8605Smrg	alu.op = ALU_OP1_FLT_TO_INT;
2465848b8605Smrg	alu.src[0].sel = reg;
2466848b8605Smrg	alu.dst.sel = reg;
2467848b8605Smrg	alu.dst.write = 1;
2468848b8605Smrg	alu.last = 1;
2469848b8605Smrg	r600_bytecode_add_alu(ctx->bc, &alu);
2470848b8605Smrg}
2471848b8605Smrg
2472848b8605Smrgstatic int generate_gs_copy_shader(struct r600_context *rctx,
2473848b8605Smrg				   struct r600_pipe_shader *gs,
2474848b8605Smrg				   struct pipe_stream_output_info *so)
2475848b8605Smrg{
2476848b8605Smrg	struct r600_shader_ctx ctx = {};
2477848b8605Smrg	struct r600_shader *gs_shader = &gs->shader;
2478848b8605Smrg	struct r600_pipe_shader *cshader;
2479b8e80941Smrg	unsigned ocnt = gs_shader->noutput;
2480848b8605Smrg	struct r600_bytecode_alu alu;
2481848b8605Smrg	struct r600_bytecode_vtx vtx;
2482848b8605Smrg	struct r600_bytecode_output output;
2483848b8605Smrg	struct r600_bytecode_cf *cf_jump, *cf_pop,
2484848b8605Smrg		*last_exp_pos = NULL, *last_exp_param = NULL;
2485b8e80941Smrg	int next_clip_pos = 61, next_param = 0;
2486b8e80941Smrg	unsigned i, j;
2487b8e80941Smrg	int ring;
2488b8e80941Smrg	bool only_ring_0 = true;
2489848b8605Smrg	cshader = calloc(1, sizeof(struct r600_pipe_shader));
2490848b8605Smrg	if (!cshader)
2491848b8605Smrg		return 0;
2492848b8605Smrg
2493848b8605Smrg	memcpy(cshader->shader.output, gs_shader->output, ocnt *
2494848b8605Smrg	       sizeof(struct r600_shader_io));
2495848b8605Smrg
2496848b8605Smrg	cshader->shader.noutput = ocnt;
2497848b8605Smrg
2498848b8605Smrg	ctx.shader = &cshader->shader;
2499848b8605Smrg	ctx.bc = &ctx.shader->bc;
2500b8e80941Smrg	ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX;
2501848b8605Smrg
2502848b8605Smrg	r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family,
2503848b8605Smrg			   rctx->screen->has_compressed_msaa_texturing);
2504848b8605Smrg
2505848b8605Smrg	ctx.bc->isa = rctx->isa;
2506848b8605Smrg
2507b8e80941Smrg	cf_jump = NULL;
2508b8e80941Smrg	memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
2509b8e80941Smrg
2510848b8605Smrg	/* R0.x = R0.x & 0x3fffffff */
2511848b8605Smrg	memset(&alu, 0, sizeof(alu));
2512848b8605Smrg	alu.op = ALU_OP2_AND_INT;
2513848b8605Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2514848b8605Smrg	alu.src[1].value = 0x3fffffff;
2515848b8605Smrg	alu.dst.write = 1;
2516848b8605Smrg	r600_bytecode_add_alu(ctx.bc, &alu);
2517848b8605Smrg
2518848b8605Smrg	/* R0.y = R0.x >> 30 */
2519848b8605Smrg	memset(&alu, 0, sizeof(alu));
2520848b8605Smrg	alu.op = ALU_OP2_LSHR_INT;
2521848b8605Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2522848b8605Smrg	alu.src[1].value = 0x1e;
2523848b8605Smrg	alu.dst.chan = 1;
2524848b8605Smrg	alu.dst.write = 1;
2525848b8605Smrg	alu.last = 1;
2526848b8605Smrg	r600_bytecode_add_alu(ctx.bc, &alu);
2527848b8605Smrg
2528848b8605Smrg	/* fetch vertex data from GSVS ring */
2529848b8605Smrg	for (i = 0; i < ocnt; ++i) {
2530848b8605Smrg		struct r600_shader_io *out = &ctx.shader->output[i];
2531b8e80941Smrg
2532848b8605Smrg		out->gpr = i + 1;
2533848b8605Smrg		out->ring_offset = i * 16;
2534848b8605Smrg
2535848b8605Smrg		memset(&vtx, 0, sizeof(vtx));
2536848b8605Smrg		vtx.op = FETCH_OP_VFETCH;
2537848b8605Smrg		vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
2538b8e80941Smrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
2539b8e80941Smrg		vtx.mega_fetch_count = 16;
2540848b8605Smrg		vtx.offset = out->ring_offset;
2541848b8605Smrg		vtx.dst_gpr = out->gpr;
2542b8e80941Smrg		vtx.src_gpr = 0;
2543848b8605Smrg		vtx.dst_sel_x = 0;
2544848b8605Smrg		vtx.dst_sel_y = 1;
2545848b8605Smrg		vtx.dst_sel_z = 2;
2546848b8605Smrg		vtx.dst_sel_w = 3;
2547848b8605Smrg		if (rctx->b.chip_class >= EVERGREEN) {
2548848b8605Smrg			vtx.use_const_fields = 1;
2549848b8605Smrg		} else {
2550848b8605Smrg			vtx.data_format = FMT_32_32_32_32_FLOAT;
2551848b8605Smrg		}
2552848b8605Smrg
2553848b8605Smrg		r600_bytecode_add_vtx(ctx.bc, &vtx);
2554848b8605Smrg	}
2555b8e80941Smrg	ctx.temp_reg = i + 1;
2556b8e80941Smrg	for (ring = 3; ring >= 0; --ring) {
2557b8e80941Smrg		bool enabled = false;
2558b8e80941Smrg		for (i = 0; i < so->num_outputs; i++) {
2559b8e80941Smrg			if (so->output[i].stream == ring) {
2560b8e80941Smrg				enabled = true;
2561b8e80941Smrg				if (ring > 0)
2562b8e80941Smrg					only_ring_0 = false;
2563b8e80941Smrg				break;
2564b8e80941Smrg			}
2565b8e80941Smrg		}
2566b8e80941Smrg		if (ring != 0 && !enabled) {
2567b8e80941Smrg			cshader->shader.ring_item_sizes[ring] = 0;
2568b8e80941Smrg			continue;
2569b8e80941Smrg		}
2570b8e80941Smrg
2571b8e80941Smrg		if (cf_jump) {
2572b8e80941Smrg			// Patch up jump label
2573b8e80941Smrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
2574b8e80941Smrg			cf_pop = ctx.bc->cf_last;
2575b8e80941Smrg
2576b8e80941Smrg			cf_jump->cf_addr = cf_pop->id + 2;
2577b8e80941Smrg			cf_jump->pop_count = 1;
2578b8e80941Smrg			cf_pop->cf_addr = cf_pop->id + 2;
2579b8e80941Smrg			cf_pop->pop_count = 1;
2580b8e80941Smrg		}
2581b8e80941Smrg
2582b8e80941Smrg		/* PRED_SETE_INT __, R0.y, ring */
2583b8e80941Smrg		memset(&alu, 0, sizeof(alu));
2584b8e80941Smrg		alu.op = ALU_OP2_PRED_SETE_INT;
2585b8e80941Smrg		alu.src[0].chan = 1;
2586b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2587b8e80941Smrg		alu.src[1].value = ring;
2588b8e80941Smrg		alu.execute_mask = 1;
2589b8e80941Smrg		alu.update_pred = 1;
2590b8e80941Smrg		alu.last = 1;
2591b8e80941Smrg		r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
2592b8e80941Smrg
2593b8e80941Smrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
2594b8e80941Smrg		cf_jump = ctx.bc->cf_last;
2595b8e80941Smrg
2596b8e80941Smrg		if (enabled)
2597b8e80941Smrg			emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]);
2598b8e80941Smrg		cshader->shader.ring_item_sizes[ring] = ocnt * 16;
2599b8e80941Smrg	}
2600b8e80941Smrg
2601b8e80941Smrg	/* bc adds nops - copy it */
2602b8e80941Smrg	if (ctx.bc->chip_class == R600) {
2603b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2604b8e80941Smrg		alu.op = ALU_OP0_NOP;
2605b8e80941Smrg		alu.last = 1;
2606b8e80941Smrg		r600_bytecode_add_alu(ctx.bc, &alu);
2607848b8605Smrg
2608b8e80941Smrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
2609b8e80941Smrg	}
2610848b8605Smrg
2611848b8605Smrg	/* export vertex data */
2612848b8605Smrg	/* XXX factor out common code with r600_shader_from_tgsi ? */
2613848b8605Smrg	for (i = 0; i < ocnt; ++i) {
2614848b8605Smrg		struct r600_shader_io *out = &ctx.shader->output[i];
2615b8e80941Smrg		bool instream0 = true;
2616848b8605Smrg		if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
2617848b8605Smrg			continue;
2618848b8605Smrg
2619b8e80941Smrg		for (j = 0; j < so->num_outputs; j++) {
2620b8e80941Smrg			if (so->output[j].register_index == i) {
2621b8e80941Smrg				if (so->output[j].stream == 0)
2622b8e80941Smrg					break;
2623b8e80941Smrg				if (so->output[j].stream > 0)
2624b8e80941Smrg					instream0 = false;
2625b8e80941Smrg			}
2626b8e80941Smrg		}
2627b8e80941Smrg		if (!instream0)
2628b8e80941Smrg			continue;
2629848b8605Smrg		memset(&output, 0, sizeof(output));
2630848b8605Smrg		output.gpr = out->gpr;
2631848b8605Smrg		output.elem_size = 3;
2632848b8605Smrg		output.swizzle_x = 0;
2633848b8605Smrg		output.swizzle_y = 1;
2634848b8605Smrg		output.swizzle_z = 2;
2635848b8605Smrg		output.swizzle_w = 3;
2636848b8605Smrg		output.burst_count = 1;
2637848b8605Smrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2638848b8605Smrg		output.op = CF_OP_EXPORT;
2639848b8605Smrg		switch (out->name) {
2640848b8605Smrg		case TGSI_SEMANTIC_POSITION:
2641848b8605Smrg			output.array_base = 60;
2642848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2643848b8605Smrg			break;
2644848b8605Smrg
2645848b8605Smrg		case TGSI_SEMANTIC_PSIZE:
2646848b8605Smrg			output.array_base = 61;
2647848b8605Smrg			if (next_clip_pos == 61)
2648848b8605Smrg				next_clip_pos = 62;
2649848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2650848b8605Smrg			output.swizzle_y = 7;
2651848b8605Smrg			output.swizzle_z = 7;
2652848b8605Smrg			output.swizzle_w = 7;
2653848b8605Smrg			ctx.shader->vs_out_misc_write = 1;
2654848b8605Smrg			ctx.shader->vs_out_point_size = 1;
2655848b8605Smrg			break;
2656848b8605Smrg		case TGSI_SEMANTIC_LAYER:
2657848b8605Smrg			if (out->spi_sid) {
2658848b8605Smrg				/* duplicate it as PARAM to pass to the pixel shader */
2659848b8605Smrg				output.array_base = next_param++;
2660848b8605Smrg				r600_bytecode_add_output(ctx.bc, &output);
2661848b8605Smrg				last_exp_param = ctx.bc->cf_last;
2662848b8605Smrg			}
2663848b8605Smrg			output.array_base = 61;
2664848b8605Smrg			if (next_clip_pos == 61)
2665848b8605Smrg				next_clip_pos = 62;
2666848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2667848b8605Smrg			output.swizzle_x = 7;
2668848b8605Smrg			output.swizzle_y = 7;
2669848b8605Smrg			output.swizzle_z = 0;
2670848b8605Smrg			output.swizzle_w = 7;
2671848b8605Smrg			ctx.shader->vs_out_misc_write = 1;
2672848b8605Smrg			ctx.shader->vs_out_layer = 1;
2673848b8605Smrg			break;
2674848b8605Smrg		case TGSI_SEMANTIC_VIEWPORT_INDEX:
2675848b8605Smrg			if (out->spi_sid) {
2676848b8605Smrg				/* duplicate it as PARAM to pass to the pixel shader */
2677848b8605Smrg				output.array_base = next_param++;
2678848b8605Smrg				r600_bytecode_add_output(ctx.bc, &output);
2679848b8605Smrg				last_exp_param = ctx.bc->cf_last;
2680848b8605Smrg			}
2681848b8605Smrg			output.array_base = 61;
2682848b8605Smrg			if (next_clip_pos == 61)
2683848b8605Smrg				next_clip_pos = 62;
2684848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2685848b8605Smrg			ctx.shader->vs_out_misc_write = 1;
2686848b8605Smrg			ctx.shader->vs_out_viewport = 1;
2687848b8605Smrg			output.swizzle_x = 7;
2688848b8605Smrg			output.swizzle_y = 7;
2689848b8605Smrg			output.swizzle_z = 7;
2690848b8605Smrg			output.swizzle_w = 0;
2691848b8605Smrg			break;
2692848b8605Smrg		case TGSI_SEMANTIC_CLIPDIST:
2693848b8605Smrg			/* spi_sid is 0 for clipdistance outputs that were generated
2694848b8605Smrg			 * for clipvertex - we don't need to pass them to PS */
2695848b8605Smrg			ctx.shader->clip_dist_write = gs->shader.clip_dist_write;
2696b8e80941Smrg			ctx.shader->cull_dist_write = gs->shader.cull_dist_write;
2697b8e80941Smrg			ctx.shader->cc_dist_mask = gs->shader.cc_dist_mask;
2698848b8605Smrg			if (out->spi_sid) {
2699848b8605Smrg				/* duplicate it as PARAM to pass to the pixel shader */
2700848b8605Smrg				output.array_base = next_param++;
2701848b8605Smrg				r600_bytecode_add_output(ctx.bc, &output);
2702848b8605Smrg				last_exp_param = ctx.bc->cf_last;
2703848b8605Smrg			}
2704848b8605Smrg			output.array_base = next_clip_pos++;
2705848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2706848b8605Smrg			break;
2707848b8605Smrg		case TGSI_SEMANTIC_FOG:
2708848b8605Smrg			output.swizzle_y = 4; /* 0 */
2709848b8605Smrg			output.swizzle_z = 4; /* 0 */
2710848b8605Smrg			output.swizzle_w = 5; /* 1 */
2711848b8605Smrg			break;
2712848b8605Smrg		default:
2713848b8605Smrg			output.array_base = next_param++;
2714848b8605Smrg			break;
2715848b8605Smrg		}
2716848b8605Smrg		r600_bytecode_add_output(ctx.bc, &output);
2717848b8605Smrg		if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
2718848b8605Smrg			last_exp_param = ctx.bc->cf_last;
2719848b8605Smrg		else
2720848b8605Smrg			last_exp_pos = ctx.bc->cf_last;
2721848b8605Smrg	}
2722848b8605Smrg
2723848b8605Smrg	if (!last_exp_pos) {
2724848b8605Smrg		memset(&output, 0, sizeof(output));
2725848b8605Smrg		output.gpr = 0;
2726848b8605Smrg		output.elem_size = 3;
2727848b8605Smrg		output.swizzle_x = 7;
2728848b8605Smrg		output.swizzle_y = 7;
2729848b8605Smrg		output.swizzle_z = 7;
2730848b8605Smrg		output.swizzle_w = 7;
2731848b8605Smrg		output.burst_count = 1;
2732848b8605Smrg		output.type = 2;
2733848b8605Smrg		output.op = CF_OP_EXPORT;
2734848b8605Smrg		output.array_base = 60;
2735848b8605Smrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2736848b8605Smrg		r600_bytecode_add_output(ctx.bc, &output);
2737848b8605Smrg		last_exp_pos = ctx.bc->cf_last;
2738848b8605Smrg	}
2739848b8605Smrg
2740848b8605Smrg	if (!last_exp_param) {
2741848b8605Smrg		memset(&output, 0, sizeof(output));
2742848b8605Smrg		output.gpr = 0;
2743848b8605Smrg		output.elem_size = 3;
2744848b8605Smrg		output.swizzle_x = 7;
2745848b8605Smrg		output.swizzle_y = 7;
2746848b8605Smrg		output.swizzle_z = 7;
2747848b8605Smrg		output.swizzle_w = 7;
2748848b8605Smrg		output.burst_count = 1;
2749848b8605Smrg		output.type = 2;
2750848b8605Smrg		output.op = CF_OP_EXPORT;
2751848b8605Smrg		output.array_base = next_param++;
2752848b8605Smrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2753848b8605Smrg		r600_bytecode_add_output(ctx.bc, &output);
2754848b8605Smrg		last_exp_param = ctx.bc->cf_last;
2755848b8605Smrg	}
2756848b8605Smrg
2757848b8605Smrg	last_exp_pos->op = CF_OP_EXPORT_DONE;
2758848b8605Smrg	last_exp_param->op = CF_OP_EXPORT_DONE;
2759848b8605Smrg
2760848b8605Smrg	r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
2761848b8605Smrg	cf_pop = ctx.bc->cf_last;
2762848b8605Smrg
2763848b8605Smrg	cf_jump->cf_addr = cf_pop->id + 2;
2764848b8605Smrg	cf_jump->pop_count = 1;
2765848b8605Smrg	cf_pop->cf_addr = cf_pop->id + 2;
2766848b8605Smrg	cf_pop->pop_count = 1;
2767848b8605Smrg
2768848b8605Smrg	if (ctx.bc->chip_class == CAYMAN)
2769848b8605Smrg		cm_bytecode_add_cf_end(ctx.bc);
2770848b8605Smrg	else {
2771848b8605Smrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
2772848b8605Smrg		ctx.bc->cf_last->end_of_program = 1;
2773848b8605Smrg	}
2774848b8605Smrg
2775848b8605Smrg	gs->gs_copy_shader = cshader;
2776b8e80941Smrg	cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
2777848b8605Smrg
2778848b8605Smrg	ctx.bc->nstack = 1;
2779848b8605Smrg
2780848b8605Smrg	return r600_bytecode_build(ctx.bc);
2781848b8605Smrg}
2782848b8605Smrg
2783b8e80941Smrgstatic int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind)
2784b8e80941Smrg{
2785b8e80941Smrg	if (ind) {
2786b8e80941Smrg		struct r600_bytecode_alu alu;
2787b8e80941Smrg		int r;
2788b8e80941Smrg
2789b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2790b8e80941Smrg		alu.op = ALU_OP2_ADD_INT;
2791b8e80941Smrg		alu.src[0].sel = ctx->gs_export_gpr_tregs[idx];
2792b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2793b8e80941Smrg		alu.src[1].value = ctx->gs_out_ring_offset >> 4;
2794b8e80941Smrg		alu.dst.sel = ctx->gs_export_gpr_tregs[idx];
2795b8e80941Smrg		alu.dst.write = 1;
2796b8e80941Smrg		alu.last = 1;
2797b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
2798b8e80941Smrg		if (r)
2799b8e80941Smrg			return r;
2800b8e80941Smrg	}
2801b8e80941Smrg	return 0;
2802b8e80941Smrg}
2803b8e80941Smrg
2804b8e80941Smrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so UNUSED, int stream, bool ind)
2805848b8605Smrg{
2806848b8605Smrg	struct r600_bytecode_output output;
2807b8e80941Smrg	int ring_offset;
2808b8e80941Smrg	unsigned i, k;
2809b8e80941Smrg	int effective_stream = stream == -1 ? 0 : stream;
2810b8e80941Smrg	int idx = 0;
2811848b8605Smrg
2812848b8605Smrg	for (i = 0; i < ctx->shader->noutput; i++) {
2813848b8605Smrg		if (ctx->gs_for_vs) {
2814848b8605Smrg			/* for ES we need to lookup corresponding ring offset expected by GS
2815848b8605Smrg			 * (map this output to GS input by name and sid) */
2816848b8605Smrg			/* FIXME precompute offsets */
2817848b8605Smrg			ring_offset = -1;
2818848b8605Smrg			for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
2819848b8605Smrg				struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
2820848b8605Smrg				struct r600_shader_io *out = &ctx->shader->output[i];
2821848b8605Smrg				if (in->name == out->name && in->sid == out->sid)
2822848b8605Smrg					ring_offset = in->ring_offset;
2823848b8605Smrg			}
2824848b8605Smrg
2825848b8605Smrg			if (ring_offset == -1)
2826848b8605Smrg				continue;
2827b8e80941Smrg		} else {
2828b8e80941Smrg			ring_offset = idx * 16;
2829b8e80941Smrg			idx++;
2830b8e80941Smrg		}
2831848b8605Smrg
2832b8e80941Smrg		if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
2833b8e80941Smrg			continue;
2834848b8605Smrg		/* next_ring_offset after parsing input decls contains total size of
2835848b8605Smrg		 * single vertex data, gs_next_vertex - current vertex index */
2836848b8605Smrg		if (!ind)
2837848b8605Smrg			ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
2838848b8605Smrg
2839848b8605Smrg		memset(&output, 0, sizeof(struct r600_bytecode_output));
2840848b8605Smrg		output.gpr = ctx->shader->output[i].gpr;
2841848b8605Smrg		output.elem_size = 3;
2842848b8605Smrg		output.comp_mask = 0xF;
2843848b8605Smrg		output.burst_count = 1;
2844848b8605Smrg
2845848b8605Smrg		if (ind)
2846848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
2847848b8605Smrg		else
2848848b8605Smrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
2849848b8605Smrg
2850b8e80941Smrg		switch (stream) {
2851b8e80941Smrg		default:
2852b8e80941Smrg		case 0:
2853b8e80941Smrg			output.op = CF_OP_MEM_RING; break;
2854b8e80941Smrg		case 1:
2855b8e80941Smrg			output.op = CF_OP_MEM_RING1; break;
2856b8e80941Smrg		case 2:
2857b8e80941Smrg			output.op = CF_OP_MEM_RING2; break;
2858b8e80941Smrg		case 3:
2859b8e80941Smrg			output.op = CF_OP_MEM_RING3; break;
2860b8e80941Smrg		}
2861848b8605Smrg
2862848b8605Smrg		if (ind) {
2863848b8605Smrg			output.array_base = ring_offset >> 2; /* in dwords */
2864848b8605Smrg			output.array_size = 0xfff;
2865b8e80941Smrg			output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
2866848b8605Smrg		} else
2867848b8605Smrg			output.array_base = ring_offset >> 2; /* in dwords */
2868848b8605Smrg		r600_bytecode_add_output(ctx->bc, &output);
2869848b8605Smrg	}
2870848b8605Smrg
2871848b8605Smrg	++ctx->gs_next_vertex;
2872848b8605Smrg	return 0;
2873848b8605Smrg}
2874848b8605Smrg
2875b8e80941Smrg
2876b8e80941Smrgstatic int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx)
2877848b8605Smrg{
2878b8e80941Smrg	int r;
2879b8e80941Smrg	struct r600_bytecode_vtx vtx;
2880b8e80941Smrg	int temp_val = ctx->temp_reg;
2881b8e80941Smrg	/* need to store the TCS output somewhere */
2882b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
2883b8e80941Smrg			   temp_val, 0,
2884b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 0,
2885b8e80941Smrg			   0, 0);
2886b8e80941Smrg	if (r)
2887b8e80941Smrg		return r;
2888848b8605Smrg
2889b8e80941Smrg	/* used by VS/TCS */
2890b8e80941Smrg	if (ctx->tess_input_info) {
2891b8e80941Smrg		/* fetch tcs input values into resv space */
2892b8e80941Smrg		memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
2893b8e80941Smrg		vtx.op = FETCH_OP_VFETCH;
2894b8e80941Smrg		vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
2895b8e80941Smrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
2896b8e80941Smrg		vtx.mega_fetch_count = 16;
2897b8e80941Smrg		vtx.data_format = FMT_32_32_32_32;
2898b8e80941Smrg		vtx.num_format_all = 2;
2899b8e80941Smrg		vtx.format_comp_all = 1;
2900b8e80941Smrg		vtx.use_const_fields = 0;
2901b8e80941Smrg		vtx.endian = r600_endian_swap(32);
2902b8e80941Smrg		vtx.srf_mode_all = 1;
2903b8e80941Smrg		vtx.offset = 0;
2904b8e80941Smrg		vtx.dst_gpr = ctx->tess_input_info;
2905b8e80941Smrg		vtx.dst_sel_x = 0;
2906b8e80941Smrg		vtx.dst_sel_y = 1;
2907b8e80941Smrg		vtx.dst_sel_z = 2;
2908b8e80941Smrg		vtx.dst_sel_w = 3;
2909b8e80941Smrg		vtx.src_gpr = temp_val;
2910b8e80941Smrg		vtx.src_sel_x = 0;
2911b8e80941Smrg
2912b8e80941Smrg		r = r600_bytecode_add_vtx(ctx->bc, &vtx);
2913b8e80941Smrg		if (r)
2914b8e80941Smrg			return r;
2915b8e80941Smrg	}
2916b8e80941Smrg
2917b8e80941Smrg	/* used by TCS/TES */
2918b8e80941Smrg	if (ctx->tess_output_info) {
2919b8e80941Smrg		/* fetch tcs output values into resv space */
2920b8e80941Smrg		memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
2921b8e80941Smrg		vtx.op = FETCH_OP_VFETCH;
2922b8e80941Smrg		vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
2923b8e80941Smrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
2924b8e80941Smrg		vtx.mega_fetch_count = 16;
2925b8e80941Smrg		vtx.data_format = FMT_32_32_32_32;
2926b8e80941Smrg		vtx.num_format_all = 2;
2927b8e80941Smrg		vtx.format_comp_all = 1;
2928b8e80941Smrg		vtx.use_const_fields = 0;
2929b8e80941Smrg		vtx.endian = r600_endian_swap(32);
2930b8e80941Smrg		vtx.srf_mode_all = 1;
2931b8e80941Smrg		vtx.offset = 16;
2932b8e80941Smrg		vtx.dst_gpr = ctx->tess_output_info;
2933b8e80941Smrg		vtx.dst_sel_x = 0;
2934b8e80941Smrg		vtx.dst_sel_y = 1;
2935b8e80941Smrg		vtx.dst_sel_z = 2;
2936b8e80941Smrg		vtx.dst_sel_w = 3;
2937b8e80941Smrg		vtx.src_gpr = temp_val;
2938b8e80941Smrg		vtx.src_sel_x = 0;
2939b8e80941Smrg
2940b8e80941Smrg		r = r600_bytecode_add_vtx(ctx->bc, &vtx);
2941b8e80941Smrg		if (r)
2942b8e80941Smrg			return r;
2943b8e80941Smrg	}
2944b8e80941Smrg	return 0;
2945b8e80941Smrg}
2946b8e80941Smrg
2947b8e80941Smrgstatic int emit_lds_vs_writes(struct r600_shader_ctx *ctx)
2948b8e80941Smrg{
2949b8e80941Smrg	int j, r;
2950b8e80941Smrg	int temp_reg;
2951b8e80941Smrg	unsigned i;
2952b8e80941Smrg
2953b8e80941Smrg	/* fetch tcs input values into input_vals */
2954b8e80941Smrg	ctx->tess_input_info = r600_get_temp(ctx);
2955b8e80941Smrg	ctx->tess_output_info = 0;
2956b8e80941Smrg	r = r600_fetch_tess_io_info(ctx);
2957b8e80941Smrg	if (r)
2958b8e80941Smrg		return r;
2959b8e80941Smrg
2960b8e80941Smrg	temp_reg = r600_get_temp(ctx);
2961b8e80941Smrg	/* dst reg contains LDS address stride * idx */
2962b8e80941Smrg	/* MUL vertexID, vertex_dw_stride */
2963b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24,
2964b8e80941Smrg			   temp_reg, 0,
2965b8e80941Smrg			   ctx->tess_input_info, 1,
2966b8e80941Smrg			   0, 1); /* rel id in r0.y? */
2967b8e80941Smrg	if (r)
2968b8e80941Smrg		return r;
2969b8e80941Smrg
2970b8e80941Smrg	for (i = 0; i < ctx->shader->noutput; i++) {
2971b8e80941Smrg		struct r600_bytecode_alu alu;
2972b8e80941Smrg		int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid);
2973b8e80941Smrg
2974b8e80941Smrg		if (param) {
2975b8e80941Smrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
2976b8e80941Smrg					   temp_reg, 1,
2977b8e80941Smrg					   temp_reg, 0,
2978b8e80941Smrg					   V_SQ_ALU_SRC_LITERAL, param * 16);
2979b8e80941Smrg			if (r)
2980b8e80941Smrg				return r;
2981b8e80941Smrg		}
2982b8e80941Smrg
2983b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
2984b8e80941Smrg				   temp_reg, 2,
2985b8e80941Smrg				   temp_reg, param ? 1 : 0,
2986b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 8);
2987b8e80941Smrg		if (r)
2988b8e80941Smrg			return r;
2989b8e80941Smrg
2990b8e80941Smrg
2991b8e80941Smrg		for (j = 0; j < 2; j++) {
2992b8e80941Smrg			int chan = (j == 1) ? 2 : (param ? 1 : 0);
2993b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2994b8e80941Smrg			alu.op = LDS_OP3_LDS_WRITE_REL;
2995b8e80941Smrg			alu.src[0].sel = temp_reg;
2996b8e80941Smrg			alu.src[0].chan = chan;
2997b8e80941Smrg			alu.src[1].sel = ctx->shader->output[i].gpr;
2998b8e80941Smrg			alu.src[1].chan = j * 2;
2999b8e80941Smrg			alu.src[2].sel = ctx->shader->output[i].gpr;
3000b8e80941Smrg			alu.src[2].chan = (j * 2) + 1;
3001b8e80941Smrg			alu.last = 1;
3002b8e80941Smrg			alu.dst.chan = 0;
3003b8e80941Smrg			alu.lds_idx = 1;
3004b8e80941Smrg			alu.is_lds_idx_op = true;
3005b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
3006b8e80941Smrg			if (r)
3007b8e80941Smrg				return r;
3008b8e80941Smrg		}
3009b8e80941Smrg	}
3010b8e80941Smrg	return 0;
3011b8e80941Smrg}
3012b8e80941Smrg
3013b8e80941Smrgstatic int r600_store_tcs_output(struct r600_shader_ctx *ctx)
3014b8e80941Smrg{
3015b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3016b8e80941Smrg	const struct tgsi_full_dst_register *dst = &inst->Dst[0];
3017b8e80941Smrg	int i, r, lasti;
3018b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
3019b8e80941Smrg	struct r600_bytecode_alu alu;
3020b8e80941Smrg	unsigned write_mask = dst->Register.WriteMask;
3021b8e80941Smrg
3022b8e80941Smrg	if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
3023b8e80941Smrg		return 0;
3024b8e80941Smrg
3025b8e80941Smrg	r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true);
3026b8e80941Smrg	if (r)
3027b8e80941Smrg		return r;
3028b8e80941Smrg
3029b8e80941Smrg	/* the base address is now in temp.x */
3030b8e80941Smrg	r = r600_get_byte_address(ctx, temp_reg,
3031b8e80941Smrg				  &inst->Dst[0], NULL, ctx->tess_output_info, 1);
3032b8e80941Smrg	if (r)
3033b8e80941Smrg		return r;
3034b8e80941Smrg
3035b8e80941Smrg	/* LDS write */
3036b8e80941Smrg	lasti = tgsi_last_instruction(write_mask);
3037b8e80941Smrg	for (i = 1; i <= lasti; i++) {
3038b8e80941Smrg
3039b8e80941Smrg		if (!(write_mask & (1 << i)))
3040b8e80941Smrg			continue;
3041b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
3042b8e80941Smrg				   temp_reg, i,
3043b8e80941Smrg				   temp_reg, 0,
3044b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
3045b8e80941Smrg		if (r)
3046b8e80941Smrg			return r;
3047b8e80941Smrg	}
3048b8e80941Smrg
3049b8e80941Smrg	for (i = 0; i <= lasti; i++) {
3050b8e80941Smrg		if (!(write_mask & (1 << i)))
3051b8e80941Smrg			continue;
3052b8e80941Smrg
3053b8e80941Smrg		if ((i == 0 && ((write_mask & 3) == 3)) ||
3054b8e80941Smrg		    (i == 2 && ((write_mask & 0xc) == 0xc))) {
3055b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056b8e80941Smrg			alu.op = LDS_OP3_LDS_WRITE_REL;
3057b8e80941Smrg			alu.src[0].sel = temp_reg;
3058b8e80941Smrg			alu.src[0].chan = i;
3059b8e80941Smrg
3060b8e80941Smrg			alu.src[1].sel = dst->Register.Index;
3061b8e80941Smrg			alu.src[1].sel += ctx->file_offset[dst->Register.File];
3062b8e80941Smrg			alu.src[1].chan = i;
3063b8e80941Smrg
3064b8e80941Smrg			alu.src[2].sel = dst->Register.Index;
3065b8e80941Smrg			alu.src[2].sel += ctx->file_offset[dst->Register.File];
3066b8e80941Smrg			alu.src[2].chan = i + 1;
3067b8e80941Smrg			alu.lds_idx = 1;
3068b8e80941Smrg			alu.dst.chan = 0;
3069b8e80941Smrg			alu.last = 1;
3070b8e80941Smrg			alu.is_lds_idx_op = true;
3071b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
3072b8e80941Smrg			if (r)
3073b8e80941Smrg				return r;
3074b8e80941Smrg			i += 1;
3075b8e80941Smrg			continue;
3076b8e80941Smrg		}
3077b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3078b8e80941Smrg		alu.op = LDS_OP2_LDS_WRITE;
3079b8e80941Smrg		alu.src[0].sel = temp_reg;
3080b8e80941Smrg		alu.src[0].chan = i;
3081b8e80941Smrg
3082b8e80941Smrg		alu.src[1].sel = dst->Register.Index;
3083b8e80941Smrg		alu.src[1].sel += ctx->file_offset[dst->Register.File];
3084b8e80941Smrg		alu.src[1].chan = i;
3085b8e80941Smrg
3086b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
3087b8e80941Smrg		alu.dst.chan = 0;
3088b8e80941Smrg		alu.last = 1;
3089b8e80941Smrg		alu.is_lds_idx_op = true;
3090b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
3091b8e80941Smrg		if (r)
3092b8e80941Smrg			return r;
3093b8e80941Smrg	}
3094b8e80941Smrg	return 0;
3095b8e80941Smrg}
3096b8e80941Smrg
3097b8e80941Smrgstatic int r600_tess_factor_read(struct r600_shader_ctx *ctx,
3098b8e80941Smrg				 int output_idx, int nc)
3099b8e80941Smrg{
3100b8e80941Smrg	int param;
3101b8e80941Smrg	unsigned temp_reg = r600_get_temp(ctx);
3102b8e80941Smrg	unsigned name = ctx->shader->output[output_idx].name;
3103b8e80941Smrg	int dreg = ctx->shader->output[output_idx].gpr;
3104b8e80941Smrg	int r;
3105b8e80941Smrg
3106b8e80941Smrg	param = r600_get_lds_unique_index(name, 0);
3107b8e80941Smrg	r = get_lds_offset0(ctx, 1, temp_reg, true);
3108b8e80941Smrg	if (r)
3109b8e80941Smrg		return r;
3110b8e80941Smrg
3111b8e80941Smrg	if (param) {
3112b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
3113b8e80941Smrg				   temp_reg, 0,
3114b8e80941Smrg				   temp_reg, 0,
3115b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, param * 16);
3116b8e80941Smrg		if (r)
3117b8e80941Smrg			return r;
3118b8e80941Smrg	}
3119b8e80941Smrg
3120b8e80941Smrg	do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1));
3121b8e80941Smrg	return 0;
3122b8e80941Smrg}
3123b8e80941Smrg
3124b8e80941Smrgstatic int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
3125b8e80941Smrg{
3126b8e80941Smrg	int stride, outer_comps, inner_comps;
3127b8e80941Smrg	int tessinner_idx = -1, tessouter_idx = -1;
3128b8e80941Smrg	int i, r;
3129b8e80941Smrg	unsigned j;
3130b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
3131b8e80941Smrg	int treg[3] = {-1, -1, -1};
3132b8e80941Smrg	struct r600_bytecode_alu alu;
3133b8e80941Smrg	struct r600_bytecode_cf *cf_jump, *cf_pop;
3134b8e80941Smrg
3135b8e80941Smrg	/* only execute factor emission for invocation 0 */
3136b8e80941Smrg	/* PRED_SETE_INT __, R0.x, 0 */
3137b8e80941Smrg	memset(&alu, 0, sizeof(alu));
3138b8e80941Smrg	alu.op = ALU_OP2_PRED_SETE_INT;
3139b8e80941Smrg	alu.src[0].chan = 2;
3140b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
3141b8e80941Smrg	alu.execute_mask = 1;
3142b8e80941Smrg	alu.update_pred = 1;
3143b8e80941Smrg	alu.last = 1;
3144b8e80941Smrg	r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
3145b8e80941Smrg
3146b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
3147b8e80941Smrg	cf_jump = ctx->bc->cf_last;
3148b8e80941Smrg
3149b8e80941Smrg	treg[0] = r600_get_temp(ctx);
3150b8e80941Smrg	switch (ctx->shader->tcs_prim_mode) {
3151b8e80941Smrg	case PIPE_PRIM_LINES:
3152b8e80941Smrg		stride = 8; /* 2 dwords, 1 vec2 store */
3153b8e80941Smrg		outer_comps = 2;
3154b8e80941Smrg		inner_comps = 0;
3155b8e80941Smrg		break;
3156b8e80941Smrg	case PIPE_PRIM_TRIANGLES:
3157b8e80941Smrg		stride = 16; /* 4 dwords, 1 vec4 store */
3158b8e80941Smrg		outer_comps = 3;
3159b8e80941Smrg		inner_comps = 1;
3160b8e80941Smrg		treg[1] = r600_get_temp(ctx);
3161b8e80941Smrg		break;
3162b8e80941Smrg	case PIPE_PRIM_QUADS:
3163b8e80941Smrg		stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */
3164b8e80941Smrg		outer_comps = 4;
3165b8e80941Smrg		inner_comps = 2;
3166b8e80941Smrg		treg[1] = r600_get_temp(ctx);
3167b8e80941Smrg		treg[2] = r600_get_temp(ctx);
3168b8e80941Smrg		break;
3169b8e80941Smrg	default:
3170b8e80941Smrg		assert(0);
3171b8e80941Smrg		return -1;
3172b8e80941Smrg	}
3173b8e80941Smrg
3174b8e80941Smrg	/* R0 is InvocationID, RelPatchID, PatchID, tf_base */
3175b8e80941Smrg	/* TF_WRITE takes index in R.x, value in R.y */
3176b8e80941Smrg	for (j = 0; j < ctx->shader->noutput; j++) {
3177b8e80941Smrg		if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSINNER)
3178b8e80941Smrg			tessinner_idx = j;
3179b8e80941Smrg		if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSOUTER)
3180b8e80941Smrg			tessouter_idx = j;
3181b8e80941Smrg	}
3182b8e80941Smrg
3183b8e80941Smrg	if (tessouter_idx == -1)
3184b8e80941Smrg		return -1;
3185b8e80941Smrg
3186b8e80941Smrg	if (tessinner_idx == -1 && inner_comps)
3187b8e80941Smrg		return -1;
3188b8e80941Smrg
3189b8e80941Smrg	if (tessouter_idx != -1) {
3190b8e80941Smrg		r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps);
3191b8e80941Smrg		if (r)
3192b8e80941Smrg			return r;
3193b8e80941Smrg	}
3194b8e80941Smrg
3195b8e80941Smrg	if (tessinner_idx != -1) {
3196b8e80941Smrg		r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps);
3197b8e80941Smrg		if (r)
3198b8e80941Smrg			return r;
3199b8e80941Smrg	}
3200b8e80941Smrg
3201b8e80941Smrg	/* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */
3202b8e80941Smrg	/* r.x = relpatchid(r0.y) * tf_stride */
3203b8e80941Smrg
3204b8e80941Smrg	/* multiply incoming r0.y * stride - t.x = r0.y * stride */
3205b8e80941Smrg	/* add incoming r0.w to it: t.x = t.x + r0.w */
3206b8e80941Smrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
3207b8e80941Smrg			   temp_reg, 0,
3208b8e80941Smrg			   0, 1,
3209b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, stride,
3210b8e80941Smrg			   0, 3);
3211b8e80941Smrg	if (r)
3212b8e80941Smrg		return r;
3213b8e80941Smrg
3214b8e80941Smrg	for (i = 0; i < outer_comps + inner_comps; i++) {
3215b8e80941Smrg		int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
3216b8e80941Smrg		int out_comp = i >= outer_comps ? i - outer_comps : i;
3217b8e80941Smrg
3218b8e80941Smrg		if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) {
3219b8e80941Smrg			if (out_comp == 1)
3220b8e80941Smrg				out_comp = 0;
3221b8e80941Smrg			else if (out_comp == 0)
3222b8e80941Smrg				out_comp = 1;
3223b8e80941Smrg		}
3224b8e80941Smrg
3225b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
3226b8e80941Smrg				   treg[i / 2], (2 * (i % 2)),
3227b8e80941Smrg				   temp_reg, 0,
3228b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
3229b8e80941Smrg		if (r)
3230b8e80941Smrg			return r;
3231b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
3232b8e80941Smrg				   treg[i / 2], 1 + (2 * (i%2)),
3233b8e80941Smrg				   ctx->shader->output[out_idx].gpr, out_comp,
3234b8e80941Smrg				   0, 0);
3235b8e80941Smrg		if (r)
3236b8e80941Smrg			return r;
3237b8e80941Smrg	}
3238b8e80941Smrg	for (i = 0; i < outer_comps + inner_comps; i++) {
3239b8e80941Smrg		struct r600_bytecode_gds gds;
3240b8e80941Smrg
3241b8e80941Smrg		memset(&gds, 0, sizeof(struct r600_bytecode_gds));
3242b8e80941Smrg		gds.src_gpr = treg[i / 2];
3243b8e80941Smrg		gds.src_sel_x = 2 * (i % 2);
3244b8e80941Smrg		gds.src_sel_y = 1 + (2 * (i % 2));
3245b8e80941Smrg		gds.src_sel_z = 4;
3246b8e80941Smrg		gds.dst_sel_x = 7;
3247b8e80941Smrg		gds.dst_sel_y = 7;
3248b8e80941Smrg		gds.dst_sel_z = 7;
3249b8e80941Smrg		gds.dst_sel_w = 7;
3250b8e80941Smrg		gds.op = FETCH_OP_TF_WRITE;
3251b8e80941Smrg		r = r600_bytecode_add_gds(ctx->bc, &gds);
3252b8e80941Smrg		if (r)
3253b8e80941Smrg			return r;
3254b8e80941Smrg	}
3255b8e80941Smrg
3256b8e80941Smrg	// Patch up jump label
3257b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
3258b8e80941Smrg	cf_pop = ctx->bc->cf_last;
3259b8e80941Smrg
3260b8e80941Smrg	cf_jump->cf_addr = cf_pop->id + 2;
3261b8e80941Smrg	cf_jump->pop_count = 1;
3262b8e80941Smrg	cf_pop->cf_addr = cf_pop->id + 2;
3263b8e80941Smrg	cf_pop->pop_count = 1;
3264b8e80941Smrg
3265b8e80941Smrg	return 0;
3266b8e80941Smrg}
3267b8e80941Smrg
3268b8e80941Smrg/*
3269b8e80941Smrg * We have to work out the thread ID for load and atomic
3270b8e80941Smrg * operations, which store the returned value to an index
3271b8e80941Smrg * in an intermediate buffer.
3272b8e80941Smrg * The index is calculated by taking the thread id,
3273b8e80941Smrg * calculated from the MBCNT instructions.
3274b8e80941Smrg * Then the shader engine ID is multiplied by 256,
3275b8e80941Smrg * and the wave id is added.
3276b8e80941Smrg * Then the result is multipled by 64 and thread id is
3277b8e80941Smrg * added.
3278b8e80941Smrg */
3279b8e80941Smrgstatic int load_thread_id_gpr(struct r600_shader_ctx *ctx)
3280b8e80941Smrg{
3281b8e80941Smrg	struct r600_bytecode_alu alu;
3282b8e80941Smrg	int r;
3283b8e80941Smrg
3284b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3285b8e80941Smrg	alu.op = ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT;
3286b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
3287b8e80941Smrg	alu.dst.chan = 0;
3288b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
3289b8e80941Smrg	alu.src[0].value = 0xffffffff;
3290b8e80941Smrg	alu.dst.write = 1;
3291b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
3292b8e80941Smrg	if (r)
3293b8e80941Smrg		return r;
3294b8e80941Smrg
3295b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3296b8e80941Smrg	alu.op = ALU_OP1_MBCNT_32HI_INT;
3297b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
3298b8e80941Smrg	alu.dst.chan = 1;
3299b8e80941Smrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
3300b8e80941Smrg	alu.src[0].value = 0xffffffff;
3301b8e80941Smrg	alu.dst.write = 1;
3302b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
3303b8e80941Smrg	if (r)
3304b8e80941Smrg		return r;
3305b8e80941Smrg
3306b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3307b8e80941Smrg	alu.op = ALU_OP3_MULADD_UINT24;
3308b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
3309b8e80941Smrg	alu.dst.chan = 2;
3310b8e80941Smrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_SE_ID;
3311b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
3312b8e80941Smrg	alu.src[1].value = 256;
3313b8e80941Smrg	alu.src[2].sel = EG_V_SQ_ALU_SRC_HW_WAVE_ID;
3314b8e80941Smrg	alu.dst.write = 1;
3315b8e80941Smrg	alu.is_op3 = 1;
3316b8e80941Smrg	alu.last = 1;
3317b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
3318b8e80941Smrg	if (r)
3319b8e80941Smrg		return r;
3320b8e80941Smrg
3321b8e80941Smrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
3322b8e80941Smrg			   ctx->thread_id_gpr, 1,
3323b8e80941Smrg			   ctx->temp_reg, 2,
3324b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 0x40,
3325b8e80941Smrg			   ctx->temp_reg, 0);
3326b8e80941Smrg	if (r)
3327b8e80941Smrg		return r;
3328b8e80941Smrg	return 0;
3329b8e80941Smrg}
3330b8e80941Smrg
3331b8e80941Smrgstatic int r600_shader_from_tgsi(struct r600_context *rctx,
3332b8e80941Smrg				 struct r600_pipe_shader *pipeshader,
3333b8e80941Smrg				 union r600_shader_key key)
3334b8e80941Smrg{
3335b8e80941Smrg	struct r600_screen *rscreen = rctx->screen;
3336b8e80941Smrg	struct r600_shader *shader = &pipeshader->shader;
3337b8e80941Smrg	struct tgsi_token *tokens = pipeshader->selector->tokens;
3338b8e80941Smrg	struct pipe_stream_output_info so = pipeshader->selector->so;
3339b8e80941Smrg	struct tgsi_full_immediate *immediate;
3340b8e80941Smrg	struct r600_shader_ctx ctx;
3341b8e80941Smrg	struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
3342b8e80941Smrg	unsigned output_done, noutput;
3343b8e80941Smrg	unsigned opcode;
3344b8e80941Smrg	int j, k, r = 0;
3345b8e80941Smrg	unsigned i;
3346b8e80941Smrg	int next_param_base = 0, next_clip_base;
3347b8e80941Smrg	int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
3348b8e80941Smrg	bool indirect_gprs;
3349b8e80941Smrg	bool ring_outputs = false;
3350b8e80941Smrg	bool lds_outputs = false;
3351b8e80941Smrg	bool lds_inputs = false;
3352b8e80941Smrg	bool pos_emitted = false;
3353848b8605Smrg
3354b8e80941Smrg	ctx.bc = &shader->bc;
3355b8e80941Smrg	ctx.shader = shader;
3356848b8605Smrg
3357848b8605Smrg	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
3358848b8605Smrg			   rscreen->has_compressed_msaa_texturing);
3359848b8605Smrg	ctx.tokens = tokens;
3360848b8605Smrg	tgsi_scan_shader(tokens, &ctx.info);
3361848b8605Smrg	shader->indirect_files = ctx.info.indirect_files;
3362b8e80941Smrg
3363b8e80941Smrg	int narrays = ctx.info.array_max[TGSI_FILE_TEMPORARY];
3364b8e80941Smrg	ctx.array_infos = calloc(narrays, sizeof(*ctx.array_infos));
3365b8e80941Smrg	ctx.spilled_arrays = calloc(narrays, sizeof(bool));
3366b8e80941Smrg	tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, narrays, ctx.array_infos);
3367b8e80941Smrg
3368b8e80941Smrg	shader->uses_helper_invocation = false;
3369b8e80941Smrg	shader->uses_doubles = ctx.info.uses_doubles;
3370b8e80941Smrg	shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
3371b8e80941Smrg	shader->nsys_inputs = 0;
3372b8e80941Smrg
3373b8e80941Smrg	shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 ||
3374b8e80941Smrg		ctx.info.file_count[TGSI_FILE_BUFFER] > 0;
3375b8e80941Smrg	indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
3376848b8605Smrg	tgsi_parse_init(&ctx.parse, tokens);
3377b8e80941Smrg	ctx.type = ctx.info.processor;
3378848b8605Smrg	shader->processor_type = ctx.type;
3379848b8605Smrg	ctx.bc->type = shader->processor_type;
3380848b8605Smrg
3381b8e80941Smrg	switch (ctx.type) {
3382b8e80941Smrg	case PIPE_SHADER_VERTEX:
3383b8e80941Smrg		shader->vs_as_gs_a = key.vs.as_gs_a;
3384b8e80941Smrg		shader->vs_as_es = key.vs.as_es;
3385b8e80941Smrg		shader->vs_as_ls = key.vs.as_ls;
3386b8e80941Smrg		shader->atomic_base = key.vs.first_atomic_counter;
3387b8e80941Smrg		if (shader->vs_as_es)
3388b8e80941Smrg			ring_outputs = true;
3389b8e80941Smrg		if (shader->vs_as_ls)
3390b8e80941Smrg			lds_outputs = true;
3391b8e80941Smrg		break;
3392b8e80941Smrg	case PIPE_SHADER_GEOMETRY:
3393b8e80941Smrg		ring_outputs = true;
3394b8e80941Smrg		shader->atomic_base = key.gs.first_atomic_counter;
3395b8e80941Smrg		shader->gs_tri_strip_adj_fix = key.gs.tri_strip_adj_fix;
3396b8e80941Smrg		break;
3397b8e80941Smrg	case PIPE_SHADER_TESS_CTRL:
3398b8e80941Smrg		shader->tcs_prim_mode = key.tcs.prim_mode;
3399b8e80941Smrg		shader->atomic_base = key.tcs.first_atomic_counter;
3400b8e80941Smrg		lds_outputs = true;
3401b8e80941Smrg		lds_inputs = true;
3402b8e80941Smrg		break;
3403b8e80941Smrg	case PIPE_SHADER_TESS_EVAL:
3404b8e80941Smrg		shader->tes_as_es = key.tes.as_es;
3405b8e80941Smrg		shader->atomic_base = key.tes.first_atomic_counter;
3406b8e80941Smrg		lds_inputs = true;
3407b8e80941Smrg		if (shader->tes_as_es)
3408b8e80941Smrg			ring_outputs = true;
3409b8e80941Smrg		break;
3410b8e80941Smrg	case PIPE_SHADER_FRAGMENT:
3411b8e80941Smrg		shader->two_side = key.ps.color_two_side;
3412b8e80941Smrg		shader->atomic_base = key.ps.first_atomic_counter;
3413b8e80941Smrg		shader->rat_base = key.ps.nr_cbufs;
3414b8e80941Smrg		shader->image_size_const_offset = key.ps.image_size_const_offset;
3415b8e80941Smrg		break;
3416b8e80941Smrg	case PIPE_SHADER_COMPUTE:
3417b8e80941Smrg		shader->rat_base = 0;
3418b8e80941Smrg		shader->image_size_const_offset = ctx.info.file_count[TGSI_FILE_SAMPLER];
3419b8e80941Smrg		break;
3420b8e80941Smrg	default:
3421b8e80941Smrg		break;
3422b8e80941Smrg	}
3423848b8605Smrg
3424b8e80941Smrg	if (shader->vs_as_es || shader->tes_as_es) {
3425848b8605Smrg		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
3426848b8605Smrg	} else {
3427848b8605Smrg		ctx.gs_for_vs = NULL;
3428848b8605Smrg	}
3429848b8605Smrg
3430848b8605Smrg	ctx.next_ring_offset = 0;
3431848b8605Smrg	ctx.gs_out_ring_offset = 0;
3432848b8605Smrg	ctx.gs_next_vertex = 0;
3433b8e80941Smrg	ctx.gs_stream_output_info = &so;
3434848b8605Smrg
3435b8e80941Smrg	ctx.thread_id_gpr = -1;
3436848b8605Smrg	ctx.face_gpr = -1;
3437b8e80941Smrg	ctx.fixed_pt_position_gpr = -1;
3438848b8605Smrg	ctx.fragcoord_input = -1;
3439848b8605Smrg	ctx.colors_used = 0;
3440848b8605Smrg	ctx.clip_vertex_write = 0;
3441848b8605Smrg
3442b8e80941Smrg	ctx.helper_invoc_reg = -1;
3443b8e80941Smrg	ctx.cs_block_size_reg = -1;
3444b8e80941Smrg	ctx.cs_grid_size_reg = -1;
3445b8e80941Smrg	ctx.cs_block_size_loaded = false;
3446b8e80941Smrg	ctx.cs_grid_size_loaded = false;
3447b8e80941Smrg
3448848b8605Smrg	shader->nr_ps_color_exports = 0;
3449848b8605Smrg	shader->nr_ps_max_color_exports = 0;
3450848b8605Smrg
3451848b8605Smrg
3452848b8605Smrg	/* register allocations */
3453848b8605Smrg	/* Values [0,127] correspond to GPR[0..127].
3454848b8605Smrg	 * Values [128,159] correspond to constant buffer bank 0
3455848b8605Smrg	 * Values [160,191] correspond to constant buffer bank 1
3456848b8605Smrg	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
3457848b8605Smrg	 * Values [256,287] correspond to constant buffer bank 2 (EG)
3458848b8605Smrg	 * Values [288,319] correspond to constant buffer bank 3 (EG)
3459848b8605Smrg	 * Other special values are shown in the list below.
3460848b8605Smrg	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
3461848b8605Smrg	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
3462848b8605Smrg	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
3463848b8605Smrg	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
3464848b8605Smrg	 * 248	SQ_ALU_SRC_0: special constant 0.0.
3465848b8605Smrg	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
3466848b8605Smrg	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
3467848b8605Smrg	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
3468848b8605Smrg	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
3469848b8605Smrg	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
3470848b8605Smrg	 * 254	SQ_ALU_SRC_PV: previous vector result.
3471848b8605Smrg	 * 255	SQ_ALU_SRC_PS: previous scalar result.
3472848b8605Smrg	 */
3473848b8605Smrg	for (i = 0; i < TGSI_FILE_COUNT; i++) {
3474848b8605Smrg		ctx.file_offset[i] = 0;
3475848b8605Smrg	}
3476848b8605Smrg
3477b8e80941Smrg	if (ctx.type == PIPE_SHADER_VERTEX)  {
3478b8e80941Smrg
3479848b8605Smrg		ctx.file_offset[TGSI_FILE_INPUT] = 1;
3480b8e80941Smrg		if (ctx.info.num_inputs)
3481848b8605Smrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
3482848b8605Smrg	}
3483b8e80941Smrg	if (ctx.type == PIPE_SHADER_FRAGMENT) {
3484b8e80941Smrg		if (ctx.bc->chip_class >= EVERGREEN)
3485b8e80941Smrg			ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
3486b8e80941Smrg		else
3487b8e80941Smrg			ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
3488b8e80941Smrg
3489b8e80941Smrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
3490b8e80941Smrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
3491b8e80941Smrg				ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
3492b8e80941Smrg				shader->uses_helper_invocation = true;
3493b8e80941Smrg			}
3494b8e80941Smrg		}
3495848b8605Smrg	}
3496b8e80941Smrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
3497848b8605Smrg		/* FIXME 1 would be enough in some cases (3 or less input vertices) */
3498848b8605Smrg		ctx.file_offset[TGSI_FILE_INPUT] = 2;
3499848b8605Smrg	}
3500b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
3501b8e80941Smrg		ctx.file_offset[TGSI_FILE_INPUT] = 1;
3502b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_EVAL) {
3503b8e80941Smrg		bool add_tesscoord = false, add_tess_inout = false;
3504b8e80941Smrg		ctx.file_offset[TGSI_FILE_INPUT] = 1;
3505b8e80941Smrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
3506b8e80941Smrg			/* if we have tesscoord save one reg */
3507b8e80941Smrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD)
3508b8e80941Smrg				add_tesscoord = true;
3509b8e80941Smrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER ||
3510b8e80941Smrg			    ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER)
3511b8e80941Smrg				add_tess_inout = true;
3512b8e80941Smrg		}
3513b8e80941Smrg		if (add_tesscoord || add_tess_inout)
3514b8e80941Smrg			ctx.file_offset[TGSI_FILE_INPUT]++;
3515b8e80941Smrg		if (add_tess_inout)
3516b8e80941Smrg			ctx.file_offset[TGSI_FILE_INPUT]+=2;
3517b8e80941Smrg	}
3518b8e80941Smrg	if (ctx.type == PIPE_SHADER_COMPUTE) {
3519b8e80941Smrg		ctx.file_offset[TGSI_FILE_INPUT] = 2;
3520b8e80941Smrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
3521b8e80941Smrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE)
3522b8e80941Smrg				ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
3523b8e80941Smrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE)
3524b8e80941Smrg				ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
3525b8e80941Smrg		}
3526b8e80941Smrg	}
3527848b8605Smrg
3528b8e80941Smrg	ctx.file_offset[TGSI_FILE_OUTPUT] =
3529848b8605Smrg			ctx.file_offset[TGSI_FILE_INPUT] +
3530848b8605Smrg			ctx.info.file_max[TGSI_FILE_INPUT] + 1;
3531848b8605Smrg	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
3532848b8605Smrg						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
3533848b8605Smrg
3534848b8605Smrg	/* Outside the GPR range. This will be translated to one of the
3535848b8605Smrg	 * kcache banks later. */
3536848b8605Smrg	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
3537848b8605Smrg	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
3538848b8605Smrg
3539b8e80941Smrg	pipeshader->scratch_space_needed = 0;
3540b8e80941Smrg	int regno = ctx.file_offset[TGSI_FILE_TEMPORARY] +
3541b8e80941Smrg			ctx.info.file_max[TGSI_FILE_TEMPORARY];
3542b8e80941Smrg	if (regno > 124) {
3543b8e80941Smrg		choose_spill_arrays(&ctx, &regno, &pipeshader->scratch_space_needed);
3544b8e80941Smrg		shader->indirect_files = ctx.info.indirect_files;
3545b8e80941Smrg	}
3546b8e80941Smrg	shader->needs_scratch_space = pipeshader->scratch_space_needed != 0;
3547b8e80941Smrg
3548b8e80941Smrg	ctx.bc->ar_reg = ++regno;
3549b8e80941Smrg	ctx.bc->index_reg[0] = ++regno;
3550b8e80941Smrg	ctx.bc->index_reg[1] = ++regno;
3551b8e80941Smrg
3552b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_CTRL) {
3553b8e80941Smrg		ctx.tess_input_info = ++regno;
3554b8e80941Smrg		ctx.tess_output_info = ++regno;
3555b8e80941Smrg	} else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
3556b8e80941Smrg		ctx.tess_input_info = ++regno;
3557b8e80941Smrg		ctx.tess_output_info = ++regno;
3558b8e80941Smrg	} else if (ctx.type == PIPE_SHADER_GEOMETRY) {
3559b8e80941Smrg		ctx.gs_export_gpr_tregs[0] = ++regno;
3560b8e80941Smrg		ctx.gs_export_gpr_tregs[1] = ++regno;
3561b8e80941Smrg		ctx.gs_export_gpr_tregs[2] = ++regno;
3562b8e80941Smrg		ctx.gs_export_gpr_tregs[3] = ++regno;
3563b8e80941Smrg		if (ctx.shader->gs_tri_strip_adj_fix) {
3564b8e80941Smrg			ctx.gs_rotated_input[0] = ++regno;
3565b8e80941Smrg			ctx.gs_rotated_input[1] = ++regno;
3566b8e80941Smrg		} else {
3567b8e80941Smrg			ctx.gs_rotated_input[0] = 0;
3568b8e80941Smrg			ctx.gs_rotated_input[1] = 1;
3569b8e80941Smrg		}
3570b8e80941Smrg	}
3571b8e80941Smrg
3572b8e80941Smrg	if (shader->uses_images) {
3573b8e80941Smrg		ctx.thread_id_gpr = ++regno;
3574b8e80941Smrg	}
3575b8e80941Smrg	ctx.temp_reg = ++regno;
3576b8e80941Smrg
3577b8e80941Smrg	shader->max_arrays = 0;
3578b8e80941Smrg	shader->num_arrays = 0;
3579848b8605Smrg	if (indirect_gprs) {
3580848b8605Smrg
3581848b8605Smrg		if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
3582848b8605Smrg			r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
3583848b8605Smrg			                   ctx.file_offset[TGSI_FILE_OUTPUT] -
3584848b8605Smrg			                   ctx.file_offset[TGSI_FILE_INPUT],
3585848b8605Smrg			                   0x0F);
3586848b8605Smrg		}
3587848b8605Smrg		if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3588848b8605Smrg			r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
3589848b8605Smrg			                   ctx.file_offset[TGSI_FILE_TEMPORARY] -
3590848b8605Smrg			                   ctx.file_offset[TGSI_FILE_OUTPUT],
3591848b8605Smrg			                   0x0F);
3592848b8605Smrg		}
3593848b8605Smrg	}
3594848b8605Smrg
3595848b8605Smrg	ctx.nliterals = 0;
3596848b8605Smrg	ctx.literals = NULL;
3597b8e80941Smrg	ctx.max_driver_temp_used = 0;
3598b8e80941Smrg
3599b8e80941Smrg	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
3600b8e80941Smrg			       ctx.info.colors_written == 1;
3601b8e80941Smrg	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
3602b8e80941Smrg	shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
3603b8e80941Smrg
3604b8e80941Smrg	if (ctx.type == PIPE_SHADER_VERTEX ||
3605b8e80941Smrg	    ctx.type == PIPE_SHADER_GEOMETRY ||
3606b8e80941Smrg	    ctx.type == PIPE_SHADER_TESS_EVAL) {
3607b8e80941Smrg		shader->cc_dist_mask = (1 << (ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED] +
3608b8e80941Smrg					      ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED])) - 1;
3609b8e80941Smrg		shader->clip_dist_write = (1 << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]) - 1;
3610b8e80941Smrg		shader->cull_dist_write = ((1 << ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED]) - 1) << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED];
3611b8e80941Smrg	}
3612b8e80941Smrg
3613b8e80941Smrg	if (shader->vs_as_gs_a)
3614b8e80941Smrg		vs_add_primid_output(&ctx, key.vs.prim_id_out);
3615b8e80941Smrg
3616b8e80941Smrg	if (ctx.thread_id_gpr != -1) {
3617b8e80941Smrg		r = load_thread_id_gpr(&ctx);
3618b8e80941Smrg		if (r)
3619b8e80941Smrg			return r;
3620b8e80941Smrg	}
3621b8e80941Smrg
3622b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_EVAL)
3623b8e80941Smrg		r600_fetch_tess_io_info(&ctx);
3624b8e80941Smrg
3625848b8605Smrg	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
3626848b8605Smrg		tgsi_parse_token(&ctx.parse);
3627848b8605Smrg		switch (ctx.parse.FullToken.Token.Type) {
3628848b8605Smrg		case TGSI_TOKEN_TYPE_IMMEDIATE:
3629848b8605Smrg			immediate = &ctx.parse.FullToken.FullImmediate;
3630848b8605Smrg			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
3631848b8605Smrg			if(ctx.literals == NULL) {
3632848b8605Smrg				r = -ENOMEM;
3633848b8605Smrg				goto out_err;
3634848b8605Smrg			}
3635848b8605Smrg			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
3636848b8605Smrg			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
3637848b8605Smrg			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
3638848b8605Smrg			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
3639848b8605Smrg			ctx.nliterals++;
3640848b8605Smrg			break;
3641848b8605Smrg		case TGSI_TOKEN_TYPE_DECLARATION:
3642848b8605Smrg			r = tgsi_declaration(&ctx);
3643848b8605Smrg			if (r)
3644848b8605Smrg				goto out_err;
3645848b8605Smrg			break;
3646848b8605Smrg		case TGSI_TOKEN_TYPE_INSTRUCTION:
3647848b8605Smrg		case TGSI_TOKEN_TYPE_PROPERTY:
3648848b8605Smrg			break;
3649848b8605Smrg		default:
3650848b8605Smrg			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
3651848b8605Smrg			r = -EINVAL;
3652848b8605Smrg			goto out_err;
3653848b8605Smrg		}
3654848b8605Smrg	}
3655848b8605Smrg
3656b8e80941Smrg	shader->ring_item_sizes[0] = ctx.next_ring_offset;
3657b8e80941Smrg	shader->ring_item_sizes[1] = 0;
3658b8e80941Smrg	shader->ring_item_sizes[2] = 0;
3659b8e80941Smrg	shader->ring_item_sizes[3] = 0;
3660848b8605Smrg
3661848b8605Smrg	/* Process two side if needed */
3662848b8605Smrg	if (shader->two_side && ctx.colors_used) {
3663848b8605Smrg		int i, count = ctx.shader->ninput;
3664848b8605Smrg		unsigned next_lds_loc = ctx.shader->nlds;
3665848b8605Smrg
3666848b8605Smrg		/* additional inputs will be allocated right after the existing inputs,
3667848b8605Smrg		 * we won't need them after the color selection, so we don't need to
3668848b8605Smrg		 * reserve these gprs for the rest of the shader code and to adjust
3669848b8605Smrg		 * output offsets etc. */
3670848b8605Smrg		int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
3671848b8605Smrg				ctx.info.file_max[TGSI_FILE_INPUT] + 1;
3672848b8605Smrg
3673b8e80941Smrg		/* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
3674848b8605Smrg		if (ctx.face_gpr == -1) {
3675848b8605Smrg			i = ctx.shader->ninput++;
3676848b8605Smrg			ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
3677848b8605Smrg			ctx.shader->input[i].spi_sid = 0;
3678848b8605Smrg			ctx.shader->input[i].gpr = gpr++;
3679848b8605Smrg			ctx.face_gpr = ctx.shader->input[i].gpr;
3680848b8605Smrg		}
3681848b8605Smrg
3682848b8605Smrg		for (i = 0; i < count; i++) {
3683848b8605Smrg			if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
3684848b8605Smrg				int ni = ctx.shader->ninput++;
3685848b8605Smrg				memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
3686848b8605Smrg				ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
3687848b8605Smrg				ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
3688848b8605Smrg				ctx.shader->input[ni].gpr = gpr++;
3689848b8605Smrg				// TGSI to LLVM needs to know the lds position of inputs.
3690848b8605Smrg				// Non LLVM path computes it later (in process_twoside_color)
3691848b8605Smrg				ctx.shader->input[ni].lds_pos = next_lds_loc++;
3692848b8605Smrg				ctx.shader->input[i].back_color_input = ni;
3693848b8605Smrg				if (ctx.bc->chip_class >= EVERGREEN) {
3694848b8605Smrg					if ((r = evergreen_interp_input(&ctx, ni)))
3695848b8605Smrg						return r;
3696848b8605Smrg				}
3697848b8605Smrg			}
3698848b8605Smrg		}
3699848b8605Smrg	}
3700848b8605Smrg
3701848b8605Smrg	if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
3702848b8605Smrg		shader->nr_ps_max_color_exports = 8;
3703848b8605Smrg
3704b8e80941Smrg	if (ctx.shader->uses_helper_invocation) {
3705b8e80941Smrg		if (ctx.bc->chip_class == CAYMAN)
3706b8e80941Smrg			r = cm_load_helper_invocation(&ctx);
3707b8e80941Smrg		else
3708b8e80941Smrg			r = eg_load_helper_invocation(&ctx);
3709b8e80941Smrg		if (r)
3710b8e80941Smrg			return r;
3711b8e80941Smrg	}
3712848b8605Smrg
3713b8e80941Smrg	/*
3714b8e80941Smrg	 * XXX this relies on fixed_pt_position_gpr only being present when
3715b8e80941Smrg	 * this shader should be executed per sample. Should be the case for now...
3716b8e80941Smrg	 */
3717b8e80941Smrg	if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) {
3718b8e80941Smrg		/*
3719b8e80941Smrg		 * Fix up sample mask. The hw always gives us coverage mask for
3720b8e80941Smrg		 * the pixel. However, for per-sample shading, we need the
3721b8e80941Smrg		 * coverage for the shader invocation only.
3722b8e80941Smrg		 * Also, with disabled msaa, only the first bit should be set
3723b8e80941Smrg		 * (luckily the same fixup works for both problems).
3724b8e80941Smrg		 * For now, we can only do it if we know this shader is always
3725b8e80941Smrg		 * executed per sample (due to usage of bits in the shader
3726b8e80941Smrg		 * forcing per-sample execution).
3727b8e80941Smrg		 * If the fb is not multisampled, we'd do unnecessary work but
3728b8e80941Smrg		 * it should still be correct.
3729b8e80941Smrg		 * It will however do nothing for sample shading according
3730b8e80941Smrg		 * to MinSampleShading.
3731b8e80941Smrg		 */
3732b8e80941Smrg		struct r600_bytecode_alu alu;
3733b8e80941Smrg		int tmp = r600_get_temp(&ctx);
3734b8e80941Smrg		assert(ctx.face_gpr != -1);
3735b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3736b8e80941Smrg
3737b8e80941Smrg		alu.op = ALU_OP2_LSHL_INT;
3738b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
3739b8e80941Smrg		alu.src[0].value = 0x1;
3740b8e80941Smrg		alu.src[1].sel = ctx.fixed_pt_position_gpr;
3741b8e80941Smrg		alu.src[1].chan = 3;
3742b8e80941Smrg		alu.dst.sel = tmp;
3743b8e80941Smrg		alu.dst.chan = 0;
3744b8e80941Smrg		alu.dst.write = 1;
3745b8e80941Smrg		alu.last = 1;
3746b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
3747b8e80941Smrg			return r;
3748b8e80941Smrg
3749b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3750b8e80941Smrg		alu.op = ALU_OP2_AND_INT;
3751b8e80941Smrg		alu.src[0].sel = tmp;
3752b8e80941Smrg		alu.src[1].sel = ctx.face_gpr;
3753b8e80941Smrg		alu.src[1].chan = 2;
3754b8e80941Smrg		alu.dst.sel = ctx.face_gpr;
3755b8e80941Smrg		alu.dst.chan = 2;
3756b8e80941Smrg		alu.dst.write = 1;
3757b8e80941Smrg		alu.last = 1;
3758b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
3759b8e80941Smrg			return r;
3760b8e80941Smrg	}
3761b8e80941Smrg
3762b8e80941Smrg	if (ctx.fragcoord_input >= 0) {
3763b8e80941Smrg		if (ctx.bc->chip_class == CAYMAN) {
3764b8e80941Smrg			for (j = 0 ; j < 4; j++) {
3765848b8605Smrg				struct r600_bytecode_alu alu;
3766848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3767848b8605Smrg				alu.op = ALU_OP1_RECIP_IEEE;
3768848b8605Smrg				alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
3769848b8605Smrg				alu.src[0].chan = 3;
3770848b8605Smrg
3771848b8605Smrg				alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
3772b8e80941Smrg				alu.dst.chan = j;
3773b8e80941Smrg				alu.dst.write = (j == 3);
3774b8e80941Smrg				alu.last = (j == 3);
3775848b8605Smrg				if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
3776848b8605Smrg					return r;
3777848b8605Smrg			}
3778b8e80941Smrg		} else {
3779b8e80941Smrg			struct r600_bytecode_alu alu;
3780b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3781b8e80941Smrg			alu.op = ALU_OP1_RECIP_IEEE;
3782b8e80941Smrg			alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
3783b8e80941Smrg			alu.src[0].chan = 3;
3784b8e80941Smrg
3785b8e80941Smrg			alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
3786b8e80941Smrg			alu.dst.chan = 3;
3787b8e80941Smrg			alu.dst.write = 1;
3788b8e80941Smrg			alu.last = 1;
3789b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
3790b8e80941Smrg				return r;
3791848b8605Smrg		}
3792b8e80941Smrg	}
3793848b8605Smrg
3794b8e80941Smrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
3795b8e80941Smrg		struct r600_bytecode_alu alu;
3796b8e80941Smrg		int r;
3797848b8605Smrg
3798b8e80941Smrg		/* GS thread with no output workaround - emit a cut at start of GS */
3799b8e80941Smrg		if (ctx.bc->chip_class == R600)
3800b8e80941Smrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
3801b8e80941Smrg
3802b8e80941Smrg		for (j = 0; j < 4; j++) {
3803848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3804848b8605Smrg			alu.op = ALU_OP1_MOV;
3805848b8605Smrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
3806848b8605Smrg			alu.src[0].value = 0;
3807b8e80941Smrg			alu.dst.sel = ctx.gs_export_gpr_tregs[j];
3808848b8605Smrg			alu.dst.write = 1;
3809848b8605Smrg			alu.last = 1;
3810848b8605Smrg			r = r600_bytecode_add_alu(ctx.bc, &alu);
3811848b8605Smrg			if (r)
3812848b8605Smrg				return r;
3813848b8605Smrg		}
3814b8e80941Smrg
3815b8e80941Smrg		if (ctx.shader->gs_tri_strip_adj_fix) {
3816b8e80941Smrg			r = single_alu_op2(&ctx, ALU_OP2_AND_INT,
3817b8e80941Smrg					   ctx.gs_rotated_input[0], 2,
3818b8e80941Smrg					   0, 2,
3819b8e80941Smrg					   V_SQ_ALU_SRC_LITERAL, 1);
3820b8e80941Smrg			if (r)
3821848b8605Smrg				return r;
3822848b8605Smrg
3823b8e80941Smrg			for (i = 0; i < 6; i++) {
3824b8e80941Smrg				int rotated = (i + 4) % 6;
3825b8e80941Smrg				int offset_reg = i / 3;
3826b8e80941Smrg				int offset_chan = i % 3;
3827b8e80941Smrg				int rotated_offset_reg = rotated / 3;
3828b8e80941Smrg				int rotated_offset_chan = rotated % 3;
3829b8e80941Smrg
3830b8e80941Smrg				if (offset_reg == 0 && offset_chan == 2)
3831b8e80941Smrg					offset_chan = 3;
3832b8e80941Smrg				if (rotated_offset_reg == 0 && rotated_offset_chan == 2)
3833b8e80941Smrg					rotated_offset_chan = 3;
3834b8e80941Smrg
3835b8e80941Smrg				r = single_alu_op3(&ctx, ALU_OP3_CNDE_INT,
3836b8e80941Smrg						   ctx.gs_rotated_input[offset_reg], offset_chan,
3837b8e80941Smrg						   ctx.gs_rotated_input[0], 2,
3838b8e80941Smrg						   offset_reg, offset_chan,
3839b8e80941Smrg						   rotated_offset_reg, rotated_offset_chan);
3840848b8605Smrg				if (r)
3841b8e80941Smrg					return r;
3842b8e80941Smrg			}
3843b8e80941Smrg		}
3844b8e80941Smrg	}
3845b8e80941Smrg
3846b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
3847b8e80941Smrg		r600_fetch_tess_io_info(&ctx);
3848b8e80941Smrg
3849b8e80941Smrg	if (shader->two_side && ctx.colors_used) {
3850b8e80941Smrg		if ((r = process_twoside_color_inputs(&ctx)))
3851b8e80941Smrg			return r;
3852b8e80941Smrg	}
3853b8e80941Smrg
3854b8e80941Smrg	tgsi_parse_init(&ctx.parse, tokens);
3855b8e80941Smrg	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
3856b8e80941Smrg		tgsi_parse_token(&ctx.parse);
3857b8e80941Smrg		switch (ctx.parse.FullToken.Token.Type) {
3858b8e80941Smrg		case TGSI_TOKEN_TYPE_INSTRUCTION:
3859b8e80941Smrg			r = tgsi_is_supported(&ctx);
3860b8e80941Smrg			if (r)
3861b8e80941Smrg				goto out_err;
3862b8e80941Smrg			ctx.max_driver_temp_used = 0;
3863b8e80941Smrg			/* reserve first tmp for everyone */
3864b8e80941Smrg			r600_get_temp(&ctx);
3865848b8605Smrg
3866b8e80941Smrg			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
3867b8e80941Smrg			if ((r = tgsi_split_constant(&ctx)))
3868b8e80941Smrg				goto out_err;
3869b8e80941Smrg			if ((r = tgsi_split_literal_constant(&ctx)))
3870b8e80941Smrg				goto out_err;
3871b8e80941Smrg			if (ctx.type == PIPE_SHADER_GEOMETRY) {
3872b8e80941Smrg				if ((r = tgsi_split_gs_inputs(&ctx)))
3873848b8605Smrg					goto out_err;
3874b8e80941Smrg			} else if (lds_inputs) {
3875b8e80941Smrg				if ((r = tgsi_split_lds_inputs(&ctx)))
3876848b8605Smrg					goto out_err;
3877b8e80941Smrg			}
3878b8e80941Smrg			if (ctx.bc->chip_class == CAYMAN)
3879b8e80941Smrg				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
3880b8e80941Smrg			else if (ctx.bc->chip_class >= EVERGREEN)
3881b8e80941Smrg				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
3882b8e80941Smrg			else
3883b8e80941Smrg				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
3884b8e80941Smrg
3885b8e80941Smrg			ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise;
3886b8e80941Smrg
3887b8e80941Smrg			r = ctx.inst_info->process(&ctx);
3888b8e80941Smrg			if (r)
3889b8e80941Smrg				goto out_err;
3890b8e80941Smrg
3891b8e80941Smrg			if (ctx.type == PIPE_SHADER_TESS_CTRL) {
3892b8e80941Smrg				r = r600_store_tcs_output(&ctx);
3893848b8605Smrg				if (r)
3894848b8605Smrg					goto out_err;
3895848b8605Smrg			}
3896b8e80941Smrg			break;
3897b8e80941Smrg		default:
3898b8e80941Smrg			break;
3899848b8605Smrg		}
3900848b8605Smrg	}
3901848b8605Smrg
3902848b8605Smrg	/* Reset the temporary register counter. */
3903848b8605Smrg	ctx.max_driver_temp_used = 0;
3904848b8605Smrg
3905848b8605Smrg	noutput = shader->noutput;
3906848b8605Smrg
3907848b8605Smrg	if (!ring_outputs && ctx.clip_vertex_write) {
3908848b8605Smrg		unsigned clipdist_temp[2];
3909848b8605Smrg
3910848b8605Smrg		clipdist_temp[0] = r600_get_temp(&ctx);
3911848b8605Smrg		clipdist_temp[1] = r600_get_temp(&ctx);
3912848b8605Smrg
3913848b8605Smrg		/* need to convert a clipvertex write into clipdistance writes and not export
3914848b8605Smrg		   the clip vertex anymore */
3915848b8605Smrg
3916848b8605Smrg		memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
3917848b8605Smrg		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
3918848b8605Smrg		shader->output[noutput].gpr = clipdist_temp[0];
3919848b8605Smrg		noutput++;
3920848b8605Smrg		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
3921848b8605Smrg		shader->output[noutput].gpr = clipdist_temp[1];
3922848b8605Smrg		noutput++;
3923848b8605Smrg
3924848b8605Smrg		/* reset spi_sid for clipvertex output to avoid confusing spi */
3925848b8605Smrg		shader->output[ctx.cv_output].spi_sid = 0;
3926848b8605Smrg
3927848b8605Smrg		shader->clip_dist_write = 0xFF;
3928b8e80941Smrg		shader->cc_dist_mask = 0xFF;
3929848b8605Smrg
3930848b8605Smrg		for (i = 0; i < 8; i++) {
3931848b8605Smrg			int oreg = i >> 2;
3932848b8605Smrg			int ochan = i & 3;
3933848b8605Smrg
3934848b8605Smrg			for (j = 0; j < 4; j++) {
3935848b8605Smrg				struct r600_bytecode_alu alu;
3936848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3937848b8605Smrg				alu.op = ALU_OP2_DOT4;
3938848b8605Smrg				alu.src[0].sel = shader->output[ctx.cv_output].gpr;
3939848b8605Smrg				alu.src[0].chan = j;
3940848b8605Smrg
3941848b8605Smrg				alu.src[1].sel = 512 + i;
3942b8e80941Smrg				alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
3943848b8605Smrg				alu.src[1].chan = j;
3944848b8605Smrg
3945848b8605Smrg				alu.dst.sel = clipdist_temp[oreg];
3946848b8605Smrg				alu.dst.chan = j;
3947848b8605Smrg				alu.dst.write = (j == ochan);
3948848b8605Smrg				if (j == 3)
3949848b8605Smrg					alu.last = 1;
3950b8e80941Smrg				r = r600_bytecode_add_alu(ctx.bc, &alu);
3951848b8605Smrg				if (r)
3952848b8605Smrg					return r;
3953848b8605Smrg			}
3954848b8605Smrg		}
3955848b8605Smrg	}
3956848b8605Smrg
3957848b8605Smrg	/* Add stream outputs. */
3958b8e80941Smrg	if (so.num_outputs) {
3959b8e80941Smrg		bool emit = false;
3960b8e80941Smrg		if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX)
3961b8e80941Smrg			emit = true;
3962b8e80941Smrg		if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL)
3963b8e80941Smrg			emit = true;
3964b8e80941Smrg		if (emit)
3965b8e80941Smrg			emit_streamout(&ctx, &so, -1, NULL);
3966b8e80941Smrg	}
3967b8e80941Smrg	pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
3968848b8605Smrg	convert_edgeflag_to_int(&ctx);
3969848b8605Smrg
3970b8e80941Smrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
3971b8e80941Smrg		r600_emit_tess_factor(&ctx);
3972b8e80941Smrg
3973b8e80941Smrg	if (lds_outputs) {
3974b8e80941Smrg		if (ctx.type == PIPE_SHADER_VERTEX) {
3975b8e80941Smrg			if (ctx.shader->noutput)
3976b8e80941Smrg				emit_lds_vs_writes(&ctx);
3977b8e80941Smrg		}
3978b8e80941Smrg	} else if (ring_outputs) {
3979b8e80941Smrg		if (shader->vs_as_es || shader->tes_as_es) {
3980b8e80941Smrg			ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
3981b8e80941Smrg			ctx.gs_export_gpr_tregs[1] = -1;
3982b8e80941Smrg			ctx.gs_export_gpr_tregs[2] = -1;
3983b8e80941Smrg			ctx.gs_export_gpr_tregs[3] = -1;
3984b8e80941Smrg
3985b8e80941Smrg			emit_gs_ring_writes(&ctx, &so, -1, FALSE);
3986b8e80941Smrg		}
3987848b8605Smrg	} else {
3988848b8605Smrg		/* Export output */
3989848b8605Smrg		next_clip_base = shader->vs_out_misc_write ? 62 : 61;
3990848b8605Smrg
3991848b8605Smrg		for (i = 0, j = 0; i < noutput; i++, j++) {
3992848b8605Smrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
3993848b8605Smrg			output[j].gpr = shader->output[i].gpr;
3994848b8605Smrg			output[j].elem_size = 3;
3995848b8605Smrg			output[j].swizzle_x = 0;
3996848b8605Smrg			output[j].swizzle_y = 1;
3997848b8605Smrg			output[j].swizzle_z = 2;
3998848b8605Smrg			output[j].swizzle_w = 3;
3999848b8605Smrg			output[j].burst_count = 1;
4000b8e80941Smrg			output[j].type = 0xffffffff;
4001848b8605Smrg			output[j].op = CF_OP_EXPORT;
4002848b8605Smrg			switch (ctx.type) {
4003b8e80941Smrg			case PIPE_SHADER_VERTEX:
4004b8e80941Smrg			case PIPE_SHADER_TESS_EVAL:
4005848b8605Smrg				switch (shader->output[i].name) {
4006848b8605Smrg				case TGSI_SEMANTIC_POSITION:
4007848b8605Smrg					output[j].array_base = 60;
4008848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4009848b8605Smrg					pos_emitted = true;
4010848b8605Smrg					break;
4011848b8605Smrg
4012848b8605Smrg				case TGSI_SEMANTIC_PSIZE:
4013848b8605Smrg					output[j].array_base = 61;
4014848b8605Smrg					output[j].swizzle_y = 7;
4015848b8605Smrg					output[j].swizzle_z = 7;
4016848b8605Smrg					output[j].swizzle_w = 7;
4017848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4018848b8605Smrg					pos_emitted = true;
4019848b8605Smrg					break;
4020848b8605Smrg				case TGSI_SEMANTIC_EDGEFLAG:
4021848b8605Smrg					output[j].array_base = 61;
4022848b8605Smrg					output[j].swizzle_x = 7;
4023848b8605Smrg					output[j].swizzle_y = 0;
4024848b8605Smrg					output[j].swizzle_z = 7;
4025848b8605Smrg					output[j].swizzle_w = 7;
4026848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4027848b8605Smrg					pos_emitted = true;
4028848b8605Smrg					break;
4029848b8605Smrg				case TGSI_SEMANTIC_LAYER:
4030848b8605Smrg					/* spi_sid is 0 for outputs that are
4031848b8605Smrg					 * not consumed by PS */
4032848b8605Smrg					if (shader->output[i].spi_sid) {
4033848b8605Smrg						output[j].array_base = next_param_base++;
4034848b8605Smrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4035848b8605Smrg						j++;
4036848b8605Smrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4037848b8605Smrg					}
4038848b8605Smrg					output[j].array_base = 61;
4039848b8605Smrg					output[j].swizzle_x = 7;
4040848b8605Smrg					output[j].swizzle_y = 7;
4041848b8605Smrg					output[j].swizzle_z = 0;
4042848b8605Smrg					output[j].swizzle_w = 7;
4043848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4044848b8605Smrg					pos_emitted = true;
4045848b8605Smrg					break;
4046848b8605Smrg				case TGSI_SEMANTIC_VIEWPORT_INDEX:
4047848b8605Smrg					/* spi_sid is 0 for outputs that are
4048848b8605Smrg					 * not consumed by PS */
4049848b8605Smrg					if (shader->output[i].spi_sid) {
4050848b8605Smrg						output[j].array_base = next_param_base++;
4051848b8605Smrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4052848b8605Smrg						j++;
4053848b8605Smrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4054848b8605Smrg					}
4055848b8605Smrg					output[j].array_base = 61;
4056848b8605Smrg					output[j].swizzle_x = 7;
4057848b8605Smrg					output[j].swizzle_y = 7;
4058848b8605Smrg					output[j].swizzle_z = 7;
4059848b8605Smrg					output[j].swizzle_w = 0;
4060848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4061848b8605Smrg					pos_emitted = true;
4062848b8605Smrg					break;
4063848b8605Smrg				case TGSI_SEMANTIC_CLIPVERTEX:
4064848b8605Smrg					j--;
4065848b8605Smrg					break;
4066848b8605Smrg				case TGSI_SEMANTIC_CLIPDIST:
4067848b8605Smrg					output[j].array_base = next_clip_base++;
4068848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4069848b8605Smrg					pos_emitted = true;
4070848b8605Smrg					/* spi_sid is 0 for clipdistance outputs that were generated
4071848b8605Smrg					 * for clipvertex - we don't need to pass them to PS */
4072848b8605Smrg					if (shader->output[i].spi_sid) {
4073848b8605Smrg						j++;
4074848b8605Smrg						/* duplicate it as PARAM to pass to the pixel shader */
4075848b8605Smrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4076848b8605Smrg						output[j].array_base = next_param_base++;
4077848b8605Smrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4078848b8605Smrg					}
4079848b8605Smrg					break;
4080848b8605Smrg				case TGSI_SEMANTIC_FOG:
4081848b8605Smrg					output[j].swizzle_y = 4; /* 0 */
4082848b8605Smrg					output[j].swizzle_z = 4; /* 0 */
4083848b8605Smrg					output[j].swizzle_w = 5; /* 1 */
4084848b8605Smrg					break;
4085b8e80941Smrg				case TGSI_SEMANTIC_PRIMID:
4086b8e80941Smrg					output[j].swizzle_x = 2;
4087b8e80941Smrg					output[j].swizzle_y = 4; /* 0 */
4088b8e80941Smrg					output[j].swizzle_z = 4; /* 0 */
4089b8e80941Smrg					output[j].swizzle_w = 4; /* 0 */
4090b8e80941Smrg					break;
4091848b8605Smrg				}
4092b8e80941Smrg
4093848b8605Smrg				break;
4094b8e80941Smrg			case PIPE_SHADER_FRAGMENT:
4095848b8605Smrg				if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
4096848b8605Smrg					/* never export more colors than the number of CBs */
4097848b8605Smrg					if (shader->output[i].sid >= max_color_exports) {
4098848b8605Smrg						/* skip export */
4099848b8605Smrg						j--;
4100848b8605Smrg						continue;
4101848b8605Smrg					}
4102b8e80941Smrg					output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
4103848b8605Smrg					output[j].array_base = shader->output[i].sid;
4104848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4105848b8605Smrg					shader->nr_ps_color_exports++;
4106b8e80941Smrg					shader->ps_color_export_mask |= (0xf << (shader->output[i].sid * 4));
4107b8e80941Smrg
4108b8e80941Smrg					/* If the i-th target format is set, all previous target formats must
4109b8e80941Smrg					 * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well.
4110b8e80941Smrg					 */
4111b8e80941Smrg					if (shader->output[i].sid > 0)
4112b8e80941Smrg						for (unsigned x = 0; x < shader->output[i].sid; x++)
4113b8e80941Smrg							shader->ps_color_export_mask |= (1 << (x*4));
4114b8e80941Smrg
4115b8e80941Smrg					if (shader->output[i].sid > shader->ps_export_highest)
4116b8e80941Smrg						shader->ps_export_highest = shader->output[i].sid;
4117848b8605Smrg					if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
4118848b8605Smrg						for (k = 1; k < max_color_exports; k++) {
4119848b8605Smrg							j++;
4120848b8605Smrg							memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4121848b8605Smrg							output[j].gpr = shader->output[i].gpr;
4122848b8605Smrg							output[j].elem_size = 3;
4123848b8605Smrg							output[j].swizzle_x = 0;
4124848b8605Smrg							output[j].swizzle_y = 1;
4125848b8605Smrg							output[j].swizzle_z = 2;
4126b8e80941Smrg							output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
4127848b8605Smrg							output[j].burst_count = 1;
4128848b8605Smrg							output[j].array_base = k;
4129848b8605Smrg							output[j].op = CF_OP_EXPORT;
4130848b8605Smrg							output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4131848b8605Smrg							shader->nr_ps_color_exports++;
4132b8e80941Smrg							if (k > shader->ps_export_highest)
4133b8e80941Smrg								shader->ps_export_highest = k;
4134b8e80941Smrg							shader->ps_color_export_mask |= (0xf << (j * 4));
4135848b8605Smrg						}
4136848b8605Smrg					}
4137848b8605Smrg				} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
4138848b8605Smrg					output[j].array_base = 61;
4139848b8605Smrg					output[j].swizzle_x = 2;
4140848b8605Smrg					output[j].swizzle_y = 7;
4141848b8605Smrg					output[j].swizzle_z = output[j].swizzle_w = 7;
4142848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4143848b8605Smrg				} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
4144848b8605Smrg					output[j].array_base = 61;
4145848b8605Smrg					output[j].swizzle_x = 7;
4146848b8605Smrg					output[j].swizzle_y = 1;
4147848b8605Smrg					output[j].swizzle_z = output[j].swizzle_w = 7;
4148848b8605Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4149b8e80941Smrg				} else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
4150b8e80941Smrg					output[j].array_base = 61;
4151b8e80941Smrg					output[j].swizzle_x = 7;
4152b8e80941Smrg					output[j].swizzle_y = 7;
4153b8e80941Smrg					output[j].swizzle_z = 0;
4154b8e80941Smrg					output[j].swizzle_w = 7;
4155b8e80941Smrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4156848b8605Smrg				} else {
4157848b8605Smrg					R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
4158848b8605Smrg					r = -EINVAL;
4159848b8605Smrg					goto out_err;
4160848b8605Smrg				}
4161848b8605Smrg				break;
4162b8e80941Smrg			case PIPE_SHADER_TESS_CTRL:
4163b8e80941Smrg				break;
4164848b8605Smrg			default:
4165848b8605Smrg				R600_ERR("unsupported processor type %d\n", ctx.type);
4166848b8605Smrg				r = -EINVAL;
4167848b8605Smrg				goto out_err;
4168848b8605Smrg			}
4169848b8605Smrg
4170b8e80941Smrg			if (output[j].type == 0xffffffff) {
4171848b8605Smrg				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4172848b8605Smrg				output[j].array_base = next_param_base++;
4173848b8605Smrg			}
4174848b8605Smrg		}
4175848b8605Smrg
4176848b8605Smrg		/* add fake position export */
4177b8e80941Smrg		if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) {
4178848b8605Smrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4179848b8605Smrg			output[j].gpr = 0;
4180848b8605Smrg			output[j].elem_size = 3;
4181848b8605Smrg			output[j].swizzle_x = 7;
4182848b8605Smrg			output[j].swizzle_y = 7;
4183848b8605Smrg			output[j].swizzle_z = 7;
4184848b8605Smrg			output[j].swizzle_w = 7;
4185848b8605Smrg			output[j].burst_count = 1;
4186848b8605Smrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4187848b8605Smrg			output[j].array_base = 60;
4188848b8605Smrg			output[j].op = CF_OP_EXPORT;
4189848b8605Smrg			j++;
4190848b8605Smrg		}
4191848b8605Smrg
4192848b8605Smrg		/* add fake param output for vertex shader if no param is exported */
4193b8e80941Smrg		if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) {
4194848b8605Smrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4195848b8605Smrg			output[j].gpr = 0;
4196848b8605Smrg			output[j].elem_size = 3;
4197848b8605Smrg			output[j].swizzle_x = 7;
4198848b8605Smrg			output[j].swizzle_y = 7;
4199848b8605Smrg			output[j].swizzle_z = 7;
4200848b8605Smrg			output[j].swizzle_w = 7;
4201848b8605Smrg			output[j].burst_count = 1;
4202848b8605Smrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4203848b8605Smrg			output[j].array_base = 0;
4204848b8605Smrg			output[j].op = CF_OP_EXPORT;
4205848b8605Smrg			j++;
4206848b8605Smrg		}
4207848b8605Smrg
4208848b8605Smrg		/* add fake pixel export */
4209b8e80941Smrg		if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) {
4210848b8605Smrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4211848b8605Smrg			output[j].gpr = 0;
4212848b8605Smrg			output[j].elem_size = 3;
4213848b8605Smrg			output[j].swizzle_x = 7;
4214848b8605Smrg			output[j].swizzle_y = 7;
4215848b8605Smrg			output[j].swizzle_z = 7;
4216848b8605Smrg			output[j].swizzle_w = 7;
4217848b8605Smrg			output[j].burst_count = 1;
4218848b8605Smrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4219848b8605Smrg			output[j].array_base = 0;
4220848b8605Smrg			output[j].op = CF_OP_EXPORT;
4221848b8605Smrg			j++;
4222b8e80941Smrg			shader->nr_ps_color_exports++;
4223b8e80941Smrg			shader->ps_color_export_mask = 0xf;
4224848b8605Smrg		}
4225848b8605Smrg
4226848b8605Smrg		noutput = j;
4227848b8605Smrg
4228848b8605Smrg		/* set export done on last export of each type */
4229b8e80941Smrg		for (k = noutput - 1, output_done = 0; k >= 0; k--) {
4230b8e80941Smrg			if (!(output_done & (1 << output[k].type))) {
4231b8e80941Smrg				output_done |= (1 << output[k].type);
4232b8e80941Smrg				output[k].op = CF_OP_EXPORT_DONE;
4233848b8605Smrg			}
4234848b8605Smrg		}
4235848b8605Smrg		/* add output to bytecode */
4236b8e80941Smrg		for (i = 0; i < noutput; i++) {
4237b8e80941Smrg			r = r600_bytecode_add_output(ctx.bc, &output[i]);
4238b8e80941Smrg			if (r)
4239b8e80941Smrg				goto out_err;
4240848b8605Smrg		}
4241848b8605Smrg	}
4242848b8605Smrg
4243848b8605Smrg	/* add program end */
4244b8e80941Smrg	if (ctx.bc->chip_class == CAYMAN)
4245b8e80941Smrg		cm_bytecode_add_cf_end(ctx.bc);
4246b8e80941Smrg	else {
4247b8e80941Smrg		const struct cf_op_info *last = NULL;
4248848b8605Smrg
4249b8e80941Smrg		if (ctx.bc->cf_last)
4250b8e80941Smrg			last = r600_isa_cf(ctx.bc->cf_last->op);
4251848b8605Smrg
4252b8e80941Smrg		/* alu clause instructions don't have EOP bit, so add NOP */
4253b8e80941Smrg		if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_POP)
4254b8e80941Smrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
4255848b8605Smrg
4256b8e80941Smrg		ctx.bc->cf_last->end_of_program = 1;
4257848b8605Smrg	}
4258848b8605Smrg
4259848b8605Smrg	/* check GPR limit - we have 124 = 128 - 4
4260848b8605Smrg	 * (4 are reserved as alu clause temporary registers) */
4261848b8605Smrg	if (ctx.bc->ngpr > 124) {
4262848b8605Smrg		R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
4263848b8605Smrg		r = -ENOMEM;
4264848b8605Smrg		goto out_err;
4265848b8605Smrg	}
4266848b8605Smrg
4267b8e80941Smrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
4268848b8605Smrg		if ((r = generate_gs_copy_shader(rctx, pipeshader, &so)))
4269848b8605Smrg			return r;
4270848b8605Smrg	}
4271848b8605Smrg
4272b8e80941Smrg	free(ctx.spilled_arrays);
4273b8e80941Smrg	free(ctx.array_infos);
4274848b8605Smrg	free(ctx.literals);
4275848b8605Smrg	tgsi_parse_free(&ctx.parse);
4276848b8605Smrg	return 0;
4277848b8605Smrgout_err:
4278b8e80941Smrg	free(ctx.spilled_arrays);
4279b8e80941Smrg	free(ctx.array_infos);
4280848b8605Smrg	free(ctx.literals);
4281848b8605Smrg	tgsi_parse_free(&ctx.parse);
4282848b8605Smrg	return r;
4283848b8605Smrg}
4284848b8605Smrg
4285848b8605Smrgstatic int tgsi_unsupported(struct r600_shader_ctx *ctx)
4286848b8605Smrg{
4287b8e80941Smrg	const unsigned tgsi_opcode =
4288b8e80941Smrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode;
4289848b8605Smrg	R600_ERR("%s tgsi opcode unsupported\n",
4290b8e80941Smrg		 tgsi_get_opcode_name(tgsi_opcode));
4291848b8605Smrg	return -EINVAL;
4292848b8605Smrg}
4293848b8605Smrg
4294b8e80941Smrgstatic int tgsi_end(struct r600_shader_ctx *ctx UNUSED)
4295848b8605Smrg{
4296848b8605Smrg	return 0;
4297848b8605Smrg}
4298848b8605Smrg
4299848b8605Smrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
4300848b8605Smrg			const struct r600_shader_src *shader_src,
4301848b8605Smrg			unsigned chan)
4302848b8605Smrg{
4303848b8605Smrg	bc_src->sel = shader_src->sel;
4304848b8605Smrg	bc_src->chan = shader_src->swizzle[chan];
4305848b8605Smrg	bc_src->neg = shader_src->neg;
4306848b8605Smrg	bc_src->abs = shader_src->abs;
4307848b8605Smrg	bc_src->rel = shader_src->rel;
4308848b8605Smrg	bc_src->value = shader_src->value[bc_src->chan];
4309848b8605Smrg	bc_src->kc_bank = shader_src->kc_bank;
4310b8e80941Smrg	bc_src->kc_rel = shader_src->kc_rel;
4311848b8605Smrg}
4312848b8605Smrg
4313848b8605Smrgstatic void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
4314848b8605Smrg{
4315848b8605Smrg	bc_src->abs = 1;
4316848b8605Smrg	bc_src->neg = 0;
4317848b8605Smrg}
4318848b8605Smrg
4319848b8605Smrgstatic void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
4320848b8605Smrg{
4321848b8605Smrg	bc_src->neg = !bc_src->neg;
4322848b8605Smrg}
4323848b8605Smrg
4324848b8605Smrgstatic void tgsi_dst(struct r600_shader_ctx *ctx,
4325848b8605Smrg		     const struct tgsi_full_dst_register *tgsi_dst,
4326848b8605Smrg		     unsigned swizzle,
4327848b8605Smrg		     struct r600_bytecode_alu_dst *r600_dst)
4328848b8605Smrg{
4329848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4330848b8605Smrg
4331b8e80941Smrg	if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) {
4332b8e80941Smrg		bool spilled;
4333b8e80941Smrg		unsigned idx;
4334b8e80941Smrg
4335b8e80941Smrg		idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_dst->Register.Index, &spilled);
4336b8e80941Smrg
4337b8e80941Smrg		if (spilled) {
4338b8e80941Smrg			struct r600_bytecode_output cf;
4339b8e80941Smrg			int reg = 0;
4340b8e80941Smrg			int r;
4341b8e80941Smrg			bool add_pending_output = true;
4342b8e80941Smrg
4343b8e80941Smrg			memset(&cf, 0, sizeof(struct r600_bytecode_output));
4344b8e80941Smrg			get_spilled_array_base_and_size(ctx, tgsi_dst->Register.Index,
4345b8e80941Smrg				&cf.array_base, &cf.array_size);
4346b8e80941Smrg
4347b8e80941Smrg			/* If no component has spilled, reserve a register and add the spill code
4348b8e80941Smrg			 *  ctx->bc->n_pending_outputs is cleared after each instruction group */
4349b8e80941Smrg			if (ctx->bc->n_pending_outputs == 0) {
4350b8e80941Smrg				reg = r600_get_temp(ctx);
4351b8e80941Smrg			} else {
4352b8e80941Smrg				/* If we are already spilling and the output address is the same like
4353b8e80941Smrg				* before then just reuse the same slot */
4354b8e80941Smrg				struct r600_bytecode_output *tmpl = &ctx->bc->pending_outputs[ctx->bc->n_pending_outputs-1];
4355b8e80941Smrg				if ((cf.array_base + idx == tmpl->array_base) ||
4356b8e80941Smrg				    (cf.array_base == tmpl->array_base &&
4357b8e80941Smrg				     tmpl->index_gpr == ctx->bc->ar_reg &&
4358b8e80941Smrg				     tgsi_dst->Register.Indirect)) {
4359b8e80941Smrg					reg = ctx->bc->pending_outputs[0].gpr;
4360b8e80941Smrg					add_pending_output = false;
4361b8e80941Smrg				} else {
4362b8e80941Smrg					reg = r600_get_temp(ctx);
4363b8e80941Smrg				}
4364b8e80941Smrg			}
4365b8e80941Smrg
4366b8e80941Smrg			r600_dst->sel = reg;
4367b8e80941Smrg			r600_dst->chan = swizzle;
4368b8e80941Smrg			r600_dst->write = 1;
4369b8e80941Smrg			if (inst->Instruction.Saturate) {
4370b8e80941Smrg				r600_dst->clamp = 1;
4371b8e80941Smrg			}
4372b8e80941Smrg
4373b8e80941Smrg			/* Add new outputs as pending */
4374b8e80941Smrg			if (add_pending_output) {
4375b8e80941Smrg				cf.op = CF_OP_MEM_SCRATCH;
4376b8e80941Smrg				cf.elem_size = 3;
4377b8e80941Smrg				cf.gpr = reg;
4378b8e80941Smrg				cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
4379b8e80941Smrg				cf.mark = 1;
4380b8e80941Smrg				cf.comp_mask = inst->Dst[0].Register.WriteMask;
4381b8e80941Smrg				cf.swizzle_x = 0;
4382b8e80941Smrg				cf.swizzle_y = 1;
4383b8e80941Smrg				cf.swizzle_z = 2;
4384b8e80941Smrg				cf.swizzle_w = 3;
4385b8e80941Smrg				cf.burst_count = 1;
4386b8e80941Smrg
4387b8e80941Smrg				if (tgsi_dst->Register.Indirect) {
4388b8e80941Smrg					if (ctx->bc->chip_class < R700)
4389b8e80941Smrg						cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
4390b8e80941Smrg					else
4391b8e80941Smrg						cf.type = 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
4392b8e80941Smrg					cf.index_gpr = ctx->bc->ar_reg;
4393b8e80941Smrg			}
4394b8e80941Smrg			else {
4395b8e80941Smrg				cf.array_base += idx;
4396b8e80941Smrg				cf.array_size = 0;
4397b8e80941Smrg			}
4398b8e80941Smrg
4399b8e80941Smrg			r = r600_bytecode_add_pending_output(ctx->bc, &cf);
4400b8e80941Smrg			if (r)
4401b8e80941Smrg				return;
4402b8e80941Smrg
4403b8e80941Smrg			if (ctx->bc->chip_class >= R700)
4404b8e80941Smrg				r600_bytecode_need_wait_ack(ctx->bc, true);
4405b8e80941Smrg			}
4406b8e80941Smrg			return;
4407b8e80941Smrg		}
4408b8e80941Smrg		else {
4409b8e80941Smrg			r600_dst->sel = idx;
4410b8e80941Smrg		}
4411b8e80941Smrg	}
4412b8e80941Smrg	else {
4413b8e80941Smrg		r600_dst->sel = tgsi_dst->Register.Index;
4414b8e80941Smrg		r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
4415b8e80941Smrg	}
4416848b8605Smrg	r600_dst->chan = swizzle;
4417848b8605Smrg	r600_dst->write = 1;
4418848b8605Smrg	if (inst->Instruction.Saturate) {
4419848b8605Smrg		r600_dst->clamp = 1;
4420848b8605Smrg	}
4421b8e80941Smrg	if (ctx->type == PIPE_SHADER_TESS_CTRL) {
4422b8e80941Smrg		if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) {
4423b8e80941Smrg			return;
4424848b8605Smrg		}
4425848b8605Smrg	}
4426b8e80941Smrg	if (tgsi_dst->Register.Indirect)
4427b8e80941Smrg		r600_dst->rel = V_SQ_REL_RELATIVE;
4428b8e80941Smrg
4429848b8605Smrg}
4430848b8605Smrg
4431b8e80941Smrgstatic int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap, int dest_temp, int op_override)
4432848b8605Smrg{
4433848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4434848b8605Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
4435b8e80941Smrg	struct r600_bytecode_alu alu;
4436848b8605Smrg	int i, j, r, lasti = tgsi_last_instruction(write_mask);
4437b8e80941Smrg	int use_tmp = 0;
4438b8e80941Smrg	int swizzle_x = inst->Src[0].Register.SwizzleX;
4439b8e80941Smrg
4440b8e80941Smrg	if (singledest) {
4441b8e80941Smrg		switch (write_mask) {
4442b8e80941Smrg		case 0x1:
4443b8e80941Smrg			if (swizzle_x == 2) {
4444b8e80941Smrg				write_mask = 0xc;
4445b8e80941Smrg				use_tmp = 3;
4446b8e80941Smrg			} else
4447b8e80941Smrg				write_mask = 0x3;
4448b8e80941Smrg			break;
4449b8e80941Smrg		case 0x2:
4450b8e80941Smrg			if (swizzle_x == 2) {
4451b8e80941Smrg				write_mask = 0xc;
4452b8e80941Smrg				use_tmp = 3;
4453b8e80941Smrg			} else {
4454b8e80941Smrg				write_mask = 0x3;
4455b8e80941Smrg				use_tmp = 1;
4456b8e80941Smrg			}
4457b8e80941Smrg			break;
4458b8e80941Smrg		case 0x4:
4459b8e80941Smrg			if (swizzle_x == 0) {
4460b8e80941Smrg				write_mask = 0x3;
4461b8e80941Smrg				use_tmp = 1;
4462b8e80941Smrg			} else
4463b8e80941Smrg				write_mask = 0xc;
4464b8e80941Smrg			break;
4465b8e80941Smrg		case 0x8:
4466b8e80941Smrg			if (swizzle_x == 0) {
4467b8e80941Smrg				write_mask = 0x3;
4468b8e80941Smrg				use_tmp = 1;
4469b8e80941Smrg			} else {
4470b8e80941Smrg				write_mask = 0xc;
4471b8e80941Smrg				use_tmp = 3;
4472b8e80941Smrg			}
4473b8e80941Smrg			break;
4474b8e80941Smrg		}
4475b8e80941Smrg	}
4476848b8605Smrg
4477b8e80941Smrg	lasti = tgsi_last_instruction(write_mask);
4478848b8605Smrg	for (i = 0; i <= lasti; i++) {
4479b8e80941Smrg
4480848b8605Smrg		if (!(write_mask & (1 << i)))
4481848b8605Smrg			continue;
4482848b8605Smrg
4483848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4484b8e80941Smrg
4485b8e80941Smrg		if (singledest) {
4486b8e80941Smrg			if (use_tmp || dest_temp) {
4487b8e80941Smrg				alu.dst.sel = use_tmp ? ctx->temp_reg : dest_temp;
4488b8e80941Smrg				alu.dst.chan = i;
4489b8e80941Smrg				alu.dst.write = 1;
4490b8e80941Smrg			} else {
4491b8e80941Smrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4492b8e80941Smrg			}
4493b8e80941Smrg			if (i == 1 || i == 3)
4494b8e80941Smrg				alu.dst.write = 0;
4495848b8605Smrg		} else
4496848b8605Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4497848b8605Smrg
4498b8e80941Smrg		alu.op = op_override ? op_override : ctx->inst_info->op;
4499b8e80941Smrg		if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
4500b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4501b8e80941Smrg		} else if (!swap) {
4502848b8605Smrg			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4503b8e80941Smrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
4504848b8605Smrg			}
4505848b8605Smrg		} else {
4506b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
4507b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
4508848b8605Smrg		}
4509b8e80941Smrg
4510848b8605Smrg		/* handle some special cases */
4511b8e80941Smrg		if (i == 1 || i == 3) {
4512b8e80941Smrg			switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
4513b8e80941Smrg			case TGSI_OPCODE_DABS:
4514b8e80941Smrg				r600_bytecode_src_set_abs(&alu.src[0]);
4515b8e80941Smrg				break;
4516b8e80941Smrg			default:
4517b8e80941Smrg				break;
4518b8e80941Smrg			}
4519848b8605Smrg		}
4520b8e80941Smrg		if (i == lasti) {
4521848b8605Smrg			alu.last = 1;
4522848b8605Smrg		}
4523848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4524848b8605Smrg		if (r)
4525848b8605Smrg			return r;
4526848b8605Smrg	}
4527848b8605Smrg
4528848b8605Smrg	if (use_tmp) {
4529b8e80941Smrg		write_mask = inst->Dst[0].Register.WriteMask;
4530b8e80941Smrg
4531b8e80941Smrg		lasti = tgsi_last_instruction(write_mask);
4532848b8605Smrg		/* move result from temp to dst */
4533848b8605Smrg		for (i = 0; i <= lasti; i++) {
4534848b8605Smrg			if (!(write_mask & (1 << i)))
4535848b8605Smrg				continue;
4536848b8605Smrg
4537848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4538848b8605Smrg			alu.op = ALU_OP1_MOV;
4539b8e80941Smrg
4540b8e80941Smrg			if (dest_temp) {
4541b8e80941Smrg				alu.dst.sel = dest_temp;
4542b8e80941Smrg				alu.dst.chan = i;
4543b8e80941Smrg				alu.dst.write = 1;
4544b8e80941Smrg			} else
4545b8e80941Smrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4546848b8605Smrg			alu.src[0].sel = ctx->temp_reg;
4547b8e80941Smrg			alu.src[0].chan = use_tmp - 1;
4548848b8605Smrg			alu.last = (i == lasti);
4549848b8605Smrg
4550848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4551848b8605Smrg			if (r)
4552848b8605Smrg				return r;
4553848b8605Smrg		}
4554848b8605Smrg	}
4555848b8605Smrg	return 0;
4556848b8605Smrg}
4557848b8605Smrg
4558b8e80941Smrgstatic int tgsi_op2_64(struct r600_shader_ctx *ctx)
4559848b8605Smrg{
4560b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4561b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
4562b8e80941Smrg	/* confirm writemasking */
4563b8e80941Smrg	if ((write_mask & 0x3) != 0x3 &&
4564b8e80941Smrg	    (write_mask & 0xc) != 0xc) {
4565b8e80941Smrg		fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
4566b8e80941Smrg		return -1;
4567b8e80941Smrg	}
4568b8e80941Smrg	return tgsi_op2_64_params(ctx, false, false, 0, 0);
4569b8e80941Smrg}
4570b8e80941Smrg
4571b8e80941Smrgstatic int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
4572848b8605Smrg{
4573b8e80941Smrg	return tgsi_op2_64_params(ctx, true, false, 0, 0);
4574848b8605Smrg}
4575848b8605Smrg
4576b8e80941Smrgstatic int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
4577848b8605Smrg{
4578b8e80941Smrg	return tgsi_op2_64_params(ctx, true, true, 0, 0);
4579848b8605Smrg}
4580848b8605Smrg
4581b8e80941Smrgstatic int tgsi_op3_64(struct r600_shader_ctx *ctx)
4582848b8605Smrg{
4583848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4584848b8605Smrg	struct r600_bytecode_alu alu;
4585b8e80941Smrg	int i, j, r;
4586b8e80941Smrg	int lasti = 3;
4587b8e80941Smrg	int tmp = r600_get_temp(ctx);
4588848b8605Smrg
4589848b8605Smrg	for (i = 0; i < lasti + 1; i++) {
4590848b8605Smrg
4591848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4592848b8605Smrg		alu.op = ctx->inst_info->op;
4593b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4594b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
4595b8e80941Smrg		}
4596848b8605Smrg
4597b8e80941Smrg		if (inst->Dst[0].Register.WriteMask & (1 << i))
4598b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4599b8e80941Smrg		else
4600b8e80941Smrg			alu.dst.sel = tmp;
4601848b8605Smrg
4602b8e80941Smrg		alu.dst.chan = i;
4603b8e80941Smrg		alu.is_op3 = 1;
4604848b8605Smrg		if (i == lasti) {
4605848b8605Smrg			alu.last = 1;
4606848b8605Smrg		}
4607848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4608848b8605Smrg		if (r)
4609848b8605Smrg			return r;
4610848b8605Smrg	}
4611848b8605Smrg	return 0;
4612848b8605Smrg}
4613848b8605Smrg
4614b8e80941Smrgstatic int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
4615848b8605Smrg{
4616848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4617848b8605Smrg	struct r600_bytecode_alu alu;
4618b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
4619b8e80941Smrg	int i, j, r, lasti = tgsi_last_instruction(write_mask);
4620b8e80941Smrg	/* use temp register if trans_only and more than one dst component */
4621b8e80941Smrg	int use_tmp = trans_only && (write_mask ^ (1 << lasti));
4622b8e80941Smrg	unsigned op = ctx->inst_info->op;
4623b8e80941Smrg
4624b8e80941Smrg	if (op == ALU_OP2_MUL_IEEE &&
4625b8e80941Smrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
4626b8e80941Smrg		op = ALU_OP2_MUL;
4627b8e80941Smrg
4628b8e80941Smrg	for (i = 0; i <= lasti; i++) {
4629b8e80941Smrg		if (!(write_mask & (1 << i)))
4630b8e80941Smrg			continue;
4631b8e80941Smrg
4632848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4633b8e80941Smrg		if (use_tmp) {
4634b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
4635b8e80941Smrg			alu.dst.chan = i;
4636b8e80941Smrg			alu.dst.write = 1;
4637b8e80941Smrg		} else
4638b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4639848b8605Smrg
4640b8e80941Smrg		alu.op = op;
4641b8e80941Smrg		if (!swap) {
4642b8e80941Smrg			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4643b8e80941Smrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
4644848b8605Smrg			}
4645b8e80941Smrg		} else {
4646b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4647b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4648848b8605Smrg		}
4649b8e80941Smrg		if (i == lasti || trans_only) {
4650848b8605Smrg			alu.last = 1;
4651b8e80941Smrg		}
4652848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4653848b8605Smrg		if (r)
4654848b8605Smrg			return r;
4655848b8605Smrg	}
4656848b8605Smrg
4657b8e80941Smrg	if (use_tmp) {
4658b8e80941Smrg		/* move result from temp to dst */
4659b8e80941Smrg		for (i = 0; i <= lasti; i++) {
4660b8e80941Smrg			if (!(write_mask & (1 << i)))
4661b8e80941Smrg				continue;
4662848b8605Smrg
4663848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4664b8e80941Smrg			alu.op = ALU_OP1_MOV;
4665b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4666b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
4667b8e80941Smrg			alu.src[0].chan = i;
4668b8e80941Smrg			alu.last = (i == lasti);
4669b8e80941Smrg
4670848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4671848b8605Smrg			if (r)
4672848b8605Smrg				return r;
4673848b8605Smrg		}
4674848b8605Smrg	}
4675848b8605Smrg	return 0;
4676848b8605Smrg}
4677848b8605Smrg
4678b8e80941Smrgstatic int tgsi_op2(struct r600_shader_ctx *ctx)
4679848b8605Smrg{
4680b8e80941Smrg	return tgsi_op2_s(ctx, 0, 0);
4681b8e80941Smrg}
4682848b8605Smrg
4683b8e80941Smrgstatic int tgsi_op2_swap(struct r600_shader_ctx *ctx)
4684b8e80941Smrg{
4685b8e80941Smrg	return tgsi_op2_s(ctx, 1, 0);
4686b8e80941Smrg}
4687848b8605Smrg
4688b8e80941Smrgstatic int tgsi_op2_trans(struct r600_shader_ctx *ctx)
4689b8e80941Smrg{
4690b8e80941Smrg	return tgsi_op2_s(ctx, 0, 1);
4691848b8605Smrg}
4692848b8605Smrg
4693b8e80941Smrgstatic int tgsi_ineg(struct r600_shader_ctx *ctx)
4694848b8605Smrg{
4695848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4696848b8605Smrg	struct r600_bytecode_alu alu;
4697848b8605Smrg	int i, r;
4698b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4699848b8605Smrg
4700b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
4701848b8605Smrg
4702b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4703b8e80941Smrg			continue;
4704848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4705848b8605Smrg		alu.op = ctx->inst_info->op;
4706b8e80941Smrg
4707b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
4708b8e80941Smrg
4709b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4710848b8605Smrg
4711848b8605Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4712848b8605Smrg
4713b8e80941Smrg		if (i == lasti) {
4714848b8605Smrg			alu.last = 1;
4715b8e80941Smrg		}
4716848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4717848b8605Smrg		if (r)
4718848b8605Smrg			return r;
4719848b8605Smrg	}
4720848b8605Smrg	return 0;
4721b8e80941Smrg
4722848b8605Smrg}
4723848b8605Smrg
4724b8e80941Smrgstatic int tgsi_dneg(struct r600_shader_ctx *ctx)
4725848b8605Smrg{
4726848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4727848b8605Smrg	struct r600_bytecode_alu alu;
4728848b8605Smrg	int i, r;
4729848b8605Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4730848b8605Smrg
4731848b8605Smrg	for (i = 0; i < lasti + 1; i++) {
4732b8e80941Smrg
4733848b8605Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4734848b8605Smrg			continue;
4735848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4736848b8605Smrg		alu.op = ALU_OP1_MOV;
4737848b8605Smrg
4738b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4739b8e80941Smrg
4740b8e80941Smrg		if (i == 1 || i == 3)
4741b8e80941Smrg			r600_bytecode_src_toggle_neg(&alu.src[0]);
4742848b8605Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4743b8e80941Smrg
4744b8e80941Smrg		if (i == lasti) {
4745848b8605Smrg			alu.last = 1;
4746b8e80941Smrg		}
4747848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4748848b8605Smrg		if (r)
4749848b8605Smrg			return r;
4750848b8605Smrg	}
4751848b8605Smrg	return 0;
4752b8e80941Smrg
4753848b8605Smrg}
4754848b8605Smrg
4755b8e80941Smrgstatic int tgsi_dfracexp(struct r600_shader_ctx *ctx)
4756848b8605Smrg{
4757848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4758848b8605Smrg	struct r600_bytecode_alu alu;
4759b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
4760b8e80941Smrg	int i, j, r;
4761848b8605Smrg
4762b8e80941Smrg	for (i = 0; i <= 3; i++) {
4763b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4764b8e80941Smrg		alu.op = ctx->inst_info->op;
4765848b8605Smrg
4766b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
4767b8e80941Smrg		alu.dst.chan = i;
4768b8e80941Smrg		alu.dst.write = 1;
4769b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
4770b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
4771b8e80941Smrg		}
4772848b8605Smrg
4773b8e80941Smrg		if (i == 3)
4774b8e80941Smrg			alu.last = 1;
4775b8e80941Smrg
4776b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4777b8e80941Smrg		if (r)
4778b8e80941Smrg			return r;
4779b8e80941Smrg	}
4780b8e80941Smrg
4781b8e80941Smrg	/* Replicate significand result across channels. */
4782b8e80941Smrg	for (i = 0; i <= 3; i++) {
4783b8e80941Smrg		if (!(write_mask & (1 << i)))
4784b8e80941Smrg			continue;
4785b8e80941Smrg
4786b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4787b8e80941Smrg		alu.op = ALU_OP1_MOV;
4788b8e80941Smrg		alu.src[0].chan = (i & 1) + 2;
4789b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
4790b8e80941Smrg
4791b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4792b8e80941Smrg		alu.dst.write = 1;
4793b8e80941Smrg		alu.last = 1;
4794b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4795b8e80941Smrg		if (r)
4796b8e80941Smrg			return r;
4797b8e80941Smrg	}
4798848b8605Smrg
4799b8e80941Smrg	for (i = 0; i <= 3; i++) {
4800b8e80941Smrg		if (inst->Dst[1].Register.WriteMask & (1 << i)) {
4801b8e80941Smrg			/* MOV third channels to writemask dst1 */
4802b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4803b8e80941Smrg			alu.op = ALU_OP1_MOV;
4804b8e80941Smrg			alu.src[0].chan = 1;
4805848b8605Smrg			alu.src[0].sel = ctx->temp_reg;
4806b8e80941Smrg
4807b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
4808848b8605Smrg			alu.last = 1;
4809848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4810848b8605Smrg			if (r)
4811848b8605Smrg				return r;
4812b8e80941Smrg			break;
4813848b8605Smrg		}
4814848b8605Smrg	}
4815b8e80941Smrg	return 0;
4816b8e80941Smrg}
4817848b8605Smrg
4818b8e80941Smrg
4819b8e80941Smrgstatic int egcm_int_to_double(struct r600_shader_ctx *ctx)
4820b8e80941Smrg{
4821b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4822b8e80941Smrg	struct r600_bytecode_alu alu;
4823b8e80941Smrg	int i, c, r;
4824b8e80941Smrg	int write_mask = inst->Dst[0].Register.WriteMask;
4825b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
4826b8e80941Smrg
4827b8e80941Smrg	assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
4828b8e80941Smrg		inst->Instruction.Opcode == TGSI_OPCODE_U2D);
4829b8e80941Smrg
4830b8e80941Smrg	for (c = 0; c < 2; c++) {
4831b8e80941Smrg		int dchan = c * 2;
4832b8e80941Smrg		if (write_mask & (0x3 << dchan)) {
4833b8e80941Smrg	/* split into 24-bit int and 8-bit int */
4834848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4835b8e80941Smrg			alu.op = ALU_OP2_AND_INT;
4836b8e80941Smrg			alu.dst.sel = temp_reg;
4837b8e80941Smrg			alu.dst.chan = dchan;
4838b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
4839b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4840b8e80941Smrg			alu.src[1].value = 0xffffff00;
4841b8e80941Smrg			alu.dst.write = 1;
4842b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4843b8e80941Smrg			if (r)
4844b8e80941Smrg				return r;
4845848b8605Smrg
4846b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4847b8e80941Smrg			alu.op = ALU_OP2_AND_INT;
4848b8e80941Smrg			alu.dst.sel = temp_reg;
4849b8e80941Smrg			alu.dst.chan = dchan + 1;
4850b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
4851b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4852b8e80941Smrg			alu.src[1].value = 0xff;
4853b8e80941Smrg			alu.dst.write = 1;
4854848b8605Smrg			alu.last = 1;
4855848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4856848b8605Smrg			if (r)
4857848b8605Smrg				return r;
4858848b8605Smrg		}
4859848b8605Smrg	}
4860848b8605Smrg
4861b8e80941Smrg	for (c = 0; c < 2; c++) {
4862b8e80941Smrg		int dchan = c * 2;
4863b8e80941Smrg		if (write_mask & (0x3 << dchan)) {
4864b8e80941Smrg			for (i = dchan; i <= dchan + 1; i++) {
4865b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4866b8e80941Smrg				alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT;
4867848b8605Smrg
4868b8e80941Smrg				alu.src[0].sel = temp_reg;
4869b8e80941Smrg				alu.src[0].chan = i;
4870b8e80941Smrg				alu.dst.sel = temp_reg;
4871b8e80941Smrg				alu.dst.chan = i;
4872b8e80941Smrg				alu.dst.write = 1;
4873b8e80941Smrg				if (ctx->bc->chip_class == CAYMAN)
4874b8e80941Smrg					alu.last = i == dchan + 1;
4875b8e80941Smrg				else
4876b8e80941Smrg					alu.last = 1; /* trans only ops on evergreen */
4877b8e80941Smrg
4878b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
4879b8e80941Smrg				if (r)
4880b8e80941Smrg					return r;
4881b8e80941Smrg			}
4882b8e80941Smrg		}
4883b8e80941Smrg	}
4884848b8605Smrg
4885b8e80941Smrg	for (c = 0; c < 2; c++) {
4886b8e80941Smrg		int dchan = c * 2;
4887b8e80941Smrg		if (write_mask & (0x3 << dchan)) {
4888b8e80941Smrg			for (i = 0; i < 4; i++) {
4889b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4890b8e80941Smrg				alu.op = ALU_OP1_FLT32_TO_FLT64;
4891b8e80941Smrg
4892b8e80941Smrg				alu.src[0].chan = dchan + (i / 2);
4893b8e80941Smrg				if (i == 0 || i == 2)
4894b8e80941Smrg					alu.src[0].sel = temp_reg;
4895b8e80941Smrg				else {
4896b8e80941Smrg					alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
4897b8e80941Smrg					alu.src[0].value = 0x0;
4898b8e80941Smrg				}
4899b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
4900b8e80941Smrg				alu.dst.chan = i;
4901b8e80941Smrg				alu.last = i == 3;
4902b8e80941Smrg				alu.dst.write = 1;
4903848b8605Smrg
4904b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
4905b8e80941Smrg				if (r)
4906b8e80941Smrg					return r;
4907b8e80941Smrg			}
4908848b8605Smrg
4909b8e80941Smrg			for (i = 0; i <= 1; i++) {
4910b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4911b8e80941Smrg				alu.op = ALU_OP2_ADD_64;
4912848b8605Smrg
4913b8e80941Smrg				alu.src[0].chan = fp64_switch(i);
4914b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
4915b8e80941Smrg
4916b8e80941Smrg				alu.src[1].chan = fp64_switch(i + 2);
4917b8e80941Smrg				alu.src[1].sel = ctx->temp_reg;
4918b8e80941Smrg				tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst);
4919b8e80941Smrg				alu.last = i == 1;
4920b8e80941Smrg
4921b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
4922b8e80941Smrg				if (r)
4923b8e80941Smrg					return r;
4924b8e80941Smrg			}
4925b8e80941Smrg		}
4926848b8605Smrg	}
4927848b8605Smrg
4928b8e80941Smrg	return 0;
4929b8e80941Smrg}
4930848b8605Smrg
4931b8e80941Smrgstatic int egcm_double_to_int(struct r600_shader_ctx *ctx)
4932b8e80941Smrg{
4933b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4934b8e80941Smrg	struct r600_bytecode_alu alu;
4935b8e80941Smrg	int i, r;
4936b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4937b8e80941Smrg	int treg = r600_get_temp(ctx);
4938b8e80941Smrg	assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
4939b8e80941Smrg		inst->Instruction.Opcode == TGSI_OPCODE_D2U);
4940848b8605Smrg
4941b8e80941Smrg	/* do a 64->32 into a temp register */
4942b8e80941Smrg	r = tgsi_op2_64_params(ctx, true, false, treg, ALU_OP1_FLT64_TO_FLT32);
4943b8e80941Smrg	if (r)
4944b8e80941Smrg		return r;
4945848b8605Smrg
4946b8e80941Smrg	for (i = 0; i <= lasti; i++) {
4947b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4948b8e80941Smrg			continue;
4949b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4950b8e80941Smrg		alu.op = ctx->inst_info->op;
4951848b8605Smrg
4952b8e80941Smrg		alu.src[0].chan = i;
4953b8e80941Smrg		alu.src[0].sel = treg;
4954b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4955b8e80941Smrg		alu.last = (i == lasti);
4956848b8605Smrg
4957848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4958848b8605Smrg		if (r)
4959848b8605Smrg			return r;
4960848b8605Smrg	}
4961848b8605Smrg
4962848b8605Smrg	return 0;
4963848b8605Smrg}
4964848b8605Smrg
4965b8e80941Smrgstatic int cayman_emit_unary_double_raw(struct r600_bytecode *bc,
4966b8e80941Smrg					unsigned op,
4967b8e80941Smrg					int dst_reg,
4968b8e80941Smrg					struct r600_shader_src *src,
4969b8e80941Smrg					bool abs)
4970848b8605Smrg{
4971848b8605Smrg	struct r600_bytecode_alu alu;
4972b8e80941Smrg	const int last_slot = 3;
4973b8e80941Smrg	int r;
4974848b8605Smrg
4975b8e80941Smrg	/* these have to write the result to X/Y by the looks of it */
4976b8e80941Smrg	for (int i = 0 ; i < last_slot; i++) {
4977848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4978b8e80941Smrg		alu.op = op;
4979848b8605Smrg
4980b8e80941Smrg		r600_bytecode_src(&alu.src[0], src, 1);
4981b8e80941Smrg		r600_bytecode_src(&alu.src[1], src, 0);
4982848b8605Smrg
4983b8e80941Smrg		if (abs)
4984b8e80941Smrg			r600_bytecode_src_set_abs(&alu.src[1]);
4985848b8605Smrg
4986b8e80941Smrg		alu.dst.sel = dst_reg;
4987b8e80941Smrg		alu.dst.chan = i;
4988b8e80941Smrg		alu.dst.write = (i == 0 || i == 1);
4989b8e80941Smrg
4990b8e80941Smrg		if (bc->chip_class != CAYMAN || i == last_slot - 1)
4991848b8605Smrg			alu.last = 1;
4992b8e80941Smrg		r = r600_bytecode_add_alu(bc, &alu);
4993848b8605Smrg		if (r)
4994848b8605Smrg			return r;
4995848b8605Smrg	}
4996848b8605Smrg
4997848b8605Smrg	return 0;
4998848b8605Smrg}
4999848b8605Smrg
5000b8e80941Smrgstatic int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
5001848b8605Smrg{
5002848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5003b8e80941Smrg	int i, r;
5004848b8605Smrg	struct r600_bytecode_alu alu;
5005b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5006b8e80941Smrg	int t1 = ctx->temp_reg;
5007848b8605Smrg
5008b8e80941Smrg	/* should only be one src regs */
5009b8e80941Smrg	assert(inst->Instruction.NumSrcRegs == 1);
5010848b8605Smrg
5011b8e80941Smrg	/* only support one double at a time */
5012b8e80941Smrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
5013b8e80941Smrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
5014848b8605Smrg
5015b8e80941Smrg	r = cayman_emit_unary_double_raw(
5016b8e80941Smrg		ctx->bc, ctx->inst_info->op, t1,
5017b8e80941Smrg		&ctx->src[0],
5018b8e80941Smrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
5019b8e80941Smrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT);
5020848b8605Smrg	if (r)
5021848b8605Smrg		return r;
5022848b8605Smrg
5023b8e80941Smrg	for (i = 0 ; i <= lasti; i++) {
5024b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5025b8e80941Smrg			continue;
5026848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5027b8e80941Smrg		alu.op = ALU_OP1_MOV;
5028b8e80941Smrg		alu.src[0].sel = t1;
5029b8e80941Smrg		alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
5030b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5031848b8605Smrg		alu.dst.write = 1;
5032b8e80941Smrg		if (i == lasti)
5033b8e80941Smrg			alu.last = 1;
5034848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5035848b8605Smrg		if (r)
5036848b8605Smrg			return r;
5037b8e80941Smrg	}
5038b8e80941Smrg	return 0;
5039b8e80941Smrg}
5040848b8605Smrg
5041b8e80941Smrgstatic int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
5042b8e80941Smrg{
5043b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5044b8e80941Smrg	int i, j, r;
5045b8e80941Smrg	struct r600_bytecode_alu alu;
5046b8e80941Smrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
5047848b8605Smrg
5048b8e80941Smrg	for (i = 0 ; i < last_slot; i++) {
5049b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5050b8e80941Smrg		alu.op = ctx->inst_info->op;
5051b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
5052b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
5053848b8605Smrg
5054b8e80941Smrg			/* RSQ should take the absolute value of src */
5055b8e80941Smrg			if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) {
5056b8e80941Smrg				r600_bytecode_src_set_abs(&alu.src[j]);
5057b8e80941Smrg			}
5058b8e80941Smrg		}
5059b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5060b8e80941Smrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
5061848b8605Smrg
5062b8e80941Smrg		if (i == last_slot - 1)
5063b8e80941Smrg			alu.last = 1;
5064b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5065b8e80941Smrg		if (r)
5066b8e80941Smrg			return r;
5067b8e80941Smrg	}
5068848b8605Smrg	return 0;
5069848b8605Smrg}
5070848b8605Smrg
5071b8e80941Smrgstatic int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
5072848b8605Smrg{
5073848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5074b8e80941Smrg	int i, j, k, r;
5075848b8605Smrg	struct r600_bytecode_alu alu;
5076b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5077b8e80941Smrg	int t1 = ctx->temp_reg;
5078848b8605Smrg
5079b8e80941Smrg	for (k = 0; k <= lasti; k++) {
5080b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
5081b8e80941Smrg			continue;
5082848b8605Smrg
5083b8e80941Smrg		for (i = 0 ; i < 4; i++) {
5084b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5085b8e80941Smrg			alu.op = ctx->inst_info->op;
5086b8e80941Smrg			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
5087b8e80941Smrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
5088b8e80941Smrg			}
5089b8e80941Smrg			alu.dst.sel = t1;
5090b8e80941Smrg			alu.dst.chan = i;
5091b8e80941Smrg			alu.dst.write = (i == k);
5092b8e80941Smrg			if (i == 3)
5093b8e80941Smrg				alu.last = 1;
5094b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
5095b8e80941Smrg			if (r)
5096b8e80941Smrg				return r;
5097b8e80941Smrg		}
5098b8e80941Smrg	}
5099848b8605Smrg
5100b8e80941Smrg	for (i = 0 ; i <= lasti; i++) {
5101b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5102b8e80941Smrg			continue;
5103b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5104b8e80941Smrg		alu.op = ALU_OP1_MOV;
5105b8e80941Smrg		alu.src[0].sel = t1;
5106b8e80941Smrg		alu.src[0].chan = i;
5107b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5108b8e80941Smrg		alu.dst.write = 1;
5109b8e80941Smrg		if (i == lasti)
5110b8e80941Smrg			alu.last = 1;
5111b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5112b8e80941Smrg		if (r)
5113b8e80941Smrg			return r;
5114848b8605Smrg	}
5115b8e80941Smrg
5116b8e80941Smrg	return 0;
5117848b8605Smrg}
5118848b8605Smrg
5119b8e80941Smrg
5120b8e80941Smrgstatic int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
5121848b8605Smrg{
5122848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5123b8e80941Smrg	int i, j, k, r;
5124848b8605Smrg	struct r600_bytecode_alu alu;
5125b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5126b8e80941Smrg	int t1 = ctx->temp_reg;
5127b8e80941Smrg
5128b8e80941Smrg	/* t1 would get overwritten below if we actually tried to
5129b8e80941Smrg	 * multiply two pairs of doubles at a time. */
5130b8e80941Smrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
5131b8e80941Smrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
5132b8e80941Smrg
5133b8e80941Smrg	k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
5134848b8605Smrg
5135848b8605Smrg	for (i = 0; i < 4; i++) {
5136848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5137b8e80941Smrg		alu.op = ctx->inst_info->op;
5138b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
5139b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
5140b8e80941Smrg		}
5141b8e80941Smrg		alu.dst.sel = t1;
5142848b8605Smrg		alu.dst.chan = i;
5143b8e80941Smrg		alu.dst.write = 1;
5144848b8605Smrg		if (i == 3)
5145848b8605Smrg			alu.last = 1;
5146848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5147848b8605Smrg		if (r)
5148848b8605Smrg			return r;
5149848b8605Smrg	}
5150b8e80941Smrg
5151b8e80941Smrg	for (i = 0; i <= lasti; i++) {
5152b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5153b8e80941Smrg			continue;
5154b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5155b8e80941Smrg		alu.op = ALU_OP1_MOV;
5156b8e80941Smrg		alu.src[0].sel = t1;
5157b8e80941Smrg		alu.src[0].chan = i;
5158b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5159b8e80941Smrg		alu.dst.write = 1;
5160b8e80941Smrg		if (i == lasti)
5161b8e80941Smrg			alu.last = 1;
5162b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5163b8e80941Smrg		if (r)
5164b8e80941Smrg			return r;
5165b8e80941Smrg	}
5166b8e80941Smrg
5167848b8605Smrg	return 0;
5168848b8605Smrg}
5169848b8605Smrg
5170b8e80941Smrg/*
5171b8e80941Smrg * Emit RECIP_64 + MUL_64 to implement division.
5172b8e80941Smrg */
5173b8e80941Smrgstatic int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
5174848b8605Smrg{
5175848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5176b8e80941Smrg	int r;
5177848b8605Smrg	struct r600_bytecode_alu alu;
5178b8e80941Smrg	int t1 = ctx->temp_reg;
5179b8e80941Smrg	int k;
5180848b8605Smrg
5181b8e80941Smrg	/* Only support one double at a time. This is the same constraint as
5182b8e80941Smrg	 * in DMUL lowering. */
5183b8e80941Smrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
5184b8e80941Smrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
5185b8e80941Smrg
5186b8e80941Smrg	k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
5187b8e80941Smrg
5188b8e80941Smrg	r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
5189848b8605Smrg	if (r)
5190848b8605Smrg		return r;
5191848b8605Smrg
5192b8e80941Smrg	for (int i = 0; i < 4; i++) {
5193848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5194b8e80941Smrg		alu.op = ALU_OP2_MUL_64;
5195b8e80941Smrg
5196b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
5197b8e80941Smrg
5198b8e80941Smrg		alu.src[1].sel = t1;
5199b8e80941Smrg		alu.src[1].chan = (i == 3) ? 0 : 1;
5200b8e80941Smrg
5201b8e80941Smrg		alu.dst.sel = t1;
5202848b8605Smrg		alu.dst.chan = i;
5203848b8605Smrg		alu.dst.write = 1;
5204b8e80941Smrg		if (i == 3)
5205848b8605Smrg			alu.last = 1;
5206848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5207848b8605Smrg		if (r)
5208848b8605Smrg			return r;
5209848b8605Smrg	}
5210848b8605Smrg
5211b8e80941Smrg	for (int i = 0; i < 2; i++) {
5212848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5213b8e80941Smrg		alu.op = ALU_OP1_MOV;
5214b8e80941Smrg		alu.src[0].sel = t1;
5215b8e80941Smrg		alu.src[0].chan = i;
5216b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
5217b8e80941Smrg		alu.dst.write = 1;
5218b8e80941Smrg		if (i == 1)
5219848b8605Smrg			alu.last = 1;
5220848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5221848b8605Smrg		if (r)
5222848b8605Smrg			return r;
5223848b8605Smrg	}
5224848b8605Smrg	return 0;
5225848b8605Smrg}
5226848b8605Smrg
5227b8e80941Smrg/*
5228b8e80941Smrg * r600 - trunc to -PI..PI range
5229b8e80941Smrg * r700 - normalize by dividing by 2PI
5230b8e80941Smrg * see fdo bug 27901
5231b8e80941Smrg */
5232b8e80941Smrgstatic int tgsi_setup_trig(struct r600_shader_ctx *ctx)
5233848b8605Smrg{
5234848b8605Smrg	int r;
5235b8e80941Smrg	struct r600_bytecode_alu alu;
5236848b8605Smrg
5237848b8605Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5238b8e80941Smrg	alu.op = ALU_OP3_MULADD;
5239b8e80941Smrg	alu.is_op3 = 1;
5240b8e80941Smrg
5241b8e80941Smrg	alu.dst.chan = 0;
5242848b8605Smrg	alu.dst.sel = ctx->temp_reg;
5243848b8605Smrg	alu.dst.write = 1;
5244b8e80941Smrg
5245b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5246b8e80941Smrg
5247b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5248b8e80941Smrg	alu.src[1].chan = 0;
5249b8e80941Smrg	alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI);
5250b8e80941Smrg	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
5251b8e80941Smrg	alu.src[2].chan = 0;
5252848b8605Smrg	alu.last = 1;
5253848b8605Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5254848b8605Smrg	if (r)
5255848b8605Smrg		return r;
5256b8e80941Smrg
5257848b8605Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5258b8e80941Smrg	alu.op = ALU_OP1_FRACT;
5259b8e80941Smrg
5260b8e80941Smrg	alu.dst.chan = 0;
5261848b8605Smrg	alu.dst.sel = ctx->temp_reg;
5262848b8605Smrg	alu.dst.write = 1;
5263b8e80941Smrg
5264b8e80941Smrg	alu.src[0].sel = ctx->temp_reg;
5265b8e80941Smrg	alu.src[0].chan = 0;
5266848b8605Smrg	alu.last = 1;
5267848b8605Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5268848b8605Smrg	if (r)
5269848b8605Smrg		return r;
5270b8e80941Smrg
5271848b8605Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5272b8e80941Smrg	alu.op = ALU_OP3_MULADD;
5273b8e80941Smrg	alu.is_op3 = 1;
5274b8e80941Smrg
5275b8e80941Smrg	alu.dst.chan = 0;
5276848b8605Smrg	alu.dst.sel = ctx->temp_reg;
5277848b8605Smrg	alu.dst.write = 1;
5278b8e80941Smrg
5279b8e80941Smrg	alu.src[0].sel = ctx->temp_reg;
5280b8e80941Smrg	alu.src[0].chan = 0;
5281b8e80941Smrg
5282b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5283b8e80941Smrg	alu.src[1].chan = 0;
5284b8e80941Smrg	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
5285b8e80941Smrg	alu.src[2].chan = 0;
5286b8e80941Smrg
5287b8e80941Smrg	if (ctx->bc->chip_class == R600) {
5288b8e80941Smrg		alu.src[1].value = u_bitcast_f2u(2.0f * M_PI);
5289b8e80941Smrg		alu.src[2].value = u_bitcast_f2u(-M_PI);
5290b8e80941Smrg	} else {
5291b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
5292b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
5293b8e80941Smrg		alu.src[2].neg = 1;
5294b8e80941Smrg	}
5295b8e80941Smrg
5296848b8605Smrg	alu.last = 1;
5297848b8605Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5298848b8605Smrg	if (r)
5299848b8605Smrg		return r;
5300b8e80941Smrg	return 0;
5301848b8605Smrg}
5302848b8605Smrg
5303b8e80941Smrgstatic int cayman_trig(struct r600_shader_ctx *ctx)
5304848b8605Smrg{
5305848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5306848b8605Smrg	struct r600_bytecode_alu alu;
5307b8e80941Smrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
5308b8e80941Smrg	int i, r;
5309848b8605Smrg
5310b8e80941Smrg	r = tgsi_setup_trig(ctx);
5311b8e80941Smrg	if (r)
5312b8e80941Smrg		return r;
5313848b8605Smrg
5314848b8605Smrg
5315b8e80941Smrg	for (i = 0; i < last_slot; i++) {
5316b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5317b8e80941Smrg		alu.op = ctx->inst_info->op;
5318b8e80941Smrg		alu.dst.chan = i;
5319848b8605Smrg
5320b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5321b8e80941Smrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
5322848b8605Smrg
5323b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
5324b8e80941Smrg		alu.src[0].chan = 0;
5325b8e80941Smrg		if (i == last_slot - 1)
5326848b8605Smrg			alu.last = 1;
5327b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5328b8e80941Smrg		if (r)
5329b8e80941Smrg			return r;
5330b8e80941Smrg	}
5331b8e80941Smrg	return 0;
5332b8e80941Smrg}
5333848b8605Smrg
5334b8e80941Smrgstatic int tgsi_trig(struct r600_shader_ctx *ctx)
5335b8e80941Smrg{
5336b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5337b8e80941Smrg	struct r600_bytecode_alu alu;
5338b8e80941Smrg	int i, r;
5339b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5340848b8605Smrg
5341b8e80941Smrg	r = tgsi_setup_trig(ctx);
5342b8e80941Smrg	if (r)
5343b8e80941Smrg		return r;
5344848b8605Smrg
5345b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5346b8e80941Smrg	alu.op = ctx->inst_info->op;
5347b8e80941Smrg	alu.dst.chan = 0;
5348b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5349b8e80941Smrg	alu.dst.write = 1;
5350848b8605Smrg
5351b8e80941Smrg	alu.src[0].sel = ctx->temp_reg;
5352b8e80941Smrg	alu.src[0].chan = 0;
5353b8e80941Smrg	alu.last = 1;
5354b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5355b8e80941Smrg	if (r)
5356b8e80941Smrg		return r;
5357848b8605Smrg
5358b8e80941Smrg	/* replicate result */
5359b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
5360b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5361b8e80941Smrg			continue;
5362848b8605Smrg
5363b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5364b8e80941Smrg		alu.op = ALU_OP1_MOV;
5365848b8605Smrg
5366b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
5367b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5368b8e80941Smrg		if (i == lasti)
5369b8e80941Smrg			alu.last = 1;
5370b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5371b8e80941Smrg		if (r)
5372b8e80941Smrg			return r;
5373b8e80941Smrg	}
5374b8e80941Smrg	return 0;
5375b8e80941Smrg}
5376848b8605Smrg
5377b8e80941Smrgstatic int tgsi_kill(struct r600_shader_ctx *ctx)
5378b8e80941Smrg{
5379b8e80941Smrg	const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5380b8e80941Smrg	struct r600_bytecode_alu alu;
5381b8e80941Smrg	int i, r;
5382848b8605Smrg
5383b8e80941Smrg	for (i = 0; i < 4; i++) {
5384b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5385b8e80941Smrg		alu.op = ctx->inst_info->op;
5386848b8605Smrg
5387b8e80941Smrg		alu.dst.chan = i;
5388848b8605Smrg
5389b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
5390848b8605Smrg
5391b8e80941Smrg		if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) {
5392b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_1;
5393b8e80941Smrg			alu.src[1].neg = 1;
5394b8e80941Smrg		} else {
5395848b8605Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5396b8e80941Smrg		}
5397b8e80941Smrg		if (i == 3) {
5398848b8605Smrg			alu.last = 1;
5399b8e80941Smrg		}
5400b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5401b8e80941Smrg		if (r)
5402b8e80941Smrg			return r;
5403b8e80941Smrg	}
5404848b8605Smrg
5405b8e80941Smrg	/* kill must be last in ALU */
5406b8e80941Smrg	ctx->bc->force_add_cf = 1;
5407b8e80941Smrg	ctx->shader->uses_kill = TRUE;
5408b8e80941Smrg	return 0;
5409b8e80941Smrg}
5410848b8605Smrg
5411b8e80941Smrgstatic int tgsi_lit(struct r600_shader_ctx *ctx)
5412b8e80941Smrg{
5413b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5414b8e80941Smrg	struct r600_bytecode_alu alu;
5415b8e80941Smrg	int r;
5416848b8605Smrg
5417b8e80941Smrg	/* tmp.x = max(src.y, 0.0) */
5418b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5419b8e80941Smrg	alu.op = ALU_OP2_MAX;
5420b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
5421b8e80941Smrg	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
5422b8e80941Smrg	alu.src[1].chan = 1;
5423848b8605Smrg
5424b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5425b8e80941Smrg	alu.dst.chan = 0;
5426b8e80941Smrg	alu.dst.write = 1;
5427848b8605Smrg
5428b8e80941Smrg	alu.last = 1;
5429b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5430b8e80941Smrg	if (r)
5431b8e80941Smrg		return r;
5432b8e80941Smrg
5433b8e80941Smrg	if (inst->Dst[0].Register.WriteMask & (1 << 2))
5434b8e80941Smrg	{
5435b8e80941Smrg		int chan;
5436b8e80941Smrg		int sel;
5437b8e80941Smrg		unsigned i;
5438848b8605Smrg
5439848b8605Smrg		if (ctx->bc->chip_class == CAYMAN) {
5440b8e80941Smrg			for (i = 0; i < 3; i++) {
5441b8e80941Smrg				/* tmp.z = log(tmp.x) */
5442848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5443b8e80941Smrg				alu.op = ALU_OP1_LOG_CLAMPED;
5444b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
5445848b8605Smrg				alu.src[0].chan = 0;
5446b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
5447b8e80941Smrg				alu.dst.chan = i;
5448b8e80941Smrg				if (i == 2) {
5449b8e80941Smrg					alu.dst.write = 1;
5450848b8605Smrg					alu.last = 1;
5451b8e80941Smrg				} else
5452b8e80941Smrg					alu.dst.write = 0;
5453b8e80941Smrg
5454b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
5455b8e80941Smrg				if (r)
5456848b8605Smrg					return r;
5457848b8605Smrg			}
5458b8e80941Smrg		} else {
5459b8e80941Smrg			/* tmp.z = log(tmp.x) */
5460848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5461b8e80941Smrg			alu.op = ALU_OP1_LOG_CLAMPED;
5462b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
5463848b8605Smrg			alu.src[0].chan = 0;
5464b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
5465b8e80941Smrg			alu.dst.chan = 2;
5466848b8605Smrg			alu.dst.write = 1;
5467848b8605Smrg			alu.last = 1;
5468848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
5469848b8605Smrg			if (r)
5470848b8605Smrg				return r;
5471848b8605Smrg		}
5472848b8605Smrg
5473b8e80941Smrg		chan = alu.dst.chan;
5474b8e80941Smrg		sel = alu.dst.sel;
5475848b8605Smrg
5476b8e80941Smrg		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
5477848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5478b8e80941Smrg		alu.op = ALU_OP3_MUL_LIT;
5479b8e80941Smrg		alu.src[0].sel  = sel;
5480b8e80941Smrg		alu.src[0].chan = chan;
5481b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
5482b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
5483b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
5484b8e80941Smrg		alu.dst.chan = 0;
5485848b8605Smrg		alu.dst.write = 1;
5486b8e80941Smrg		alu.is_op3 = 1;
5487848b8605Smrg		alu.last = 1;
5488b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5489b8e80941Smrg		if (r)
5490848b8605Smrg			return r;
5491848b8605Smrg
5492848b8605Smrg		if (ctx->bc->chip_class == CAYMAN) {
5493b8e80941Smrg			for (i = 0; i < 3; i++) {
5494b8e80941Smrg				/* dst.z = exp(tmp.x) */
5495848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5496b8e80941Smrg				alu.op = ALU_OP1_EXP_IEEE;
5497b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
5498848b8605Smrg				alu.src[0].chan = 0;
5499b8e80941Smrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5500b8e80941Smrg				if (i == 2) {
5501b8e80941Smrg					alu.dst.write = 1;
5502b8e80941Smrg					alu.last = 1;
5503b8e80941Smrg				} else
5504b8e80941Smrg					alu.dst.write = 0;
5505b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
5506b8e80941Smrg				if (r)
5507848b8605Smrg					return r;
5508848b8605Smrg			}
5509848b8605Smrg		} else {
5510b8e80941Smrg			/* dst.z = exp(tmp.x) */
5511848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5512b8e80941Smrg			alu.op = ALU_OP1_EXP_IEEE;
5513b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
5514848b8605Smrg			alu.src[0].chan = 0;
5515b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
5516848b8605Smrg			alu.last = 1;
5517b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
5518b8e80941Smrg			if (r)
5519848b8605Smrg				return r;
5520848b8605Smrg		}
5521b8e80941Smrg	}
5522848b8605Smrg
5523b8e80941Smrg	/* dst.x, <- 1.0  */
5524b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5525b8e80941Smrg	alu.op = ALU_OP1_MOV;
5526b8e80941Smrg	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
5527b8e80941Smrg	alu.src[0].chan = 0;
5528b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
5529b8e80941Smrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
5530b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5531b8e80941Smrg	if (r)
5532b8e80941Smrg		return r;
5533848b8605Smrg
5534b8e80941Smrg	/* dst.y = max(src.x, 0.0) */
5535b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5536b8e80941Smrg	alu.op = ALU_OP2_MAX;
5537b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5538b8e80941Smrg	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
5539b8e80941Smrg	alu.src[1].chan = 0;
5540b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
5541b8e80941Smrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
5542b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5543b8e80941Smrg	if (r)
5544b8e80941Smrg		return r;
5545848b8605Smrg
5546b8e80941Smrg	/* dst.w, <- 1.0  */
5547b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5548b8e80941Smrg	alu.op = ALU_OP1_MOV;
5549b8e80941Smrg	alu.src[0].sel  = V_SQ_ALU_SRC_1;
5550b8e80941Smrg	alu.src[0].chan = 0;
5551b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
5552b8e80941Smrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
5553b8e80941Smrg	alu.last = 1;
5554b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5555b8e80941Smrg	if (r)
5556b8e80941Smrg		return r;
5557848b8605Smrg
5558b8e80941Smrg	return 0;
5559b8e80941Smrg}
5560848b8605Smrg
5561b8e80941Smrgstatic int tgsi_rsq(struct r600_shader_ctx *ctx)
5562b8e80941Smrg{
5563b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5564b8e80941Smrg	struct r600_bytecode_alu alu;
5565b8e80941Smrg	int i, r;
5566848b8605Smrg
5567b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5568848b8605Smrg
5569b8e80941Smrg	alu.op = ALU_OP1_RECIPSQRT_IEEE;
5570848b8605Smrg
5571b8e80941Smrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
5572b8e80941Smrg		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
5573b8e80941Smrg		r600_bytecode_src_set_abs(&alu.src[i]);
5574b8e80941Smrg	}
5575b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5576b8e80941Smrg	alu.dst.write = 1;
5577b8e80941Smrg	alu.last = 1;
5578b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5579b8e80941Smrg	if (r)
5580b8e80941Smrg		return r;
5581b8e80941Smrg	/* replicate result */
5582b8e80941Smrg	return tgsi_helper_tempx_replicate(ctx);
5583b8e80941Smrg}
5584848b8605Smrg
5585b8e80941Smrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
5586b8e80941Smrg{
5587b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5588b8e80941Smrg	struct r600_bytecode_alu alu;
5589b8e80941Smrg	int i, r;
5590848b8605Smrg
5591b8e80941Smrg	for (i = 0; i < 4; i++) {
5592848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5593b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
5594b8e80941Smrg		alu.op = ALU_OP1_MOV;
5595b8e80941Smrg		alu.dst.chan = i;
5596b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5597b8e80941Smrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
5598b8e80941Smrg		if (i == 3)
5599b8e80941Smrg			alu.last = 1;
5600b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5601b8e80941Smrg		if (r)
5602848b8605Smrg			return r;
5603b8e80941Smrg	}
5604b8e80941Smrg	return 0;
5605b8e80941Smrg}
5606848b8605Smrg
5607b8e80941Smrgstatic int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
5608b8e80941Smrg{
5609b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5610b8e80941Smrg	struct r600_bytecode_alu alu;
5611b8e80941Smrg	int i, r;
5612848b8605Smrg
5613b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5614b8e80941Smrg	alu.op = ctx->inst_info->op;
5615b8e80941Smrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
5616b8e80941Smrg		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
5617b8e80941Smrg	}
5618b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5619b8e80941Smrg	alu.dst.write = 1;
5620b8e80941Smrg	alu.last = 1;
5621b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5622b8e80941Smrg	if (r)
5623b8e80941Smrg		return r;
5624b8e80941Smrg	/* replicate result */
5625b8e80941Smrg	return tgsi_helper_tempx_replicate(ctx);
5626b8e80941Smrg}
5627848b8605Smrg
5628b8e80941Smrgstatic int cayman_pow(struct r600_shader_ctx *ctx)
5629b8e80941Smrg{
5630b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5631b8e80941Smrg	int i, r;
5632b8e80941Smrg	struct r600_bytecode_alu alu;
5633b8e80941Smrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
5634848b8605Smrg
5635b8e80941Smrg	for (i = 0; i < 3; i++) {
5636848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5637b8e80941Smrg		alu.op = ALU_OP1_LOG_IEEE;
5638b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5639b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
5640b8e80941Smrg		alu.dst.chan = i;
5641848b8605Smrg		alu.dst.write = 1;
5642b8e80941Smrg		if (i == 2)
5643b8e80941Smrg			alu.last = 1;
5644b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5645b8e80941Smrg		if (r)
5646848b8605Smrg			return r;
5647b8e80941Smrg	}
5648848b8605Smrg
5649b8e80941Smrg	/* b * LOG2(a) */
5650b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5651b8e80941Smrg	alu.op = ALU_OP2_MUL;
5652b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
5653b8e80941Smrg	alu.src[1].sel = ctx->temp_reg;
5654b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5655b8e80941Smrg	alu.dst.write = 1;
5656b8e80941Smrg	alu.last = 1;
5657b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5658b8e80941Smrg	if (r)
5659b8e80941Smrg		return r;
5660848b8605Smrg
5661b8e80941Smrg	for (i = 0; i < last_slot; i++) {
5662b8e80941Smrg		/* POW(a,b) = EXP2(b * LOG2(a))*/
5663b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5664b8e80941Smrg		alu.op = ALU_OP1_EXP_IEEE;
5665b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
5666848b8605Smrg
5667b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5668b8e80941Smrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
5669b8e80941Smrg		if (i == last_slot - 1)
5670b8e80941Smrg			alu.last = 1;
5671b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
5672b8e80941Smrg		if (r)
5673b8e80941Smrg			return r;
5674b8e80941Smrg	}
5675b8e80941Smrg	return 0;
5676b8e80941Smrg}
5677848b8605Smrg
5678b8e80941Smrgstatic int tgsi_pow(struct r600_shader_ctx *ctx)
5679b8e80941Smrg{
5680b8e80941Smrg	struct r600_bytecode_alu alu;
5681b8e80941Smrg	int r;
5682848b8605Smrg
5683b8e80941Smrg	/* LOG2(a) */
5684b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5685b8e80941Smrg	alu.op = ALU_OP1_LOG_IEEE;
5686b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
5687b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5688b8e80941Smrg	alu.dst.write = 1;
5689b8e80941Smrg	alu.last = 1;
5690b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5691b8e80941Smrg	if (r)
5692b8e80941Smrg		return r;
5693b8e80941Smrg	/* b * LOG2(a) */
5694b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5695b8e80941Smrg	alu.op = ALU_OP2_MUL;
5696b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
5697b8e80941Smrg	alu.src[1].sel = ctx->temp_reg;
5698b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5699b8e80941Smrg	alu.dst.write = 1;
5700b8e80941Smrg	alu.last = 1;
5701b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5702b8e80941Smrg	if (r)
5703b8e80941Smrg		return r;
5704b8e80941Smrg	/* POW(a,b) = EXP2(b * LOG2(a))*/
5705b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5706b8e80941Smrg	alu.op = ALU_OP1_EXP_IEEE;
5707b8e80941Smrg	alu.src[0].sel = ctx->temp_reg;
5708b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
5709b8e80941Smrg	alu.dst.write = 1;
5710b8e80941Smrg	alu.last = 1;
5711b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
5712b8e80941Smrg	if (r)
5713b8e80941Smrg		return r;
5714b8e80941Smrg	return tgsi_helper_tempx_replicate(ctx);
5715b8e80941Smrg}
5716848b8605Smrg
5717b8e80941Smrgstatic int emit_mul_int_op(struct r600_bytecode *bc,
5718b8e80941Smrg			   struct r600_bytecode_alu *alu_src)
5719b8e80941Smrg{
5720b8e80941Smrg	struct r600_bytecode_alu alu;
5721b8e80941Smrg	int i, r;
5722b8e80941Smrg	alu = *alu_src;
5723b8e80941Smrg	if (bc->chip_class == CAYMAN) {
5724b8e80941Smrg		for (i = 0; i < 4; i++) {
5725b8e80941Smrg			alu.dst.chan = i;
5726b8e80941Smrg			alu.dst.write = (i == alu_src->dst.chan);
5727b8e80941Smrg			alu.last = (i == 3);
5728848b8605Smrg
5729b8e80941Smrg			r = r600_bytecode_add_alu(bc, &alu);
5730b8e80941Smrg			if (r)
5731848b8605Smrg				return r;
5732848b8605Smrg		}
5733b8e80941Smrg	} else {
5734b8e80941Smrg		alu.last = 1;
5735b8e80941Smrg		r = r600_bytecode_add_alu(bc, &alu);
5736b8e80941Smrg		if (r)
5737b8e80941Smrg			return r;
5738b8e80941Smrg	}
5739b8e80941Smrg	return 0;
5740b8e80941Smrg}
5741848b8605Smrg
5742b8e80941Smrgstatic int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
5743b8e80941Smrg{
5744b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5745b8e80941Smrg	struct r600_bytecode_alu alu;
5746b8e80941Smrg	int i, r, j;
5747b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
5748b8e80941Smrg	int lasti = tgsi_last_instruction(write_mask);
5749b8e80941Smrg	int tmp0 = ctx->temp_reg;
5750b8e80941Smrg	int tmp1 = r600_get_temp(ctx);
5751b8e80941Smrg	int tmp2 = r600_get_temp(ctx);
5752b8e80941Smrg	int tmp3 = r600_get_temp(ctx);
5753b8e80941Smrg	int tmp4 = 0;
5754848b8605Smrg
5755b8e80941Smrg	/* Use additional temp if dst register and src register are the same */
5756b8e80941Smrg	if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index ||
5757b8e80941Smrg	    inst->Src[1].Register.Index == inst->Dst[0].Register.Index) {
5758b8e80941Smrg		tmp4 = r600_get_temp(ctx);
5759b8e80941Smrg	}
5760848b8605Smrg
5761b8e80941Smrg	/* Unsigned path:
5762b8e80941Smrg	 *
5763b8e80941Smrg	 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
5764b8e80941Smrg	 *
5765b8e80941Smrg	 * 1. tmp0.x = rcp (src2)     = 2^32/src2 + e, where e is rounding error
5766b8e80941Smrg	 * 2. tmp0.z = lo (tmp0.x * src2)
5767b8e80941Smrg	 * 3. tmp0.w = -tmp0.z
5768b8e80941Smrg	 * 4. tmp0.y = hi (tmp0.x * src2)
5769b8e80941Smrg	 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src2))
5770b8e80941Smrg	 * 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error
5771b8e80941Smrg	 * 7. tmp1.x = tmp0.x - tmp0.w
5772b8e80941Smrg	 * 8. tmp1.y = tmp0.x + tmp0.w
5773b8e80941Smrg	 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
5774b8e80941Smrg	 * 10. tmp0.z = hi(tmp0.x * src1)     = q
5775b8e80941Smrg	 * 11. tmp0.y = lo (tmp0.z * src2)     = src2*q = src1 - r
5776b8e80941Smrg	 *
5777b8e80941Smrg	 * 12. tmp0.w = src1 - tmp0.y       = r
5778b8e80941Smrg	 * 13. tmp1.x = tmp0.w >= src2		= r >= src2 (uint comparison)
5779b8e80941Smrg	 * 14. tmp1.y = src1 >= tmp0.y      = r >= 0 (uint comparison)
5780b8e80941Smrg	 *
5781b8e80941Smrg	 * if DIV
5782b8e80941Smrg	 *
5783b8e80941Smrg	 *   15. tmp1.z = tmp0.z + 1			= q + 1
5784b8e80941Smrg	 *   16. tmp1.w = tmp0.z - 1			= q - 1
5785b8e80941Smrg	 *
5786b8e80941Smrg	 * else MOD
5787b8e80941Smrg	 *
5788b8e80941Smrg	 *   15. tmp1.z = tmp0.w - src2			= r - src2
5789b8e80941Smrg	 *   16. tmp1.w = tmp0.w + src2			= r + src2
5790b8e80941Smrg	 *
5791b8e80941Smrg	 * endif
5792b8e80941Smrg	 *
5793b8e80941Smrg	 * 17. tmp1.x = tmp1.x & tmp1.y
5794b8e80941Smrg	 *
5795b8e80941Smrg	 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
5796b8e80941Smrg	 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
5797b8e80941Smrg	 *
5798b8e80941Smrg	 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
5799b8e80941Smrg	 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
5800b8e80941Smrg	 *
5801b8e80941Smrg	 * Signed path:
5802b8e80941Smrg	 *
5803b8e80941Smrg	 * Same as unsigned, using abs values of the operands,
5804b8e80941Smrg	 * and fixing the sign of the result in the end.
5805b8e80941Smrg	 */
5806848b8605Smrg
5807b8e80941Smrg	for (i = 0; i < 4; i++) {
5808b8e80941Smrg		if (!(write_mask & (1<<i)))
5809b8e80941Smrg			continue;
5810848b8605Smrg
5811b8e80941Smrg		if (signed_op) {
5812b8e80941Smrg
5813b8e80941Smrg			/* tmp2.x = -src0 */
5814848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5815b8e80941Smrg			alu.op = ALU_OP2_SUB_INT;
5816848b8605Smrg
5817b8e80941Smrg			alu.dst.sel = tmp2;
5818b8e80941Smrg			alu.dst.chan = 0;
5819848b8605Smrg			alu.dst.write = 1;
5820848b8605Smrg
5821b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_0;
5822b8e80941Smrg
5823b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5824848b8605Smrg
5825848b8605Smrg			alu.last = 1;
5826848b8605Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5827848b8605Smrg				return r;
5828848b8605Smrg
5829b8e80941Smrg			/* tmp2.y = -src1 */
5830b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5831b8e80941Smrg			alu.op = ALU_OP2_SUB_INT;
5832848b8605Smrg
5833b8e80941Smrg			alu.dst.sel = tmp2;
5834b8e80941Smrg			alu.dst.chan = 1;
5835b8e80941Smrg			alu.dst.write = 1;
5836848b8605Smrg
5837b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_0;
5838848b8605Smrg
5839b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5840848b8605Smrg
5841b8e80941Smrg			alu.last = 1;
5842b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5843b8e80941Smrg				return r;
5844848b8605Smrg
5845b8e80941Smrg			/* tmp2.z sign bit is set if src0 and src2 signs are different */
5846b8e80941Smrg			/* it will be a sign of the quotient */
5847b8e80941Smrg			if (!mod) {
5848848b8605Smrg
5849b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5850b8e80941Smrg				alu.op = ALU_OP2_XOR_INT;
5851848b8605Smrg
5852b8e80941Smrg				alu.dst.sel = tmp2;
5853b8e80941Smrg				alu.dst.chan = 2;
5854b8e80941Smrg				alu.dst.write = 1;
5855848b8605Smrg
5856b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
5857b8e80941Smrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5858848b8605Smrg
5859b8e80941Smrg				alu.last = 1;
5860b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5861b8e80941Smrg					return r;
5862b8e80941Smrg			}
5863848b8605Smrg
5864b8e80941Smrg			/* tmp2.x = |src0| */
5865b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5866b8e80941Smrg			alu.op = ALU_OP3_CNDGE_INT;
5867b8e80941Smrg			alu.is_op3 = 1;
5868848b8605Smrg
5869b8e80941Smrg			alu.dst.sel = tmp2;
5870b8e80941Smrg			alu.dst.chan = 0;
5871b8e80941Smrg			alu.dst.write = 1;
5872848b8605Smrg
5873b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
5874b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5875b8e80941Smrg			alu.src[2].sel = tmp2;
5876b8e80941Smrg			alu.src[2].chan = 0;
5877848b8605Smrg
5878b8e80941Smrg			alu.last = 1;
5879b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5880b8e80941Smrg				return r;
5881848b8605Smrg
5882b8e80941Smrg			/* tmp2.y = |src1| */
5883848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5884b8e80941Smrg			alu.op = ALU_OP3_CNDGE_INT;
5885b8e80941Smrg			alu.is_op3 = 1;
5886848b8605Smrg
5887b8e80941Smrg			alu.dst.sel = tmp2;
5888b8e80941Smrg			alu.dst.chan = 1;
5889848b8605Smrg			alu.dst.write = 1;
5890848b8605Smrg
5891b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
5892b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5893b8e80941Smrg			alu.src[2].sel = tmp2;
5894b8e80941Smrg			alu.src[2].chan = 1;
5895848b8605Smrg
5896848b8605Smrg			alu.last = 1;
5897848b8605Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5898848b8605Smrg				return r;
5899848b8605Smrg
5900b8e80941Smrg		}
5901b8e80941Smrg
5902b8e80941Smrg		/* 1. tmp0.x = rcp_u (src2)     = 2^32/src2 + e, where e is rounding error */
5903b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
5904b8e80941Smrg			/* tmp3.x = u2f(src2) */
5905848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5906b8e80941Smrg			alu.op = ALU_OP1_UINT_TO_FLT;
5907848b8605Smrg
5908b8e80941Smrg			alu.dst.sel = tmp3;
5909b8e80941Smrg			alu.dst.chan = 0;
5910848b8605Smrg			alu.dst.write = 1;
5911848b8605Smrg
5912848b8605Smrg			if (signed_op) {
5913b8e80941Smrg				alu.src[0].sel = tmp2;
5914b8e80941Smrg				alu.src[0].chan = 1;
5915848b8605Smrg			} else {
5916b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
5917848b8605Smrg			}
5918848b8605Smrg
5919848b8605Smrg			alu.last = 1;
5920848b8605Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5921848b8605Smrg				return r;
5922848b8605Smrg
5923b8e80941Smrg			/* tmp0.x = recip(tmp3.x) */
5924b8e80941Smrg			for (j = 0 ; j < 3; j++) {
5925b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5926b8e80941Smrg				alu.op = ALU_OP1_RECIP_IEEE;
5927848b8605Smrg
5928b8e80941Smrg				alu.dst.sel = tmp0;
5929b8e80941Smrg				alu.dst.chan = j;
5930b8e80941Smrg				alu.dst.write = (j == 0);
5931848b8605Smrg
5932b8e80941Smrg				alu.src[0].sel = tmp3;
5933b8e80941Smrg				alu.src[0].chan = 0;
5934b8e80941Smrg
5935b8e80941Smrg				if (j == 2)
5936b8e80941Smrg					alu.last = 1;
5937b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5938b8e80941Smrg					return r;
5939b8e80941Smrg			}
5940b8e80941Smrg
5941b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5942b8e80941Smrg			alu.op = ALU_OP2_MUL;
5943848b8605Smrg
5944848b8605Smrg			alu.src[0].sel = tmp0;
5945b8e80941Smrg			alu.src[0].chan = 0;
5946b8e80941Smrg
5947b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
5948b8e80941Smrg			alu.src[1].value = 0x4f800000;
5949848b8605Smrg
5950b8e80941Smrg			alu.dst.sel = tmp3;
5951b8e80941Smrg			alu.dst.write = 1;
5952848b8605Smrg			alu.last = 1;
5953b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
5954b8e80941Smrg			if (r)
5955848b8605Smrg				return r;
5956848b8605Smrg
5957848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5958b8e80941Smrg			alu.op = ALU_OP1_FLT_TO_UINT;
5959848b8605Smrg
5960b8e80941Smrg			alu.dst.sel = tmp0;
5961b8e80941Smrg			alu.dst.chan = 0;
5962848b8605Smrg			alu.dst.write = 1;
5963848b8605Smrg
5964b8e80941Smrg			alu.src[0].sel = tmp3;
5965b8e80941Smrg			alu.src[0].chan = 0;
5966848b8605Smrg
5967848b8605Smrg			alu.last = 1;
5968848b8605Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5969848b8605Smrg				return r;
5970848b8605Smrg
5971b8e80941Smrg		} else {
5972b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5973b8e80941Smrg			alu.op = ALU_OP1_RECIP_UINT;
5974b8e80941Smrg
5975b8e80941Smrg			alu.dst.sel = tmp0;
5976b8e80941Smrg			alu.dst.chan = 0;
5977b8e80941Smrg			alu.dst.write = 1;
5978b8e80941Smrg
5979b8e80941Smrg			if (signed_op) {
5980b8e80941Smrg				alu.src[0].sel = tmp2;
5981b8e80941Smrg				alu.src[0].chan = 1;
5982b8e80941Smrg			} else {
5983b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
5984b8e80941Smrg			}
5985b8e80941Smrg
5986b8e80941Smrg			alu.last = 1;
5987b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5988b8e80941Smrg				return r;
5989848b8605Smrg		}
5990848b8605Smrg
5991b8e80941Smrg		/* 2. tmp0.z = lo (tmp0.x * src2) */
5992848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5993b8e80941Smrg		alu.op = ALU_OP2_MULLO_UINT;
5994848b8605Smrg
5995b8e80941Smrg		alu.dst.sel = tmp0;
5996b8e80941Smrg		alu.dst.chan = 2;
5997848b8605Smrg		alu.dst.write = 1;
5998848b8605Smrg
5999b8e80941Smrg		alu.src[0].sel = tmp0;
6000848b8605Smrg		alu.src[0].chan = 0;
6001b8e80941Smrg		if (signed_op) {
6002b8e80941Smrg			alu.src[1].sel = tmp2;
6003b8e80941Smrg			alu.src[1].chan = 1;
6004b8e80941Smrg		} else {
6005b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6006b8e80941Smrg		}
6007848b8605Smrg
6008b8e80941Smrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6009848b8605Smrg			return r;
6010848b8605Smrg
6011b8e80941Smrg		/* 3. tmp0.w = -tmp0.z */
6012848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6013b8e80941Smrg		alu.op = ALU_OP2_SUB_INT;
6014848b8605Smrg
6015848b8605Smrg		alu.dst.sel = tmp0;
6016b8e80941Smrg		alu.dst.chan = 3;
6017848b8605Smrg		alu.dst.write = 1;
6018848b8605Smrg
6019b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
6020848b8605Smrg		alu.src[1].sel = tmp0;
6021b8e80941Smrg		alu.src[1].chan = 2;
6022848b8605Smrg
6023848b8605Smrg		alu.last = 1;
6024848b8605Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6025848b8605Smrg			return r;
6026848b8605Smrg
6027b8e80941Smrg		/* 4. tmp0.y = hi (tmp0.x * src2) */
6028848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6029b8e80941Smrg		alu.op = ALU_OP2_MULHI_UINT;
6030b8e80941Smrg
6031b8e80941Smrg		alu.dst.sel = tmp0;
6032b8e80941Smrg		alu.dst.chan = 1;
6033b8e80941Smrg		alu.dst.write = 1;
6034b8e80941Smrg
6035b8e80941Smrg		alu.src[0].sel = tmp0;
6036b8e80941Smrg		alu.src[0].chan = 0;
6037848b8605Smrg
6038848b8605Smrg		if (signed_op) {
6039b8e80941Smrg			alu.src[1].sel = tmp2;
6040b8e80941Smrg			alu.src[1].chan = 1;
6041848b8605Smrg		} else {
6042b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6043848b8605Smrg		}
6044848b8605Smrg
6045b8e80941Smrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6046b8e80941Smrg			return r;
6047b8e80941Smrg
6048b8e80941Smrg		/* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src)) */
6049b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6050b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
6051b8e80941Smrg		alu.is_op3 = 1;
6052b8e80941Smrg
6053b8e80941Smrg		alu.dst.sel = tmp0;
6054b8e80941Smrg		alu.dst.chan = 2;
6055b8e80941Smrg		alu.dst.write = 1;
6056b8e80941Smrg
6057b8e80941Smrg		alu.src[0].sel = tmp0;
6058848b8605Smrg		alu.src[0].chan = 1;
6059b8e80941Smrg		alu.src[1].sel = tmp0;
6060848b8605Smrg		alu.src[1].chan = 3;
6061848b8605Smrg		alu.src[2].sel = tmp0;
6062848b8605Smrg		alu.src[2].chan = 2;
6063848b8605Smrg
6064848b8605Smrg		alu.last = 1;
6065848b8605Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6066848b8605Smrg			return r;
6067848b8605Smrg
6068b8e80941Smrg		/* 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error */
6069b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6070b8e80941Smrg		alu.op = ALU_OP2_MULHI_UINT;
6071848b8605Smrg
6072b8e80941Smrg		alu.dst.sel = tmp0;
6073b8e80941Smrg		alu.dst.chan = 3;
6074b8e80941Smrg		alu.dst.write = 1;
6075848b8605Smrg
6076b8e80941Smrg		alu.src[0].sel = tmp0;
6077b8e80941Smrg		alu.src[0].chan = 2;
6078848b8605Smrg
6079b8e80941Smrg		alu.src[1].sel = tmp0;
6080b8e80941Smrg		alu.src[1].chan = 0;
6081848b8605Smrg
6082b8e80941Smrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6083b8e80941Smrg				return r;
6084848b8605Smrg
6085b8e80941Smrg		/* 7. tmp1.x = tmp0.x - tmp0.w */
6086b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6087b8e80941Smrg		alu.op = ALU_OP2_SUB_INT;
6088848b8605Smrg
6089b8e80941Smrg		alu.dst.sel = tmp1;
6090b8e80941Smrg		alu.dst.chan = 0;
6091b8e80941Smrg		alu.dst.write = 1;
6092848b8605Smrg
6093b8e80941Smrg		alu.src[0].sel = tmp0;
6094b8e80941Smrg		alu.src[0].chan = 0;
6095b8e80941Smrg		alu.src[1].sel = tmp0;
6096b8e80941Smrg		alu.src[1].chan = 3;
6097848b8605Smrg
6098b8e80941Smrg		alu.last = 1;
6099b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6100b8e80941Smrg			return r;
6101848b8605Smrg
6102b8e80941Smrg		/* 8. tmp1.y = tmp0.x + tmp0.w */
6103b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6104b8e80941Smrg		alu.op = ALU_OP2_ADD_INT;
6105848b8605Smrg
6106b8e80941Smrg		alu.dst.sel = tmp1;
6107b8e80941Smrg		alu.dst.chan = 1;
6108b8e80941Smrg		alu.dst.write = 1;
6109848b8605Smrg
6110b8e80941Smrg		alu.src[0].sel = tmp0;
6111b8e80941Smrg		alu.src[0].chan = 0;
6112b8e80941Smrg		alu.src[1].sel = tmp0;
6113b8e80941Smrg		alu.src[1].chan = 3;
6114848b8605Smrg
6115b8e80941Smrg		alu.last = 1;
6116b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6117b8e80941Smrg			return r;
6118848b8605Smrg
6119b8e80941Smrg		/* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
6120b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6121b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
6122b8e80941Smrg		alu.is_op3 = 1;
6123848b8605Smrg
6124b8e80941Smrg		alu.dst.sel = tmp0;
6125b8e80941Smrg		alu.dst.chan = 0;
6126b8e80941Smrg		alu.dst.write = 1;
6127848b8605Smrg
6128b8e80941Smrg		alu.src[0].sel = tmp0;
6129b8e80941Smrg		alu.src[0].chan = 1;
6130b8e80941Smrg		alu.src[1].sel = tmp1;
6131b8e80941Smrg		alu.src[1].chan = 1;
6132b8e80941Smrg		alu.src[2].sel = tmp1;
6133b8e80941Smrg		alu.src[2].chan = 0;
6134848b8605Smrg
6135b8e80941Smrg		alu.last = 1;
6136b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6137b8e80941Smrg			return r;
6138848b8605Smrg
6139b8e80941Smrg		/* 10. tmp0.z = hi(tmp0.x * src1)     = q */
6140b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6141b8e80941Smrg		alu.op = ALU_OP2_MULHI_UINT;
6142848b8605Smrg
6143b8e80941Smrg		alu.dst.sel = tmp0;
6144b8e80941Smrg		alu.dst.chan = 2;
6145b8e80941Smrg		alu.dst.write = 1;
6146848b8605Smrg
6147b8e80941Smrg		alu.src[0].sel = tmp0;
6148b8e80941Smrg		alu.src[0].chan = 0;
6149848b8605Smrg
6150b8e80941Smrg		if (signed_op) {
6151b8e80941Smrg			alu.src[1].sel = tmp2;
6152b8e80941Smrg			alu.src[1].chan = 0;
6153b8e80941Smrg		} else {
6154b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
6155b8e80941Smrg		}
6156848b8605Smrg
6157b8e80941Smrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6158b8e80941Smrg			return r;
6159848b8605Smrg
6160b8e80941Smrg		/* 11. tmp0.y = lo (src2 * tmp0.z)     = src2*q = src1 - r */
6161b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6162b8e80941Smrg		alu.op = ALU_OP2_MULLO_UINT;
6163848b8605Smrg
6164b8e80941Smrg		alu.dst.sel = tmp0;
6165b8e80941Smrg		alu.dst.chan = 1;
6166b8e80941Smrg		alu.dst.write = 1;
6167848b8605Smrg
6168b8e80941Smrg		if (signed_op) {
6169b8e80941Smrg			alu.src[0].sel = tmp2;
6170b8e80941Smrg			alu.src[0].chan = 1;
6171b8e80941Smrg		} else {
6172b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
6173b8e80941Smrg		}
6174848b8605Smrg
6175b8e80941Smrg		alu.src[1].sel = tmp0;
6176b8e80941Smrg		alu.src[1].chan = 2;
6177848b8605Smrg
6178b8e80941Smrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6179b8e80941Smrg			return r;
6180848b8605Smrg
6181b8e80941Smrg		/* 12. tmp0.w = src1 - tmp0.y       = r */
6182848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6183b8e80941Smrg		alu.op = ALU_OP2_SUB_INT;
6184848b8605Smrg
6185b8e80941Smrg		alu.dst.sel = tmp0;
6186b8e80941Smrg		alu.dst.chan = 3;
6187848b8605Smrg		alu.dst.write = 1;
6188848b8605Smrg
6189b8e80941Smrg		if (signed_op) {
6190b8e80941Smrg			alu.src[0].sel = tmp2;
6191b8e80941Smrg			alu.src[0].chan = 0;
6192b8e80941Smrg		} else {
6193b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6194b8e80941Smrg		}
6195848b8605Smrg
6196b8e80941Smrg		alu.src[1].sel = tmp0;
6197b8e80941Smrg		alu.src[1].chan = 1;
6198848b8605Smrg
6199b8e80941Smrg		alu.last = 1;
6200b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6201848b8605Smrg			return r;
6202848b8605Smrg
6203b8e80941Smrg		/* 13. tmp1.x = tmp0.w >= src2		= r >= src2 */
6204848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6205b8e80941Smrg		alu.op = ALU_OP2_SETGE_UINT;
6206848b8605Smrg
6207b8e80941Smrg		alu.dst.sel = tmp1;
6208b8e80941Smrg		alu.dst.chan = 0;
6209848b8605Smrg		alu.dst.write = 1;
6210848b8605Smrg
6211b8e80941Smrg		alu.src[0].sel = tmp0;
6212b8e80941Smrg		alu.src[0].chan = 3;
6213b8e80941Smrg		if (signed_op) {
6214b8e80941Smrg			alu.src[1].sel = tmp2;
6215b8e80941Smrg			alu.src[1].chan = 1;
6216b8e80941Smrg		} else {
6217b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6218b8e80941Smrg		}
6219848b8605Smrg
6220b8e80941Smrg		alu.last = 1;
6221b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6222848b8605Smrg			return r;
6223848b8605Smrg
6224b8e80941Smrg		/* 14. tmp1.y = src1 >= tmp0.y       = r >= 0 */
6225848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6226b8e80941Smrg		alu.op = ALU_OP2_SETGE_UINT;
6227b8e80941Smrg
6228b8e80941Smrg		alu.dst.sel = tmp1;
6229b8e80941Smrg		alu.dst.chan = 1;
6230848b8605Smrg		alu.dst.write = 1;
6231848b8605Smrg
6232b8e80941Smrg		if (signed_op) {
6233b8e80941Smrg			alu.src[0].sel = tmp2;
6234b8e80941Smrg			alu.src[0].chan = 0;
6235b8e80941Smrg		} else {
6236b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6237b8e80941Smrg		}
6238848b8605Smrg
6239b8e80941Smrg		alu.src[1].sel = tmp0;
6240b8e80941Smrg		alu.src[1].chan = 1;
6241848b8605Smrg
6242b8e80941Smrg		alu.last = 1;
6243b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6244848b8605Smrg			return r;
6245848b8605Smrg
6246b8e80941Smrg		if (mod) { /* UMOD */
6247848b8605Smrg
6248b8e80941Smrg			/* 15. tmp1.z = tmp0.w - src2			= r - src2 */
6249b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6250b8e80941Smrg			alu.op = ALU_OP2_SUB_INT;
6251848b8605Smrg
6252b8e80941Smrg			alu.dst.sel = tmp1;
6253b8e80941Smrg			alu.dst.chan = 2;
6254b8e80941Smrg			alu.dst.write = 1;
6255848b8605Smrg
6256b8e80941Smrg			alu.src[0].sel = tmp0;
6257b8e80941Smrg			alu.src[0].chan = 3;
6258848b8605Smrg
6259b8e80941Smrg			if (signed_op) {
6260b8e80941Smrg				alu.src[1].sel = tmp2;
6261b8e80941Smrg				alu.src[1].chan = 1;
6262b8e80941Smrg			} else {
6263b8e80941Smrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6264b8e80941Smrg			}
6265848b8605Smrg
6266848b8605Smrg			alu.last = 1;
6267b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6268b8e80941Smrg				return r;
6269848b8605Smrg
6270b8e80941Smrg			/* 16. tmp1.w = tmp0.w + src2			= r + src2 */
6271b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6272b8e80941Smrg			alu.op = ALU_OP2_ADD_INT;
6273848b8605Smrg
6274b8e80941Smrg			alu.dst.sel = tmp1;
6275b8e80941Smrg			alu.dst.chan = 3;
6276b8e80941Smrg			alu.dst.write = 1;
6277848b8605Smrg
6278b8e80941Smrg			alu.src[0].sel = tmp0;
6279b8e80941Smrg			alu.src[0].chan = 3;
6280b8e80941Smrg			if (signed_op) {
6281b8e80941Smrg				alu.src[1].sel = tmp2;
6282b8e80941Smrg				alu.src[1].chan = 1;
6283b8e80941Smrg			} else {
6284b8e80941Smrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6285b8e80941Smrg			}
6286848b8605Smrg
6287b8e80941Smrg			alu.last = 1;
6288b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6289b8e80941Smrg				return r;
6290848b8605Smrg
6291b8e80941Smrg		} else { /* UDIV */
6292848b8605Smrg
6293b8e80941Smrg			/* 15. tmp1.z = tmp0.z + 1       = q + 1       DIV */
6294b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6295b8e80941Smrg			alu.op = ALU_OP2_ADD_INT;
6296b8e80941Smrg
6297b8e80941Smrg			alu.dst.sel = tmp1;
6298b8e80941Smrg			alu.dst.chan = 2;
6299b8e80941Smrg			alu.dst.write = 1;
6300b8e80941Smrg
6301b8e80941Smrg			alu.src[0].sel = tmp0;
6302b8e80941Smrg			alu.src[0].chan = 2;
6303b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
6304848b8605Smrg
6305848b8605Smrg			alu.last = 1;
6306b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6307b8e80941Smrg				return r;
6308848b8605Smrg
6309b8e80941Smrg			/* 16. tmp1.w = tmp0.z - 1			= q - 1 */
6310b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6311b8e80941Smrg			alu.op = ALU_OP2_ADD_INT;
6312848b8605Smrg
6313b8e80941Smrg			alu.dst.sel = tmp1;
6314b8e80941Smrg			alu.dst.chan = 3;
6315b8e80941Smrg			alu.dst.write = 1;
6316848b8605Smrg
6317b8e80941Smrg			alu.src[0].sel = tmp0;
6318b8e80941Smrg			alu.src[0].chan = 2;
6319b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
6320848b8605Smrg
6321b8e80941Smrg			alu.last = 1;
6322b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6323b8e80941Smrg				return r;
6324b8e80941Smrg
6325b8e80941Smrg		}
6326b8e80941Smrg
6327b8e80941Smrg		/* 17. tmp1.x = tmp1.x & tmp1.y */
6328848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6329b8e80941Smrg		alu.op = ALU_OP2_AND_INT;
6330848b8605Smrg
6331b8e80941Smrg		alu.dst.sel = tmp1;
6332b8e80941Smrg		alu.dst.chan = 0;
6333b8e80941Smrg		alu.dst.write = 1;
6334848b8605Smrg
6335b8e80941Smrg		alu.src[0].sel = tmp1;
6336b8e80941Smrg		alu.src[0].chan = 0;
6337b8e80941Smrg		alu.src[1].sel = tmp1;
6338b8e80941Smrg		alu.src[1].chan = 1;
6339848b8605Smrg
6340b8e80941Smrg		alu.last = 1;
6341b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6342848b8605Smrg			return r;
6343848b8605Smrg
6344b8e80941Smrg		/* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z    DIV */
6345b8e80941Smrg		/* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z    MOD */
6346848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6347b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
6348848b8605Smrg		alu.is_op3 = 1;
6349848b8605Smrg
6350b8e80941Smrg		alu.dst.sel = tmp0;
6351b8e80941Smrg		alu.dst.chan = 2;
6352b8e80941Smrg		alu.dst.write = 1;
6353848b8605Smrg
6354b8e80941Smrg		alu.src[0].sel = tmp1;
6355b8e80941Smrg		alu.src[0].chan = 0;
6356b8e80941Smrg		alu.src[1].sel = tmp0;
6357b8e80941Smrg		alu.src[1].chan = mod ? 3 : 2;
6358b8e80941Smrg		alu.src[2].sel = tmp1;
6359b8e80941Smrg		alu.src[2].chan = 2;
6360848b8605Smrg
6361b8e80941Smrg		alu.last = 1;
6362b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6363b8e80941Smrg			return r;
6364848b8605Smrg
6365b8e80941Smrg		/* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
6366b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6367b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
6368b8e80941Smrg		alu.is_op3 = 1;
6369b8e80941Smrg
6370b8e80941Smrg		if (signed_op) {
6371b8e80941Smrg			alu.dst.sel = tmp0;
6372b8e80941Smrg			alu.dst.chan = 2;
6373b8e80941Smrg			alu.dst.write = 1;
6374b8e80941Smrg		} else {
6375b8e80941Smrg			if (tmp4 > 0) {
6376b8e80941Smrg				alu.dst.sel = tmp4;
6377b8e80941Smrg				alu.dst.chan = i;
6378b8e80941Smrg				alu.dst.write = 1;
6379b8e80941Smrg			} else {
6380b8e80941Smrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6381b8e80941Smrg			}
6382b8e80941Smrg		}
6383b8e80941Smrg
6384b8e80941Smrg		alu.src[0].sel = tmp1;
6385b8e80941Smrg		alu.src[0].chan = 1;
6386b8e80941Smrg		alu.src[1].sel = tmp1;
6387b8e80941Smrg		alu.src[1].chan = 3;
6388b8e80941Smrg		alu.src[2].sel = tmp0;
6389b8e80941Smrg		alu.src[2].chan = 2;
6390b8e80941Smrg
6391b8e80941Smrg		alu.last = 1;
6392b8e80941Smrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6393848b8605Smrg			return r;
6394b8e80941Smrg
6395b8e80941Smrg		if (signed_op) {
6396b8e80941Smrg
6397b8e80941Smrg			/* fix the sign of the result */
6398b8e80941Smrg
6399b8e80941Smrg			if (mod) {
6400b8e80941Smrg
6401b8e80941Smrg				/* tmp0.x = -tmp0.z */
6402b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6403b8e80941Smrg				alu.op = ALU_OP2_SUB_INT;
6404b8e80941Smrg
6405b8e80941Smrg				alu.dst.sel = tmp0;
6406b8e80941Smrg				alu.dst.chan = 0;
6407b8e80941Smrg				alu.dst.write = 1;
6408b8e80941Smrg
6409b8e80941Smrg				alu.src[0].sel = V_SQ_ALU_SRC_0;
6410b8e80941Smrg				alu.src[1].sel = tmp0;
6411b8e80941Smrg				alu.src[1].chan = 2;
6412b8e80941Smrg
6413b8e80941Smrg				alu.last = 1;
6414b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6415b8e80941Smrg					return r;
6416b8e80941Smrg
6417b8e80941Smrg				/* sign of the remainder is the same as the sign of src0 */
6418b8e80941Smrg				/* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
6419b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6420b8e80941Smrg				alu.op = ALU_OP3_CNDGE_INT;
6421b8e80941Smrg				alu.is_op3 = 1;
6422b8e80941Smrg
6423b8e80941Smrg				if (tmp4 > 0) {
6424b8e80941Smrg					alu.dst.sel = tmp4;
6425b8e80941Smrg					alu.dst.chan = i;
6426b8e80941Smrg					alu.dst.write = 1;
6427b8e80941Smrg				} else {
6428b8e80941Smrg					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6429b8e80941Smrg				}
6430b8e80941Smrg
6431b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6432b8e80941Smrg				alu.src[1].sel = tmp0;
6433b8e80941Smrg				alu.src[1].chan = 2;
6434b8e80941Smrg				alu.src[2].sel = tmp0;
6435b8e80941Smrg				alu.src[2].chan = 0;
6436b8e80941Smrg
6437b8e80941Smrg				alu.last = 1;
6438b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6439b8e80941Smrg					return r;
6440b8e80941Smrg
6441b8e80941Smrg			} else {
6442b8e80941Smrg
6443b8e80941Smrg				/* tmp0.x = -tmp0.z */
6444b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6445b8e80941Smrg				alu.op = ALU_OP2_SUB_INT;
6446b8e80941Smrg
6447b8e80941Smrg				alu.dst.sel = tmp0;
6448b8e80941Smrg				alu.dst.chan = 0;
6449b8e80941Smrg				alu.dst.write = 1;
6450b8e80941Smrg
6451b8e80941Smrg				alu.src[0].sel = V_SQ_ALU_SRC_0;
6452b8e80941Smrg				alu.src[1].sel = tmp0;
6453b8e80941Smrg				alu.src[1].chan = 2;
6454b8e80941Smrg
6455b8e80941Smrg				alu.last = 1;
6456b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6457b8e80941Smrg					return r;
6458b8e80941Smrg
6459b8e80941Smrg				/* fix the quotient sign (same as the sign of src0*src1) */
6460b8e80941Smrg				/* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
6461b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6462b8e80941Smrg				alu.op = ALU_OP3_CNDGE_INT;
6463b8e80941Smrg				alu.is_op3 = 1;
6464b8e80941Smrg
6465b8e80941Smrg				if (tmp4 > 0) {
6466b8e80941Smrg					alu.dst.sel = tmp4;
6467b8e80941Smrg					alu.dst.chan = i;
6468b8e80941Smrg					alu.dst.write = 1;
6469b8e80941Smrg				} else {
6470b8e80941Smrg					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6471b8e80941Smrg				}
6472b8e80941Smrg
6473b8e80941Smrg				alu.src[0].sel = tmp2;
6474b8e80941Smrg				alu.src[0].chan = 2;
6475b8e80941Smrg				alu.src[1].sel = tmp0;
6476b8e80941Smrg				alu.src[1].chan = 2;
6477b8e80941Smrg				alu.src[2].sel = tmp0;
6478b8e80941Smrg				alu.src[2].chan = 0;
6479b8e80941Smrg
6480b8e80941Smrg				alu.last = 1;
6481b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6482b8e80941Smrg					return r;
6483b8e80941Smrg			}
6484b8e80941Smrg		}
6485b8e80941Smrg	}
6486b8e80941Smrg
6487b8e80941Smrg	if (tmp4 > 0) {
6488b8e80941Smrg		for (i = 0; i <= lasti; ++i) {
6489b8e80941Smrg			if (!(write_mask & (1<<i)))
6490b8e80941Smrg				continue;
6491b8e80941Smrg
6492b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6493b8e80941Smrg			alu.op = ALU_OP1_MOV;
6494b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6495b8e80941Smrg			alu.src[0].sel = tmp4;
6496b8e80941Smrg			alu.src[0].chan = i;
6497b8e80941Smrg
6498b8e80941Smrg			if (i == lasti)
6499b8e80941Smrg				alu.last = 1;
6500b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6501b8e80941Smrg				return r;
6502b8e80941Smrg		}
6503848b8605Smrg	}
6504b8e80941Smrg
6505848b8605Smrg	return 0;
6506848b8605Smrg}
6507848b8605Smrg
6508b8e80941Smrgstatic int tgsi_udiv(struct r600_shader_ctx *ctx)
6509848b8605Smrg{
6510b8e80941Smrg	return tgsi_divmod(ctx, 0, 0);
6511b8e80941Smrg}
6512848b8605Smrg
6513b8e80941Smrgstatic int tgsi_umod(struct r600_shader_ctx *ctx)
6514b8e80941Smrg{
6515b8e80941Smrg	return tgsi_divmod(ctx, 1, 0);
6516b8e80941Smrg}
6517848b8605Smrg
6518b8e80941Smrgstatic int tgsi_idiv(struct r600_shader_ctx *ctx)
6519b8e80941Smrg{
6520b8e80941Smrg	return tgsi_divmod(ctx, 0, 1);
6521b8e80941Smrg}
6522848b8605Smrg
6523b8e80941Smrgstatic int tgsi_imod(struct r600_shader_ctx *ctx)
6524b8e80941Smrg{
6525b8e80941Smrg	return tgsi_divmod(ctx, 1, 1);
6526b8e80941Smrg}
6527848b8605Smrg
6528848b8605Smrg
6529b8e80941Smrgstatic int tgsi_f2i(struct r600_shader_ctx *ctx)
6530b8e80941Smrg{
6531b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6532b8e80941Smrg	struct r600_bytecode_alu alu;
6533b8e80941Smrg	int i, r;
6534b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6535b8e80941Smrg	int last_inst = tgsi_last_instruction(write_mask);
6536848b8605Smrg
6537848b8605Smrg	for (i = 0; i < 4; i++) {
6538848b8605Smrg		if (!(write_mask & (1<<i)))
6539848b8605Smrg			continue;
6540848b8605Smrg
6541848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6542b8e80941Smrg		alu.op = ALU_OP1_TRUNC;
6543b8e80941Smrg
6544b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
6545848b8605Smrg		alu.dst.chan = i;
6546848b8605Smrg		alu.dst.write = 1;
6547848b8605Smrg
6548b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6549b8e80941Smrg		if (i == last_inst)
6550b8e80941Smrg			alu.last = 1;
6551848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6552848b8605Smrg		if (r)
6553848b8605Smrg			return r;
6554848b8605Smrg	}
6555848b8605Smrg
6556848b8605Smrg	for (i = 0; i < 4; i++) {
6557848b8605Smrg		if (!(write_mask & (1<<i)))
6558848b8605Smrg			continue;
6559848b8605Smrg
6560848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6561b8e80941Smrg		alu.op = ctx->inst_info->op;
6562b8e80941Smrg
6563848b8605Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6564848b8605Smrg
6565b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
6566848b8605Smrg		alu.src[0].chan = i;
6567848b8605Smrg
6568b8e80941Smrg		if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT)
6569b8e80941Smrg			alu.last = 1;
6570848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6571848b8605Smrg		if (r)
6572848b8605Smrg			return r;
6573848b8605Smrg	}
6574848b8605Smrg
6575848b8605Smrg	return 0;
6576848b8605Smrg}
6577848b8605Smrg
6578b8e80941Smrgstatic int tgsi_iabs(struct r600_shader_ctx *ctx)
6579848b8605Smrg{
6580848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6581848b8605Smrg	struct r600_bytecode_alu alu;
6582b8e80941Smrg	int i, r;
6583848b8605Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6584848b8605Smrg	int last_inst = tgsi_last_instruction(write_mask);
6585848b8605Smrg
6586b8e80941Smrg	/* tmp = -src */
6587848b8605Smrg	for (i = 0; i < 4; i++) {
6588848b8605Smrg		if (!(write_mask & (1<<i)))
6589848b8605Smrg			continue;
6590848b8605Smrg
6591848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6592b8e80941Smrg		alu.op = ALU_OP2_SUB_INT;
6593b8e80941Smrg
6594b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
6595848b8605Smrg		alu.dst.chan = i;
6596848b8605Smrg		alu.dst.write = 1;
6597848b8605Smrg
6598b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
6599b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
6600848b8605Smrg
6601b8e80941Smrg		if (i == last_inst)
6602b8e80941Smrg			alu.last = 1;
6603848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6604848b8605Smrg		if (r)
6605848b8605Smrg			return r;
6606848b8605Smrg	}
6607848b8605Smrg
6608b8e80941Smrg	/* dst = (src >= 0 ? src : tmp) */
6609848b8605Smrg	for (i = 0; i < 4; i++) {
6610848b8605Smrg		if (!(write_mask & (1<<i)))
6611848b8605Smrg			continue;
6612848b8605Smrg
6613848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6614b8e80941Smrg		alu.op = ALU_OP3_CNDGE_INT;
6615b8e80941Smrg		alu.is_op3 = 1;
6616848b8605Smrg		alu.dst.write = 1;
6617848b8605Smrg
6618b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6619848b8605Smrg
6620b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6621b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
6622b8e80941Smrg		alu.src[2].sel = ctx->temp_reg;
6623b8e80941Smrg		alu.src[2].chan = i;
6624b8e80941Smrg
6625b8e80941Smrg		if (i == last_inst)
6626b8e80941Smrg			alu.last = 1;
6627848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6628848b8605Smrg		if (r)
6629848b8605Smrg			return r;
6630848b8605Smrg	}
6631b8e80941Smrg	return 0;
6632b8e80941Smrg}
6633b8e80941Smrg
6634b8e80941Smrgstatic int tgsi_issg(struct r600_shader_ctx *ctx)
6635b8e80941Smrg{
6636b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6637b8e80941Smrg	struct r600_bytecode_alu alu;
6638b8e80941Smrg	int i, r;
6639b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6640b8e80941Smrg	int last_inst = tgsi_last_instruction(write_mask);
6641848b8605Smrg
6642b8e80941Smrg	/* tmp = (src >= 0 ? src : -1) */
6643848b8605Smrg	for (i = 0; i < 4; i++) {
6644848b8605Smrg		if (!(write_mask & (1<<i)))
6645848b8605Smrg			continue;
6646848b8605Smrg
6647848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6648848b8605Smrg		alu.op = ALU_OP3_CNDGE_INT;
6649848b8605Smrg		alu.is_op3 = 1;
6650b8e80941Smrg
6651b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
6652848b8605Smrg		alu.dst.chan = i;
6653848b8605Smrg		alu.dst.write = 1;
6654848b8605Smrg
6655b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6656b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
6657b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
6658848b8605Smrg
6659b8e80941Smrg		if (i == last_inst)
6660b8e80941Smrg			alu.last = 1;
6661848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6662848b8605Smrg		if (r)
6663848b8605Smrg			return r;
6664848b8605Smrg	}
6665848b8605Smrg
6666b8e80941Smrg	/* dst = (tmp > 0 ? 1 : tmp) */
6667848b8605Smrg	for (i = 0; i < 4; i++) {
6668b8e80941Smrg		if (!(write_mask & (1<<i)))
6669b8e80941Smrg			continue;
6670b8e80941Smrg
6671848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6672b8e80941Smrg		alu.op = ALU_OP3_CNDGT_INT;
6673b8e80941Smrg		alu.is_op3 = 1;
6674b8e80941Smrg		alu.dst.write = 1;
6675b8e80941Smrg
6676b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6677b8e80941Smrg
6678b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
6679b8e80941Smrg		alu.src[0].chan = i;
6680b8e80941Smrg
6681b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
6682b8e80941Smrg
6683b8e80941Smrg		alu.src[2].sel = ctx->temp_reg;
6684b8e80941Smrg		alu.src[2].chan = i;
6685b8e80941Smrg
6686b8e80941Smrg		if (i == last_inst)
6687848b8605Smrg			alu.last = 1;
6688848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6689848b8605Smrg		if (r)
6690848b8605Smrg			return r;
6691848b8605Smrg	}
6692848b8605Smrg	return 0;
6693848b8605Smrg}
6694848b8605Smrg
6695b8e80941Smrg
6696b8e80941Smrg
6697b8e80941Smrgstatic int tgsi_ssg(struct r600_shader_ctx *ctx)
6698848b8605Smrg{
6699848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6700b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6701b8e80941Smrg	int last_inst = tgsi_last_instruction(write_mask);
6702848b8605Smrg	struct r600_bytecode_alu alu;
6703b8e80941Smrg	int i, r;
6704848b8605Smrg
6705b8e80941Smrg	/* tmp = (src > 0 ? 1 : src) */
6706b8e80941Smrg	for (i = 0; i <= last_inst; i++) {
6707b8e80941Smrg		if (!(write_mask & (1 << i)))
6708848b8605Smrg			continue;
6709848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6710b8e80941Smrg		alu.op = ALU_OP3_CNDGT;
6711b8e80941Smrg		alu.is_op3 = 1;
6712848b8605Smrg
6713b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
6714848b8605Smrg		alu.dst.chan = i;
6715b8e80941Smrg
6716b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6717b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
6718b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
6719b8e80941Smrg
6720b8e80941Smrg		if (i == last_inst)
6721848b8605Smrg			alu.last = 1;
6722848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6723848b8605Smrg		if (r)
6724848b8605Smrg			return r;
6725848b8605Smrg	}
6726848b8605Smrg
6727b8e80941Smrg	/* dst = (-tmp > 0 ? -1 : tmp) */
6728b8e80941Smrg	for (i = 0; i <= last_inst; i++) {
6729b8e80941Smrg		if (!(write_mask & (1 << i)))
6730b8e80941Smrg			continue;
6731848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6732b8e80941Smrg		alu.op = ALU_OP3_CNDGT;
6733b8e80941Smrg		alu.is_op3 = 1;
6734848b8605Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6735b8e80941Smrg
6736b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
6737b8e80941Smrg		alu.src[0].chan = i;
6738b8e80941Smrg		alu.src[0].neg = 1;
6739b8e80941Smrg
6740b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
6741b8e80941Smrg		alu.src[1].neg = 1;
6742b8e80941Smrg
6743b8e80941Smrg		alu.src[2].sel = ctx->temp_reg;
6744b8e80941Smrg		alu.src[2].chan = i;
6745b8e80941Smrg
6746b8e80941Smrg		if (i == last_inst)
6747848b8605Smrg			alu.last = 1;
6748848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6749848b8605Smrg		if (r)
6750848b8605Smrg			return r;
6751848b8605Smrg	}
6752848b8605Smrg	return 0;
6753848b8605Smrg}
6754848b8605Smrg
6755b8e80941Smrgstatic int tgsi_bfi(struct r600_shader_ctx *ctx)
6756848b8605Smrg{
6757848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6758848b8605Smrg	struct r600_bytecode_alu alu;
6759b8e80941Smrg	int i, r, t1, t2;
6760848b8605Smrg
6761b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6762b8e80941Smrg	int last_inst = tgsi_last_instruction(write_mask);
6763848b8605Smrg
6764b8e80941Smrg	t1 = r600_get_temp(ctx);
6765848b8605Smrg
6766b8e80941Smrg	for (i = 0; i < 4; i++) {
6767b8e80941Smrg		if (!(write_mask & (1<<i)))
6768b8e80941Smrg			continue;
6769848b8605Smrg
6770b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6771b8e80941Smrg		alu.op = ALU_OP2_SETGE_INT;
6772b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
6773b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
6774b8e80941Smrg		alu.src[1].value = 32;
6775b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
6776b8e80941Smrg		alu.dst.chan = i;
6777b8e80941Smrg		alu.dst.write = 1;
6778b8e80941Smrg		alu.last = i == last_inst;
6779b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6780b8e80941Smrg		if (r)
6781b8e80941Smrg			return r;
6782b8e80941Smrg	}
6783848b8605Smrg
6784848b8605Smrg	for (i = 0; i < 4; i++) {
6785b8e80941Smrg		if (!(write_mask & (1<<i)))
6786848b8605Smrg			continue;
6787848b8605Smrg
6788b8e80941Smrg		/* create mask tmp */
6789848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6790b8e80941Smrg		alu.op = ALU_OP2_BFM_INT;
6791b8e80941Smrg		alu.dst.sel = t1;
6792848b8605Smrg		alu.dst.chan = i;
6793848b8605Smrg		alu.dst.write = 1;
6794b8e80941Smrg		alu.last = i == last_inst;
6795848b8605Smrg
6796b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
6797b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
6798848b8605Smrg
6799848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6800848b8605Smrg		if (r)
6801848b8605Smrg			return r;
6802848b8605Smrg	}
6803848b8605Smrg
6804b8e80941Smrg	t2 = r600_get_temp(ctx);
6805b8e80941Smrg
6806b8e80941Smrg	for (i = 0; i < 4; i++) {
6807b8e80941Smrg		if (!(write_mask & (1<<i)))
6808b8e80941Smrg			continue;
6809b8e80941Smrg
6810b8e80941Smrg		/* shift insert left */
6811848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6812b8e80941Smrg		alu.op = ALU_OP2_LSHL_INT;
6813b8e80941Smrg		alu.dst.sel = t2;
6814b8e80941Smrg		alu.dst.chan = i;
6815b8e80941Smrg		alu.dst.write = 1;
6816b8e80941Smrg		alu.last = i == last_inst;
6817848b8605Smrg
6818b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
6819b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
6820b8e80941Smrg
6821b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6822b8e80941Smrg		if (r)
6823b8e80941Smrg			return r;
6824b8e80941Smrg	}
6825b8e80941Smrg
6826b8e80941Smrg	for (i = 0; i < 4; i++) {
6827b8e80941Smrg		if (!(write_mask & (1<<i)))
6828b8e80941Smrg			continue;
6829b8e80941Smrg
6830b8e80941Smrg		/* actual bitfield insert */
6831b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6832b8e80941Smrg		alu.op = ALU_OP3_BFI_INT;
6833b8e80941Smrg		alu.is_op3 = 1;
6834b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6835b8e80941Smrg		alu.dst.chan = i;
6836848b8605Smrg		alu.dst.write = 1;
6837b8e80941Smrg		alu.last = i == last_inst;
6838848b8605Smrg
6839b8e80941Smrg		alu.src[0].sel = t1;
6840b8e80941Smrg		alu.src[0].chan = i;
6841b8e80941Smrg		alu.src[1].sel = t2;
6842b8e80941Smrg		alu.src[1].chan = i;
6843b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
6844848b8605Smrg
6845b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6846b8e80941Smrg		if (r)
6847b8e80941Smrg			return r;
6848b8e80941Smrg	}
6849848b8605Smrg
6850b8e80941Smrg	for (i = 0; i < 4; i++) {
6851b8e80941Smrg		if (!(write_mask & (1<<i)))
6852b8e80941Smrg			continue;
6853b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6854b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
6855b8e80941Smrg		alu.is_op3 = 1;
6856b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
6857b8e80941Smrg		alu.src[0].chan = i;
6858b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
6859b8e80941Smrg
6860b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6861b8e80941Smrg
6862b8e80941Smrg		alu.src[1].sel = alu.dst.sel;
6863b8e80941Smrg		alu.src[1].chan = i;
6864b8e80941Smrg
6865b8e80941Smrg		alu.last = i == last_inst;
6866848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6867848b8605Smrg		if (r)
6868848b8605Smrg			return r;
6869848b8605Smrg	}
6870848b8605Smrg	return 0;
6871848b8605Smrg}
6872848b8605Smrg
6873b8e80941Smrgstatic int tgsi_msb(struct r600_shader_ctx *ctx)
6874848b8605Smrg{
6875848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6876848b8605Smrg	struct r600_bytecode_alu alu;
6877b8e80941Smrg	int i, r, t1, t2;
6878848b8605Smrg
6879b8e80941Smrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6880b8e80941Smrg	int last_inst = tgsi_last_instruction(write_mask);
6881848b8605Smrg
6882b8e80941Smrg	assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
6883b8e80941Smrg		ctx->inst_info->op == ALU_OP1_FFBH_UINT);
6884b8e80941Smrg
6885b8e80941Smrg	t1 = ctx->temp_reg;
6886b8e80941Smrg
6887b8e80941Smrg	/* bit position is indexed from lsb by TGSI, and from msb by the hardware */
6888b8e80941Smrg	for (i = 0; i < 4; i++) {
6889b8e80941Smrg		if (!(write_mask & (1<<i)))
6890b8e80941Smrg			continue;
6891b8e80941Smrg
6892b8e80941Smrg		/* t1 = FFBH_INT / FFBH_UINT */
6893b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6894b8e80941Smrg		alu.op = ctx->inst_info->op;
6895b8e80941Smrg		alu.dst.sel = t1;
6896b8e80941Smrg		alu.dst.chan = i;
6897b8e80941Smrg		alu.dst.write = 1;
6898b8e80941Smrg		alu.last = i == last_inst;
6899b8e80941Smrg
6900b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
6901b8e80941Smrg
6902b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6903b8e80941Smrg		if (r)
6904b8e80941Smrg			return r;
6905848b8605Smrg	}
6906848b8605Smrg
6907b8e80941Smrg	t2 = r600_get_temp(ctx);
6908848b8605Smrg
6909b8e80941Smrg	for (i = 0; i < 4; i++) {
6910b8e80941Smrg		if (!(write_mask & (1<<i)))
6911b8e80941Smrg			continue;
6912848b8605Smrg
6913b8e80941Smrg		/* t2 = 31 - t1 */
6914b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6915b8e80941Smrg		alu.op = ALU_OP2_SUB_INT;
6916b8e80941Smrg		alu.dst.sel = t2;
6917b8e80941Smrg		alu.dst.chan = i;
6918b8e80941Smrg		alu.dst.write = 1;
6919b8e80941Smrg		alu.last = i == last_inst;
6920848b8605Smrg
6921b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
6922b8e80941Smrg		alu.src[0].value = 31;
6923b8e80941Smrg		alu.src[1].sel = t1;
6924b8e80941Smrg		alu.src[1].chan = i;
6925848b8605Smrg
6926b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6927b8e80941Smrg		if (r)
6928b8e80941Smrg			return r;
6929b8e80941Smrg	}
6930848b8605Smrg
6931b8e80941Smrg	for (i = 0; i < 4; i++) {
6932b8e80941Smrg		if (!(write_mask & (1<<i)))
6933b8e80941Smrg			continue;
6934848b8605Smrg
6935b8e80941Smrg		/* result = t1 >= 0 ? t2 : t1 */
6936b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6937b8e80941Smrg		alu.op = ALU_OP3_CNDGE_INT;
6938b8e80941Smrg		alu.is_op3 = 1;
6939b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6940b8e80941Smrg		alu.dst.chan = i;
6941b8e80941Smrg		alu.dst.write = 1;
6942b8e80941Smrg		alu.last = i == last_inst;
6943848b8605Smrg
6944b8e80941Smrg		alu.src[0].sel = t1;
6945b8e80941Smrg		alu.src[0].chan = i;
6946b8e80941Smrg		alu.src[1].sel = t2;
6947b8e80941Smrg		alu.src[1].chan = i;
6948b8e80941Smrg		alu.src[2].sel = t1;
6949b8e80941Smrg		alu.src[2].chan = i;
6950b8e80941Smrg
6951b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6952b8e80941Smrg		if (r)
6953b8e80941Smrg			return r;
6954848b8605Smrg	}
6955848b8605Smrg
6956b8e80941Smrg	return 0;
6957b8e80941Smrg}
6958848b8605Smrg
6959b8e80941Smrgstatic int tgsi_interp_egcm(struct r600_shader_ctx *ctx)
6960b8e80941Smrg{
6961b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6962b8e80941Smrg	struct r600_bytecode_alu alu;
6963b8e80941Smrg	int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
6964b8e80941Smrg	unsigned location;
6965b8e80941Smrg	const int input = inst->Src[0].Register.Index + ctx->shader->nsys_inputs;
6966848b8605Smrg
6967b8e80941Smrg	assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
6968848b8605Smrg
6969b8e80941Smrg	/* Interpolators have been marked for use already by allocate_system_value_inputs */
6970b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
6971b8e80941Smrg		inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
6972b8e80941Smrg		location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */
6973b8e80941Smrg	}
6974b8e80941Smrg	else {
6975b8e80941Smrg		location = TGSI_INTERPOLATE_LOC_CENTROID;
6976b8e80941Smrg	}
6977848b8605Smrg
6978b8e80941Smrg	k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location);
6979b8e80941Smrg	if (k < 0)
6980b8e80941Smrg		k = 0;
6981b8e80941Smrg	interp_gpr = ctx->eg_interpolators[k].ij_index / 2;
6982b8e80941Smrg	interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2);
6983b8e80941Smrg
6984b8e80941Smrg	/* NOTE: currently offset is not perspective correct */
6985b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
6986b8e80941Smrg		inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
6987b8e80941Smrg		int sample_gpr = -1;
6988b8e80941Smrg		int gradientsH, gradientsV;
6989b8e80941Smrg		struct r600_bytecode_tex tex;
6990b8e80941Smrg
6991b8e80941Smrg		if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
6992b8e80941Smrg			sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]);
6993b8e80941Smrg		}
6994b8e80941Smrg
6995b8e80941Smrg		gradientsH = r600_get_temp(ctx);
6996b8e80941Smrg		gradientsV = r600_get_temp(ctx);
6997b8e80941Smrg		for (i = 0; i < 2; i++) {
6998b8e80941Smrg			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
6999b8e80941Smrg			tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V;
7000b8e80941Smrg			tex.src_gpr = interp_gpr;
7001b8e80941Smrg			tex.src_sel_x = interp_base_chan + 0;
7002b8e80941Smrg			tex.src_sel_y = interp_base_chan + 1;
7003b8e80941Smrg			tex.src_sel_z = 0;
7004b8e80941Smrg			tex.src_sel_w = 0;
7005b8e80941Smrg			tex.dst_gpr = i == 0 ? gradientsH : gradientsV;
7006b8e80941Smrg			tex.dst_sel_x = 0;
7007b8e80941Smrg			tex.dst_sel_y = 1;
7008b8e80941Smrg			tex.dst_sel_z = 7;
7009b8e80941Smrg			tex.dst_sel_w = 7;
7010b8e80941Smrg			tex.inst_mod = 1; // Use per pixel gradient calculation
7011b8e80941Smrg			tex.sampler_id = 0;
7012b8e80941Smrg			tex.resource_id = tex.sampler_id;
7013848b8605Smrg			r = r600_bytecode_add_tex(ctx->bc, &tex);
7014848b8605Smrg			if (r)
7015848b8605Smrg				return r;
7016848b8605Smrg		}
7017848b8605Smrg
7018b8e80941Smrg		for (i = 0; i < 2; i++) {
7019848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7020b8e80941Smrg			alu.op = ALU_OP3_MULADD;
7021b8e80941Smrg			alu.is_op3 = 1;
7022b8e80941Smrg			alu.src[0].sel = gradientsH;
7023b8e80941Smrg			alu.src[0].chan = i;
7024b8e80941Smrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
7025b8e80941Smrg				alu.src[1].sel = sample_gpr;
7026b8e80941Smrg				alu.src[1].chan = 2;
7027b8e80941Smrg			}
7028b8e80941Smrg			else {
7029b8e80941Smrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
7030b8e80941Smrg			}
7031b8e80941Smrg			alu.src[2].sel = interp_gpr;
7032b8e80941Smrg			alu.src[2].chan = interp_base_chan + i;
7033848b8605Smrg			alu.dst.sel = ctx->temp_reg;
7034b8e80941Smrg			alu.dst.chan = i;
7035b8e80941Smrg			alu.last = i == 1;
7036b8e80941Smrg
7037848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7038848b8605Smrg			if (r)
7039848b8605Smrg				return r;
7040848b8605Smrg		}
7041848b8605Smrg
7042b8e80941Smrg		for (i = 0; i < 2; i++) {
7043848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7044b8e80941Smrg			alu.op = ALU_OP3_MULADD;
7045b8e80941Smrg			alu.is_op3 = 1;
7046b8e80941Smrg			alu.src[0].sel = gradientsV;
7047b8e80941Smrg			alu.src[0].chan = i;
7048b8e80941Smrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
7049b8e80941Smrg				alu.src[1].sel = sample_gpr;
7050b8e80941Smrg				alu.src[1].chan = 3;
7051b8e80941Smrg			}
7052b8e80941Smrg			else {
7053b8e80941Smrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
7054b8e80941Smrg			}
7055b8e80941Smrg			alu.src[2].sel = ctx->temp_reg;
7056b8e80941Smrg			alu.src[2].chan = i;
7057848b8605Smrg			alu.dst.sel = ctx->temp_reg;
7058848b8605Smrg			alu.dst.chan = i;
7059b8e80941Smrg			alu.last = i == 1;
7060b8e80941Smrg
7061848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7062848b8605Smrg			if (r)
7063848b8605Smrg				return r;
7064848b8605Smrg		}
7065848b8605Smrg	}
7066848b8605Smrg
7067b8e80941Smrg	tmp = r600_get_temp(ctx);
7068b8e80941Smrg	for (i = 0; i < 8; i++) {
7069b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7070b8e80941Smrg		alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY;
7071848b8605Smrg
7072b8e80941Smrg		alu.dst.sel = tmp;
7073b8e80941Smrg		if ((i > 1 && i < 6)) {
7074848b8605Smrg			alu.dst.write = 1;
7075848b8605Smrg		}
7076b8e80941Smrg		else {
7077b8e80941Smrg			alu.dst.write = 0;
7078b8e80941Smrg		}
7079b8e80941Smrg		alu.dst.chan = i % 4;
7080848b8605Smrg
7081b8e80941Smrg		if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
7082b8e80941Smrg			inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
7083848b8605Smrg			alu.src[0].sel = ctx->temp_reg;
7084b8e80941Smrg			alu.src[0].chan = 1 - (i % 2);
7085b8e80941Smrg		} else {
7086b8e80941Smrg			alu.src[0].sel = interp_gpr;
7087b8e80941Smrg			alu.src[0].chan = interp_base_chan + 1 - (i % 2);
7088848b8605Smrg		}
7089b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
7090b8e80941Smrg		alu.src[1].chan = 0;
7091848b8605Smrg
7092b8e80941Smrg		alu.last = i % 4 == 3;
7093b8e80941Smrg		alu.bank_swizzle_force = SQ_ALU_VEC_210;
7094848b8605Smrg
7095b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7096b8e80941Smrg		if (r)
7097b8e80941Smrg			return r;
7098b8e80941Smrg	}
7099848b8605Smrg
7100b8e80941Smrg	// INTERP can't swizzle dst
7101b8e80941Smrg	lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
7102b8e80941Smrg	for (i = 0; i <= lasti; i++) {
7103b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
7104b8e80941Smrg			continue;
7105848b8605Smrg
7106b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7107b8e80941Smrg		alu.op = ALU_OP1_MOV;
7108b8e80941Smrg		alu.src[0].sel = tmp;
7109b8e80941Smrg		alu.src[0].chan = ctx->src[0].swizzle[i];
7110b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
7111848b8605Smrg		alu.dst.write = 1;
7112b8e80941Smrg		alu.last = i == lasti;
7113848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7114848b8605Smrg		if (r)
7115848b8605Smrg			return r;
7116b8e80941Smrg	}
7117848b8605Smrg
7118b8e80941Smrg	return 0;
7119b8e80941Smrg}
7120848b8605Smrg
7121848b8605Smrg
7122b8e80941Smrgstatic int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
7123b8e80941Smrg{
7124b8e80941Smrg	struct r600_bytecode_alu alu;
7125b8e80941Smrg	int i, r;
7126848b8605Smrg
7127b8e80941Smrg	for (i = 0; i < 4; i++) {
7128b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7129b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
7130b8e80941Smrg			alu.op = ALU_OP0_NOP;
7131b8e80941Smrg			alu.dst.chan = i;
7132b8e80941Smrg		} else {
7133848b8605Smrg			alu.op = ALU_OP1_MOV;
7134b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
7135b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
7136b8e80941Smrg			alu.src[0].chan = i;
7137b8e80941Smrg		}
7138b8e80941Smrg		if (i == 3) {
7139848b8605Smrg			alu.last = 1;
7140848b8605Smrg		}
7141b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7142b8e80941Smrg		if (r)
7143b8e80941Smrg			return r;
7144b8e80941Smrg	}
7145b8e80941Smrg	return 0;
7146b8e80941Smrg}
7147848b8605Smrg
7148b8e80941Smrgstatic int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx,
7149b8e80941Smrg                                 unsigned writemask,
7150b8e80941Smrg                                 struct r600_bytecode_alu_src *bc_src,
7151b8e80941Smrg                                 const struct r600_shader_src *shader_src)
7152b8e80941Smrg{
7153b8e80941Smrg	struct r600_bytecode_alu alu;
7154b8e80941Smrg	int i, r;
7155b8e80941Smrg	int lasti = tgsi_last_instruction(writemask);
7156b8e80941Smrg	int temp_reg = 0;
7157848b8605Smrg
7158b8e80941Smrg	r600_bytecode_src(&bc_src[0], shader_src, 0);
7159b8e80941Smrg	r600_bytecode_src(&bc_src[1], shader_src, 1);
7160b8e80941Smrg	r600_bytecode_src(&bc_src[2], shader_src, 2);
7161b8e80941Smrg	r600_bytecode_src(&bc_src[3], shader_src, 3);
7162848b8605Smrg
7163b8e80941Smrg	if (bc_src->abs) {
7164b8e80941Smrg		temp_reg = r600_get_temp(ctx);
7165848b8605Smrg
7166b8e80941Smrg		for (i = 0; i < lasti + 1; i++) {
7167b8e80941Smrg			if (!(writemask & (1 << i)))
7168b8e80941Smrg				continue;
7169848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7170848b8605Smrg			alu.op = ALU_OP1_MOV;
7171b8e80941Smrg			alu.dst.sel = temp_reg;
7172b8e80941Smrg			alu.dst.chan = i;
7173848b8605Smrg			alu.dst.write = 1;
7174b8e80941Smrg			alu.src[0] = bc_src[i];
7175b8e80941Smrg			if (i == lasti) {
7176b8e80941Smrg				alu.last = 1;
7177b8e80941Smrg			}
7178848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7179848b8605Smrg			if (r)
7180848b8605Smrg				return r;
7181b8e80941Smrg			memset(&bc_src[i], 0, sizeof(*bc_src));
7182b8e80941Smrg			bc_src[i].sel = temp_reg;
7183b8e80941Smrg			bc_src[i].chan = i;
7184848b8605Smrg		}
7185848b8605Smrg	}
7186b8e80941Smrg	return 0;
7187b8e80941Smrg}
7188848b8605Smrg
7189b8e80941Smrgstatic int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst)
7190b8e80941Smrg{
7191b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7192b8e80941Smrg	struct r600_bytecode_alu alu;
7193b8e80941Smrg	struct r600_bytecode_alu_src srcs[4][4];
7194b8e80941Smrg	int i, j, r;
7195b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
7196b8e80941Smrg	unsigned op = ctx->inst_info->op;
7197b8e80941Smrg
7198b8e80941Smrg	if (op == ALU_OP3_MULADD_IEEE &&
7199b8e80941Smrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
7200b8e80941Smrg		op = ALU_OP3_MULADD;
7201b8e80941Smrg
7202b8e80941Smrg	for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
7203b8e80941Smrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
7204b8e80941Smrg					  srcs[j], &ctx->src[j]);
7205b8e80941Smrg		if (r)
7206b8e80941Smrg			return r;
7207b8e80941Smrg	}
7208b8e80941Smrg
7209b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
7210b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
7211b8e80941Smrg			continue;
7212b8e80941Smrg
7213b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7214b8e80941Smrg		alu.op = op;
7215b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
7216b8e80941Smrg			alu.src[j] = srcs[j][i];
7217b8e80941Smrg		}
7218b8e80941Smrg
7219b8e80941Smrg		if (dst == -1) {
7220b8e80941Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
7221b8e80941Smrg		} else {
7222b8e80941Smrg			alu.dst.sel = dst;
7223b8e80941Smrg		}
7224b8e80941Smrg		alu.dst.chan = i;
7225b8e80941Smrg		alu.dst.write = 1;
7226b8e80941Smrg		alu.is_op3 = 1;
7227b8e80941Smrg		if (i == lasti) {
7228b8e80941Smrg			alu.last = 1;
7229b8e80941Smrg		}
7230b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7231b8e80941Smrg		if (r)
7232b8e80941Smrg			return r;
7233b8e80941Smrg	}
7234b8e80941Smrg	return 0;
7235b8e80941Smrg}
7236b8e80941Smrg
7237b8e80941Smrgstatic int tgsi_op3(struct r600_shader_ctx *ctx)
7238b8e80941Smrg{
7239b8e80941Smrg	return tgsi_op3_dst(ctx, -1);
7240b8e80941Smrg}
7241b8e80941Smrg
7242b8e80941Smrgstatic int tgsi_dp(struct r600_shader_ctx *ctx)
7243b8e80941Smrg{
7244b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7245b8e80941Smrg	struct r600_bytecode_alu alu;
7246b8e80941Smrg	int i, j, r;
7247b8e80941Smrg	unsigned op = ctx->inst_info->op;
7248b8e80941Smrg	if (op == ALU_OP2_DOT4_IEEE &&
7249b8e80941Smrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
7250b8e80941Smrg		op = ALU_OP2_DOT4;
7251b8e80941Smrg
7252b8e80941Smrg	for (i = 0; i < 4; i++) {
7253b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7254b8e80941Smrg		alu.op = op;
7255b8e80941Smrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
7256b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
7257b8e80941Smrg		}
7258b8e80941Smrg
7259b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
7260b8e80941Smrg		alu.dst.chan = i;
7261b8e80941Smrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
7262b8e80941Smrg		/* handle some special cases */
7263b8e80941Smrg		switch (inst->Instruction.Opcode) {
7264b8e80941Smrg		case TGSI_OPCODE_DP2:
7265b8e80941Smrg			if (i > 1) {
7266b8e80941Smrg				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
7267b8e80941Smrg				alu.src[0].chan = alu.src[1].chan = 0;
7268b8e80941Smrg			}
7269b8e80941Smrg			break;
7270b8e80941Smrg		case TGSI_OPCODE_DP3:
7271b8e80941Smrg			if (i > 2) {
7272b8e80941Smrg				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
7273b8e80941Smrg				alu.src[0].chan = alu.src[1].chan = 0;
7274b8e80941Smrg			}
7275b8e80941Smrg			break;
7276b8e80941Smrg		default:
7277b8e80941Smrg			break;
7278b8e80941Smrg		}
7279b8e80941Smrg		if (i == 3) {
7280b8e80941Smrg			alu.last = 1;
7281b8e80941Smrg		}
7282b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7283b8e80941Smrg		if (r)
7284b8e80941Smrg			return r;
7285b8e80941Smrg	}
7286b8e80941Smrg	return 0;
7287b8e80941Smrg}
7288b8e80941Smrg
7289b8e80941Smrgstatic inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
7290b8e80941Smrg						    unsigned index)
7291b8e80941Smrg{
7292b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7293b8e80941Smrg	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
7294b8e80941Smrg		inst->Src[index].Register.File != TGSI_FILE_INPUT &&
7295b8e80941Smrg		inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
7296b8e80941Smrg		ctx->src[index].neg || ctx->src[index].abs ||
7297b8e80941Smrg		(inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY);
7298b8e80941Smrg}
7299b8e80941Smrg
7300b8e80941Smrgstatic inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
7301b8e80941Smrg					unsigned index)
7302b8e80941Smrg{
7303b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7304b8e80941Smrg	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
7305b8e80941Smrg}
7306b8e80941Smrg
7307b8e80941Smrgstatic int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
7308b8e80941Smrg{
7309b8e80941Smrg	struct r600_bytecode_vtx vtx;
7310b8e80941Smrg	struct r600_bytecode_alu alu;
7311b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7312b8e80941Smrg	int src_gpr, r, i;
7313b8e80941Smrg	int id = tgsi_tex_get_src_gpr(ctx, 1);
7314b8e80941Smrg	int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7315b8e80941Smrg
7316b8e80941Smrg	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
7317b8e80941Smrg	if (src_requires_loading) {
7318b8e80941Smrg		for (i = 0; i < 4; i++) {
7319848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7320848b8605Smrg			alu.op = ALU_OP1_MOV;
7321848b8605Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
7322848b8605Smrg			alu.dst.sel = ctx->temp_reg;
7323848b8605Smrg			alu.dst.chan = i;
7324848b8605Smrg			if (i == 3)
7325848b8605Smrg				alu.last = 1;
7326848b8605Smrg			alu.dst.write = 1;
7327848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7328848b8605Smrg			if (r)
7329848b8605Smrg				return r;
7330848b8605Smrg		}
7331848b8605Smrg		src_gpr = ctx->temp_reg;
7332848b8605Smrg	}
7333848b8605Smrg
7334b8e80941Smrg	memset(&vtx, 0, sizeof(vtx));
7335b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
7336b8e80941Smrg	vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
7337b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
7338b8e80941Smrg	vtx.src_gpr = src_gpr;
7339b8e80941Smrg	vtx.mega_fetch_count = 16;
7340b8e80941Smrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
7341b8e80941Smrg	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
7342b8e80941Smrg	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
7343b8e80941Smrg	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
7344b8e80941Smrg	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
7345b8e80941Smrg	vtx.use_const_fields = 1;
7346b8e80941Smrg	vtx.buffer_index_mode = sampler_index_mode;
7347848b8605Smrg
7348b8e80941Smrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
7349b8e80941Smrg		return r;
7350848b8605Smrg
7351b8e80941Smrg	if (ctx->bc->chip_class >= EVERGREEN)
7352b8e80941Smrg		return 0;
7353848b8605Smrg
7354b8e80941Smrg	for (i = 0; i < 4; i++) {
7355b8e80941Smrg		int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
7356b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
7357b8e80941Smrg			continue;
7358848b8605Smrg
7359b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7360b8e80941Smrg		alu.op = ALU_OP2_AND_INT;
7361848b8605Smrg
7362b8e80941Smrg		alu.dst.chan = i;
7363b8e80941Smrg		alu.dst.sel = vtx.dst_gpr;
7364b8e80941Smrg		alu.dst.write = 1;
7365848b8605Smrg
7366b8e80941Smrg		alu.src[0].sel = vtx.dst_gpr;
7367b8e80941Smrg		alu.src[0].chan = i;
7368848b8605Smrg
7369b8e80941Smrg		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
7370b8e80941Smrg		alu.src[1].sel += (id * 2);
7371b8e80941Smrg		alu.src[1].chan = i % 4;
7372b8e80941Smrg		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
7373b8e80941Smrg
7374b8e80941Smrg		if (i == lasti)
7375b8e80941Smrg			alu.last = 1;
7376b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7377b8e80941Smrg		if (r)
7378b8e80941Smrg			return r;
7379b8e80941Smrg	}
7380b8e80941Smrg
7381b8e80941Smrg	if (inst->Dst[0].Register.WriteMask & 3) {
7382b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7383b8e80941Smrg		alu.op = ALU_OP2_OR_INT;
7384b8e80941Smrg
7385b8e80941Smrg		alu.dst.chan = 3;
7386b8e80941Smrg		alu.dst.sel = vtx.dst_gpr;
7387b8e80941Smrg		alu.dst.write = 1;
7388b8e80941Smrg
7389b8e80941Smrg		alu.src[0].sel = vtx.dst_gpr;
7390b8e80941Smrg		alu.src[0].chan = 3;
7391b8e80941Smrg
7392b8e80941Smrg		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
7393b8e80941Smrg		alu.src[1].chan = 0;
7394b8e80941Smrg		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
7395b8e80941Smrg
7396b8e80941Smrg		alu.last = 1;
7397b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7398b8e80941Smrg		if (r)
7399b8e80941Smrg			return r;
7400b8e80941Smrg	}
7401b8e80941Smrg	return 0;
7402b8e80941Smrg}
7403b8e80941Smrg
7404b8e80941Smrgstatic int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset, int eg_buffer_base)
7405b8e80941Smrg{
7406b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7407b8e80941Smrg	int r;
7408b8e80941Smrg	int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
7409b8e80941Smrg	int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7410b8e80941Smrg
7411b8e80941Smrg	if (ctx->bc->chip_class < EVERGREEN) {
7412b8e80941Smrg		struct r600_bytecode_alu alu;
7413b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7414b8e80941Smrg		alu.op = ALU_OP1_MOV;
7415b8e80941Smrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
7416b8e80941Smrg		/* r600 we have them at channel 2 of the second dword */
7417b8e80941Smrg		alu.src[0].sel += (id * 2) + 1;
7418b8e80941Smrg		alu.src[0].chan = 1;
7419b8e80941Smrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
7420b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
7421b8e80941Smrg		alu.last = 1;
7422b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7423b8e80941Smrg		if (r)
7424b8e80941Smrg			return r;
7425b8e80941Smrg		return 0;
7426b8e80941Smrg	} else {
7427b8e80941Smrg		struct r600_bytecode_vtx vtx;
7428b8e80941Smrg		memset(&vtx, 0, sizeof(vtx));
7429b8e80941Smrg		vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
7430b8e80941Smrg		vtx.buffer_id = id + eg_buffer_base;
7431b8e80941Smrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
7432b8e80941Smrg		vtx.src_gpr = 0;
7433b8e80941Smrg		vtx.mega_fetch_count = 16; /* no idea here really... */
7434b8e80941Smrg		vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
7435b8e80941Smrg		vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
7436b8e80941Smrg		vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7;		/* SEL_Y */
7437b8e80941Smrg		vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7;		/* SEL_Z */
7438b8e80941Smrg		vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7;		/* SEL_W */
7439b8e80941Smrg		vtx.data_format = FMT_32_32_32_32;
7440b8e80941Smrg		vtx.buffer_index_mode = sampler_index_mode;
7441b8e80941Smrg
7442b8e80941Smrg		if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
7443b8e80941Smrg			return r;
7444b8e80941Smrg		return 0;
7445b8e80941Smrg	}
7446b8e80941Smrg}
7447b8e80941Smrg
7448b8e80941Smrg
7449b8e80941Smrgstatic int tgsi_tex(struct r600_shader_ctx *ctx)
7450b8e80941Smrg{
7451b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
7452b8e80941Smrg	struct r600_bytecode_tex tex;
7453b8e80941Smrg	struct r600_bytecode_tex grad_offs[3];
7454b8e80941Smrg	struct r600_bytecode_alu alu;
7455b8e80941Smrg	unsigned src_gpr;
7456b8e80941Smrg	int r, i, j, n_grad_offs = 0;
7457b8e80941Smrg	int opcode;
7458b8e80941Smrg	bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing &&
7459b8e80941Smrg				    inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
7460b8e80941Smrg				    (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
7461b8e80941Smrg				     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
7462b8e80941Smrg
7463b8e80941Smrg	bool txf_add_offsets = inst->Texture.NumOffsets &&
7464b8e80941Smrg			     inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
7465b8e80941Smrg			     inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
7466b8e80941Smrg
7467b8e80941Smrg	/* Texture fetch instructions can only use gprs as source.
7468b8e80941Smrg	 * Also they cannot negate the source or take the absolute value */
7469b8e80941Smrg	const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
7470b8e80941Smrg                                              tgsi_tex_src_requires_loading(ctx, 0)) ||
7471b8e80941Smrg					     read_compressed_msaa || txf_add_offsets;
7472b8e80941Smrg
7473b8e80941Smrg	boolean src_loaded = FALSE;
7474b8e80941Smrg	unsigned sampler_src_reg = 1;
7475b8e80941Smrg	int8_t offset_x = 0, offset_y = 0, offset_z = 0;
7476b8e80941Smrg	boolean has_txq_cube_array_z = false;
7477b8e80941Smrg	unsigned sampler_index_mode;
7478b8e80941Smrg	int array_index_offset_channel = -1;
7479b8e80941Smrg
7480b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
7481b8e80941Smrg	    ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
7482b8e80941Smrg	      inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
7483b8e80941Smrg		if (inst->Dst[0].Register.WriteMask & 4) {
7484b8e80941Smrg			ctx->shader->has_txq_cube_array_z_comp = true;
7485b8e80941Smrg			has_txq_cube_array_z = true;
7486b8e80941Smrg		}
7487b8e80941Smrg
7488b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
7489b8e80941Smrg	    inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
7490b8e80941Smrg	    inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
7491b8e80941Smrg	    inst->Instruction.Opcode == TGSI_OPCODE_TG4)
7492b8e80941Smrg		sampler_src_reg = 2;
7493b8e80941Smrg
7494b8e80941Smrg	/* TGSI moves the sampler to src reg 3 for TXD */
7495b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
7496b8e80941Smrg		sampler_src_reg = 3;
7497b8e80941Smrg
7498b8e80941Smrg	sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
7499b8e80941Smrg
7500b8e80941Smrg	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
7501b8e80941Smrg
7502b8e80941Smrg	if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
7503b8e80941Smrg		if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
7504b8e80941Smrg			if (ctx->bc->chip_class < EVERGREEN)
7505b8e80941Smrg				ctx->shader->uses_tex_buffers = true;
7506b8e80941Smrg			return r600_do_buffer_txq(ctx, 1, 0, R600_MAX_CONST_BUFFERS);
7507b8e80941Smrg		}
7508b8e80941Smrg		else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
7509b8e80941Smrg			if (ctx->bc->chip_class < EVERGREEN)
7510b8e80941Smrg				ctx->shader->uses_tex_buffers = true;
7511b8e80941Smrg			return do_vtx_fetch_inst(ctx, src_requires_loading);
7512b8e80941Smrg		}
7513b8e80941Smrg	}
7514b8e80941Smrg
7515b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
7516b8e80941Smrg		int out_chan;
7517b8e80941Smrg		/* Add perspective divide */
7518b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
7519b8e80941Smrg			out_chan = 2;
7520b8e80941Smrg			for (i = 0; i < 3; i++) {
7521848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7522b8e80941Smrg				alu.op = ALU_OP1_RECIP_IEEE;
7523b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7524b8e80941Smrg
7525b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
7526848b8605Smrg				alu.dst.chan = i;
7527b8e80941Smrg				if (i == 2)
7528848b8605Smrg					alu.last = 1;
7529b8e80941Smrg				if (out_chan == i)
7530b8e80941Smrg					alu.dst.write = 1;
7531848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7532848b8605Smrg				if (r)
7533848b8605Smrg					return r;
7534848b8605Smrg			}
7535b8e80941Smrg
7536848b8605Smrg		} else {
7537b8e80941Smrg			out_chan = 3;
7538848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7539b8e80941Smrg			alu.op = ALU_OP1_RECIP_IEEE;
7540b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7541b8e80941Smrg
7542b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7543b8e80941Smrg			alu.dst.chan = out_chan;
7544b8e80941Smrg			alu.last = 1;
7545b8e80941Smrg			alu.dst.write = 1;
7546b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7547b8e80941Smrg			if (r)
7548b8e80941Smrg				return r;
7549b8e80941Smrg		}
7550b8e80941Smrg
7551b8e80941Smrg		for (i = 0; i < 3; i++) {
7552b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7553b8e80941Smrg			alu.op = ALU_OP2_MUL;
7554b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
7555b8e80941Smrg			alu.src[0].chan = out_chan;
7556b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
7557b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7558b8e80941Smrg			alu.dst.chan = i;
7559b8e80941Smrg			alu.dst.write = 1;
7560b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7561b8e80941Smrg			if (r)
7562b8e80941Smrg				return r;
7563b8e80941Smrg		}
7564b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7565b8e80941Smrg		alu.op = ALU_OP1_MOV;
7566b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
7567b8e80941Smrg		alu.src[0].chan = 0;
7568b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
7569b8e80941Smrg		alu.dst.chan = 3;
7570b8e80941Smrg		alu.last = 1;
7571b8e80941Smrg		alu.dst.write = 1;
7572b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7573b8e80941Smrg		if (r)
7574b8e80941Smrg			return r;
7575b8e80941Smrg		src_loaded = TRUE;
7576b8e80941Smrg		src_gpr = ctx->temp_reg;
7577b8e80941Smrg	}
7578b8e80941Smrg
7579b8e80941Smrg
7580b8e80941Smrg	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
7581b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
7582b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
7583b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
7584b8e80941Smrg	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
7585b8e80941Smrg
7586b8e80941Smrg		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
7587b8e80941Smrg		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
7588b8e80941Smrg
7589b8e80941Smrg		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
7590b8e80941Smrg		for (i = 0; i < 4; i++) {
7591b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7592b8e80941Smrg			alu.op = ALU_OP2_CUBE;
7593b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
7594b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
7595b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7596b8e80941Smrg			alu.dst.chan = i;
7597b8e80941Smrg			if (i == 3)
7598b8e80941Smrg				alu.last = 1;
7599b8e80941Smrg			alu.dst.write = 1;
7600b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7601b8e80941Smrg			if (r)
7602b8e80941Smrg				return r;
7603b8e80941Smrg		}
7604b8e80941Smrg
7605b8e80941Smrg		/* tmp1.z = RCP_e(|tmp1.z|) */
7606b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
7607b8e80941Smrg			for (i = 0; i < 3; i++) {
7608b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7609b8e80941Smrg				alu.op = ALU_OP1_RECIP_IEEE;
7610b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
7611b8e80941Smrg				alu.src[0].chan = 2;
7612b8e80941Smrg				alu.src[0].abs = 1;
7613b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
7614b8e80941Smrg				alu.dst.chan = i;
7615b8e80941Smrg				if (i == 2)
7616b8e80941Smrg					alu.dst.write = 1;
7617b8e80941Smrg				if (i == 2)
7618b8e80941Smrg					alu.last = 1;
7619b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7620b8e80941Smrg				if (r)
7621b8e80941Smrg					return r;
7622b8e80941Smrg			}
7623b8e80941Smrg		} else {
7624b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7625b8e80941Smrg			alu.op = ALU_OP1_RECIP_IEEE;
7626b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
7627b8e80941Smrg			alu.src[0].chan = 2;
7628b8e80941Smrg			alu.src[0].abs = 1;
7629b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7630b8e80941Smrg			alu.dst.chan = 2;
7631848b8605Smrg			alu.dst.write = 1;
7632848b8605Smrg			alu.last = 1;
7633848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7634848b8605Smrg			if (r)
7635848b8605Smrg				return r;
7636848b8605Smrg		}
7637848b8605Smrg
7638b8e80941Smrg		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
7639b8e80941Smrg		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
7640b8e80941Smrg		 * muladd has no writemask, have to use another temp
7641b8e80941Smrg		 */
7642b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7643b8e80941Smrg		alu.op = ALU_OP3_MULADD;
7644b8e80941Smrg		alu.is_op3 = 1;
7645b8e80941Smrg
7646b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
7647b8e80941Smrg		alu.src[0].chan = 0;
7648b8e80941Smrg		alu.src[1].sel = ctx->temp_reg;
7649b8e80941Smrg		alu.src[1].chan = 2;
7650b8e80941Smrg
7651b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
7652b8e80941Smrg		alu.src[2].chan = 0;
7653b8e80941Smrg		alu.src[2].value = u_bitcast_f2u(1.5f);
7654b8e80941Smrg
7655b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
7656b8e80941Smrg		alu.dst.chan = 0;
7657b8e80941Smrg		alu.dst.write = 1;
7658b8e80941Smrg
7659b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7660b8e80941Smrg		if (r)
7661b8e80941Smrg			return r;
7662b8e80941Smrg
7663b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7664b8e80941Smrg		alu.op = ALU_OP3_MULADD;
7665b8e80941Smrg		alu.is_op3 = 1;
7666b8e80941Smrg
7667b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
7668b8e80941Smrg		alu.src[0].chan = 1;
7669b8e80941Smrg		alu.src[1].sel = ctx->temp_reg;
7670b8e80941Smrg		alu.src[1].chan = 2;
7671b8e80941Smrg
7672b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
7673b8e80941Smrg		alu.src[2].chan = 0;
7674b8e80941Smrg		alu.src[2].value = u_bitcast_f2u(1.5f);
7675b8e80941Smrg
7676b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
7677b8e80941Smrg		alu.dst.chan = 1;
7678b8e80941Smrg		alu.dst.write = 1;
7679b8e80941Smrg
7680b8e80941Smrg		alu.last = 1;
7681b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
7682b8e80941Smrg		if (r)
7683b8e80941Smrg			return r;
7684b8e80941Smrg		/* write initial compare value into Z component
7685b8e80941Smrg		  - W src 0 for shadow cube
7686b8e80941Smrg		  - X src 1 for shadow cube array */
7687b8e80941Smrg		if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
7688b8e80941Smrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
7689b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7690b8e80941Smrg			alu.op = ALU_OP1_MOV;
7691b8e80941Smrg			if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
7692b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
7693b8e80941Smrg			else
7694b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7695b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7696b8e80941Smrg			alu.dst.chan = 2;
7697b8e80941Smrg			alu.dst.write = 1;
7698b8e80941Smrg			alu.last = 1;
7699b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7700b8e80941Smrg			if (r)
7701b8e80941Smrg				return r;
7702b8e80941Smrg		}
7703b8e80941Smrg
7704b8e80941Smrg		if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
7705b8e80941Smrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
7706b8e80941Smrg			if (ctx->bc->chip_class >= EVERGREEN) {
7707b8e80941Smrg				int mytmp = r600_get_temp(ctx);
7708b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7709b8e80941Smrg				alu.op = ALU_OP1_MOV;
7710b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
7711b8e80941Smrg				alu.src[0].chan = 3;
7712b8e80941Smrg				alu.dst.sel = mytmp;
7713b8e80941Smrg				alu.dst.chan = 0;
7714b8e80941Smrg				alu.dst.write = 1;
7715b8e80941Smrg				alu.last = 1;
7716b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7717b8e80941Smrg				if (r)
7718b8e80941Smrg					return r;
7719b8e80941Smrg
7720b8e80941Smrg				/* Evaluate the array index according to floor(idx + 0.5). This
7721b8e80941Smrg				 * needs to be done before merging the face select value, because
7722b8e80941Smrg				 * otherwise the fractional part of the array index will interfere
7723b8e80941Smrg				 * with the face select value */
7724b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7725b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7726b8e80941Smrg				alu.op = ALU_OP1_RNDNE;
7727b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
7728b8e80941Smrg				alu.dst.chan = 3;
7729b8e80941Smrg				alu.dst.write = 1;
7730b8e80941Smrg				alu.last = 1;
7731b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7732b8e80941Smrg				if (r)
7733b8e80941Smrg					return r;
7734b8e80941Smrg
7735b8e80941Smrg				/* Because the array slice index and the cube face index are merged
7736b8e80941Smrg				 * into one value we have to make sure the array slice index is >= 0,
7737b8e80941Smrg				 * otherwise the face selection will fail */
7738b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7739b8e80941Smrg				alu.op = ALU_OP2_MAX;
7740b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
7741b8e80941Smrg				alu.src[0].chan = 3;
7742b8e80941Smrg				alu.src[1].sel = V_SQ_ALU_SRC_0;
7743b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
7744b8e80941Smrg				alu.dst.chan = 3;
7745b8e80941Smrg				alu.dst.write = 1;
7746b8e80941Smrg				alu.last = 1;
7747b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7748b8e80941Smrg				if (r)
7749b8e80941Smrg					return r;
7750b8e80941Smrg
7751b8e80941Smrg				/* have to multiply original layer by 8 and add to face id (temp.w) in Z */
7752b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7753b8e80941Smrg				alu.op = ALU_OP3_MULADD;
7754b8e80941Smrg				alu.is_op3 = 1;
7755b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
7756b8e80941Smrg				alu.src[0].chan = 3;
7757b8e80941Smrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
7758b8e80941Smrg				alu.src[1].chan = 0;
7759b8e80941Smrg				alu.src[1].value = u_bitcast_f2u(8.0f);
7760b8e80941Smrg				alu.src[2].sel = mytmp;
7761b8e80941Smrg				alu.src[2].chan = 0;
7762b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
7763b8e80941Smrg				alu.dst.chan = 3;
7764b8e80941Smrg				alu.dst.write = 1;
7765b8e80941Smrg				alu.last = 1;
7766b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7767b8e80941Smrg				if (r)
7768b8e80941Smrg					return r;
7769b8e80941Smrg			} else if (ctx->bc->chip_class < EVERGREEN) {
7770b8e80941Smrg				memset(&tex, 0, sizeof(struct r600_bytecode_tex));
7771b8e80941Smrg				tex.op = FETCH_OP_SET_CUBEMAP_INDEX;
7772b8e80941Smrg				tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
7773b8e80941Smrg				tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
7774b8e80941Smrg				tex.src_gpr = r600_get_temp(ctx);
7775b8e80941Smrg				tex.src_sel_x = 0;
7776b8e80941Smrg				tex.src_sel_y = 0;
7777b8e80941Smrg				tex.src_sel_z = 0;
7778b8e80941Smrg				tex.src_sel_w = 0;
7779b8e80941Smrg				tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
7780b8e80941Smrg				tex.coord_type_x = 1;
7781b8e80941Smrg				tex.coord_type_y = 1;
7782b8e80941Smrg				tex.coord_type_z = 1;
7783b8e80941Smrg				tex.coord_type_w = 1;
7784b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7785b8e80941Smrg				alu.op = ALU_OP1_MOV;
7786b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7787b8e80941Smrg				alu.dst.sel = tex.src_gpr;
7788b8e80941Smrg				alu.dst.chan = 0;
7789b8e80941Smrg				alu.last = 1;
7790b8e80941Smrg				alu.dst.write = 1;
7791b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7792b8e80941Smrg				if (r)
7793b8e80941Smrg					return r;
7794b8e80941Smrg
7795b8e80941Smrg				r = r600_bytecode_add_tex(ctx->bc, &tex);
7796b8e80941Smrg				if (r)
7797b8e80941Smrg					return r;
7798b8e80941Smrg			}
7799b8e80941Smrg
7800b8e80941Smrg		}
7801b8e80941Smrg
7802b8e80941Smrg		/* for cube forms of lod and bias we need to route things */
7803b8e80941Smrg		if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
7804b8e80941Smrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
7805b8e80941Smrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
7806b8e80941Smrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
7807b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7808b8e80941Smrg			alu.op = ALU_OP1_MOV;
7809b8e80941Smrg			if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
7810b8e80941Smrg			    inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
7811b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
7812b8e80941Smrg			else
7813b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
7814b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
7815b8e80941Smrg			alu.dst.chan = 2;
7816b8e80941Smrg			alu.last = 1;
7817b8e80941Smrg			alu.dst.write = 1;
7818b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7819b8e80941Smrg			if (r)
7820b8e80941Smrg				return r;
7821b8e80941Smrg		}
7822b8e80941Smrg
7823b8e80941Smrg		src_loaded = TRUE;
7824b8e80941Smrg		src_gpr = ctx->temp_reg;
7825b8e80941Smrg	}
7826b8e80941Smrg
7827b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
7828b8e80941Smrg		int temp_h = 0, temp_v = 0;
7829b8e80941Smrg		int start_val = 0;
7830b8e80941Smrg
7831b8e80941Smrg		/* if we've already loaded the src (i.e. CUBE don't reload it). */
7832b8e80941Smrg		if (src_loaded == TRUE)
7833b8e80941Smrg			start_val = 1;
7834b8e80941Smrg		else
7835b8e80941Smrg			src_loaded = TRUE;
7836b8e80941Smrg		for (i = start_val; i < 3; i++) {
7837b8e80941Smrg			int treg = r600_get_temp(ctx);
7838b8e80941Smrg
7839b8e80941Smrg			if (i == 0)
7840b8e80941Smrg				src_gpr = treg;
7841b8e80941Smrg			else if (i == 1)
7842b8e80941Smrg				temp_h = treg;
7843b8e80941Smrg			else
7844b8e80941Smrg				temp_v = treg;
7845b8e80941Smrg
7846b8e80941Smrg			for (j = 0; j < 4; j++) {
7847b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7848b8e80941Smrg				alu.op = ALU_OP1_MOV;
7849b8e80941Smrg                                r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
7850b8e80941Smrg                                alu.dst.sel = treg;
7851b8e80941Smrg                                alu.dst.chan = j;
7852b8e80941Smrg                                if (j == 3)
7853b8e80941Smrg                                   alu.last = 1;
7854b8e80941Smrg                                alu.dst.write = 1;
7855b8e80941Smrg                                r = r600_bytecode_add_alu(ctx->bc, &alu);
7856b8e80941Smrg                                if (r)
7857b8e80941Smrg                                    return r;
7858b8e80941Smrg			}
7859b8e80941Smrg		}
7860b8e80941Smrg		for (i = 1; i < 3; i++) {
7861b8e80941Smrg			/* set gradients h/v */
7862b8e80941Smrg			struct r600_bytecode_tex *t = &grad_offs[n_grad_offs++];
7863b8e80941Smrg			memset(t, 0, sizeof(struct r600_bytecode_tex));
7864b8e80941Smrg			t->op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
7865b8e80941Smrg				FETCH_OP_SET_GRADIENTS_V;
7866b8e80941Smrg			t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
7867b8e80941Smrg			t->sampler_index_mode = sampler_index_mode;
7868b8e80941Smrg			t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS;
7869b8e80941Smrg			t->resource_index_mode = sampler_index_mode;
7870b8e80941Smrg
7871b8e80941Smrg			t->src_gpr = (i == 1) ? temp_h : temp_v;
7872b8e80941Smrg			t->src_sel_x = 0;
7873b8e80941Smrg			t->src_sel_y = 1;
7874b8e80941Smrg			t->src_sel_z = 2;
7875b8e80941Smrg			t->src_sel_w = 3;
7876b8e80941Smrg
7877b8e80941Smrg			t->dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */
7878b8e80941Smrg			t->dst_sel_x = t->dst_sel_y = t->dst_sel_z = t->dst_sel_w = 7;
7879b8e80941Smrg			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
7880b8e80941Smrg				t->coord_type_x = 1;
7881b8e80941Smrg				t->coord_type_y = 1;
7882b8e80941Smrg				t->coord_type_z = 1;
7883b8e80941Smrg				t->coord_type_w = 1;
7884b8e80941Smrg			}
7885b8e80941Smrg		}
7886b8e80941Smrg	}
7887b8e80941Smrg
7888b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
7889b8e80941Smrg		/* Gather4 should follow the same rules as bilinear filtering, but the hardware
7890b8e80941Smrg		 * incorrectly forces nearest filtering if the texture format is integer.
7891b8e80941Smrg		 * The only effect it has on Gather4, which always returns 4 texels for
7892b8e80941Smrg		 * bilinear filtering, is that the final coordinates are off by 0.5 of
7893b8e80941Smrg		 * the texel size.
7894b8e80941Smrg		 *
7895b8e80941Smrg		 * The workaround is to subtract 0.5 from the unnormalized coordinates,
7896b8e80941Smrg		 * or (0.5 / size) from the normalized coordinates.
7897b8e80941Smrg		 */
7898b8e80941Smrg		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
7899b8e80941Smrg		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
7900b8e80941Smrg			int treg = r600_get_temp(ctx);
7901b8e80941Smrg
7902b8e80941Smrg			/* mov array and comparison oordinate to temp_reg if needed */
7903b8e80941Smrg			if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
7904b8e80941Smrg			     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
7905b8e80941Smrg			     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) && !src_loaded) {
7906b8e80941Smrg				int end = inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ? 3 : 2;
7907b8e80941Smrg				for (i = 2; i <= end; i++) {
7908b8e80941Smrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7909b8e80941Smrg					alu.op = ALU_OP1_MOV;
7910b8e80941Smrg					alu.dst.sel = ctx->temp_reg;
7911b8e80941Smrg					alu.dst.chan = i;
7912b8e80941Smrg					alu.dst.write = 1;
7913b8e80941Smrg					alu.last = (i == end);
7914b8e80941Smrg					r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
7915b8e80941Smrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
7916b8e80941Smrg					if (r)
7917b8e80941Smrg						return r;
7918b8e80941Smrg				}
7919b8e80941Smrg			}
7920b8e80941Smrg
7921b8e80941Smrg			if (inst->Texture.Texture == TGSI_TEXTURE_RECT ||
7922b8e80941Smrg			    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
7923b8e80941Smrg				for (i = 0; i < 2; i++) {
7924b8e80941Smrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7925b8e80941Smrg					alu.op = ALU_OP2_ADD;
7926b8e80941Smrg					alu.dst.sel = ctx->temp_reg;
7927b8e80941Smrg					alu.dst.chan = i;
7928b8e80941Smrg					alu.dst.write = 1;
7929b8e80941Smrg					alu.last = i == 1;
7930b8e80941Smrg					if (src_loaded) {
7931b8e80941Smrg						alu.src[0].sel = ctx->temp_reg;
7932b8e80941Smrg						alu.src[0].chan = i;
7933b8e80941Smrg					} else
7934b8e80941Smrg						r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
7935b8e80941Smrg					alu.src[1].sel = V_SQ_ALU_SRC_0_5;
7936b8e80941Smrg					alu.src[1].neg = 1;
7937b8e80941Smrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
7938b8e80941Smrg					if (r)
7939b8e80941Smrg						return r;
7940b8e80941Smrg				}
7941b8e80941Smrg			} else {
7942b8e80941Smrg				/* execute a TXQ */
7943b8e80941Smrg				memset(&tex, 0, sizeof(struct r600_bytecode_tex));
7944b8e80941Smrg				tex.op = FETCH_OP_GET_TEXTURE_RESINFO;
7945b8e80941Smrg				tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
7946b8e80941Smrg				tex.sampler_index_mode = sampler_index_mode;
7947b8e80941Smrg				tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
7948b8e80941Smrg				tex.resource_index_mode = sampler_index_mode;
7949b8e80941Smrg				tex.dst_gpr = treg;
7950b8e80941Smrg				tex.src_sel_x = 4;
7951b8e80941Smrg				tex.src_sel_y = 4;
7952b8e80941Smrg				tex.src_sel_z = 4;
7953b8e80941Smrg				tex.src_sel_w = 4;
7954b8e80941Smrg				tex.dst_sel_x = 0;
7955b8e80941Smrg				tex.dst_sel_y = 1;
7956b8e80941Smrg				tex.dst_sel_z = 7;
7957b8e80941Smrg				tex.dst_sel_w = 7;
7958b8e80941Smrg				r = r600_bytecode_add_tex(ctx->bc, &tex);
7959b8e80941Smrg				if (r)
7960b8e80941Smrg					return r;
7961b8e80941Smrg
7962b8e80941Smrg				/* coord.xy = -0.5 * (1.0/int_to_flt(size)) + coord.xy */
7963b8e80941Smrg				if (ctx->bc->chip_class == CAYMAN) {
7964b8e80941Smrg					/* */
7965b8e80941Smrg					for (i = 0; i < 2; i++) {
7966b8e80941Smrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7967b8e80941Smrg						alu.op = ALU_OP1_INT_TO_FLT;
7968b8e80941Smrg						alu.dst.sel = treg;
7969b8e80941Smrg						alu.dst.chan = i;
7970b8e80941Smrg						alu.dst.write = 1;
7971b8e80941Smrg						alu.src[0].sel = treg;
7972b8e80941Smrg						alu.src[0].chan = i;
7973b8e80941Smrg						alu.last = (i == 1) ? 1 : 0;
7974b8e80941Smrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
7975b8e80941Smrg						if (r)
7976b8e80941Smrg							return r;
7977b8e80941Smrg					}
7978b8e80941Smrg					for (j = 0; j < 2; j++) {
7979b8e80941Smrg						for (i = 0; i < 3; i++) {
7980b8e80941Smrg							memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7981b8e80941Smrg							alu.op = ALU_OP1_RECIP_IEEE;
7982b8e80941Smrg							alu.src[0].sel = treg;
7983b8e80941Smrg							alu.src[0].chan = j;
7984b8e80941Smrg							alu.dst.sel = treg;
7985b8e80941Smrg							alu.dst.chan = i;
7986b8e80941Smrg							if (i == 2)
7987b8e80941Smrg								alu.last = 1;
7988b8e80941Smrg							if (i == j)
7989b8e80941Smrg								alu.dst.write = 1;
7990b8e80941Smrg							r = r600_bytecode_add_alu(ctx->bc, &alu);
7991b8e80941Smrg							if (r)
7992b8e80941Smrg								return r;
7993b8e80941Smrg						}
7994b8e80941Smrg					}
7995b8e80941Smrg				} else {
7996b8e80941Smrg					for (i = 0; i < 2; i++) {
7997b8e80941Smrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7998b8e80941Smrg						alu.op = ALU_OP1_INT_TO_FLT;
7999b8e80941Smrg						alu.dst.sel = treg;
8000b8e80941Smrg						alu.dst.chan = i;
8001b8e80941Smrg						alu.dst.write = 1;
8002b8e80941Smrg						alu.src[0].sel = treg;
8003b8e80941Smrg						alu.src[0].chan = i;
8004b8e80941Smrg						alu.last = 1;
8005b8e80941Smrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
8006b8e80941Smrg						if (r)
8007b8e80941Smrg							return r;
8008b8e80941Smrg					}
8009b8e80941Smrg					for (i = 0; i < 2; i++) {
8010b8e80941Smrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8011b8e80941Smrg						alu.op = ALU_OP1_RECIP_IEEE;
8012b8e80941Smrg						alu.src[0].sel = treg;
8013b8e80941Smrg						alu.src[0].chan = i;
8014b8e80941Smrg						alu.dst.sel = treg;
8015b8e80941Smrg						alu.dst.chan = i;
8016b8e80941Smrg						alu.last = 1;
8017b8e80941Smrg						alu.dst.write = 1;
8018b8e80941Smrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
8019b8e80941Smrg						if (r)
8020b8e80941Smrg							return r;
8021b8e80941Smrg					}
8022b8e80941Smrg				}
8023b8e80941Smrg				for (i = 0; i < 2; i++) {
8024b8e80941Smrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8025b8e80941Smrg					alu.op = ALU_OP3_MULADD;
8026b8e80941Smrg					alu.is_op3 = 1;
8027b8e80941Smrg					alu.dst.sel = ctx->temp_reg;
8028b8e80941Smrg					alu.dst.chan = i;
8029b8e80941Smrg					alu.dst.write = 1;
8030b8e80941Smrg					alu.last = i == 1;
8031b8e80941Smrg					alu.src[0].sel = treg;
8032b8e80941Smrg					alu.src[0].chan = i;
8033b8e80941Smrg					alu.src[1].sel = V_SQ_ALU_SRC_0_5;
8034b8e80941Smrg					alu.src[1].neg = 1;
8035b8e80941Smrg					if (src_loaded) {
8036b8e80941Smrg						alu.src[2].sel = ctx->temp_reg;
8037b8e80941Smrg						alu.src[2].chan = i;
8038b8e80941Smrg					} else
8039b8e80941Smrg						r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
8040b8e80941Smrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
8041b8e80941Smrg					if (r)
8042b8e80941Smrg						return r;
8043b8e80941Smrg				}
8044b8e80941Smrg			}
8045b8e80941Smrg			src_loaded = TRUE;
8046b8e80941Smrg			src_gpr = ctx->temp_reg;
8047b8e80941Smrg		}
8048b8e80941Smrg	}
8049b8e80941Smrg
8050b8e80941Smrg	if (src_requires_loading && !src_loaded) {
8051b8e80941Smrg		for (i = 0; i < 4; i++) {
8052b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8053b8e80941Smrg			alu.op = ALU_OP1_MOV;
8054b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
8055b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
8056b8e80941Smrg			alu.dst.chan = i;
8057b8e80941Smrg			if (i == 3)
8058b8e80941Smrg				alu.last = 1;
8059b8e80941Smrg			alu.dst.write = 1;
8060b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
8061b8e80941Smrg			if (r)
8062b8e80941Smrg				return r;
8063b8e80941Smrg		}
8064b8e80941Smrg		src_loaded = TRUE;
8065b8e80941Smrg		src_gpr = ctx->temp_reg;
8066b8e80941Smrg	}
8067b8e80941Smrg
8068b8e80941Smrg	/* get offset values */
8069b8e80941Smrg	if (inst->Texture.NumOffsets) {
8070b8e80941Smrg		assert(inst->Texture.NumOffsets == 1);
8071b8e80941Smrg
8072b8e80941Smrg		/* The texture offset feature doesn't work with the TXF instruction
8073b8e80941Smrg		 * and must be emulated by adding the offset to the texture coordinates. */
8074b8e80941Smrg		if (txf_add_offsets) {
8075b8e80941Smrg			const struct tgsi_texture_offset *off = inst->TexOffsets;
8076b8e80941Smrg
8077b8e80941Smrg			switch (inst->Texture.Texture) {
8078b8e80941Smrg			case TGSI_TEXTURE_3D:
8079b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8080b8e80941Smrg				alu.op = ALU_OP2_ADD_INT;
8081b8e80941Smrg				alu.src[0].sel = src_gpr;
8082b8e80941Smrg				alu.src[0].chan = 2;
8083b8e80941Smrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8084b8e80941Smrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
8085b8e80941Smrg				alu.dst.sel = src_gpr;
8086b8e80941Smrg				alu.dst.chan = 2;
8087b8e80941Smrg				alu.dst.write = 1;
8088b8e80941Smrg				alu.last = 1;
8089b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
8090b8e80941Smrg				if (r)
8091b8e80941Smrg					return r;
8092b8e80941Smrg				/* fall through */
8093b8e80941Smrg
8094b8e80941Smrg			case TGSI_TEXTURE_2D:
8095b8e80941Smrg			case TGSI_TEXTURE_SHADOW2D:
8096b8e80941Smrg			case TGSI_TEXTURE_RECT:
8097b8e80941Smrg			case TGSI_TEXTURE_SHADOWRECT:
8098b8e80941Smrg			case TGSI_TEXTURE_2D_ARRAY:
8099b8e80941Smrg			case TGSI_TEXTURE_SHADOW2D_ARRAY:
8100b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8101b8e80941Smrg				alu.op = ALU_OP2_ADD_INT;
8102b8e80941Smrg				alu.src[0].sel = src_gpr;
8103b8e80941Smrg				alu.src[0].chan = 1;
8104b8e80941Smrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8105b8e80941Smrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
8106b8e80941Smrg				alu.dst.sel = src_gpr;
8107b8e80941Smrg				alu.dst.chan = 1;
8108b8e80941Smrg				alu.dst.write = 1;
8109b8e80941Smrg				alu.last = 1;
8110b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
8111b8e80941Smrg				if (r)
8112b8e80941Smrg					return r;
8113b8e80941Smrg				/* fall through */
8114b8e80941Smrg
8115b8e80941Smrg			case TGSI_TEXTURE_1D:
8116b8e80941Smrg			case TGSI_TEXTURE_SHADOW1D:
8117b8e80941Smrg			case TGSI_TEXTURE_1D_ARRAY:
8118b8e80941Smrg			case TGSI_TEXTURE_SHADOW1D_ARRAY:
8119b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8120b8e80941Smrg				alu.op = ALU_OP2_ADD_INT;
8121b8e80941Smrg				alu.src[0].sel = src_gpr;
8122b8e80941Smrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8123b8e80941Smrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
8124b8e80941Smrg				alu.dst.sel = src_gpr;
8125b8e80941Smrg				alu.dst.write = 1;
8126b8e80941Smrg				alu.last = 1;
8127b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
8128b8e80941Smrg				if (r)
8129b8e80941Smrg					return r;
8130b8e80941Smrg				break;
8131b8e80941Smrg				/* texture offsets do not apply to other texture targets */
8132b8e80941Smrg			}
8133b8e80941Smrg		} else {
8134b8e80941Smrg			switch (inst->Texture.Texture) {
8135b8e80941Smrg			case TGSI_TEXTURE_3D:
8136b8e80941Smrg				offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
8137b8e80941Smrg				/* fallthrough */
8138b8e80941Smrg			case TGSI_TEXTURE_2D:
8139b8e80941Smrg			case TGSI_TEXTURE_SHADOW2D:
8140b8e80941Smrg			case TGSI_TEXTURE_RECT:
8141b8e80941Smrg			case TGSI_TEXTURE_SHADOWRECT:
8142b8e80941Smrg			case TGSI_TEXTURE_2D_ARRAY:
8143b8e80941Smrg			case TGSI_TEXTURE_SHADOW2D_ARRAY:
8144b8e80941Smrg				offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
8145b8e80941Smrg				/* fallthrough */
8146b8e80941Smrg			case TGSI_TEXTURE_1D:
8147b8e80941Smrg			case TGSI_TEXTURE_SHADOW1D:
8148b8e80941Smrg			case TGSI_TEXTURE_1D_ARRAY:
8149b8e80941Smrg			case TGSI_TEXTURE_SHADOW1D_ARRAY:
8150b8e80941Smrg				offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
8151b8e80941Smrg			}
8152b8e80941Smrg		}
8153b8e80941Smrg	}
8154b8e80941Smrg
8155b8e80941Smrg	/* Obtain the sample index for reading a compressed MSAA color texture.
8156b8e80941Smrg	 * To read the FMASK, we use the ldfptr instruction, which tells us
8157b8e80941Smrg	 * where the samples are stored.
8158b8e80941Smrg	 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
8159b8e80941Smrg	 * which is the identity mapping. Each nibble says which physical sample
8160b8e80941Smrg	 * should be fetched to get that sample.
8161b8e80941Smrg	 *
8162b8e80941Smrg	 * Assume src.z contains the sample index. It should be modified like this:
8163b8e80941Smrg	 *   src.z = (ldfptr() >> (src.z * 4)) & 0xF;
8164b8e80941Smrg	 * Then fetch the texel with src.
8165b8e80941Smrg	 */
8166b8e80941Smrg	if (read_compressed_msaa) {
8167b8e80941Smrg		unsigned sample_chan = 3;
8168b8e80941Smrg		unsigned temp = r600_get_temp(ctx);
8169b8e80941Smrg		assert(src_loaded);
8170b8e80941Smrg
8171b8e80941Smrg		/* temp.w = ldfptr() */
8172b8e80941Smrg		memset(&tex, 0, sizeof(struct r600_bytecode_tex));
8173b8e80941Smrg		tex.op = FETCH_OP_LD;
8174b8e80941Smrg		tex.inst_mod = 1; /* to indicate this is ldfptr */
8175b8e80941Smrg		tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
8176b8e80941Smrg		tex.sampler_index_mode = sampler_index_mode;
8177b8e80941Smrg		tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
8178b8e80941Smrg		tex.resource_index_mode = sampler_index_mode;
8179b8e80941Smrg		tex.src_gpr = src_gpr;
8180b8e80941Smrg		tex.dst_gpr = temp;
8181b8e80941Smrg		tex.dst_sel_x = 7; /* mask out these components */
8182b8e80941Smrg		tex.dst_sel_y = 7;
8183b8e80941Smrg		tex.dst_sel_z = 7;
8184b8e80941Smrg		tex.dst_sel_w = 0; /* store X */
8185b8e80941Smrg		tex.src_sel_x = 0;
8186b8e80941Smrg		tex.src_sel_y = 1;
8187b8e80941Smrg		tex.src_sel_z = 2;
8188b8e80941Smrg		tex.src_sel_w = 3;
8189b8e80941Smrg		tex.offset_x = offset_x;
8190b8e80941Smrg		tex.offset_y = offset_y;
8191b8e80941Smrg		tex.offset_z = offset_z;
8192b8e80941Smrg		r = r600_bytecode_add_tex(ctx->bc, &tex);
8193b8e80941Smrg		if (r)
8194b8e80941Smrg			return r;
8195b8e80941Smrg
8196b8e80941Smrg		/* temp.x = sample_index*4 */
8197b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8198b8e80941Smrg		alu.op = ALU_OP2_MULLO_INT;
8199b8e80941Smrg		alu.src[0].sel = src_gpr;
8200b8e80941Smrg		alu.src[0].chan = sample_chan;
8201b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8202b8e80941Smrg		alu.src[1].value = 4;
8203b8e80941Smrg		alu.dst.sel = temp;
8204b8e80941Smrg		alu.dst.chan = 0;
8205b8e80941Smrg		alu.dst.write = 1;
8206b8e80941Smrg		r = emit_mul_int_op(ctx->bc, &alu);
8207b8e80941Smrg		if (r)
8208b8e80941Smrg			return r;
8209b8e80941Smrg
8210b8e80941Smrg		/* sample_index = temp.w >> temp.x */
8211b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8212b8e80941Smrg		alu.op = ALU_OP2_LSHR_INT;
8213b8e80941Smrg		alu.src[0].sel = temp;
8214b8e80941Smrg		alu.src[0].chan = 3;
8215b8e80941Smrg		alu.src[1].sel = temp;
8216b8e80941Smrg		alu.src[1].chan = 0;
8217b8e80941Smrg		alu.dst.sel = src_gpr;
8218b8e80941Smrg		alu.dst.chan = sample_chan;
8219b8e80941Smrg		alu.dst.write = 1;
8220b8e80941Smrg		alu.last = 1;
8221b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8222b8e80941Smrg		if (r)
8223b8e80941Smrg			return r;
8224b8e80941Smrg
8225b8e80941Smrg		/* sample_index & 0xF */
8226b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8227b8e80941Smrg		alu.op = ALU_OP2_AND_INT;
8228b8e80941Smrg		alu.src[0].sel = src_gpr;
8229b8e80941Smrg		alu.src[0].chan = sample_chan;
8230b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8231b8e80941Smrg		alu.src[1].value = 0xF;
8232b8e80941Smrg		alu.dst.sel = src_gpr;
8233b8e80941Smrg		alu.dst.chan = sample_chan;
8234b8e80941Smrg		alu.dst.write = 1;
8235b8e80941Smrg		alu.last = 1;
8236b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8237b8e80941Smrg		if (r)
8238b8e80941Smrg			return r;
8239b8e80941Smrg#if 0
8240b8e80941Smrg		/* visualize the FMASK */
8241b8e80941Smrg		for (i = 0; i < 4; i++) {
8242b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8243b8e80941Smrg			alu.op = ALU_OP1_INT_TO_FLT;
8244b8e80941Smrg			alu.src[0].sel = src_gpr;
8245b8e80941Smrg			alu.src[0].chan = sample_chan;
8246b8e80941Smrg			alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
8247b8e80941Smrg			alu.dst.chan = i;
8248b8e80941Smrg			alu.dst.write = 1;
8249b8e80941Smrg			alu.last = 1;
8250b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
8251b8e80941Smrg			if (r)
8252b8e80941Smrg				return r;
8253b8e80941Smrg		}
8254b8e80941Smrg		return 0;
8255b8e80941Smrg#endif
8256b8e80941Smrg	}
8257b8e80941Smrg
8258b8e80941Smrg	/* does this shader want a num layers from TXQ for a cube array? */
8259b8e80941Smrg	if (has_txq_cube_array_z) {
8260b8e80941Smrg		int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
8261b8e80941Smrg
8262b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8263b8e80941Smrg		alu.op = ALU_OP1_MOV;
8264b8e80941Smrg
8265b8e80941Smrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
8266b8e80941Smrg		if (ctx->bc->chip_class >= EVERGREEN) {
8267b8e80941Smrg			/* with eg each dword is number of cubes */
8268b8e80941Smrg			alu.src[0].sel += id / 4;
8269b8e80941Smrg			alu.src[0].chan = id % 4;
8270b8e80941Smrg		} else {
8271b8e80941Smrg			/* r600 we have them at channel 2 of the second dword */
8272b8e80941Smrg			alu.src[0].sel += (id * 2) + 1;
8273b8e80941Smrg			alu.src[0].chan = 2;
8274b8e80941Smrg		}
8275b8e80941Smrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
8276b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
8277b8e80941Smrg		alu.last = 1;
8278b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8279b8e80941Smrg		if (r)
8280b8e80941Smrg			return r;
8281b8e80941Smrg		/* disable writemask from texture instruction */
8282b8e80941Smrg		inst->Dst[0].Register.WriteMask &= ~4;
8283b8e80941Smrg	}
8284b8e80941Smrg
8285b8e80941Smrg	opcode = ctx->inst_info->op;
8286b8e80941Smrg	if (opcode == FETCH_OP_GATHER4 &&
8287b8e80941Smrg		inst->TexOffsets[0].File != TGSI_FILE_NULL &&
8288b8e80941Smrg		inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
8289b8e80941Smrg		struct r600_bytecode_tex *t;
8290b8e80941Smrg		opcode = FETCH_OP_GATHER4_O;
8291b8e80941Smrg
8292b8e80941Smrg		/* GATHER4_O/GATHER4_C_O use offset values loaded by
8293b8e80941Smrg		   SET_TEXTURE_OFFSETS instruction. The immediate offset values
8294b8e80941Smrg		   encoded in the instruction are ignored. */
8295b8e80941Smrg		t = &grad_offs[n_grad_offs++];
8296b8e80941Smrg		memset(t, 0, sizeof(struct r600_bytecode_tex));
8297b8e80941Smrg		t->op = FETCH_OP_SET_TEXTURE_OFFSETS;
8298b8e80941Smrg		t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
8299b8e80941Smrg		t->sampler_index_mode = sampler_index_mode;
8300b8e80941Smrg		t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS;
8301b8e80941Smrg		t->resource_index_mode = sampler_index_mode;
8302b8e80941Smrg
8303b8e80941Smrg		t->src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
8304b8e80941Smrg		t->src_sel_x = inst->TexOffsets[0].SwizzleX;
8305b8e80941Smrg		t->src_sel_y = inst->TexOffsets[0].SwizzleY;
8306b8e80941Smrg		if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
8307b8e80941Smrg			 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
8308b8e80941Smrg			/* make sure array index selector is 0, this is just a safety
8309b8e80941Smrg			 * precausion because TGSI seems to emit something strange here */
8310b8e80941Smrg			t->src_sel_z = 4;
8311b8e80941Smrg		else
8312b8e80941Smrg			t->src_sel_z = inst->TexOffsets[0].SwizzleZ;
8313b8e80941Smrg
8314b8e80941Smrg		t->src_sel_w = 4;
8315b8e80941Smrg
8316b8e80941Smrg		t->dst_sel_x = 7;
8317b8e80941Smrg		t->dst_sel_y = 7;
8318b8e80941Smrg		t->dst_sel_z = 7;
8319b8e80941Smrg		t->dst_sel_w = 7;
8320b8e80941Smrg	}
8321b8e80941Smrg
8322b8e80941Smrg	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
8323b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
8324b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
8325b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
8326b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
8327b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
8328b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8329b8e80941Smrg		switch (opcode) {
8330b8e80941Smrg		case FETCH_OP_SAMPLE:
8331b8e80941Smrg			opcode = FETCH_OP_SAMPLE_C;
8332b8e80941Smrg			break;
8333b8e80941Smrg		case FETCH_OP_SAMPLE_L:
8334b8e80941Smrg			opcode = FETCH_OP_SAMPLE_C_L;
8335b8e80941Smrg			break;
8336b8e80941Smrg		case FETCH_OP_SAMPLE_LB:
8337b8e80941Smrg			opcode = FETCH_OP_SAMPLE_C_LB;
8338b8e80941Smrg			break;
8339b8e80941Smrg		case FETCH_OP_SAMPLE_G:
8340b8e80941Smrg			opcode = FETCH_OP_SAMPLE_C_G;
8341b8e80941Smrg			break;
8342b8e80941Smrg		/* Texture gather variants */
8343b8e80941Smrg		case FETCH_OP_GATHER4:
8344b8e80941Smrg			opcode = FETCH_OP_GATHER4_C;
8345b8e80941Smrg			break;
8346b8e80941Smrg		case FETCH_OP_GATHER4_O:
8347b8e80941Smrg			opcode = FETCH_OP_GATHER4_C_O;
8348b8e80941Smrg			break;
8349b8e80941Smrg		}
8350b8e80941Smrg	}
8351b8e80941Smrg
8352b8e80941Smrg	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
8353b8e80941Smrg	tex.op = opcode;
8354b8e80941Smrg
8355b8e80941Smrg	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
8356b8e80941Smrg	tex.sampler_index_mode = sampler_index_mode;
8357b8e80941Smrg	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
8358b8e80941Smrg	tex.resource_index_mode = sampler_index_mode;
8359b8e80941Smrg	tex.src_gpr = src_gpr;
8360b8e80941Smrg	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
8361b8e80941Smrg
8362b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
8363b8e80941Smrg		inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
8364b8e80941Smrg		tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */
8365b8e80941Smrg	}
8366b8e80941Smrg
8367b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
8368b8e80941Smrg		int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
8369b8e80941Smrg		tex.inst_mod = texture_component_select;
8370b8e80941Smrg
8371b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
8372b8e80941Smrg			tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
8373b8e80941Smrg			tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
8374b8e80941Smrg			tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
8375b8e80941Smrg			tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
8376b8e80941Smrg		} else {
8377b8e80941Smrg			/* GATHER4 result order is different from TGSI TG4 */
8378b8e80941Smrg			tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 1 : 7;
8379b8e80941Smrg			tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 2 : 7;
8380b8e80941Smrg			tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 0 : 7;
8381b8e80941Smrg			tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
8382b8e80941Smrg		}
8383b8e80941Smrg	}
8384b8e80941Smrg	else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
8385b8e80941Smrg		tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
8386b8e80941Smrg		tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
8387b8e80941Smrg		tex.dst_sel_z = 7;
8388b8e80941Smrg		tex.dst_sel_w = 7;
8389b8e80941Smrg	}
8390b8e80941Smrg	else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
8391b8e80941Smrg		tex.dst_sel_x = 3;
8392b8e80941Smrg		tex.dst_sel_y = 7;
8393b8e80941Smrg		tex.dst_sel_z = 7;
8394b8e80941Smrg		tex.dst_sel_w = 7;
8395b8e80941Smrg	}
8396b8e80941Smrg	else {
8397b8e80941Smrg		tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
8398b8e80941Smrg		tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
8399b8e80941Smrg		tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
8400b8e80941Smrg		tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
8401b8e80941Smrg	}
8402b8e80941Smrg
8403b8e80941Smrg
8404b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
8405b8e80941Smrg		tex.src_sel_x = 4;
8406b8e80941Smrg		tex.src_sel_y = 4;
8407b8e80941Smrg		tex.src_sel_z = 4;
8408b8e80941Smrg		tex.src_sel_w = 4;
8409b8e80941Smrg	} else if (src_loaded) {
8410b8e80941Smrg		tex.src_sel_x = 0;
8411b8e80941Smrg		tex.src_sel_y = 1;
8412b8e80941Smrg		tex.src_sel_z = 2;
8413b8e80941Smrg		tex.src_sel_w = 3;
8414b8e80941Smrg	} else {
8415b8e80941Smrg		tex.src_sel_x = ctx->src[0].swizzle[0];
8416b8e80941Smrg		tex.src_sel_y = ctx->src[0].swizzle[1];
8417b8e80941Smrg		tex.src_sel_z = ctx->src[0].swizzle[2];
8418b8e80941Smrg		tex.src_sel_w = ctx->src[0].swizzle[3];
8419b8e80941Smrg		tex.src_rel = ctx->src[0].rel;
8420b8e80941Smrg	}
8421b8e80941Smrg
8422b8e80941Smrg	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
8423b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
8424b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
8425b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8426b8e80941Smrg		tex.src_sel_x = 1;
8427b8e80941Smrg		tex.src_sel_y = 0;
8428b8e80941Smrg		tex.src_sel_z = 3;
8429b8e80941Smrg		tex.src_sel_w = 2; /* route Z compare or Lod value into W */
8430b8e80941Smrg	}
8431b8e80941Smrg
8432b8e80941Smrg	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
8433b8e80941Smrg	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
8434b8e80941Smrg		tex.coord_type_x = 1;
8435b8e80941Smrg		tex.coord_type_y = 1;
8436b8e80941Smrg	}
8437b8e80941Smrg	tex.coord_type_z = 1;
8438b8e80941Smrg	tex.coord_type_w = 1;
8439b8e80941Smrg
8440b8e80941Smrg	tex.offset_x = offset_x;
8441b8e80941Smrg	tex.offset_y = offset_y;
8442b8e80941Smrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
8443b8e80941Smrg		(inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
8444b8e80941Smrg		 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
8445b8e80941Smrg		tex.offset_z = 0;
8446b8e80941Smrg	}
8447b8e80941Smrg	else {
8448b8e80941Smrg		tex.offset_z = offset_z;
8449b8e80941Smrg	}
8450b8e80941Smrg
8451b8e80941Smrg	/* Put the depth for comparison in W.
8452b8e80941Smrg	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
8453b8e80941Smrg	 * Some instructions expect the depth in Z. */
8454b8e80941Smrg	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
8455b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
8456b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
8457b8e80941Smrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
8458b8e80941Smrg	    opcode != FETCH_OP_SAMPLE_C_L &&
8459b8e80941Smrg	    opcode != FETCH_OP_SAMPLE_C_LB) {
8460b8e80941Smrg		tex.src_sel_w = tex.src_sel_z;
8461b8e80941Smrg	}
8462b8e80941Smrg
8463b8e80941Smrg	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
8464b8e80941Smrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
8465b8e80941Smrg		if (opcode == FETCH_OP_SAMPLE_C_L ||
8466b8e80941Smrg		    opcode == FETCH_OP_SAMPLE_C_LB) {
8467b8e80941Smrg			/* the array index is read from Y */
8468b8e80941Smrg			tex.coord_type_y = 0;
8469b8e80941Smrg			array_index_offset_channel = tex.src_sel_y;
8470b8e80941Smrg		} else {
8471b8e80941Smrg			/* the array index is read from Z */
8472b8e80941Smrg			tex.coord_type_z = 0;
8473b8e80941Smrg			tex.src_sel_z = tex.src_sel_y;
8474b8e80941Smrg			array_index_offset_channel = tex.src_sel_z;
8475b8e80941Smrg		}
8476b8e80941Smrg	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
8477b8e80941Smrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
8478b8e80941Smrg		tex.coord_type_z = 0;
8479b8e80941Smrg		array_index_offset_channel = tex.src_sel_z;
8480b8e80941Smrg	} else if  ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
8481b8e80941Smrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
8482b8e80941Smrg		    (ctx->bc->chip_class >= EVERGREEN))
8483b8e80941Smrg		/* the array index is read from Z, coordinate will be corrected elsewhere  */
8484b8e80941Smrg		tex.coord_type_z = 0;
8485b8e80941Smrg
8486b8e80941Smrg	/* We have array access to 1D or 2D ARRAY, the coordinates are not int ->
8487b8e80941Smrg	 * evaluate the array index  */
8488b8e80941Smrg	if (array_index_offset_channel >= 0 &&
8489b8e80941Smrg		 opcode != FETCH_OP_LD &&
8490b8e80941Smrg		 opcode != FETCH_OP_GET_TEXTURE_RESINFO) {
8491b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8492b8e80941Smrg		alu.src[0].sel =  tex.src_gpr;
8493b8e80941Smrg		alu.src[0].chan =  array_index_offset_channel;
8494b8e80941Smrg		alu.src[0].rel = tex.src_rel;
8495b8e80941Smrg		alu.op = ALU_OP1_RNDNE;
8496b8e80941Smrg		alu.dst.sel = tex.src_gpr;
8497b8e80941Smrg		alu.dst.chan = array_index_offset_channel;
8498b8e80941Smrg		alu.dst.rel = tex.src_rel;
8499b8e80941Smrg		alu.dst.write = 1;
8500b8e80941Smrg		alu.last = 1;
8501b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8502b8e80941Smrg		if (r)
8503b8e80941Smrg			return r;
8504b8e80941Smrg	}
8505b8e80941Smrg
8506b8e80941Smrg	/* mask unused source components */
8507b8e80941Smrg	if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
8508b8e80941Smrg		switch (inst->Texture.Texture) {
8509b8e80941Smrg		case TGSI_TEXTURE_2D:
8510b8e80941Smrg		case TGSI_TEXTURE_RECT:
8511b8e80941Smrg			tex.src_sel_z = 7;
8512b8e80941Smrg			tex.src_sel_w = 7;
8513b8e80941Smrg			break;
8514b8e80941Smrg		case TGSI_TEXTURE_1D_ARRAY:
8515b8e80941Smrg			tex.src_sel_y = 7;
8516b8e80941Smrg			tex.src_sel_w = 7;
8517b8e80941Smrg			break;
8518b8e80941Smrg		case TGSI_TEXTURE_1D:
8519b8e80941Smrg			tex.src_sel_y = 7;
8520b8e80941Smrg			tex.src_sel_z = 7;
8521b8e80941Smrg			tex.src_sel_w = 7;
8522b8e80941Smrg			break;
8523b8e80941Smrg		}
8524b8e80941Smrg	}
8525b8e80941Smrg
8526b8e80941Smrg	/* Emit set gradient and offset instructions. */
8527b8e80941Smrg	for (i = 0; i < n_grad_offs; ++i) {
8528b8e80941Smrg		r = r600_bytecode_add_tex(ctx->bc, &grad_offs[i]);
8529b8e80941Smrg		if (r)
8530b8e80941Smrg			return r;
8531b8e80941Smrg	}
8532b8e80941Smrg
8533b8e80941Smrg	r = r600_bytecode_add_tex(ctx->bc, &tex);
8534b8e80941Smrg	if (r)
8535b8e80941Smrg		return r;
8536b8e80941Smrg
8537b8e80941Smrg	/* add shadow ambient support  - gallium doesn't do it yet */
8538b8e80941Smrg	return 0;
8539b8e80941Smrg}
8540b8e80941Smrg
8541b8e80941Smrgstatic int find_hw_atomic_counter(struct r600_shader_ctx *ctx,
8542b8e80941Smrg				  struct tgsi_full_src_register *src)
8543b8e80941Smrg{
8544b8e80941Smrg	unsigned i;
8545b8e80941Smrg
8546b8e80941Smrg	if (src->Register.Indirect) {
8547b8e80941Smrg		for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
8548b8e80941Smrg			if (src->Indirect.ArrayID == ctx->shader->atomics[i].array_id)
8549b8e80941Smrg				return ctx->shader->atomics[i].hw_idx;
8550b8e80941Smrg		}
8551b8e80941Smrg	} else {
8552b8e80941Smrg		uint32_t index = src->Register.Index;
8553b8e80941Smrg		for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
8554b8e80941Smrg			if (ctx->shader->atomics[i].buffer_id != (unsigned)src->Dimension.Index)
8555b8e80941Smrg				continue;
8556b8e80941Smrg			if (index > ctx->shader->atomics[i].end)
8557b8e80941Smrg				continue;
8558b8e80941Smrg			if (index < ctx->shader->atomics[i].start)
8559b8e80941Smrg				continue;
8560b8e80941Smrg			uint32_t offset = (index - ctx->shader->atomics[i].start);
8561b8e80941Smrg			return ctx->shader->atomics[i].hw_idx + offset;
8562b8e80941Smrg		}
8563b8e80941Smrg	}
8564b8e80941Smrg	assert(0);
8565b8e80941Smrg	return -1;
8566b8e80941Smrg}
8567b8e80941Smrg
8568b8e80941Smrgstatic int tgsi_set_gds_temp(struct r600_shader_ctx *ctx,
8569b8e80941Smrg			     int *uav_id_p, int *uav_index_mode_p)
8570b8e80941Smrg{
8571b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8572b8e80941Smrg	int uav_id, uav_index_mode = 0;
8573b8e80941Smrg	int r;
8574b8e80941Smrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
8575b8e80941Smrg
8576b8e80941Smrg	uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]);
8577b8e80941Smrg
8578b8e80941Smrg	if (inst->Src[0].Register.Indirect) {
8579b8e80941Smrg		if (is_cm) {
8580b8e80941Smrg			struct r600_bytecode_alu alu;
8581b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8582b8e80941Smrg			alu.op = ALU_OP2_LSHL_INT;
8583b8e80941Smrg			alu.src[0].sel = get_address_file_reg(ctx, inst->Src[0].Indirect.Index);
8584b8e80941Smrg			alu.src[0].chan = 0;
8585b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8586b8e80941Smrg			alu.src[1].value = 2;
8587b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
8588b8e80941Smrg			alu.dst.chan = 0;
8589b8e80941Smrg			alu.dst.write = 1;
8590b8e80941Smrg			alu.last = 1;
8591b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
8592b8e80941Smrg			if (r)
8593b8e80941Smrg				return r;
8594b8e80941Smrg
8595b8e80941Smrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
8596b8e80941Smrg					   ctx->temp_reg, 0,
8597b8e80941Smrg					   ctx->temp_reg, 0,
8598b8e80941Smrg					   V_SQ_ALU_SRC_LITERAL, uav_id * 4);
8599b8e80941Smrg			if (r)
8600b8e80941Smrg				return r;
8601b8e80941Smrg		} else
8602b8e80941Smrg			uav_index_mode = 2;
8603b8e80941Smrg	} else if (is_cm) {
8604b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
8605b8e80941Smrg				   ctx->temp_reg, 0,
8606b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, uav_id * 4,
8607b8e80941Smrg				   0, 0);
8608b8e80941Smrg		if (r)
8609b8e80941Smrg			return r;
8610b8e80941Smrg	}
8611b8e80941Smrg	*uav_id_p = uav_id;
8612b8e80941Smrg	*uav_index_mode_p = uav_index_mode;
8613b8e80941Smrg	return 0;
8614b8e80941Smrg}
8615b8e80941Smrg
8616b8e80941Smrgstatic int tgsi_load_gds(struct r600_shader_ctx *ctx)
8617b8e80941Smrg{
8618b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8619b8e80941Smrg	int r;
8620b8e80941Smrg	struct r600_bytecode_gds gds;
8621b8e80941Smrg	int uav_id = 0;
8622b8e80941Smrg	int uav_index_mode = 0;
8623b8e80941Smrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
8624b8e80941Smrg
8625b8e80941Smrg	r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
8626b8e80941Smrg	if (r)
8627b8e80941Smrg		return r;
8628b8e80941Smrg
8629b8e80941Smrg	memset(&gds, 0, sizeof(struct r600_bytecode_gds));
8630b8e80941Smrg	gds.op = FETCH_OP_GDS_READ_RET;
8631b8e80941Smrg	gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
8632b8e80941Smrg	gds.uav_id = is_cm ? 0 : uav_id;
8633b8e80941Smrg	gds.uav_index_mode = is_cm ? 0 : uav_index_mode;
8634b8e80941Smrg	gds.src_gpr = ctx->temp_reg;
8635b8e80941Smrg	gds.src_sel_x = (is_cm) ? 0 : 4;
8636b8e80941Smrg	gds.src_sel_y = 4;
8637b8e80941Smrg	gds.src_sel_z = 4;
8638b8e80941Smrg	gds.dst_sel_x = 0;
8639b8e80941Smrg	gds.dst_sel_y = 7;
8640b8e80941Smrg	gds.dst_sel_z = 7;
8641b8e80941Smrg	gds.dst_sel_w = 7;
8642b8e80941Smrg	gds.src_gpr2 = 0;
8643b8e80941Smrg	gds.alloc_consume = !is_cm;
8644b8e80941Smrg	r = r600_bytecode_add_gds(ctx->bc, &gds);
8645b8e80941Smrg	if (r)
8646b8e80941Smrg		return r;
8647b8e80941Smrg
8648b8e80941Smrg	ctx->bc->cf_last->vpm = 1;
8649b8e80941Smrg	return 0;
8650b8e80941Smrg}
8651b8e80941Smrg
8652b8e80941Smrg/* this fixes up 1D arrays properly */
8653b8e80941Smrgstatic int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr)
8654b8e80941Smrg{
8655b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8656b8e80941Smrg	int r, i;
8657b8e80941Smrg	struct r600_bytecode_alu alu;
8658b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
8659b8e80941Smrg
8660b8e80941Smrg	for (i = 0; i < 4; i++) {
8661b8e80941Smrg		bool def_val = true, write_zero = false;
8662b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8663b8e80941Smrg		alu.op = ALU_OP1_MOV;
8664b8e80941Smrg		alu.dst.sel = temp_reg;
8665b8e80941Smrg		alu.dst.chan = i;
8666b8e80941Smrg
8667b8e80941Smrg		switch (inst->Memory.Texture) {
8668b8e80941Smrg		case TGSI_TEXTURE_BUFFER:
8669b8e80941Smrg		case TGSI_TEXTURE_1D:
8670b8e80941Smrg			if (i == 1 || i == 2 || i == 3) {
8671b8e80941Smrg				write_zero = true;
8672b8e80941Smrg			}
8673b8e80941Smrg			break;
8674b8e80941Smrg		case TGSI_TEXTURE_1D_ARRAY:
8675b8e80941Smrg			if (i == 1 || i == 3)
8676b8e80941Smrg				write_zero = true;
8677b8e80941Smrg			else if (i == 2) {
8678b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1);
8679b8e80941Smrg				def_val = false;
8680b8e80941Smrg			}
8681b8e80941Smrg			break;
8682b8e80941Smrg		case TGSI_TEXTURE_2D:
8683b8e80941Smrg			if (i == 2 || i == 3)
8684b8e80941Smrg				write_zero = true;
8685b8e80941Smrg			break;
8686b8e80941Smrg		default:
8687b8e80941Smrg			if (i == 3)
8688b8e80941Smrg				write_zero = true;
8689b8e80941Smrg			break;
8690b8e80941Smrg		}
8691b8e80941Smrg
8692b8e80941Smrg		if (write_zero) {
8693b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
8694b8e80941Smrg			alu.src[0].value = 0;
8695b8e80941Smrg		} else if (def_val) {
8696b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i);
8697b8e80941Smrg		}
8698b8e80941Smrg
8699b8e80941Smrg		if (i == 3)
8700b8e80941Smrg			alu.last = 1;
8701b8e80941Smrg		alu.dst.write = 1;
8702b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8703b8e80941Smrg		if (r)
8704b8e80941Smrg			return r;
8705b8e80941Smrg	}
8706b8e80941Smrg	*idx_gpr = temp_reg;
8707b8e80941Smrg	return 0;
8708b8e80941Smrg}
8709b8e80941Smrg
8710b8e80941Smrgstatic int load_buffer_coord(struct r600_shader_ctx *ctx, int src_idx,
8711b8e80941Smrg			     int temp_reg)
8712b8e80941Smrg{
8713b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8714b8e80941Smrg	int r;
8715b8e80941Smrg	if (inst->Src[src_idx].Register.File == TGSI_FILE_IMMEDIATE) {
8716b8e80941Smrg		int value = (ctx->literals[4 * inst->Src[src_idx].Register.Index + inst->Src[src_idx].Register.SwizzleX]);
8717b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
8718b8e80941Smrg				   temp_reg, 0,
8719b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, value >> 2,
8720b8e80941Smrg				   0, 0);
8721b8e80941Smrg		if (r)
8722b8e80941Smrg			return r;
8723b8e80941Smrg	} else {
8724b8e80941Smrg		struct r600_bytecode_alu alu;
8725b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8726b8e80941Smrg		alu.op = ALU_OP2_LSHR_INT;
8727b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[src_idx], 0);
8728b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
8729b8e80941Smrg		alu.src[1].value = 2;
8730b8e80941Smrg		alu.dst.sel = temp_reg;
8731b8e80941Smrg		alu.dst.write = 1;
8732b8e80941Smrg		alu.last = 1;
8733b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8734b8e80941Smrg		if (r)
8735b8e80941Smrg			return r;
8736b8e80941Smrg	}
8737b8e80941Smrg	return 0;
8738b8e80941Smrg}
8739b8e80941Smrg
8740b8e80941Smrgstatic int tgsi_load_buffer(struct r600_shader_ctx *ctx)
8741b8e80941Smrg{
8742b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8743b8e80941Smrg	/* have to work out the offset into the RAT immediate return buffer */
8744b8e80941Smrg	struct r600_bytecode_vtx vtx;
8745b8e80941Smrg	struct r600_bytecode_cf *cf;
8746b8e80941Smrg	int r;
8747b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
8748b8e80941Smrg	unsigned rat_index_mode;
8749b8e80941Smrg	unsigned base;
8750b8e80941Smrg
8751b8e80941Smrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8752b8e80941Smrg	base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE];
8753b8e80941Smrg
8754b8e80941Smrg	r = load_buffer_coord(ctx, 1, temp_reg);
8755b8e80941Smrg	if (r)
8756b8e80941Smrg		return r;
8757b8e80941Smrg	ctx->bc->cf_last->barrier = 1;
8758b8e80941Smrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
8759b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
8760b8e80941Smrg	vtx.buffer_id = inst->Src[0].Register.Index + base;
8761b8e80941Smrg	vtx.buffer_index_mode = rat_index_mode;
8762b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
8763b8e80941Smrg	vtx.src_gpr = temp_reg;
8764b8e80941Smrg	vtx.src_sel_x = 0;
8765b8e80941Smrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
8766b8e80941Smrg	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
8767b8e80941Smrg	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
8768b8e80941Smrg	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
8769b8e80941Smrg	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
8770b8e80941Smrg	vtx.num_format_all = 1;
8771b8e80941Smrg	vtx.format_comp_all = 1;
8772b8e80941Smrg	vtx.srf_mode_all = 0;
8773b8e80941Smrg
8774b8e80941Smrg	if (inst->Dst[0].Register.WriteMask & 8) {
8775b8e80941Smrg		vtx.data_format = FMT_32_32_32_32;
8776b8e80941Smrg		vtx.use_const_fields = 0;
8777b8e80941Smrg	} else if (inst->Dst[0].Register.WriteMask & 4) {
8778b8e80941Smrg		vtx.data_format = FMT_32_32_32;
8779b8e80941Smrg		vtx.use_const_fields = 0;
8780b8e80941Smrg	} else if (inst->Dst[0].Register.WriteMask & 2) {
8781b8e80941Smrg		vtx.data_format = FMT_32_32;
8782b8e80941Smrg		vtx.use_const_fields = 0;
8783b8e80941Smrg	} else {
8784b8e80941Smrg		vtx.data_format = FMT_32;
8785b8e80941Smrg		vtx.use_const_fields = 0;
8786b8e80941Smrg	}
8787b8e80941Smrg
8788b8e80941Smrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
8789b8e80941Smrg	if (r)
8790b8e80941Smrg		return r;
8791b8e80941Smrg	cf = ctx->bc->cf_last;
8792b8e80941Smrg	cf->barrier = 1;
8793b8e80941Smrg	return 0;
8794b8e80941Smrg}
8795b8e80941Smrg
8796b8e80941Smrgstatic int tgsi_load_rat(struct r600_shader_ctx *ctx)
8797b8e80941Smrg{
8798b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8799b8e80941Smrg	/* have to work out the offset into the RAT immediate return buffer */
8800b8e80941Smrg	struct r600_bytecode_vtx vtx;
8801b8e80941Smrg	struct r600_bytecode_cf *cf;
8802b8e80941Smrg	int r;
8803b8e80941Smrg	int idx_gpr;
8804b8e80941Smrg	unsigned format, num_format, format_comp, endian;
8805b8e80941Smrg	const struct util_format_description *desc;
8806b8e80941Smrg	unsigned rat_index_mode;
8807b8e80941Smrg	unsigned immed_base;
8808b8e80941Smrg
8809b8e80941Smrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8810b8e80941Smrg
8811b8e80941Smrg	immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
8812b8e80941Smrg	r = load_index_src(ctx, 1, &idx_gpr);
8813b8e80941Smrg	if (r)
8814b8e80941Smrg		return r;
8815b8e80941Smrg
8816b8e80941Smrg	if (rat_index_mode)
8817b8e80941Smrg		egcm_load_index_reg(ctx->bc, 1, false);
8818b8e80941Smrg
8819b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
8820b8e80941Smrg	cf = ctx->bc->cf_last;
8821b8e80941Smrg
8822b8e80941Smrg	cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
8823b8e80941Smrg	cf->rat.inst = V_RAT_INST_NOP_RTN;
8824b8e80941Smrg	cf->rat.index_mode = rat_index_mode;
8825b8e80941Smrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
8826b8e80941Smrg	cf->output.gpr = ctx->thread_id_gpr;
8827b8e80941Smrg	cf->output.index_gpr = idx_gpr;
8828b8e80941Smrg	cf->output.comp_mask = 0xf;
8829b8e80941Smrg	cf->output.burst_count = 1;
8830b8e80941Smrg	cf->vpm = 1;
8831b8e80941Smrg	cf->barrier = 1;
8832b8e80941Smrg	cf->mark = 1;
8833b8e80941Smrg	cf->output.elem_size = 0;
8834b8e80941Smrg
8835b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
8836b8e80941Smrg	cf = ctx->bc->cf_last;
8837b8e80941Smrg	cf->barrier = 1;
8838b8e80941Smrg
8839b8e80941Smrg	desc = util_format_description(inst->Memory.Format);
8840b8e80941Smrg	r600_vertex_data_type(inst->Memory.Format,
8841b8e80941Smrg			      &format, &num_format, &format_comp, &endian);
8842b8e80941Smrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
8843b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
8844b8e80941Smrg	vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
8845b8e80941Smrg	vtx.buffer_index_mode = rat_index_mode;
8846b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
8847b8e80941Smrg	vtx.src_gpr = ctx->thread_id_gpr;
8848b8e80941Smrg	vtx.src_sel_x = 1;
8849b8e80941Smrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
8850b8e80941Smrg	vtx.dst_sel_x = desc->swizzle[0];
8851b8e80941Smrg	vtx.dst_sel_y = desc->swizzle[1];
8852b8e80941Smrg	vtx.dst_sel_z = desc->swizzle[2];
8853b8e80941Smrg	vtx.dst_sel_w = desc->swizzle[3];
8854b8e80941Smrg	vtx.srf_mode_all = 1;
8855b8e80941Smrg	vtx.data_format = format;
8856b8e80941Smrg	vtx.num_format_all = num_format;
8857b8e80941Smrg	vtx.format_comp_all = format_comp;
8858b8e80941Smrg	vtx.endian = endian;
8859b8e80941Smrg	vtx.offset = 0;
8860b8e80941Smrg	vtx.mega_fetch_count = 3;
8861b8e80941Smrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
8862b8e80941Smrg	if (r)
8863b8e80941Smrg		return r;
8864b8e80941Smrg	cf = ctx->bc->cf_last;
8865b8e80941Smrg	cf->barrier = 1;
8866b8e80941Smrg	return 0;
8867b8e80941Smrg}
8868b8e80941Smrg
8869b8e80941Smrgstatic int tgsi_load_lds(struct r600_shader_ctx *ctx)
8870b8e80941Smrg{
8871b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8872b8e80941Smrg	struct r600_bytecode_alu alu;
8873b8e80941Smrg	int r;
8874b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
8875b8e80941Smrg
8876b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8877b8e80941Smrg	alu.op = ALU_OP1_MOV;
8878b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
8879b8e80941Smrg	alu.dst.sel = temp_reg;
8880b8e80941Smrg	alu.dst.write = 1;
8881b8e80941Smrg	alu.last = 1;
8882b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
8883b8e80941Smrg	if (r)
8884b8e80941Smrg		return r;
8885b8e80941Smrg
8886b8e80941Smrg	r = do_lds_fetch_values(ctx, temp_reg,
8887b8e80941Smrg				ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask);
8888b8e80941Smrg	if (r)
8889b8e80941Smrg		return r;
8890b8e80941Smrg	return 0;
8891b8e80941Smrg}
8892b8e80941Smrg
8893b8e80941Smrgstatic int tgsi_load(struct r600_shader_ctx *ctx)
8894b8e80941Smrg{
8895b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8896b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
8897b8e80941Smrg		return tgsi_load_rat(ctx);
8898b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
8899b8e80941Smrg		return tgsi_load_gds(ctx);
8900b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
8901b8e80941Smrg		return tgsi_load_buffer(ctx);
8902b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
8903b8e80941Smrg		return tgsi_load_lds(ctx);
8904b8e80941Smrg	return 0;
8905b8e80941Smrg}
8906b8e80941Smrg
8907b8e80941Smrgstatic int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx)
8908b8e80941Smrg{
8909b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8910b8e80941Smrg	struct r600_bytecode_cf *cf;
8911b8e80941Smrg	int r, i;
8912b8e80941Smrg	unsigned rat_index_mode;
8913b8e80941Smrg	int lasti;
8914b8e80941Smrg	int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx);
8915b8e80941Smrg
8916b8e80941Smrg	r = load_buffer_coord(ctx, 0, treg2);
8917b8e80941Smrg	if (r)
8918b8e80941Smrg		return r;
8919b8e80941Smrg
8920b8e80941Smrg	rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8921b8e80941Smrg	if (rat_index_mode)
8922b8e80941Smrg		egcm_load_index_reg(ctx->bc, 1, false);
8923b8e80941Smrg
8924b8e80941Smrg	for (i = 0; i <= 3; i++) {
8925b8e80941Smrg		struct r600_bytecode_alu alu;
8926b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8927b8e80941Smrg		alu.op = ALU_OP1_MOV;
8928b8e80941Smrg		alu.dst.sel = temp_reg;
8929b8e80941Smrg		alu.dst.chan = i;
8930b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
8931b8e80941Smrg		alu.last = (i == 3);
8932b8e80941Smrg		alu.dst.write = 1;
8933b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8934b8e80941Smrg		if (r)
8935b8e80941Smrg			return r;
8936b8e80941Smrg	}
8937b8e80941Smrg
8938b8e80941Smrg	lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
8939b8e80941Smrg	for (i = 0; i <= lasti; i++) {
8940b8e80941Smrg		struct r600_bytecode_alu alu;
8941b8e80941Smrg		if (!((1 << i) & inst->Dst[0].Register.WriteMask))
8942b8e80941Smrg			continue;
8943b8e80941Smrg
8944b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
8945b8e80941Smrg				   temp_reg, 0,
8946b8e80941Smrg				   treg2, 0,
8947b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, i);
8948b8e80941Smrg		if (r)
8949b8e80941Smrg			return r;
8950b8e80941Smrg
8951b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
8952b8e80941Smrg		alu.op = ALU_OP1_MOV;
8953b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
8954b8e80941Smrg		alu.dst.chan = 0;
8955b8e80941Smrg
8956b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
8957b8e80941Smrg		alu.last = 1;
8958b8e80941Smrg		alu.dst.write = 1;
8959b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
8960b8e80941Smrg		if (r)
8961b8e80941Smrg			return r;
8962b8e80941Smrg
8963b8e80941Smrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
8964b8e80941Smrg		cf = ctx->bc->cf_last;
8965b8e80941Smrg
8966b8e80941Smrg		cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE];
8967b8e80941Smrg		cf->rat.inst = V_RAT_INST_STORE_TYPED;
8968b8e80941Smrg		cf->rat.index_mode = rat_index_mode;
8969b8e80941Smrg		cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
8970b8e80941Smrg		cf->output.gpr = ctx->temp_reg;
8971b8e80941Smrg		cf->output.index_gpr = temp_reg;
8972b8e80941Smrg		cf->output.comp_mask = 1;
8973b8e80941Smrg		cf->output.burst_count = 1;
8974b8e80941Smrg		cf->vpm = 1;
8975b8e80941Smrg		cf->barrier = 1;
8976b8e80941Smrg		cf->output.elem_size = 0;
8977b8e80941Smrg	}
8978b8e80941Smrg	return 0;
8979b8e80941Smrg}
8980b8e80941Smrg
8981b8e80941Smrgstatic int tgsi_store_rat(struct r600_shader_ctx *ctx)
8982b8e80941Smrg{
8983b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
8984b8e80941Smrg	struct r600_bytecode_cf *cf;
8985b8e80941Smrg	bool src_requires_loading = false;
8986b8e80941Smrg	int val_gpr, idx_gpr;
8987b8e80941Smrg	int r, i;
8988b8e80941Smrg	unsigned rat_index_mode;
8989b8e80941Smrg
8990b8e80941Smrg	rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
8991b8e80941Smrg
8992b8e80941Smrg	r = load_index_src(ctx, 0, &idx_gpr);
8993b8e80941Smrg	if (r)
8994b8e80941Smrg		return r;
8995b8e80941Smrg
8996b8e80941Smrg	if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY)
8997b8e80941Smrg		src_requires_loading = true;
8998b8e80941Smrg
8999b8e80941Smrg	if (src_requires_loading) {
9000b8e80941Smrg		struct r600_bytecode_alu alu;
9001b8e80941Smrg		for (i = 0; i < 4; i++) {
9002b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9003b8e80941Smrg			alu.op = ALU_OP1_MOV;
9004b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9005b8e80941Smrg			alu.dst.chan = i;
9006b8e80941Smrg
9007b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
9008b8e80941Smrg			if (i == 3)
9009b8e80941Smrg				alu.last = 1;
9010b8e80941Smrg			alu.dst.write = 1;
9011b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9012b8e80941Smrg			if (r)
9013b8e80941Smrg				return r;
9014b8e80941Smrg		}
9015b8e80941Smrg		val_gpr = ctx->temp_reg;
9016b8e80941Smrg	} else
9017b8e80941Smrg		val_gpr = tgsi_tex_get_src_gpr(ctx, 1);
9018b8e80941Smrg	if (rat_index_mode)
9019b8e80941Smrg		egcm_load_index_reg(ctx->bc, 1, false);
9020b8e80941Smrg
9021b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
9022b8e80941Smrg	cf = ctx->bc->cf_last;
9023b8e80941Smrg
9024b8e80941Smrg	cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
9025b8e80941Smrg	cf->rat.inst = V_RAT_INST_STORE_TYPED;
9026b8e80941Smrg	cf->rat.index_mode = rat_index_mode;
9027b8e80941Smrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
9028b8e80941Smrg	cf->output.gpr = val_gpr;
9029b8e80941Smrg	cf->output.index_gpr = idx_gpr;
9030b8e80941Smrg	cf->output.comp_mask = 0xf;
9031b8e80941Smrg	cf->output.burst_count = 1;
9032b8e80941Smrg	cf->vpm = 1;
9033b8e80941Smrg	cf->barrier = 1;
9034b8e80941Smrg	cf->output.elem_size = 0;
9035b8e80941Smrg	return 0;
9036b8e80941Smrg}
9037b8e80941Smrg
9038b8e80941Smrgstatic int tgsi_store_lds(struct r600_shader_ctx *ctx)
9039b8e80941Smrg{
9040b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9041b8e80941Smrg	struct r600_bytecode_alu alu;
9042b8e80941Smrg	int r, i, lasti;
9043b8e80941Smrg	int write_mask = inst->Dst[0].Register.WriteMask;
9044b8e80941Smrg	int temp_reg = r600_get_temp(ctx);
9045b8e80941Smrg
9046b8e80941Smrg	/* LDS write */
9047b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9048b8e80941Smrg	alu.op = ALU_OP1_MOV;
9049b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9050b8e80941Smrg	alu.dst.sel = temp_reg;
9051b8e80941Smrg	alu.dst.write = 1;
9052b8e80941Smrg	alu.last = 1;
9053b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
9054b8e80941Smrg	if (r)
9055b8e80941Smrg		return r;
9056b8e80941Smrg
9057b8e80941Smrg	lasti = tgsi_last_instruction(write_mask);
9058b8e80941Smrg	for (i = 1; i <= lasti; i++) {
9059b8e80941Smrg		if (!(write_mask & (1 << i)))
9060b8e80941Smrg			continue;
9061b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
9062b8e80941Smrg				   temp_reg, i,
9063b8e80941Smrg				   temp_reg, 0,
9064b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
9065b8e80941Smrg		if (r)
9066b8e80941Smrg			return r;
9067b8e80941Smrg	}
9068b8e80941Smrg	for (i = 0; i <= lasti; i++) {
9069b8e80941Smrg		if (!(write_mask & (1 << i)))
9070b8e80941Smrg			continue;
9071b8e80941Smrg
9072b8e80941Smrg		if ((i == 0 && ((write_mask & 3) == 3)) ||
9073b8e80941Smrg		    (i == 2 && ((write_mask & 0xc) == 0xc))) {
9074b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9075b8e80941Smrg			alu.op = LDS_OP3_LDS_WRITE_REL;
9076b8e80941Smrg
9077b8e80941Smrg			alu.src[0].sel = temp_reg;
9078b8e80941Smrg			alu.src[0].chan = i;
9079b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
9080b8e80941Smrg			r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1);
9081b8e80941Smrg			alu.last = 1;
9082b8e80941Smrg			alu.is_lds_idx_op = true;
9083b8e80941Smrg			alu.lds_idx = 1;
9084b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9085b8e80941Smrg			if (r)
9086b8e80941Smrg				return r;
9087b8e80941Smrg			i += 1;
9088b8e80941Smrg			continue;
9089b8e80941Smrg		}
9090b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9091b8e80941Smrg		alu.op = LDS_OP2_LDS_WRITE;
9092b8e80941Smrg
9093b8e80941Smrg		alu.src[0].sel = temp_reg;
9094b8e80941Smrg		alu.src[0].chan = i;
9095b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
9096b8e80941Smrg
9097b8e80941Smrg		alu.last = 1;
9098b8e80941Smrg		alu.is_lds_idx_op = true;
9099b8e80941Smrg
9100b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9101b8e80941Smrg		if (r)
9102b8e80941Smrg			return r;
9103b8e80941Smrg	}
9104b8e80941Smrg	return 0;
9105b8e80941Smrg}
9106b8e80941Smrg
9107b8e80941Smrgstatic int tgsi_store(struct r600_shader_ctx *ctx)
9108b8e80941Smrg{
9109b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9110b8e80941Smrg	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
9111b8e80941Smrg		return tgsi_store_buffer_rat(ctx);
9112b8e80941Smrg	else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
9113b8e80941Smrg		return tgsi_store_lds(ctx);
9114b8e80941Smrg	else
9115b8e80941Smrg		return tgsi_store_rat(ctx);
9116b8e80941Smrg}
9117b8e80941Smrg
9118b8e80941Smrgstatic int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
9119b8e80941Smrg{
9120b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9121b8e80941Smrg	/* have to work out the offset into the RAT immediate return buffer */
9122b8e80941Smrg	struct r600_bytecode_alu alu;
9123b8e80941Smrg	struct r600_bytecode_vtx vtx;
9124b8e80941Smrg	struct r600_bytecode_cf *cf;
9125b8e80941Smrg	int r;
9126b8e80941Smrg	int idx_gpr;
9127b8e80941Smrg	unsigned format, num_format, format_comp, endian;
9128b8e80941Smrg	const struct util_format_description *desc;
9129b8e80941Smrg	unsigned rat_index_mode;
9130b8e80941Smrg	unsigned immed_base;
9131b8e80941Smrg	unsigned rat_base;
9132b8e80941Smrg
9133b8e80941Smrg	immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
9134b8e80941Smrg	rat_base = ctx->shader->rat_base;
9135b8e80941Smrg
9136b8e80941Smrg        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
9137b8e80941Smrg		immed_base += ctx->info.file_count[TGSI_FILE_IMAGE];
9138b8e80941Smrg		rat_base += ctx->info.file_count[TGSI_FILE_IMAGE];
9139b8e80941Smrg
9140b8e80941Smrg		r = load_buffer_coord(ctx, 1, ctx->temp_reg);
9141b8e80941Smrg		if (r)
9142b8e80941Smrg			return r;
9143b8e80941Smrg		idx_gpr = ctx->temp_reg;
9144b8e80941Smrg	} else {
9145b8e80941Smrg		r = load_index_src(ctx, 1, &idx_gpr);
9146b8e80941Smrg		if (r)
9147b8e80941Smrg			return r;
9148b8e80941Smrg	}
9149b8e80941Smrg
9150b8e80941Smrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
9151b8e80941Smrg
9152b8e80941Smrg	if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) {
9153848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9154b8e80941Smrg		alu.op = ALU_OP1_MOV;
9155b8e80941Smrg		alu.dst.sel = ctx->thread_id_gpr;
9156b8e80941Smrg		alu.dst.chan = 0;
9157848b8605Smrg		alu.dst.write = 1;
9158b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
9159848b8605Smrg		alu.last = 1;
9160848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9161848b8605Smrg		if (r)
9162848b8605Smrg			return r;
9163848b8605Smrg
9164848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9165b8e80941Smrg		alu.op = ALU_OP1_MOV;
9166b8e80941Smrg		alu.dst.sel = ctx->thread_id_gpr;
9167b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN)
9168b8e80941Smrg			alu.dst.chan = 2;
9169b8e80941Smrg		else
9170b8e80941Smrg			alu.dst.chan = 3;
9171848b8605Smrg		alu.dst.write = 1;
9172b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
9173848b8605Smrg		alu.last = 1;
9174848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9175848b8605Smrg		if (r)
9176848b8605Smrg			return r;
9177b8e80941Smrg	} else {
9178b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9179b8e80941Smrg		alu.op = ALU_OP1_MOV;
9180b8e80941Smrg		alu.dst.sel = ctx->thread_id_gpr;
9181b8e80941Smrg		alu.dst.chan = 0;
9182b8e80941Smrg		alu.dst.write = 1;
9183b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
9184b8e80941Smrg		alu.last = 1;
9185b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9186b8e80941Smrg		if (r)
9187b8e80941Smrg			return r;
9188b8e80941Smrg	}
9189b8e80941Smrg
9190b8e80941Smrg	if (rat_index_mode)
9191b8e80941Smrg		egcm_load_index_reg(ctx->bc, 1, false);
9192b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
9193b8e80941Smrg	cf = ctx->bc->cf_last;
9194b8e80941Smrg
9195b8e80941Smrg	cf->rat.id = rat_base + inst->Src[0].Register.Index;
9196b8e80941Smrg	cf->rat.inst = ctx->inst_info->op;
9197b8e80941Smrg	cf->rat.index_mode = rat_index_mode;
9198b8e80941Smrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
9199b8e80941Smrg	cf->output.gpr = ctx->thread_id_gpr;
9200b8e80941Smrg	cf->output.index_gpr = idx_gpr;
9201b8e80941Smrg	cf->output.comp_mask = 0xf;
9202b8e80941Smrg	cf->output.burst_count = 1;
9203b8e80941Smrg	cf->vpm = 1;
9204b8e80941Smrg	cf->barrier = 1;
9205b8e80941Smrg	cf->mark = 1;
9206b8e80941Smrg	cf->output.elem_size = 0;
9207b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
9208b8e80941Smrg	cf = ctx->bc->cf_last;
9209b8e80941Smrg	cf->barrier = 1;
9210b8e80941Smrg	cf->cf_addr = 1;
9211b8e80941Smrg
9212b8e80941Smrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
9213b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
9214b8e80941Smrg		desc = util_format_description(inst->Memory.Format);
9215b8e80941Smrg		r600_vertex_data_type(inst->Memory.Format,
9216b8e80941Smrg				      &format, &num_format, &format_comp, &endian);
9217b8e80941Smrg		vtx.dst_sel_x = desc->swizzle[0];
9218b8e80941Smrg	} else {
9219b8e80941Smrg		format = FMT_32;
9220b8e80941Smrg		num_format = 1;
9221b8e80941Smrg		format_comp = 0;
9222b8e80941Smrg		endian = 0;
9223b8e80941Smrg		vtx.dst_sel_x = 0;
9224b8e80941Smrg	}
9225b8e80941Smrg	vtx.op = FETCH_OP_VFETCH;
9226b8e80941Smrg	vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
9227b8e80941Smrg	vtx.buffer_index_mode = rat_index_mode;
9228b8e80941Smrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
9229b8e80941Smrg	vtx.src_gpr = ctx->thread_id_gpr;
9230b8e80941Smrg	vtx.src_sel_x = 1;
9231b8e80941Smrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
9232b8e80941Smrg	vtx.dst_sel_y = 7;
9233b8e80941Smrg	vtx.dst_sel_z = 7;
9234b8e80941Smrg	vtx.dst_sel_w = 7;
9235b8e80941Smrg	vtx.use_const_fields = 0;
9236b8e80941Smrg	vtx.srf_mode_all = 1;
9237b8e80941Smrg	vtx.data_format = format;
9238b8e80941Smrg	vtx.num_format_all = num_format;
9239b8e80941Smrg	vtx.format_comp_all = format_comp;
9240b8e80941Smrg	vtx.endian = endian;
9241b8e80941Smrg	vtx.offset = 0;
9242b8e80941Smrg	vtx.mega_fetch_count = 0xf;
9243b8e80941Smrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
9244b8e80941Smrg	if (r)
9245b8e80941Smrg		return r;
9246b8e80941Smrg	cf = ctx->bc->cf_last;
9247b8e80941Smrg	cf->vpm = 1;
9248b8e80941Smrg	cf->barrier = 1;
9249b8e80941Smrg	return 0;
9250b8e80941Smrg}
9251b8e80941Smrg
9252b8e80941Smrgstatic int get_gds_op(int opcode)
9253b8e80941Smrg{
9254b8e80941Smrg	switch (opcode) {
9255b8e80941Smrg	case TGSI_OPCODE_ATOMUADD:
9256b8e80941Smrg		return FETCH_OP_GDS_ADD_RET;
9257b8e80941Smrg	case TGSI_OPCODE_ATOMAND:
9258b8e80941Smrg		return FETCH_OP_GDS_AND_RET;
9259b8e80941Smrg	case TGSI_OPCODE_ATOMOR:
9260b8e80941Smrg		return FETCH_OP_GDS_OR_RET;
9261b8e80941Smrg	case TGSI_OPCODE_ATOMXOR:
9262b8e80941Smrg		return FETCH_OP_GDS_XOR_RET;
9263b8e80941Smrg	case TGSI_OPCODE_ATOMUMIN:
9264b8e80941Smrg		return FETCH_OP_GDS_MIN_UINT_RET;
9265b8e80941Smrg	case TGSI_OPCODE_ATOMUMAX:
9266b8e80941Smrg		return FETCH_OP_GDS_MAX_UINT_RET;
9267b8e80941Smrg	case TGSI_OPCODE_ATOMXCHG:
9268b8e80941Smrg		return FETCH_OP_GDS_XCHG_RET;
9269b8e80941Smrg	case TGSI_OPCODE_ATOMCAS:
9270b8e80941Smrg		return FETCH_OP_GDS_CMP_XCHG_RET;
9271b8e80941Smrg	default:
9272b8e80941Smrg		return -1;
9273b8e80941Smrg	}
9274b8e80941Smrg}
9275b8e80941Smrg
9276b8e80941Smrgstatic int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
9277b8e80941Smrg{
9278b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9279b8e80941Smrg	struct r600_bytecode_gds gds;
9280b8e80941Smrg	struct r600_bytecode_alu alu;
9281b8e80941Smrg	int gds_op = get_gds_op(inst->Instruction.Opcode);
9282b8e80941Smrg	int r;
9283b8e80941Smrg	int uav_id = 0;
9284b8e80941Smrg	int uav_index_mode = 0;
9285b8e80941Smrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
9286b8e80941Smrg
9287b8e80941Smrg	if (gds_op == -1) {
9288b8e80941Smrg		fprintf(stderr, "unknown GDS op for opcode %d\n", inst->Instruction.Opcode);
9289b8e80941Smrg		return -1;
9290b8e80941Smrg	}
9291b8e80941Smrg
9292b8e80941Smrg	r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
9293b8e80941Smrg	if (r)
9294b8e80941Smrg		return r;
9295b8e80941Smrg
9296b8e80941Smrg	if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) {
9297b8e80941Smrg		if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) {
9298b8e80941Smrg			int value = (ctx->literals[4 * inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]);
9299848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9300b8e80941Smrg			alu.op = ALU_OP1_MOV;
9301b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9302b8e80941Smrg			alu.dst.chan = is_cm ? 2 : 1;
9303b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
9304b8e80941Smrg			alu.src[0].value = value;
9305b8e80941Smrg			alu.last = 1;
9306848b8605Smrg			alu.dst.write = 1;
9307b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9308b8e80941Smrg			if (r)
9309b8e80941Smrg				return r;
9310b8e80941Smrg		} else {
9311b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9312b8e80941Smrg			alu.op = ALU_OP1_MOV;
9313b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9314b8e80941Smrg			alu.dst.chan = is_cm ? 2 : 1;
9315b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
9316848b8605Smrg			alu.last = 1;
9317b8e80941Smrg			alu.dst.write = 1;
9318848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9319848b8605Smrg			if (r)
9320848b8605Smrg				return r;
9321848b8605Smrg		}
9322848b8605Smrg	}
9323b8e80941Smrg	if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) {
9324b8e80941Smrg		int value = (ctx->literals[4 * inst->Src[2].Register.Index + inst->Src[2].Register.SwizzleX]);
9325b8e80941Smrg		int abs_value = abs(value);
9326b8e80941Smrg		if (abs_value != value && gds_op == FETCH_OP_GDS_ADD_RET)
9327b8e80941Smrg			gds_op = FETCH_OP_GDS_SUB_RET;
9328848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9329848b8605Smrg		alu.op = ALU_OP1_MOV;
9330b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
9331b8e80941Smrg		alu.dst.chan = is_cm ? 1 : 0;
9332b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
9333b8e80941Smrg		alu.src[0].value = abs_value;
9334848b8605Smrg		alu.last = 1;
9335b8e80941Smrg		alu.dst.write = 1;
9336b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9337b8e80941Smrg		if (r)
9338b8e80941Smrg			return r;
9339b8e80941Smrg	} else {
9340b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9341b8e80941Smrg		alu.op = ALU_OP1_MOV;
9342b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
9343b8e80941Smrg		alu.dst.chan = is_cm ? 1 : 0;
9344b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
9345b8e80941Smrg		alu.last = 1;
9346b8e80941Smrg		alu.dst.write = 1;
9347848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9348848b8605Smrg		if (r)
9349848b8605Smrg			return r;
9350848b8605Smrg	}
9351848b8605Smrg
9352848b8605Smrg
9353b8e80941Smrg	memset(&gds, 0, sizeof(struct r600_bytecode_gds));
9354b8e80941Smrg	gds.op = gds_op;
9355b8e80941Smrg	gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
9356b8e80941Smrg	gds.uav_id = is_cm ? 0 : uav_id;
9357b8e80941Smrg	gds.uav_index_mode = is_cm ? 0 : uav_index_mode;
9358b8e80941Smrg	gds.src_gpr = ctx->temp_reg;
9359b8e80941Smrg	gds.src_gpr2 = 0;
9360b8e80941Smrg	gds.src_sel_x = is_cm ? 0 : 4;
9361b8e80941Smrg	gds.src_sel_y = is_cm ? 1 : 0;
9362b8e80941Smrg	if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET)
9363b8e80941Smrg		gds.src_sel_z = is_cm ? 2 : 1;
9364b8e80941Smrg	else
9365b8e80941Smrg		gds.src_sel_z = 7;
9366b8e80941Smrg	gds.dst_sel_x = 0;
9367b8e80941Smrg	gds.dst_sel_y = 7;
9368b8e80941Smrg	gds.dst_sel_z = 7;
9369b8e80941Smrg	gds.dst_sel_w = 7;
9370b8e80941Smrg	gds.alloc_consume = !is_cm;
9371b8e80941Smrg
9372b8e80941Smrg	r = r600_bytecode_add_gds(ctx->bc, &gds);
9373b8e80941Smrg	if (r)
9374b8e80941Smrg		return r;
9375b8e80941Smrg	ctx->bc->cf_last->vpm = 1;
9376b8e80941Smrg	return 0;
9377b8e80941Smrg}
9378848b8605Smrg
9379b8e80941Smrgstatic int get_lds_op(int opcode)
9380b8e80941Smrg{
9381b8e80941Smrg	switch (opcode) {
9382b8e80941Smrg	case TGSI_OPCODE_ATOMUADD:
9383b8e80941Smrg		return LDS_OP2_LDS_ADD_RET;
9384b8e80941Smrg	case TGSI_OPCODE_ATOMAND:
9385b8e80941Smrg		return LDS_OP2_LDS_AND_RET;
9386b8e80941Smrg	case TGSI_OPCODE_ATOMOR:
9387b8e80941Smrg		return LDS_OP2_LDS_OR_RET;
9388b8e80941Smrg	case TGSI_OPCODE_ATOMXOR:
9389b8e80941Smrg		return LDS_OP2_LDS_XOR_RET;
9390b8e80941Smrg	case TGSI_OPCODE_ATOMUMIN:
9391b8e80941Smrg		return LDS_OP2_LDS_MIN_UINT_RET;
9392b8e80941Smrg	case TGSI_OPCODE_ATOMUMAX:
9393b8e80941Smrg		return LDS_OP2_LDS_MAX_UINT_RET;
9394b8e80941Smrg	case TGSI_OPCODE_ATOMIMIN:
9395b8e80941Smrg		return LDS_OP2_LDS_MIN_INT_RET;
9396b8e80941Smrg	case TGSI_OPCODE_ATOMIMAX:
9397b8e80941Smrg		return LDS_OP2_LDS_MAX_INT_RET;
9398b8e80941Smrg	case TGSI_OPCODE_ATOMXCHG:
9399b8e80941Smrg		return LDS_OP2_LDS_XCHG_RET;
9400b8e80941Smrg	case TGSI_OPCODE_ATOMCAS:
9401b8e80941Smrg		return LDS_OP3_LDS_CMP_XCHG_RET;
9402b8e80941Smrg	default:
9403b8e80941Smrg		return -1;
9404848b8605Smrg	}
9405b8e80941Smrg}
9406848b8605Smrg
9407b8e80941Smrgstatic int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
9408b8e80941Smrg{
9409b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9410b8e80941Smrg	int lds_op = get_lds_op(inst->Instruction.Opcode);
9411b8e80941Smrg	int r;
9412848b8605Smrg
9413b8e80941Smrg	struct r600_bytecode_alu alu;
9414b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9415b8e80941Smrg	alu.op = lds_op;
9416b8e80941Smrg	alu.is_lds_idx_op = true;
9417b8e80941Smrg	alu.last = 1;
9418b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
9419b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[2], 0);
9420b8e80941Smrg	if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET)
9421b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[3], 0);
9422b8e80941Smrg	else
9423b8e80941Smrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
9424b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
9425b8e80941Smrg	if (r)
9426b8e80941Smrg		return r;
9427848b8605Smrg
9428b8e80941Smrg	/* then read from LDS_OQ_A_POP */
9429b8e80941Smrg	memset(&alu, 0, sizeof(alu));
9430848b8605Smrg
9431b8e80941Smrg	alu.op = ALU_OP1_MOV;
9432b8e80941Smrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
9433b8e80941Smrg	alu.src[0].chan = 0;
9434b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
9435b8e80941Smrg	alu.dst.write = 1;
9436b8e80941Smrg	alu.last = 1;
9437b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
9438b8e80941Smrg	if (r)
9439b8e80941Smrg		return r;
9440848b8605Smrg
9441b8e80941Smrg	return 0;
9442b8e80941Smrg}
9443848b8605Smrg
9444b8e80941Smrgstatic int tgsi_atomic_op(struct r600_shader_ctx *ctx)
9445b8e80941Smrg{
9446b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9447b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
9448b8e80941Smrg		return tgsi_atomic_op_rat(ctx);
9449b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
9450b8e80941Smrg		return tgsi_atomic_op_gds(ctx);
9451b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
9452b8e80941Smrg		return tgsi_atomic_op_rat(ctx);
9453b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
9454b8e80941Smrg		return tgsi_atomic_op_lds(ctx);
9455b8e80941Smrg	return 0;
9456b8e80941Smrg}
9457848b8605Smrg
9458b8e80941Smrgstatic int tgsi_resq(struct r600_shader_ctx *ctx)
9459b8e80941Smrg{
9460b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9461b8e80941Smrg	unsigned sampler_index_mode;
9462b8e80941Smrg	struct r600_bytecode_tex tex;
9463b8e80941Smrg	int r;
9464b8e80941Smrg	boolean has_txq_cube_array_z = false;
9465848b8605Smrg
9466b8e80941Smrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
9467b8e80941Smrg	    (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
9468b8e80941Smrg		if (ctx->bc->chip_class < EVERGREEN)
9469b8e80941Smrg			ctx->shader->uses_tex_buffers = true;
9470b8e80941Smrg		unsigned eg_buffer_base = 0;
9471b8e80941Smrg		eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET;
9472b8e80941Smrg		if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
9473b8e80941Smrg			eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE];
9474b8e80941Smrg		return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset, eg_buffer_base);
9475848b8605Smrg	}
9476848b8605Smrg
9477b8e80941Smrg	if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY &&
9478b8e80941Smrg	    inst->Dst[0].Register.WriteMask & 4) {
9479b8e80941Smrg		ctx->shader->has_txq_cube_array_z_comp = true;
9480b8e80941Smrg		has_txq_cube_array_z = true;
9481848b8605Smrg	}
9482848b8605Smrg
9483b8e80941Smrg	sampler_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
9484b8e80941Smrg	if (sampler_index_mode)
9485b8e80941Smrg		egcm_load_index_reg(ctx->bc, 1, false);
9486848b8605Smrg
9487848b8605Smrg
9488b8e80941Smrg	/* does this shader want a num layers from TXQ for a cube array? */
9489b8e80941Smrg	if (has_txq_cube_array_z) {
9490b8e80941Smrg		int id = tgsi_tex_get_src_gpr(ctx, 0) + ctx->shader->image_size_const_offset;
9491b8e80941Smrg		struct r600_bytecode_alu alu;
9492848b8605Smrg
9493b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9494b8e80941Smrg		alu.op = ALU_OP1_MOV;
9495848b8605Smrg
9496b8e80941Smrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
9497b8e80941Smrg		/* with eg each dword is either number of cubes */
9498b8e80941Smrg		alu.src[0].sel += id / 4;
9499b8e80941Smrg		alu.src[0].chan = id % 4;
9500b8e80941Smrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
9501b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
9502b8e80941Smrg		alu.last = 1;
9503b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9504b8e80941Smrg		if (r)
9505b8e80941Smrg			return r;
9506b8e80941Smrg		/* disable writemask from texture instruction */
9507b8e80941Smrg		inst->Dst[0].Register.WriteMask &= ~4;
9508848b8605Smrg	}
9509b8e80941Smrg	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
9510b8e80941Smrg	tex.op = ctx->inst_info->op;
9511b8e80941Smrg	tex.sampler_id = R600_IMAGE_REAL_RESOURCE_OFFSET + inst->Src[0].Register.Index;
9512b8e80941Smrg	tex.sampler_index_mode = sampler_index_mode;
9513b8e80941Smrg	tex.resource_id = tex.sampler_id;
9514b8e80941Smrg	tex.resource_index_mode = sampler_index_mode;
9515b8e80941Smrg	tex.src_sel_x = 4;
9516b8e80941Smrg	tex.src_sel_y = 4;
9517b8e80941Smrg	tex.src_sel_z = 4;
9518b8e80941Smrg	tex.src_sel_w = 4;
9519b8e80941Smrg	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
9520b8e80941Smrg	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
9521b8e80941Smrg	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
9522b8e80941Smrg	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
9523b8e80941Smrg	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
9524848b8605Smrg	r = r600_bytecode_add_tex(ctx->bc, &tex);
9525848b8605Smrg	if (r)
9526848b8605Smrg		return r;
9527848b8605Smrg
9528848b8605Smrg	return 0;
9529848b8605Smrg}
9530848b8605Smrg
9531848b8605Smrgstatic int tgsi_lrp(struct r600_shader_ctx *ctx)
9532848b8605Smrg{
9533848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9534848b8605Smrg	struct r600_bytecode_alu alu;
9535b8e80941Smrg	unsigned lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
9536b8e80941Smrg	struct r600_bytecode_alu_src srcs[2][4];
9537848b8605Smrg	unsigned i;
9538848b8605Smrg	int r;
9539848b8605Smrg
9540848b8605Smrg	/* optimize if it's just an equal balance */
9541848b8605Smrg	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
9542848b8605Smrg		for (i = 0; i < lasti + 1; i++) {
9543848b8605Smrg			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9544848b8605Smrg				continue;
9545848b8605Smrg
9546848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9547848b8605Smrg			alu.op = ALU_OP2_ADD;
9548848b8605Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
9549848b8605Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
9550848b8605Smrg			alu.omod = 3;
9551848b8605Smrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
9552848b8605Smrg			alu.dst.chan = i;
9553848b8605Smrg			if (i == lasti) {
9554848b8605Smrg				alu.last = 1;
9555848b8605Smrg			}
9556848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9557848b8605Smrg			if (r)
9558848b8605Smrg				return r;
9559848b8605Smrg		}
9560848b8605Smrg		return 0;
9561848b8605Smrg	}
9562848b8605Smrg
9563848b8605Smrg	/* 1 - src0 */
9564848b8605Smrg	for (i = 0; i < lasti + 1; i++) {
9565848b8605Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9566848b8605Smrg			continue;
9567848b8605Smrg
9568848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9569848b8605Smrg		alu.op = ALU_OP2_ADD;
9570848b8605Smrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
9571848b8605Smrg		alu.src[0].chan = 0;
9572848b8605Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
9573848b8605Smrg		r600_bytecode_src_toggle_neg(&alu.src[1]);
9574848b8605Smrg		alu.dst.sel = ctx->temp_reg;
9575848b8605Smrg		alu.dst.chan = i;
9576848b8605Smrg		if (i == lasti) {
9577848b8605Smrg			alu.last = 1;
9578848b8605Smrg		}
9579848b8605Smrg		alu.dst.write = 1;
9580848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9581848b8605Smrg		if (r)
9582848b8605Smrg			return r;
9583848b8605Smrg	}
9584848b8605Smrg
9585848b8605Smrg	/* (1 - src0) * src2 */
9586848b8605Smrg	for (i = 0; i < lasti + 1; i++) {
9587848b8605Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9588848b8605Smrg			continue;
9589848b8605Smrg
9590848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9591848b8605Smrg		alu.op = ALU_OP2_MUL;
9592848b8605Smrg		alu.src[0].sel = ctx->temp_reg;
9593848b8605Smrg		alu.src[0].chan = i;
9594848b8605Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
9595848b8605Smrg		alu.dst.sel = ctx->temp_reg;
9596848b8605Smrg		alu.dst.chan = i;
9597848b8605Smrg		if (i == lasti) {
9598848b8605Smrg			alu.last = 1;
9599848b8605Smrg		}
9600848b8605Smrg		alu.dst.write = 1;
9601848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9602848b8605Smrg		if (r)
9603848b8605Smrg			return r;
9604848b8605Smrg	}
9605848b8605Smrg
9606848b8605Smrg	/* src0 * src1 + (1 - src0) * src2 */
9607b8e80941Smrg
9608b8e80941Smrg	for (i = 0; i < 2; i++) {
9609b8e80941Smrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
9610b8e80941Smrg					  srcs[i], &ctx->src[i]);
9611b8e80941Smrg		if (r)
9612b8e80941Smrg			return r;
9613b8e80941Smrg	}
9614b8e80941Smrg
9615b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
9616b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9617b8e80941Smrg			continue;
9618b8e80941Smrg
9619b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9620b8e80941Smrg		alu.op = ALU_OP3_MULADD;
9621b8e80941Smrg		alu.is_op3 = 1;
9622b8e80941Smrg		alu.src[0] = srcs[0][i];
9623b8e80941Smrg		alu.src[1] = srcs[1][i];
9624b8e80941Smrg		alu.src[2].sel = ctx->temp_reg;
9625b8e80941Smrg		alu.src[2].chan = i;
9626b8e80941Smrg
9627b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
9628b8e80941Smrg		alu.dst.chan = i;
9629b8e80941Smrg		if (i == lasti) {
9630b8e80941Smrg			alu.last = 1;
9631b8e80941Smrg		}
9632b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9633b8e80941Smrg		if (r)
9634b8e80941Smrg			return r;
9635b8e80941Smrg	}
9636b8e80941Smrg	return 0;
9637b8e80941Smrg}
9638b8e80941Smrg
9639b8e80941Smrgstatic int tgsi_cmp(struct r600_shader_ctx *ctx)
9640b8e80941Smrg{
9641b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9642b8e80941Smrg	struct r600_bytecode_alu alu;
9643b8e80941Smrg	int i, r, j;
9644b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
9645b8e80941Smrg	struct r600_bytecode_alu_src srcs[3][4];
9646b8e80941Smrg
9647b8e80941Smrg	unsigned op;
9648b8e80941Smrg
9649b8e80941Smrg	if (ctx->src[0].abs && ctx->src[0].neg) {
9650b8e80941Smrg		op = ALU_OP3_CNDE;
9651b8e80941Smrg		ctx->src[0].abs = 0;
9652b8e80941Smrg		ctx->src[0].neg = 0;
9653b8e80941Smrg	} else {
9654b8e80941Smrg		op = ALU_OP3_CNDGE;
9655b8e80941Smrg	}
9656b8e80941Smrg
9657b8e80941Smrg	for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
9658b8e80941Smrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
9659b8e80941Smrg					  srcs[j], &ctx->src[j]);
9660b8e80941Smrg		if (r)
9661b8e80941Smrg			return r;
9662b8e80941Smrg	}
9663b8e80941Smrg
9664b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
9665b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9666b8e80941Smrg			continue;
9667b8e80941Smrg
9668b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9669b8e80941Smrg		alu.op = op;
9670b8e80941Smrg		alu.src[0] = srcs[0][i];
9671b8e80941Smrg		alu.src[1] = srcs[2][i];
9672b8e80941Smrg		alu.src[2] = srcs[1][i];
9673b8e80941Smrg
9674b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
9675b8e80941Smrg		alu.dst.chan = i;
9676b8e80941Smrg		alu.dst.write = 1;
9677b8e80941Smrg		alu.is_op3 = 1;
9678b8e80941Smrg		if (i == lasti)
9679b8e80941Smrg			alu.last = 1;
9680b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9681b8e80941Smrg		if (r)
9682b8e80941Smrg			return r;
9683b8e80941Smrg	}
9684b8e80941Smrg	return 0;
9685b8e80941Smrg}
9686b8e80941Smrg
9687b8e80941Smrgstatic int tgsi_ucmp(struct r600_shader_ctx *ctx)
9688b8e80941Smrg{
9689b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9690b8e80941Smrg	struct r600_bytecode_alu alu;
9691b8e80941Smrg	int i, r;
9692b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
9693b8e80941Smrg
9694848b8605Smrg	for (i = 0; i < lasti + 1; i++) {
9695848b8605Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
9696848b8605Smrg			continue;
9697848b8605Smrg
9698b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9699b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
9700b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
9701b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
9702b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
9703b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
9704b8e80941Smrg		alu.dst.chan = i;
9705b8e80941Smrg		alu.dst.write = 1;
9706b8e80941Smrg		alu.is_op3 = 1;
9707b8e80941Smrg		if (i == lasti)
9708b8e80941Smrg			alu.last = 1;
9709b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9710b8e80941Smrg		if (r)
9711b8e80941Smrg			return r;
9712b8e80941Smrg	}
9713b8e80941Smrg	return 0;
9714b8e80941Smrg}
9715b8e80941Smrg
9716b8e80941Smrgstatic int tgsi_exp(struct r600_shader_ctx *ctx)
9717b8e80941Smrg{
9718b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9719b8e80941Smrg	struct r600_bytecode_alu alu;
9720b8e80941Smrg	int r;
9721b8e80941Smrg	unsigned i;
9722b8e80941Smrg
9723b8e80941Smrg	/* result.x = 2^floor(src); */
9724b8e80941Smrg	if (inst->Dst[0].Register.WriteMask & 1) {
9725b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9726b8e80941Smrg
9727b8e80941Smrg		alu.op = ALU_OP1_FLOOR;
9728b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9729b8e80941Smrg
9730b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
9731b8e80941Smrg		alu.dst.chan = 0;
9732b8e80941Smrg		alu.dst.write = 1;
9733b8e80941Smrg		alu.last = 1;
9734b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9735b8e80941Smrg		if (r)
9736b8e80941Smrg			return r;
9737b8e80941Smrg
9738b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
9739b8e80941Smrg			for (i = 0; i < 3; i++) {
9740b8e80941Smrg				alu.op = ALU_OP1_EXP_IEEE;
9741b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
9742b8e80941Smrg				alu.src[0].chan = 0;
9743b8e80941Smrg
9744b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
9745b8e80941Smrg				alu.dst.chan = i;
9746b8e80941Smrg				alu.dst.write = i == 0;
9747b8e80941Smrg				alu.last = i == 2;
9748b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
9749b8e80941Smrg				if (r)
9750b8e80941Smrg					return r;
9751b8e80941Smrg			}
9752b8e80941Smrg		} else {
9753b8e80941Smrg			alu.op = ALU_OP1_EXP_IEEE;
9754b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
9755b8e80941Smrg			alu.src[0].chan = 0;
9756b8e80941Smrg
9757b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9758b8e80941Smrg			alu.dst.chan = 0;
9759b8e80941Smrg			alu.dst.write = 1;
9760b8e80941Smrg			alu.last = 1;
9761b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9762b8e80941Smrg			if (r)
9763b8e80941Smrg				return r;
9764b8e80941Smrg		}
9765b8e80941Smrg	}
9766b8e80941Smrg
9767b8e80941Smrg	/* result.y = tmp - floor(tmp); */
9768b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
9769b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9770b8e80941Smrg
9771b8e80941Smrg		alu.op = ALU_OP1_FRACT;
9772b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9773b8e80941Smrg
9774b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
9775b8e80941Smrg#if 0
9776b8e80941Smrg		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
9777b8e80941Smrg		if (r)
9778b8e80941Smrg			return r;
9779b8e80941Smrg#endif
9780b8e80941Smrg		alu.dst.write = 1;
9781b8e80941Smrg		alu.dst.chan = 1;
9782b8e80941Smrg
9783b8e80941Smrg		alu.last = 1;
9784848b8605Smrg
9785848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9786848b8605Smrg		if (r)
9787848b8605Smrg			return r;
9788848b8605Smrg	}
9789848b8605Smrg
9790b8e80941Smrg	/* result.z = RoughApprox2ToX(tmp);*/
9791b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
9792b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
9793b8e80941Smrg			for (i = 0; i < 3; i++) {
9794b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9795b8e80941Smrg				alu.op = ALU_OP1_EXP_IEEE;
9796b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9797848b8605Smrg
9798b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
9799b8e80941Smrg				alu.dst.chan = i;
9800b8e80941Smrg				if (i == 2) {
9801b8e80941Smrg					alu.dst.write = 1;
9802b8e80941Smrg					alu.last = 1;
9803b8e80941Smrg				}
9804b8e80941Smrg
9805b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
9806b8e80941Smrg				if (r)
9807b8e80941Smrg					return r;
9808b8e80941Smrg			}
9809b8e80941Smrg		} else {
9810b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9811b8e80941Smrg			alu.op = ALU_OP1_EXP_IEEE;
9812b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9813b8e80941Smrg
9814b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9815b8e80941Smrg			alu.dst.write = 1;
9816b8e80941Smrg			alu.dst.chan = 2;
9817848b8605Smrg
9818848b8605Smrg			alu.last = 1;
9819b8e80941Smrg
9820b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9821b8e80941Smrg			if (r)
9822b8e80941Smrg				return r;
9823b8e80941Smrg		}
9824848b8605Smrg	}
9825848b8605Smrg
9826b8e80941Smrg	/* result.w = 1.0;*/
9827b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
9828b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9829848b8605Smrg
9830b8e80941Smrg		alu.op = ALU_OP1_MOV;
9831b8e80941Smrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
9832b8e80941Smrg		alu.src[0].chan = 0;
9833848b8605Smrg
9834b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
9835b8e80941Smrg		alu.dst.chan = 3;
9836848b8605Smrg		alu.dst.write = 1;
9837b8e80941Smrg		alu.last = 1;
9838848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9839848b8605Smrg		if (r)
9840848b8605Smrg			return r;
9841848b8605Smrg	}
9842b8e80941Smrg	return tgsi_helper_copy(ctx, inst);
9843848b8605Smrg}
9844848b8605Smrg
9845b8e80941Smrgstatic int tgsi_log(struct r600_shader_ctx *ctx)
9846848b8605Smrg{
9847848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9848848b8605Smrg	struct r600_bytecode_alu alu;
9849b8e80941Smrg	int r;
9850b8e80941Smrg	unsigned i;
9851848b8605Smrg
9852b8e80941Smrg	/* result.x = floor(log2(|src|)); */
9853b8e80941Smrg	if (inst->Dst[0].Register.WriteMask & 1) {
9854b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
9855b8e80941Smrg			for (i = 0; i < 3; i++) {
9856b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9857b8e80941Smrg
9858b8e80941Smrg				alu.op = ALU_OP1_LOG_IEEE;
9859b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9860b8e80941Smrg				r600_bytecode_src_set_abs(&alu.src[0]);
9861b8e80941Smrg
9862b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
9863b8e80941Smrg				alu.dst.chan = i;
9864b8e80941Smrg				if (i == 0)
9865b8e80941Smrg					alu.dst.write = 1;
9866b8e80941Smrg				if (i == 2)
9867b8e80941Smrg					alu.last = 1;
9868b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
9869b8e80941Smrg				if (r)
9870b8e80941Smrg					return r;
9871b8e80941Smrg			}
9872848b8605Smrg
9873848b8605Smrg		} else {
9874b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9875b8e80941Smrg
9876b8e80941Smrg			alu.op = ALU_OP1_LOG_IEEE;
9877b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9878b8e80941Smrg			r600_bytecode_src_set_abs(&alu.src[0]);
9879b8e80941Smrg
9880b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9881b8e80941Smrg			alu.dst.chan = 0;
9882b8e80941Smrg			alu.dst.write = 1;
9883b8e80941Smrg			alu.last = 1;
9884b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9885b8e80941Smrg			if (r)
9886b8e80941Smrg				return r;
9887848b8605Smrg		}
9888848b8605Smrg
9889b8e80941Smrg		alu.op = ALU_OP1_FLOOR;
9890b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
9891b8e80941Smrg		alu.src[0].chan = 0;
9892b8e80941Smrg
9893848b8605Smrg		alu.dst.sel = ctx->temp_reg;
9894b8e80941Smrg		alu.dst.chan = 0;
9895848b8605Smrg		alu.dst.write = 1;
9896b8e80941Smrg		alu.last = 1;
9897848b8605Smrg
9898848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9899848b8605Smrg		if (r)
9900848b8605Smrg			return r;
9901848b8605Smrg	}
9902848b8605Smrg
9903b8e80941Smrg	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
9904b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
9905848b8605Smrg
9906b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
9907b8e80941Smrg			for (i = 0; i < 3; i++) {
9908b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9909b8e80941Smrg
9910b8e80941Smrg				alu.op = ALU_OP1_LOG_IEEE;
9911b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9912b8e80941Smrg				r600_bytecode_src_set_abs(&alu.src[0]);
9913b8e80941Smrg
9914b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
9915b8e80941Smrg				alu.dst.chan = i;
9916b8e80941Smrg				if (i == 1)
9917b8e80941Smrg					alu.dst.write = 1;
9918b8e80941Smrg				if (i == 2)
9919b8e80941Smrg					alu.last = 1;
9920b8e80941Smrg
9921b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
9922b8e80941Smrg				if (r)
9923b8e80941Smrg					return r;
9924b8e80941Smrg			}
9925848b8605Smrg		} else {
9926b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9927848b8605Smrg
9928b8e80941Smrg			alu.op = ALU_OP1_LOG_IEEE;
9929b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9930b8e80941Smrg			r600_bytecode_src_set_abs(&alu.src[0]);
9931848b8605Smrg
9932848b8605Smrg			alu.dst.sel = ctx->temp_reg;
9933b8e80941Smrg			alu.dst.chan = 1;
9934b8e80941Smrg			alu.dst.write = 1;
9935848b8605Smrg			alu.last = 1;
9936848b8605Smrg
9937b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9938b8e80941Smrg			if (r)
9939b8e80941Smrg				return r;
9940b8e80941Smrg		}
9941848b8605Smrg
9942848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9943848b8605Smrg
9944848b8605Smrg		alu.op = ALU_OP1_FLOOR;
9945b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
9946b8e80941Smrg		alu.src[0].chan = 1;
9947848b8605Smrg
9948848b8605Smrg		alu.dst.sel = ctx->temp_reg;
9949b8e80941Smrg		alu.dst.chan = 1;
9950848b8605Smrg		alu.dst.write = 1;
9951848b8605Smrg		alu.last = 1;
9952b8e80941Smrg
9953848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9954848b8605Smrg		if (r)
9955848b8605Smrg			return r;
9956848b8605Smrg
9957848b8605Smrg		if (ctx->bc->chip_class == CAYMAN) {
9958848b8605Smrg			for (i = 0; i < 3; i++) {
9959b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9960b8e80941Smrg				alu.op = ALU_OP1_EXP_IEEE;
9961b8e80941Smrg				alu.src[0].sel = ctx->temp_reg;
9962b8e80941Smrg				alu.src[0].chan = 1;
9963b8e80941Smrg
9964b8e80941Smrg				alu.dst.sel = ctx->temp_reg;
9965b8e80941Smrg				alu.dst.chan = i;
9966b8e80941Smrg				if (i == 1)
9967b8e80941Smrg					alu.dst.write = 1;
9968b8e80941Smrg				if (i == 2)
9969b8e80941Smrg					alu.last = 1;
9970b8e80941Smrg
9971b8e80941Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
9972b8e80941Smrg				if (r)
9973b8e80941Smrg					return r;
9974b8e80941Smrg			}
9975b8e80941Smrg		} else {
9976b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9977b8e80941Smrg			alu.op = ALU_OP1_EXP_IEEE;
9978b8e80941Smrg			alu.src[0].sel = ctx->temp_reg;
9979b8e80941Smrg			alu.src[0].chan = 1;
9980b8e80941Smrg
9981b8e80941Smrg			alu.dst.sel = ctx->temp_reg;
9982b8e80941Smrg			alu.dst.chan = 1;
9983b8e80941Smrg			alu.dst.write = 1;
9984b8e80941Smrg			alu.last = 1;
9985b8e80941Smrg
9986b8e80941Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9987b8e80941Smrg			if (r)
9988b8e80941Smrg				return r;
9989b8e80941Smrg		}
9990b8e80941Smrg
9991b8e80941Smrg		if (ctx->bc->chip_class == CAYMAN) {
9992b8e80941Smrg			for (i = 0; i < 3; i++) {
9993b8e80941Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9994b8e80941Smrg				alu.op = ALU_OP1_RECIP_IEEE;
9995848b8605Smrg				alu.src[0].sel = ctx->temp_reg;
9996b8e80941Smrg				alu.src[0].chan = 1;
9997848b8605Smrg
9998848b8605Smrg				alu.dst.sel = ctx->temp_reg;
9999848b8605Smrg				alu.dst.chan = i;
10000b8e80941Smrg				if (i == 1)
10001b8e80941Smrg					alu.dst.write = 1;
10002b8e80941Smrg				if (i == 2)
10003b8e80941Smrg					alu.last = 1;
10004b8e80941Smrg
10005848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
10006848b8605Smrg				if (r)
10007848b8605Smrg					return r;
10008848b8605Smrg			}
10009848b8605Smrg		} else {
10010b8e80941Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10011b8e80941Smrg			alu.op = ALU_OP1_RECIP_IEEE;
10012848b8605Smrg			alu.src[0].sel = ctx->temp_reg;
10013b8e80941Smrg			alu.src[0].chan = 1;
10014848b8605Smrg
10015848b8605Smrg			alu.dst.sel = ctx->temp_reg;
10016b8e80941Smrg			alu.dst.chan = 1;
10017848b8605Smrg			alu.dst.write = 1;
10018848b8605Smrg			alu.last = 1;
10019b8e80941Smrg
10020848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
10021848b8605Smrg			if (r)
10022848b8605Smrg				return r;
10023848b8605Smrg		}
10024848b8605Smrg
10025848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10026848b8605Smrg
10027b8e80941Smrg		alu.op = ALU_OP2_MUL;
10028b8e80941Smrg
10029848b8605Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10030b8e80941Smrg		r600_bytecode_src_set_abs(&alu.src[0]);
10031b8e80941Smrg
10032b8e80941Smrg		alu.src[1].sel = ctx->temp_reg;
10033b8e80941Smrg		alu.src[1].chan = 1;
10034848b8605Smrg
10035848b8605Smrg		alu.dst.sel = ctx->temp_reg;
10036848b8605Smrg		alu.dst.chan = 1;
10037b8e80941Smrg		alu.dst.write = 1;
10038848b8605Smrg		alu.last = 1;
10039848b8605Smrg
10040848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10041848b8605Smrg		if (r)
10042848b8605Smrg			return r;
10043848b8605Smrg	}
10044848b8605Smrg
10045b8e80941Smrg	/* result.z = log2(|src|);*/
10046b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
10047848b8605Smrg		if (ctx->bc->chip_class == CAYMAN) {
10048848b8605Smrg			for (i = 0; i < 3; i++) {
10049848b8605Smrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10050b8e80941Smrg
10051b8e80941Smrg				alu.op = ALU_OP1_LOG_IEEE;
10052848b8605Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10053b8e80941Smrg				r600_bytecode_src_set_abs(&alu.src[0]);
10054848b8605Smrg
10055848b8605Smrg				alu.dst.sel = ctx->temp_reg;
10056b8e80941Smrg				if (i == 2)
10057848b8605Smrg					alu.dst.write = 1;
10058b8e80941Smrg				alu.dst.chan = i;
10059b8e80941Smrg				if (i == 2)
10060848b8605Smrg					alu.last = 1;
10061848b8605Smrg
10062848b8605Smrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
10063848b8605Smrg				if (r)
10064848b8605Smrg					return r;
10065848b8605Smrg			}
10066848b8605Smrg		} else {
10067848b8605Smrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10068b8e80941Smrg
10069b8e80941Smrg			alu.op = ALU_OP1_LOG_IEEE;
10070848b8605Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10071b8e80941Smrg			r600_bytecode_src_set_abs(&alu.src[0]);
10072848b8605Smrg
10073848b8605Smrg			alu.dst.sel = ctx->temp_reg;
10074848b8605Smrg			alu.dst.write = 1;
10075848b8605Smrg			alu.dst.chan = 2;
10076848b8605Smrg			alu.last = 1;
10077848b8605Smrg
10078848b8605Smrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
10079848b8605Smrg			if (r)
10080848b8605Smrg				return r;
10081848b8605Smrg		}
10082848b8605Smrg	}
10083848b8605Smrg
10084b8e80941Smrg	/* result.w = 1.0; */
10085b8e80941Smrg	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
10086848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10087848b8605Smrg
10088848b8605Smrg		alu.op = ALU_OP1_MOV;
10089848b8605Smrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
10090848b8605Smrg		alu.src[0].chan = 0;
10091848b8605Smrg
10092848b8605Smrg		alu.dst.sel = ctx->temp_reg;
10093848b8605Smrg		alu.dst.chan = 3;
10094848b8605Smrg		alu.dst.write = 1;
10095848b8605Smrg		alu.last = 1;
10096b8e80941Smrg
10097848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10098848b8605Smrg		if (r)
10099848b8605Smrg			return r;
10100848b8605Smrg	}
10101b8e80941Smrg
10102848b8605Smrg	return tgsi_helper_copy(ctx, inst);
10103848b8605Smrg}
10104848b8605Smrg
10105b8e80941Smrgstatic int tgsi_eg_arl(struct r600_shader_ctx *ctx)
10106848b8605Smrg{
10107848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10108848b8605Smrg	struct r600_bytecode_alu alu;
10109848b8605Smrg	int r;
10110b8e80941Smrg	int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10111b8e80941Smrg	unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
10112848b8605Smrg
10113b8e80941Smrg	assert(inst->Dst[0].Register.Index < 3);
10114b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10115848b8605Smrg
10116b8e80941Smrg	switch (inst->Instruction.Opcode) {
10117b8e80941Smrg	case TGSI_OPCODE_ARL:
10118b8e80941Smrg		alu.op = ALU_OP1_FLT_TO_INT_FLOOR;
10119b8e80941Smrg		break;
10120b8e80941Smrg	case TGSI_OPCODE_ARR:
10121b8e80941Smrg		alu.op = ALU_OP1_FLT_TO_INT;
10122b8e80941Smrg		break;
10123b8e80941Smrg	case TGSI_OPCODE_UARL:
10124b8e80941Smrg		alu.op = ALU_OP1_MOV;
10125b8e80941Smrg		break;
10126b8e80941Smrg	default:
10127b8e80941Smrg		assert(0);
10128b8e80941Smrg		return -1;
10129b8e80941Smrg	}
10130b8e80941Smrg
10131b8e80941Smrg	for (i = 0; i <= lasti; ++i) {
10132b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
10133b8e80941Smrg			continue;
10134b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
10135b8e80941Smrg		alu.last = i == lasti;
10136b8e80941Smrg		alu.dst.sel = reg;
10137b8e80941Smrg	        alu.dst.chan = i;
10138b8e80941Smrg		alu.dst.write = 1;
10139b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10140b8e80941Smrg		if (r)
10141b8e80941Smrg			return r;
10142b8e80941Smrg	}
10143b8e80941Smrg
10144b8e80941Smrg	if (inst->Dst[0].Register.Index > 0)
10145b8e80941Smrg		ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0;
10146b8e80941Smrg	else
10147b8e80941Smrg		ctx->bc->ar_loaded = 0;
10148b8e80941Smrg
10149b8e80941Smrg	return 0;
10150b8e80941Smrg}
10151b8e80941Smrgstatic int tgsi_r600_arl(struct r600_shader_ctx *ctx)
10152b8e80941Smrg{
10153b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10154b8e80941Smrg	struct r600_bytecode_alu alu;
10155b8e80941Smrg	int r;
10156b8e80941Smrg	int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10157b8e80941Smrg
10158b8e80941Smrg	switch (inst->Instruction.Opcode) {
10159b8e80941Smrg	case TGSI_OPCODE_ARL:
10160b8e80941Smrg		memset(&alu, 0, sizeof(alu));
10161b8e80941Smrg		alu.op = ALU_OP1_FLOOR;
10162b8e80941Smrg		alu.dst.sel = ctx->bc->ar_reg;
10163b8e80941Smrg		alu.dst.write = 1;
10164b8e80941Smrg		for (i = 0; i <= lasti; ++i) {
10165b8e80941Smrg			if (inst->Dst[0].Register.WriteMask & (1 << i))  {
10166848b8605Smrg				alu.dst.chan = i;
10167b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
10168b8e80941Smrg				alu.last = i == lasti;
10169b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
10170b8e80941Smrg					return r;
10171b8e80941Smrg			}
10172b8e80941Smrg		}
10173b8e80941Smrg
10174b8e80941Smrg		memset(&alu, 0, sizeof(alu));
10175b8e80941Smrg		alu.op = ALU_OP1_FLT_TO_INT;
10176b8e80941Smrg		alu.src[0].sel = ctx->bc->ar_reg;
10177b8e80941Smrg		alu.dst.sel = ctx->bc->ar_reg;
10178b8e80941Smrg		alu.dst.write = 1;
10179b8e80941Smrg		/* FLT_TO_INT is trans-only on r600/r700 */
10180b8e80941Smrg		alu.last = TRUE;
10181b8e80941Smrg		for (i = 0; i <= lasti; ++i) {
10182b8e80941Smrg			alu.dst.chan = i;
10183b8e80941Smrg			alu.src[0].chan = i;
10184b8e80941Smrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
10185b8e80941Smrg				return r;
10186b8e80941Smrg		}
10187b8e80941Smrg		break;
10188b8e80941Smrg	case TGSI_OPCODE_ARR:
10189b8e80941Smrg		memset(&alu, 0, sizeof(alu));
10190b8e80941Smrg		alu.op = ALU_OP1_FLT_TO_INT;
10191b8e80941Smrg		alu.dst.sel = ctx->bc->ar_reg;
10192b8e80941Smrg		alu.dst.write = 1;
10193b8e80941Smrg		/* FLT_TO_INT is trans-only on r600/r700 */
10194b8e80941Smrg		alu.last = TRUE;
10195b8e80941Smrg		for (i = 0; i <= lasti; ++i) {
10196b8e80941Smrg			if (inst->Dst[0].Register.WriteMask & (1 << i)) {
10197b8e80941Smrg				alu.dst.chan = i;
10198b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
10199b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
10200848b8605Smrg					return r;
10201848b8605Smrg			}
10202b8e80941Smrg		}
10203b8e80941Smrg		break;
10204b8e80941Smrg	case TGSI_OPCODE_UARL:
10205b8e80941Smrg		memset(&alu, 0, sizeof(alu));
10206b8e80941Smrg		alu.op = ALU_OP1_MOV;
10207b8e80941Smrg		alu.dst.sel = ctx->bc->ar_reg;
10208b8e80941Smrg		alu.dst.write = 1;
10209b8e80941Smrg		for (i = 0; i <= lasti; ++i) {
10210b8e80941Smrg			if (inst->Dst[0].Register.WriteMask & (1 << i)) {
10211b8e80941Smrg				alu.dst.chan = i;
10212b8e80941Smrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
10213b8e80941Smrg				alu.last = i == lasti;
10214b8e80941Smrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
10215b8e80941Smrg					return r;
10216b8e80941Smrg			}
10217b8e80941Smrg		}
10218b8e80941Smrg		break;
10219b8e80941Smrg	default:
10220b8e80941Smrg		assert(0);
10221b8e80941Smrg		return -1;
10222b8e80941Smrg	}
10223b8e80941Smrg
10224b8e80941Smrg	ctx->bc->ar_loaded = 0;
10225b8e80941Smrg	return 0;
10226b8e80941Smrg}
10227b8e80941Smrg
10228b8e80941Smrgstatic int tgsi_opdst(struct r600_shader_ctx *ctx)
10229b8e80941Smrg{
10230b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10231b8e80941Smrg	struct r600_bytecode_alu alu;
10232b8e80941Smrg	int i, r = 0;
10233b8e80941Smrg
10234b8e80941Smrg	for (i = 0; i < 4; i++) {
10235b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10236b8e80941Smrg
10237b8e80941Smrg		alu.op = ALU_OP2_MUL;
10238b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
10239b8e80941Smrg
10240b8e80941Smrg		if (i == 0 || i == 3) {
10241b8e80941Smrg			alu.src[0].sel = V_SQ_ALU_SRC_1;
10242b8e80941Smrg		} else {
10243b8e80941Smrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
10244b8e80941Smrg		}
10245b8e80941Smrg
10246b8e80941Smrg		if (i == 0 || i == 2) {
10247b8e80941Smrg			alu.src[1].sel = V_SQ_ALU_SRC_1;
10248b8e80941Smrg		} else {
10249b8e80941Smrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
10250b8e80941Smrg		}
10251b8e80941Smrg		if (i == 3)
10252b8e80941Smrg			alu.last = 1;
10253b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10254b8e80941Smrg		if (r)
10255b8e80941Smrg			return r;
10256b8e80941Smrg	}
10257b8e80941Smrg	return 0;
10258b8e80941Smrg}
10259b8e80941Smrg
10260b8e80941Smrgstatic int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type,
10261b8e80941Smrg			   struct r600_bytecode_alu_src *src)
10262b8e80941Smrg{
10263b8e80941Smrg	struct r600_bytecode_alu alu;
10264b8e80941Smrg	int r;
10265b8e80941Smrg
10266b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10267b8e80941Smrg	alu.op = opcode;
10268b8e80941Smrg	alu.execute_mask = 1;
10269b8e80941Smrg	alu.update_pred = 1;
10270b8e80941Smrg
10271b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
10272b8e80941Smrg	alu.dst.write = 1;
10273b8e80941Smrg	alu.dst.chan = 0;
10274b8e80941Smrg
10275b8e80941Smrg	alu.src[0] = *src;
10276b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_0;
10277b8e80941Smrg	alu.src[1].chan = 0;
10278b8e80941Smrg
10279b8e80941Smrg	alu.last = 1;
10280b8e80941Smrg
10281b8e80941Smrg	r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
10282b8e80941Smrg	if (r)
10283b8e80941Smrg		return r;
10284b8e80941Smrg	return 0;
10285b8e80941Smrg}
10286b8e80941Smrg
10287b8e80941Smrgstatic int pops(struct r600_shader_ctx *ctx, int pops)
10288b8e80941Smrg{
10289b8e80941Smrg	unsigned force_pop = ctx->bc->force_add_cf;
10290848b8605Smrg
10291b8e80941Smrg	if (!force_pop) {
10292b8e80941Smrg		int alu_pop = 3;
10293b8e80941Smrg		if (ctx->bc->cf_last) {
10294b8e80941Smrg			if (ctx->bc->cf_last->op == CF_OP_ALU)
10295b8e80941Smrg				alu_pop = 0;
10296b8e80941Smrg			else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER)
10297b8e80941Smrg				alu_pop = 1;
10298b8e80941Smrg		}
10299b8e80941Smrg		alu_pop += pops;
10300b8e80941Smrg		if (alu_pop == 1) {
10301b8e80941Smrg			ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER;
10302b8e80941Smrg			ctx->bc->force_add_cf = 1;
10303b8e80941Smrg		} else if (alu_pop == 2) {
10304b8e80941Smrg			ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
10305b8e80941Smrg			ctx->bc->force_add_cf = 1;
10306848b8605Smrg		} else {
10307b8e80941Smrg			force_pop = 1;
10308848b8605Smrg		}
10309b8e80941Smrg	}
10310848b8605Smrg
10311b8e80941Smrg	if (force_pop) {
10312b8e80941Smrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
10313b8e80941Smrg		ctx->bc->cf_last->pop_count = pops;
10314b8e80941Smrg		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
10315848b8605Smrg	}
10316848b8605Smrg
10317b8e80941Smrg	return 0;
10318b8e80941Smrg}
10319848b8605Smrg
10320b8e80941Smrgstatic inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
10321b8e80941Smrg                                              unsigned reason)
10322b8e80941Smrg{
10323b8e80941Smrg	struct r600_stack_info *stack = &ctx->bc->stack;
10324b8e80941Smrg	unsigned elements;
10325b8e80941Smrg	int entries;
10326848b8605Smrg
10327b8e80941Smrg	unsigned entry_size = stack->entry_size;
10328848b8605Smrg
10329b8e80941Smrg	elements = (stack->loop + stack->push_wqm ) * entry_size;
10330b8e80941Smrg	elements += stack->push;
10331848b8605Smrg
10332b8e80941Smrg	switch (ctx->bc->chip_class) {
10333b8e80941Smrg	case R600:
10334b8e80941Smrg	case R700:
10335b8e80941Smrg		/* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
10336b8e80941Smrg		 * the stack must be reserved to hold the current active/continue
10337b8e80941Smrg		 * masks */
10338b8e80941Smrg		if (reason == FC_PUSH_VPM || stack->push > 0) {
10339b8e80941Smrg			elements += 2;
10340b8e80941Smrg		}
10341b8e80941Smrg		break;
10342848b8605Smrg
10343b8e80941Smrg	case CAYMAN:
10344b8e80941Smrg		/* r9xx: any stack operation on empty stack consumes 2 additional
10345b8e80941Smrg		 * elements */
10346b8e80941Smrg		elements += 2;
10347848b8605Smrg
10348b8e80941Smrg		/* fallthrough */
10349b8e80941Smrg		/* FIXME: do the two elements added above cover the cases for the
10350b8e80941Smrg		 * r8xx+ below? */
10351b8e80941Smrg
10352b8e80941Smrg	case EVERGREEN:
10353b8e80941Smrg		/* r8xx+: 2 extra elements are not always required, but one extra
10354b8e80941Smrg		 * element must be added for each of the following cases:
10355b8e80941Smrg		 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
10356b8e80941Smrg		 *    stack usage.
10357b8e80941Smrg		 *    (Currently we don't use ALU_ELSE_AFTER.)
10358b8e80941Smrg		 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
10359b8e80941Smrg		 *    PUSH instruction executed.
10360b8e80941Smrg		 *
10361b8e80941Smrg		 *    NOTE: it seems we also need to reserve additional element in some
10362b8e80941Smrg		 *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
10363b8e80941Smrg		 *    then STACK_SIZE should be 2 instead of 1 */
10364b8e80941Smrg		if (reason == FC_PUSH_VPM || stack->push > 0) {
10365b8e80941Smrg			elements += 1;
10366848b8605Smrg		}
10367b8e80941Smrg		break;
10368848b8605Smrg
10369b8e80941Smrg	default:
10370b8e80941Smrg		assert(0);
10371b8e80941Smrg		break;
10372b8e80941Smrg	}
10373848b8605Smrg
10374b8e80941Smrg	/* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4
10375b8e80941Smrg	 * for all chips, so we use 4 in the final formula, not the real entry_size
10376b8e80941Smrg	 * for the chip */
10377b8e80941Smrg	entry_size = 4;
10378848b8605Smrg
10379b8e80941Smrg	entries = (elements + (entry_size - 1)) / entry_size;
10380848b8605Smrg
10381b8e80941Smrg	if (entries > stack->max_entries)
10382b8e80941Smrg		stack->max_entries = entries;
10383b8e80941Smrg	return elements;
10384b8e80941Smrg}
10385848b8605Smrg
10386b8e80941Smrgstatic inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
10387b8e80941Smrg{
10388b8e80941Smrg	switch(reason) {
10389b8e80941Smrg	case FC_PUSH_VPM:
10390b8e80941Smrg		--ctx->bc->stack.push;
10391b8e80941Smrg		assert(ctx->bc->stack.push >= 0);
10392b8e80941Smrg		break;
10393b8e80941Smrg	case FC_PUSH_WQM:
10394b8e80941Smrg		--ctx->bc->stack.push_wqm;
10395b8e80941Smrg		assert(ctx->bc->stack.push_wqm >= 0);
10396b8e80941Smrg		break;
10397b8e80941Smrg	case FC_LOOP:
10398b8e80941Smrg		--ctx->bc->stack.loop;
10399b8e80941Smrg		assert(ctx->bc->stack.loop >= 0);
10400b8e80941Smrg		break;
10401b8e80941Smrg	default:
10402b8e80941Smrg		assert(0);
10403b8e80941Smrg		break;
10404b8e80941Smrg	}
10405b8e80941Smrg}
10406848b8605Smrg
10407b8e80941Smrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
10408b8e80941Smrg{
10409b8e80941Smrg	switch (reason) {
10410b8e80941Smrg	case FC_PUSH_VPM:
10411b8e80941Smrg		++ctx->bc->stack.push;
10412b8e80941Smrg		break;
10413b8e80941Smrg	case FC_PUSH_WQM:
10414b8e80941Smrg		++ctx->bc->stack.push_wqm;
10415b8e80941Smrg		break;
10416b8e80941Smrg	case FC_LOOP:
10417b8e80941Smrg		++ctx->bc->stack.loop;
10418b8e80941Smrg		break;
10419b8e80941Smrg	default:
10420b8e80941Smrg		assert(0);
10421b8e80941Smrg	}
10422848b8605Smrg
10423b8e80941Smrg	return callstack_update_max_depth(ctx, reason);
10424b8e80941Smrg}
10425848b8605Smrg
10426b8e80941Smrgstatic void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
10427b8e80941Smrg{
10428b8e80941Smrg	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
10429848b8605Smrg
10430b8e80941Smrg	sp->mid = realloc((void *)sp->mid,
10431b8e80941Smrg						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
10432b8e80941Smrg	sp->mid[sp->num_mid] = ctx->bc->cf_last;
10433b8e80941Smrg	sp->num_mid++;
10434b8e80941Smrg}
10435848b8605Smrg
10436b8e80941Smrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
10437b8e80941Smrg{
10438b8e80941Smrg	assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
10439b8e80941Smrg	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
10440b8e80941Smrg	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
10441b8e80941Smrg	ctx->bc->fc_sp++;
10442b8e80941Smrg}
10443848b8605Smrg
10444b8e80941Smrgstatic void fc_poplevel(struct r600_shader_ctx *ctx)
10445b8e80941Smrg{
10446b8e80941Smrg	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
10447b8e80941Smrg	free(sp->mid);
10448b8e80941Smrg	sp->mid = NULL;
10449b8e80941Smrg	sp->num_mid = 0;
10450b8e80941Smrg	sp->start = NULL;
10451b8e80941Smrg	sp->type = 0;
10452b8e80941Smrg	ctx->bc->fc_sp--;
10453b8e80941Smrg}
10454848b8605Smrg
10455b8e80941Smrg#if 0
10456b8e80941Smrgstatic int emit_return(struct r600_shader_ctx *ctx)
10457b8e80941Smrg{
10458b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN));
10459b8e80941Smrg	return 0;
10460b8e80941Smrg}
10461848b8605Smrg
10462b8e80941Smrgstatic int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
10463b8e80941Smrg{
10464b8e80941Smrg
10465b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP));
10466b8e80941Smrg	ctx->bc->cf_last->pop_count = pops;
10467b8e80941Smrg	/* XXX work out offset */
10468b8e80941Smrg	return 0;
10469b8e80941Smrg}
10470848b8605Smrg
10471b8e80941Smrgstatic int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
10472b8e80941Smrg{
10473b8e80941Smrg	return 0;
10474b8e80941Smrg}
10475848b8605Smrg
10476b8e80941Smrgstatic void emit_testflag(struct r600_shader_ctx *ctx)
10477b8e80941Smrg{
10478848b8605Smrg
10479b8e80941Smrg}
10480848b8605Smrg
10481b8e80941Smrgstatic void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
10482b8e80941Smrg{
10483b8e80941Smrg	emit_testflag(ctx);
10484b8e80941Smrg	emit_jump_to_offset(ctx, 1, 4);
10485b8e80941Smrg	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
10486b8e80941Smrg	pops(ctx, ifidx + 1);
10487b8e80941Smrg	emit_return(ctx);
10488b8e80941Smrg}
10489848b8605Smrg
10490b8e80941Smrgstatic void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
10491b8e80941Smrg{
10492b8e80941Smrg	emit_testflag(ctx);
10493848b8605Smrg
10494b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
10495b8e80941Smrg	ctx->bc->cf_last->pop_count = 1;
10496848b8605Smrg
10497b8e80941Smrg	fc_set_mid(ctx, fc_sp);
10498848b8605Smrg
10499b8e80941Smrg	pops(ctx, 1);
10500b8e80941Smrg}
10501b8e80941Smrg#endif
10502848b8605Smrg
10503b8e80941Smrgstatic int emit_if(struct r600_shader_ctx *ctx, int opcode,
10504b8e80941Smrg		   struct r600_bytecode_alu_src *src)
10505b8e80941Smrg{
10506b8e80941Smrg	int alu_type = CF_OP_ALU_PUSH_BEFORE;
10507b8e80941Smrg	bool needs_workaround = false;
10508b8e80941Smrg	int elems = callstack_push(ctx, FC_PUSH_VPM);
10509848b8605Smrg
10510b8e80941Smrg	if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
10511b8e80941Smrg		needs_workaround = true;
10512848b8605Smrg
10513b8e80941Smrg	if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
10514b8e80941Smrg		unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
10515b8e80941Smrg		unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
10516848b8605Smrg
10517b8e80941Smrg		if (elems && (!dmod1 || !dmod2))
10518b8e80941Smrg			needs_workaround = true;
10519b8e80941Smrg	}
10520848b8605Smrg
10521b8e80941Smrg	/* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
10522b8e80941Smrg	 * LOOP_STARTxxx for nested loops may put the branch stack into a state
10523b8e80941Smrg	 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
10524b8e80941Smrg	 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
10525b8e80941Smrg	if (needs_workaround) {
10526b8e80941Smrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
10527b8e80941Smrg		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
10528b8e80941Smrg		alu_type = CF_OP_ALU;
10529848b8605Smrg	}
10530848b8605Smrg
10531b8e80941Smrg	emit_logic_pred(ctx, opcode, alu_type, src);
10532848b8605Smrg
10533b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
10534848b8605Smrg
10535b8e80941Smrg	fc_pushlevel(ctx, FC_IF);
10536848b8605Smrg
10537b8e80941Smrg	return 0;
10538b8e80941Smrg}
10539848b8605Smrg
10540b8e80941Smrgstatic int tgsi_if(struct r600_shader_ctx *ctx)
10541b8e80941Smrg{
10542b8e80941Smrg	struct r600_bytecode_alu_src alu_src;
10543b8e80941Smrg	r600_bytecode_src(&alu_src, &ctx->src[0], 0);
10544b8e80941Smrg
10545b8e80941Smrg	return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src);
10546848b8605Smrg}
10547848b8605Smrg
10548b8e80941Smrgstatic int tgsi_uif(struct r600_shader_ctx *ctx)
10549848b8605Smrg{
10550b8e80941Smrg	struct r600_bytecode_alu_src alu_src;
10551b8e80941Smrg	r600_bytecode_src(&alu_src, &ctx->src[0], 0);
10552b8e80941Smrg	return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
10553b8e80941Smrg}
10554848b8605Smrg
10555b8e80941Smrgstatic int tgsi_else(struct r600_shader_ctx *ctx)
10556b8e80941Smrg{
10557b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
10558b8e80941Smrg	ctx->bc->cf_last->pop_count = 1;
10559848b8605Smrg
10560b8e80941Smrg	fc_set_mid(ctx, ctx->bc->fc_sp - 1);
10561b8e80941Smrg	ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id;
10562b8e80941Smrg	return 0;
10563b8e80941Smrg}
10564b8e80941Smrg
10565b8e80941Smrgstatic int tgsi_endif(struct r600_shader_ctx *ctx)
10566b8e80941Smrg{
10567b8e80941Smrg	int offset = 2;
10568b8e80941Smrg	pops(ctx, 1);
10569b8e80941Smrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) {
10570b8e80941Smrg		R600_ERR("if/endif unbalanced in shader\n");
10571848b8605Smrg		return -1;
10572848b8605Smrg	}
10573848b8605Smrg
10574b8e80941Smrg	/* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */
10575b8e80941Smrg	if (ctx->bc->cf_last->eg_alu_extended)
10576b8e80941Smrg			offset += 2;
10577b8e80941Smrg
10578b8e80941Smrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) {
10579b8e80941Smrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + offset;
10580b8e80941Smrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1;
10581b8e80941Smrg	} else {
10582b8e80941Smrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset;
10583848b8605Smrg	}
10584b8e80941Smrg	fc_poplevel(ctx);
10585848b8605Smrg
10586b8e80941Smrg	callstack_pop(ctx, FC_PUSH_VPM);
10587848b8605Smrg	return 0;
10588848b8605Smrg}
10589b8e80941Smrg
10590b8e80941Smrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx)
10591848b8605Smrg{
10592b8e80941Smrg	/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
10593b8e80941Smrg	 * limited to 4096 iterations, like the other LOOP_* instructions. */
10594b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
10595848b8605Smrg
10596b8e80941Smrg	fc_pushlevel(ctx, FC_LOOP);
10597848b8605Smrg
10598b8e80941Smrg	/* check stack depth */
10599b8e80941Smrg	callstack_push(ctx, FC_LOOP);
10600b8e80941Smrg	return 0;
10601b8e80941Smrg}
10602b8e80941Smrg
10603b8e80941Smrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx)
10604b8e80941Smrg{
10605b8e80941Smrg	int i;
10606b8e80941Smrg
10607b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
10608b8e80941Smrg
10609b8e80941Smrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) {
10610b8e80941Smrg		R600_ERR("loop/endloop in shader code are not paired.\n");
10611b8e80941Smrg		return -EINVAL;
10612b8e80941Smrg	}
10613b8e80941Smrg
10614b8e80941Smrg	/* fixup loop pointers - from r600isa
10615b8e80941Smrg	   LOOP END points to CF after LOOP START,
10616b8e80941Smrg	   LOOP START point to CF after LOOP END
10617b8e80941Smrg	   BRK/CONT point to LOOP END CF
10618b8e80941Smrg	*/
10619b8e80941Smrg	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
10620b8e80941Smrg
10621b8e80941Smrg	ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
10622b8e80941Smrg
10623b8e80941Smrg	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) {
10624b8e80941Smrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id;
10625b8e80941Smrg	}
10626b8e80941Smrg	/* XXX add LOOPRET support */
10627b8e80941Smrg	fc_poplevel(ctx);
10628b8e80941Smrg	callstack_pop(ctx, FC_LOOP);
10629b8e80941Smrg	return 0;
10630b8e80941Smrg}
10631b8e80941Smrg
10632b8e80941Smrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
10633b8e80941Smrg{
10634b8e80941Smrg	unsigned int fscp;
10635b8e80941Smrg
10636b8e80941Smrg	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
10637b8e80941Smrg	{
10638b8e80941Smrg		if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
10639b8e80941Smrg			break;
10640848b8605Smrg	}
10641848b8605Smrg
10642b8e80941Smrg	if (fscp == 0) {
10643b8e80941Smrg		R600_ERR("Break not inside loop/endloop pair\n");
10644b8e80941Smrg		return -EINVAL;
10645b8e80941Smrg	}
10646b8e80941Smrg
10647b8e80941Smrg	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
10648b8e80941Smrg
10649b8e80941Smrg	fc_set_mid(ctx, fscp - 1);
10650b8e80941Smrg
10651848b8605Smrg	return 0;
10652848b8605Smrg}
10653848b8605Smrg
10654b8e80941Smrgstatic int tgsi_gs_emit(struct r600_shader_ctx *ctx)
10655b8e80941Smrg{
10656b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10657b8e80941Smrg	int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
10658b8e80941Smrg	int r;
10659b8e80941Smrg
10660b8e80941Smrg	if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
10661b8e80941Smrg		emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
10662b8e80941Smrg
10663b8e80941Smrg	r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
10664b8e80941Smrg	if (!r) {
10665b8e80941Smrg		ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
10666b8e80941Smrg		if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
10667b8e80941Smrg			return emit_inc_ring_offset(ctx, stream, TRUE);
10668b8e80941Smrg	}
10669b8e80941Smrg	return r;
10670b8e80941Smrg}
10671b8e80941Smrg
10672b8e80941Smrgstatic int tgsi_umad(struct r600_shader_ctx *ctx)
10673848b8605Smrg{
10674848b8605Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10675848b8605Smrg	struct r600_bytecode_alu alu;
10676b8e80941Smrg	int i, j, r;
10677b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10678b8e80941Smrg
10679b8e80941Smrg	/* src0 * src1 */
10680b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10681b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
10682b8e80941Smrg			continue;
10683848b8605Smrg
10684848b8605Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10685848b8605Smrg
10686b8e80941Smrg		alu.dst.chan = i;
10687b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
10688b8e80941Smrg		alu.dst.write = 1;
10689848b8605Smrg
10690b8e80941Smrg		alu.op = ALU_OP2_MULLO_UINT;
10691b8e80941Smrg		for (j = 0; j < 2; j++) {
10692b8e80941Smrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
10693848b8605Smrg		}
10694848b8605Smrg
10695b8e80941Smrg		alu.last = 1;
10696b8e80941Smrg		r = emit_mul_int_op(ctx->bc, &alu);
10697b8e80941Smrg		if (r)
10698b8e80941Smrg			return r;
10699b8e80941Smrg	}
10700b8e80941Smrg
10701b8e80941Smrg
10702b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10703b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
10704b8e80941Smrg			continue;
10705b8e80941Smrg
10706b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10707b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
10708b8e80941Smrg
10709b8e80941Smrg		alu.op = ALU_OP2_ADD_INT;
10710b8e80941Smrg
10711b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
10712b8e80941Smrg		alu.src[0].chan = i;
10713b8e80941Smrg
10714b8e80941Smrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
10715b8e80941Smrg		if (i == lasti) {
10716848b8605Smrg			alu.last = 1;
10717b8e80941Smrg		}
10718848b8605Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10719848b8605Smrg		if (r)
10720848b8605Smrg			return r;
10721848b8605Smrg	}
10722848b8605Smrg	return 0;
10723848b8605Smrg}
10724848b8605Smrg
10725b8e80941Smrgstatic int tgsi_pk2h(struct r600_shader_ctx *ctx)
10726848b8605Smrg{
10727b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10728848b8605Smrg	struct r600_bytecode_alu alu;
10729b8e80941Smrg	int r, i;
10730b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10731848b8605Smrg
10732b8e80941Smrg	/* temp.xy = f32_to_f16(src) */
10733848b8605Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10734b8e80941Smrg	alu.op = ALU_OP1_FLT32_TO_FLT16;
10735b8e80941Smrg	alu.dst.chan = 0;
10736848b8605Smrg	alu.dst.sel = ctx->temp_reg;
10737848b8605Smrg	alu.dst.write = 1;
10738848b8605Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10739b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10740b8e80941Smrg	if (r)
10741b8e80941Smrg		return r;
10742b8e80941Smrg	alu.dst.chan = 1;
10743b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
10744848b8605Smrg	alu.last = 1;
10745b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10746848b8605Smrg	if (r)
10747848b8605Smrg		return r;
10748848b8605Smrg
10749b8e80941Smrg	/* dst.x = temp.y * 0x10000 + temp.x */
10750b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10751b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
10752b8e80941Smrg			continue;
10753848b8605Smrg
10754b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10755b8e80941Smrg		alu.op = ALU_OP3_MULADD_UINT24;
10756b8e80941Smrg		alu.is_op3 = 1;
10757b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
10758b8e80941Smrg		alu.last = i == lasti;
10759b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
10760b8e80941Smrg		alu.src[0].chan = 1;
10761b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
10762b8e80941Smrg		alu.src[1].value = 0x10000;
10763b8e80941Smrg		alu.src[2].sel = ctx->temp_reg;
10764b8e80941Smrg		alu.src[2].chan = 0;
10765b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10766b8e80941Smrg		if (r)
10767b8e80941Smrg			return r;
10768848b8605Smrg	}
10769848b8605Smrg
10770848b8605Smrg	return 0;
10771848b8605Smrg}
10772848b8605Smrg
10773b8e80941Smrgstatic int tgsi_up2h(struct r600_shader_ctx *ctx)
10774848b8605Smrg{
10775b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10776b8e80941Smrg	struct r600_bytecode_alu alu;
10777b8e80941Smrg	int r, i;
10778b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10779848b8605Smrg
10780b8e80941Smrg	/* temp.x = src.x */
10781b8e80941Smrg	/* note: no need to mask out the high bits */
10782b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10783b8e80941Smrg	alu.op = ALU_OP1_MOV;
10784b8e80941Smrg	alu.dst.chan = 0;
10785b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
10786b8e80941Smrg	alu.dst.write = 1;
10787b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10788b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10789b8e80941Smrg	if (r)
10790b8e80941Smrg		return r;
10791848b8605Smrg
10792b8e80941Smrg	/* temp.y = src.x >> 16 */
10793b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10794b8e80941Smrg	alu.op = ALU_OP2_LSHR_INT;
10795b8e80941Smrg	alu.dst.chan = 1;
10796b8e80941Smrg	alu.dst.sel = ctx->temp_reg;
10797b8e80941Smrg	alu.dst.write = 1;
10798b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10799b8e80941Smrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
10800b8e80941Smrg	alu.src[1].value = 16;
10801b8e80941Smrg	alu.last = 1;
10802b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10803b8e80941Smrg	if (r)
10804b8e80941Smrg		return r;
10805848b8605Smrg
10806b8e80941Smrg	/* dst.wz = dst.xy = f16_to_f32(temp.xy) */
10807b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10808b8e80941Smrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
10809b8e80941Smrg			continue;
10810b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10811b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
10812b8e80941Smrg		alu.op = ALU_OP1_FLT16_TO_FLT32;
10813b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
10814b8e80941Smrg		alu.src[0].chan = i % 2;
10815b8e80941Smrg		alu.last = i == lasti;
10816b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10817b8e80941Smrg		if (r)
10818b8e80941Smrg			return r;
10819848b8605Smrg	}
10820848b8605Smrg
10821b8e80941Smrg	return 0;
10822848b8605Smrg}
10823848b8605Smrg
10824b8e80941Smrgstatic int tgsi_bfe(struct r600_shader_ctx *ctx)
10825848b8605Smrg{
10826b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10827b8e80941Smrg	struct r600_bytecode_alu alu;
10828b8e80941Smrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10829b8e80941Smrg	int r, i;
10830b8e80941Smrg	int dst = -1;
10831848b8605Smrg
10832b8e80941Smrg	if ((inst->Src[0].Register.File == inst->Dst[0].Register.File &&
10833b8e80941Smrg	     inst->Src[0].Register.Index == inst->Dst[0].Register.Index) ||
10834b8e80941Smrg	    (inst->Src[2].Register.File == inst->Dst[0].Register.File &&
10835b8e80941Smrg	     inst->Src[2].Register.Index == inst->Dst[0].Register.Index))
10836b8e80941Smrg		dst = r600_get_temp(ctx);
10837848b8605Smrg
10838b8e80941Smrg	r = tgsi_op3_dst(ctx, dst);
10839b8e80941Smrg	if (r)
10840b8e80941Smrg		return r;
10841848b8605Smrg
10842b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10843b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10844b8e80941Smrg		alu.op = ALU_OP2_SETGE_INT;
10845b8e80941Smrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], i);
10846b8e80941Smrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
10847b8e80941Smrg		alu.src[1].value = 32;
10848b8e80941Smrg		alu.dst.sel = ctx->temp_reg;
10849b8e80941Smrg		alu.dst.chan = i;
10850b8e80941Smrg		alu.dst.write = 1;
10851b8e80941Smrg		if (i == lasti)
10852b8e80941Smrg			alu.last = 1;
10853b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10854b8e80941Smrg		if (r)
10855b8e80941Smrg			return r;
10856b8e80941Smrg	}
10857848b8605Smrg
10858b8e80941Smrg	for (i = 0; i < lasti + 1; i++) {
10859b8e80941Smrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10860b8e80941Smrg		alu.op = ALU_OP3_CNDE_INT;
10861b8e80941Smrg		alu.is_op3 = 1;
10862b8e80941Smrg		alu.src[0].sel = ctx->temp_reg;
10863b8e80941Smrg		alu.src[0].chan = i;
10864848b8605Smrg
10865b8e80941Smrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
10866b8e80941Smrg		if (dst != -1)
10867b8e80941Smrg			alu.src[1].sel = dst;
10868b8e80941Smrg		else
10869b8e80941Smrg			alu.src[1].sel = alu.dst.sel;
10870b8e80941Smrg		alu.src[1].chan = i;
10871b8e80941Smrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
10872b8e80941Smrg		alu.dst.write = 1;
10873b8e80941Smrg		if (i == lasti)
10874b8e80941Smrg			alu.last = 1;
10875b8e80941Smrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
10876b8e80941Smrg		if (r)
10877b8e80941Smrg			return r;
10878b8e80941Smrg	}
10879848b8605Smrg
10880848b8605Smrg	return 0;
10881848b8605Smrg}
10882848b8605Smrg
10883b8e80941Smrgstatic int tgsi_clock(struct r600_shader_ctx *ctx)
10884848b8605Smrg{
10885b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10886b8e80941Smrg	struct r600_bytecode_alu alu;
10887b8e80941Smrg	int r;
10888b8e80941Smrg
10889b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10890b8e80941Smrg	alu.op = ALU_OP1_MOV;
10891b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
10892b8e80941Smrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO;
10893b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10894b8e80941Smrg	if (r)
10895b8e80941Smrg		return r;
10896b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10897b8e80941Smrg	alu.op = ALU_OP1_MOV;
10898b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
10899b8e80941Smrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI;
10900b8e80941Smrg	alu.last = 1;
10901b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10902b8e80941Smrg	if (r)
10903b8e80941Smrg		return r;
10904848b8605Smrg	return 0;
10905848b8605Smrg}
10906848b8605Smrg
10907b8e80941Smrgstatic int emit_u64add(struct r600_shader_ctx *ctx, int op,
10908b8e80941Smrg		       int treg,
10909b8e80941Smrg		       int src0_sel, int src0_chan,
10910b8e80941Smrg		       int src1_sel, int src1_chan)
10911848b8605Smrg{
10912b8e80941Smrg	struct r600_bytecode_alu alu;
10913b8e80941Smrg	int r;
10914b8e80941Smrg	int opc;
10915848b8605Smrg
10916b8e80941Smrg	if (op == ALU_OP2_ADD_INT)
10917b8e80941Smrg		opc = ALU_OP2_ADDC_UINT;
10918b8e80941Smrg	else
10919b8e80941Smrg		opc = ALU_OP2_SUBB_UINT;
10920848b8605Smrg
10921b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10922b8e80941Smrg	alu.op = op;            ;
10923b8e80941Smrg	alu.dst.sel = treg;
10924b8e80941Smrg	alu.dst.chan = 0;
10925b8e80941Smrg	alu.dst.write = 1;
10926b8e80941Smrg	alu.src[0].sel = src0_sel;
10927b8e80941Smrg	alu.src[0].chan = src0_chan + 0;
10928b8e80941Smrg	alu.src[1].sel = src1_sel;
10929b8e80941Smrg	alu.src[1].chan = src1_chan + 0;
10930b8e80941Smrg	alu.src[1].neg = 0;
10931b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10932b8e80941Smrg	if (r)
10933b8e80941Smrg		return r;
10934848b8605Smrg
10935b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10936b8e80941Smrg	alu.op = op;
10937b8e80941Smrg	alu.dst.sel = treg;
10938b8e80941Smrg	alu.dst.chan = 1;
10939b8e80941Smrg	alu.dst.write = 1;
10940b8e80941Smrg	alu.src[0].sel = src0_sel;
10941b8e80941Smrg	alu.src[0].chan = src0_chan + 1;
10942b8e80941Smrg	alu.src[1].sel = src1_sel;
10943b8e80941Smrg	alu.src[1].chan = src1_chan + 1;
10944b8e80941Smrg	alu.src[1].neg = 0;
10945b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10946b8e80941Smrg	if (r)
10947b8e80941Smrg		return r;
10948848b8605Smrg
10949b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10950b8e80941Smrg	alu.op = opc;
10951b8e80941Smrg	alu.dst.sel = treg;
10952b8e80941Smrg	alu.dst.chan = 2;
10953b8e80941Smrg	alu.dst.write = 1;
10954b8e80941Smrg	alu.last = 1;
10955b8e80941Smrg	alu.src[0].sel = src0_sel;
10956b8e80941Smrg	alu.src[0].chan = src0_chan + 0;
10957b8e80941Smrg	alu.src[1].sel = src1_sel;
10958b8e80941Smrg	alu.src[1].chan = src1_chan + 0;
10959b8e80941Smrg	alu.src[1].neg = 0;
10960b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10961b8e80941Smrg	if (r)
10962b8e80941Smrg		return r;
10963848b8605Smrg
10964b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10965b8e80941Smrg	alu.op = op;
10966b8e80941Smrg	alu.dst.sel = treg;
10967b8e80941Smrg	alu.dst.chan = 1;
10968b8e80941Smrg	alu.dst.write = 1;
10969b8e80941Smrg	alu.src[0].sel = treg;
10970b8e80941Smrg	alu.src[0].chan = 1;
10971b8e80941Smrg	alu.src[1].sel = treg;
10972b8e80941Smrg	alu.src[1].chan = 2;
10973b8e80941Smrg	alu.last = 1;
10974b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
10975b8e80941Smrg	if (r)
10976b8e80941Smrg		return r;
10977b8e80941Smrg	return 0;
10978848b8605Smrg}
10979848b8605Smrg
10980b8e80941Smrgstatic int egcm_u64add(struct r600_shader_ctx *ctx)
10981848b8605Smrg{
10982b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10983b8e80941Smrg	struct r600_bytecode_alu alu;
10984b8e80941Smrg	int r;
10985b8e80941Smrg	int treg = ctx->temp_reg;
10986b8e80941Smrg	int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT;
10987848b8605Smrg
10988b8e80941Smrg	if (ctx->src[1].neg) {
10989b8e80941Smrg		op = ALU_OP2_SUB_INT;
10990b8e80941Smrg		opc = ALU_OP2_SUBB_UINT;
10991848b8605Smrg	}
10992b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
10993b8e80941Smrg	alu.op = op;            ;
10994b8e80941Smrg	alu.dst.sel = treg;
10995b8e80941Smrg	alu.dst.chan = 0;
10996b8e80941Smrg	alu.dst.write = 1;
10997b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
10998b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
10999b8e80941Smrg	alu.src[1].neg = 0;
11000b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11001b8e80941Smrg	if (r)
11002b8e80941Smrg		return r;
11003848b8605Smrg
11004b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11005b8e80941Smrg	alu.op = op;
11006b8e80941Smrg	alu.dst.sel = treg;
11007b8e80941Smrg	alu.dst.chan = 1;
11008b8e80941Smrg	alu.dst.write = 1;
11009b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
11010b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
11011b8e80941Smrg	alu.src[1].neg = 0;
11012b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11013b8e80941Smrg	if (r)
11014b8e80941Smrg		return r;
11015848b8605Smrg
11016b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11017b8e80941Smrg	alu.op = opc              ;
11018b8e80941Smrg	alu.dst.sel = treg;
11019b8e80941Smrg	alu.dst.chan = 2;
11020b8e80941Smrg	alu.dst.write = 1;
11021b8e80941Smrg	alu.last = 1;
11022b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
11023b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
11024b8e80941Smrg	alu.src[1].neg = 0;
11025b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11026b8e80941Smrg	if (r)
11027b8e80941Smrg		return r;
11028848b8605Smrg
11029b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11030b8e80941Smrg	alu.op = op;
11031b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
11032b8e80941Smrg	alu.src[0].sel = treg;
11033b8e80941Smrg	alu.src[0].chan = 1;
11034b8e80941Smrg	alu.src[1].sel = treg;
11035b8e80941Smrg	alu.src[1].chan = 2;
11036b8e80941Smrg	alu.last = 1;
11037b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11038b8e80941Smrg	if (r)
11039b8e80941Smrg		return r;
11040b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11041b8e80941Smrg	alu.op = ALU_OP1_MOV;
11042b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
11043b8e80941Smrg	alu.src[0].sel = treg;
11044b8e80941Smrg	alu.src[0].chan = 0;
11045b8e80941Smrg	alu.last = 1;
11046b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11047b8e80941Smrg	if (r)
11048b8e80941Smrg		return r;
11049848b8605Smrg	return 0;
11050848b8605Smrg}
11051848b8605Smrg
11052b8e80941Smrg/* result.y = mul_high a, b
11053b8e80941Smrg   result.x = mul a,b
11054b8e80941Smrg   result.y += a.x * b.y + a.y * b.x;
11055b8e80941Smrg*/
11056b8e80941Smrgstatic int egcm_u64mul(struct r600_shader_ctx *ctx)
11057848b8605Smrg{
11058b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
11059b8e80941Smrg	struct r600_bytecode_alu alu;
11060b8e80941Smrg	int r;
11061b8e80941Smrg	int treg = ctx->temp_reg;
11062848b8605Smrg
11063b8e80941Smrg	/* temp.x = mul_lo a.x, b.x */
11064b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11065b8e80941Smrg	alu.op = ALU_OP2_MULLO_UINT;
11066b8e80941Smrg	alu.dst.sel = treg;
11067b8e80941Smrg	alu.dst.chan = 0;
11068b8e80941Smrg	alu.dst.write = 1;
11069b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
11070b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
11071b8e80941Smrg	r = emit_mul_int_op(ctx->bc, &alu);
11072b8e80941Smrg	if (r)
11073b8e80941Smrg		return r;
11074848b8605Smrg
11075b8e80941Smrg	/* temp.y = mul_hi a.x, b.x */
11076b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11077b8e80941Smrg	alu.op = ALU_OP2_MULHI_UINT;
11078b8e80941Smrg	alu.dst.sel = treg;
11079b8e80941Smrg	alu.dst.chan = 1;
11080b8e80941Smrg	alu.dst.write = 1;
11081b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
11082b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
11083b8e80941Smrg	r = emit_mul_int_op(ctx->bc, &alu);
11084b8e80941Smrg	if (r)
11085b8e80941Smrg		return r;
11086848b8605Smrg
11087b8e80941Smrg	/* temp.z = mul a.x, b.y */
11088b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11089b8e80941Smrg	alu.op = ALU_OP2_MULLO_UINT;
11090b8e80941Smrg	alu.dst.sel = treg;
11091b8e80941Smrg	alu.dst.chan = 2;
11092b8e80941Smrg	alu.dst.write = 1;
11093b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
11094b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
11095b8e80941Smrg	r = emit_mul_int_op(ctx->bc, &alu);
11096b8e80941Smrg	if (r)
11097b8e80941Smrg		return r;
11098848b8605Smrg
11099b8e80941Smrg	/* temp.w = mul a.y, b.x */
11100b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11101b8e80941Smrg	alu.op = ALU_OP2_MULLO_UINT;
11102b8e80941Smrg	alu.dst.sel = treg;
11103b8e80941Smrg	alu.dst.chan = 3;
11104b8e80941Smrg	alu.dst.write = 1;
11105b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
11106b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
11107b8e80941Smrg	r = emit_mul_int_op(ctx->bc, &alu);
11108b8e80941Smrg	if (r)
11109b8e80941Smrg		return r;
11110848b8605Smrg
11111b8e80941Smrg	/* temp.z = temp.z + temp.w */
11112b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11113b8e80941Smrg	alu.op = ALU_OP2_ADD_INT;
11114b8e80941Smrg	alu.dst.sel = treg;
11115b8e80941Smrg	alu.dst.chan = 2;
11116b8e80941Smrg	alu.dst.write = 1;
11117b8e80941Smrg	alu.src[0].sel = treg;
11118b8e80941Smrg	alu.src[0].chan = 2;
11119b8e80941Smrg	alu.src[1].sel = treg;
11120b8e80941Smrg	alu.src[1].chan = 3;
11121b8e80941Smrg	alu.last = 1;
11122b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11123b8e80941Smrg	if (r)
11124b8e80941Smrg		return r;
11125b8e80941Smrg
11126b8e80941Smrg	/* temp.y = temp.y + temp.z */
11127b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11128b8e80941Smrg	alu.op = ALU_OP2_ADD_INT;
11129b8e80941Smrg	alu.dst.sel = treg;
11130b8e80941Smrg	alu.dst.chan = 1;
11131b8e80941Smrg	alu.dst.write = 1;
11132b8e80941Smrg	alu.src[0].sel = treg;
11133b8e80941Smrg	alu.src[0].chan = 1;
11134b8e80941Smrg	alu.src[1].sel = treg;
11135b8e80941Smrg	alu.src[1].chan = 2;
11136b8e80941Smrg	alu.last = 1;
11137b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11138b8e80941Smrg	if (r)
11139b8e80941Smrg		return r;
11140b8e80941Smrg
11141b8e80941Smrg	/* dst.x = temp.x */
11142b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11143b8e80941Smrg	alu.op = ALU_OP1_MOV;
11144b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
11145b8e80941Smrg	alu.src[0].sel = treg;
11146b8e80941Smrg	alu.src[0].chan = 0;
11147b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11148b8e80941Smrg	if (r)
11149b8e80941Smrg		return r;
11150b8e80941Smrg
11151b8e80941Smrg	/* dst.y = temp.y */
11152b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11153b8e80941Smrg	alu.op = ALU_OP1_MOV;
11154b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
11155b8e80941Smrg	alu.src[0].sel = treg;
11156b8e80941Smrg	alu.src[0].chan = 1;
11157b8e80941Smrg	alu.last = 1;
11158b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11159b8e80941Smrg	if (r)
11160b8e80941Smrg		return r;
11161848b8605Smrg
11162848b8605Smrg	return 0;
11163848b8605Smrg}
11164848b8605Smrg
11165b8e80941Smrgstatic int emit_u64sge(struct r600_shader_ctx *ctx,
11166b8e80941Smrg		       int treg,
11167b8e80941Smrg		       int src0_sel, int src0_base_chan,
11168b8e80941Smrg		       int src1_sel, int src1_base_chan)
11169848b8605Smrg{
11170b8e80941Smrg	int r;
11171b8e80941Smrg	/* for 64-bit sge */
11172b8e80941Smrg	/* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */
11173b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT,
11174b8e80941Smrg			   treg, 1,
11175b8e80941Smrg			   src0_sel, src0_base_chan + 1,
11176b8e80941Smrg			   src1_sel, src1_base_chan + 1);
11177b8e80941Smrg	if (r)
11178b8e80941Smrg		return r;
11179848b8605Smrg
11180b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
11181b8e80941Smrg			   treg, 0,
11182b8e80941Smrg			   src0_sel, src0_base_chan,
11183b8e80941Smrg			   src1_sel, src1_base_chan);
11184b8e80941Smrg	if (r)
11185b8e80941Smrg		return r;
11186848b8605Smrg
11187b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_SETE_INT,
11188b8e80941Smrg			   treg, 2,
11189b8e80941Smrg			   src0_sel, src0_base_chan + 1,
11190b8e80941Smrg			   src1_sel, src1_base_chan + 1);
11191b8e80941Smrg	if (r)
11192b8e80941Smrg		return r;
11193b8e80941Smrg
11194b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_AND_INT,
11195b8e80941Smrg			   treg, 0,
11196b8e80941Smrg			   treg, 0,
11197b8e80941Smrg			   treg, 2);
11198b8e80941Smrg	if (r)
11199b8e80941Smrg		return r;
11200b8e80941Smrg
11201b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
11202b8e80941Smrg			   treg, 0,
11203b8e80941Smrg			   treg, 0,
11204b8e80941Smrg			   treg, 1);
11205b8e80941Smrg	if (r)
11206b8e80941Smrg		return r;
11207848b8605Smrg	return 0;
11208848b8605Smrg}
11209848b8605Smrg
11210b8e80941Smrg/* this isn't a complete div it's just enough for qbo shader to work */
11211b8e80941Smrgstatic int egcm_u64div(struct r600_shader_ctx *ctx)
11212848b8605Smrg{
11213b8e80941Smrg	struct r600_bytecode_alu alu;
11214b8e80941Smrg	struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src;
11215b8e80941Smrg	int r, i;
11216b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
11217848b8605Smrg
11218b8e80941Smrg	/* make sure we are dividing my a const with 0 in the high bits */
11219b8e80941Smrg	if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL)
11220b8e80941Smrg		return -1;
11221b8e80941Smrg	if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0)
11222b8e80941Smrg		return -1;
11223b8e80941Smrg	/* make sure we are doing one division */
11224b8e80941Smrg	if (inst->Dst[0].Register.WriteMask != 0x3)
11225b8e80941Smrg		return -1;
11226848b8605Smrg
11227b8e80941Smrg	/* emit_if uses ctx->temp_reg so we can't */
11228b8e80941Smrg	int treg = r600_get_temp(ctx);
11229b8e80941Smrg	int tmp_num = r600_get_temp(ctx);
11230b8e80941Smrg	int sub_tmp = r600_get_temp(ctx);
11231b8e80941Smrg
11232b8e80941Smrg	/* tmp quot are tmp_num.zw */
11233b8e80941Smrg	r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0);
11234b8e80941Smrg	r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1);
11235b8e80941Smrg	r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0);
11236b8e80941Smrg	r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1);
11237b8e80941Smrg
11238b8e80941Smrg	/* MOV tmp_num.xy, numerator */
11239b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11240b8e80941Smrg			   tmp_num, 0,
11241b8e80941Smrg			   alu_num_lo.sel, alu_num_lo.chan,
11242b8e80941Smrg			   0, 0);
11243b8e80941Smrg	if (r)
11244b8e80941Smrg		return r;
11245b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11246b8e80941Smrg			   tmp_num, 1,
11247b8e80941Smrg			   alu_num_hi.sel, alu_num_hi.chan,
11248b8e80941Smrg			   0, 0);
11249b8e80941Smrg	if (r)
11250b8e80941Smrg		return r;
11251848b8605Smrg
11252b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11253b8e80941Smrg			   tmp_num, 2,
11254b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 0,
11255b8e80941Smrg			   0, 0);
11256b8e80941Smrg	if (r)
11257b8e80941Smrg		return r;
11258848b8605Smrg
11259b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11260b8e80941Smrg			   tmp_num, 3,
11261b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 0,
11262b8e80941Smrg			   0, 0);
11263b8e80941Smrg	if (r)
11264b8e80941Smrg		return r;
11265848b8605Smrg
11266b8e80941Smrg	/* treg 0 is log2_denom */
11267b8e80941Smrg	/* normally this gets the MSB for the denom high value
11268b8e80941Smrg	   - however we know this will always be 0 here. */
11269b8e80941Smrg	r = single_alu_op2(ctx,
11270b8e80941Smrg			   ALU_OP1_MOV,
11271b8e80941Smrg			   treg, 0,
11272b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 32,
11273b8e80941Smrg			   0, 0);
11274b8e80941Smrg	if (r)
11275b8e80941Smrg		return r;
11276848b8605Smrg
11277b8e80941Smrg	/* normally check demon hi for 0, but we know it is already */
11278b8e80941Smrg	/* t0.z = num_hi >= denom_lo */
11279b8e80941Smrg	r = single_alu_op2(ctx,
11280b8e80941Smrg			   ALU_OP2_SETGE_UINT,
11281b8e80941Smrg			   treg, 1,
11282b8e80941Smrg			   alu_num_hi.sel, alu_num_hi.chan,
11283b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
11284b8e80941Smrg	if (r)
11285b8e80941Smrg		return r;
11286848b8605Smrg
11287b8e80941Smrg	memset(&alu_src, 0, sizeof(alu_src));
11288b8e80941Smrg	alu_src.sel = treg;
11289b8e80941Smrg	alu_src.chan = 1;
11290b8e80941Smrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
11291b8e80941Smrg	if (r)
11292b8e80941Smrg		return r;
11293b8e80941Smrg
11294b8e80941Smrg	/* for loops in here */
11295b8e80941Smrg	/* get msb t0.x = msb(src[1].x) first */
11296b8e80941Smrg	int msb_lo = util_last_bit(alu_denom_lo.value);
11297b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11298b8e80941Smrg			   treg, 0,
11299b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, msb_lo,
11300b8e80941Smrg			   0, 0);
11301b8e80941Smrg	if (r)
11302b8e80941Smrg		return r;
11303848b8605Smrg
11304b8e80941Smrg	/* unroll the asm here */
11305b8e80941Smrg	for (i = 0; i < 31; i++) {
11306b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
11307b8e80941Smrg				   treg, 2,
11308b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, i,
11309b8e80941Smrg				   treg, 0);
11310848b8605Smrg		if (r)
11311848b8605Smrg			return r;
11312848b8605Smrg
11313b8e80941Smrg		/* we can do this on the CPU */
11314b8e80941Smrg		uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i);
11315b8e80941Smrg		/* t0.z = tmp_num.y >= t0.z */
11316b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
11317b8e80941Smrg				   treg, 1,
11318b8e80941Smrg				   tmp_num, 1,
11319b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
11320848b8605Smrg		if (r)
11321848b8605Smrg			return r;
11322848b8605Smrg
11323b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_AND_INT,
11324b8e80941Smrg				   treg, 1,
11325b8e80941Smrg				   treg, 1,
11326b8e80941Smrg				   treg, 2);
11327848b8605Smrg		if (r)
11328848b8605Smrg			return r;
11329848b8605Smrg
11330b8e80941Smrg		memset(&alu_src, 0, sizeof(alu_src));
11331b8e80941Smrg		alu_src.sel = treg;
11332b8e80941Smrg		alu_src.chan = 1;
11333b8e80941Smrg		r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
11334b8e80941Smrg		if (r)
11335b8e80941Smrg			return r;
11336848b8605Smrg
11337b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
11338b8e80941Smrg				   tmp_num, 1,
11339b8e80941Smrg				   tmp_num, 1,
11340b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
11341b8e80941Smrg		if (r)
11342b8e80941Smrg			return r;
11343848b8605Smrg
11344b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_OR_INT,
11345b8e80941Smrg				   tmp_num, 3,
11346b8e80941Smrg				   tmp_num, 3,
11347b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
11348b8e80941Smrg		if (r)
11349b8e80941Smrg			return r;
11350848b8605Smrg
11351b8e80941Smrg		r = tgsi_endif(ctx);
11352b8e80941Smrg		if (r)
11353b8e80941Smrg			return r;
11354848b8605Smrg	}
11355848b8605Smrg
11356b8e80941Smrg	/* log2_denom is always <= 31, so manually peel the last loop
11357b8e80941Smrg	 * iteration.
11358b8e80941Smrg	 */
11359b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
11360b8e80941Smrg			   treg, 1,
11361b8e80941Smrg			   tmp_num, 1,
11362b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
11363b8e80941Smrg	if (r)
11364b8e80941Smrg		return r;
11365848b8605Smrg
11366b8e80941Smrg	memset(&alu_src, 0, sizeof(alu_src));
11367b8e80941Smrg	alu_src.sel = treg;
11368b8e80941Smrg	alu_src.chan = 1;
11369b8e80941Smrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
11370b8e80941Smrg	if (r)
11371b8e80941Smrg		return r;
11372848b8605Smrg
11373b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
11374b8e80941Smrg			   tmp_num, 1,
11375b8e80941Smrg			   tmp_num, 1,
11376b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
11377b8e80941Smrg	if (r)
11378b8e80941Smrg		return r;
11379848b8605Smrg
11380b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
11381b8e80941Smrg			   tmp_num, 3,
11382b8e80941Smrg			   tmp_num, 3,
11383b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 1U);
11384b8e80941Smrg	if (r)
11385b8e80941Smrg		return r;
11386b8e80941Smrg	r = tgsi_endif(ctx);
11387b8e80941Smrg	if (r)
11388b8e80941Smrg		return r;
11389848b8605Smrg
11390b8e80941Smrg	r = tgsi_endif(ctx);
11391b8e80941Smrg	if (r)
11392b8e80941Smrg		return r;
11393848b8605Smrg
11394b8e80941Smrg	/* onto the second loop to unroll */
11395b8e80941Smrg	for (i = 0; i < 31; i++) {
11396b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
11397b8e80941Smrg				   treg, 1,
11398b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)),
11399b8e80941Smrg				   treg, 0);
11400b8e80941Smrg		if (r)
11401b8e80941Smrg			return r;
11402848b8605Smrg
11403b8e80941Smrg		uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i);
11404b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
11405b8e80941Smrg				   treg, 2,
11406b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
11407b8e80941Smrg				   0, 0);
11408b8e80941Smrg		if (r)
11409b8e80941Smrg			return r;
11410848b8605Smrg
11411b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
11412b8e80941Smrg				   treg, 3,
11413b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
11414b8e80941Smrg				   0, 0);
11415b8e80941Smrg		if (r)
11416b8e80941Smrg			return r;
11417848b8605Smrg
11418b8e80941Smrg		r = emit_u64sge(ctx, sub_tmp,
11419b8e80941Smrg				tmp_num, 0,
11420b8e80941Smrg				treg, 2);
11421b8e80941Smrg		if (r)
11422b8e80941Smrg			return r;
11423848b8605Smrg
11424b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_AND_INT,
11425b8e80941Smrg				   treg, 1,
11426b8e80941Smrg				   treg, 1,
11427b8e80941Smrg				   sub_tmp, 0);
11428b8e80941Smrg		if (r)
11429b8e80941Smrg			return r;
11430848b8605Smrg
11431b8e80941Smrg		memset(&alu_src, 0, sizeof(alu_src));
11432b8e80941Smrg		alu_src.sel = treg;
11433b8e80941Smrg		alu_src.chan = 1;
11434b8e80941Smrg		r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
11435b8e80941Smrg		if (r)
11436b8e80941Smrg			return r;
11437848b8605Smrg
11438848b8605Smrg
11439b8e80941Smrg		r = emit_u64add(ctx, ALU_OP2_SUB_INT,
11440b8e80941Smrg				sub_tmp,
11441b8e80941Smrg				tmp_num, 0,
11442b8e80941Smrg				treg, 2);
11443b8e80941Smrg		if (r)
11444b8e80941Smrg			return r;
11445848b8605Smrg
11446b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
11447b8e80941Smrg				   tmp_num, 0,
11448b8e80941Smrg				   sub_tmp, 0,
11449b8e80941Smrg				   0, 0);
11450b8e80941Smrg		if (r)
11451b8e80941Smrg			return r;
11452848b8605Smrg
11453b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
11454b8e80941Smrg				   tmp_num, 1,
11455b8e80941Smrg				   sub_tmp, 1,
11456b8e80941Smrg				   0, 0);
11457b8e80941Smrg		if (r)
11458b8e80941Smrg			return r;
11459848b8605Smrg
11460b8e80941Smrg		r = single_alu_op2(ctx, ALU_OP2_OR_INT,
11461b8e80941Smrg				   tmp_num, 2,
11462b8e80941Smrg				   tmp_num, 2,
11463b8e80941Smrg				   V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
11464b8e80941Smrg		if (r)
11465b8e80941Smrg			return r;
11466848b8605Smrg
11467b8e80941Smrg		r = tgsi_endif(ctx);
11468848b8605Smrg		if (r)
11469848b8605Smrg			return r;
11470848b8605Smrg	}
11471b8e80941Smrg
11472b8e80941Smrg	/* log2_denom is always <= 63, so manually peel the last loop
11473b8e80941Smrg	 * iteration.
11474b8e80941Smrg	 */
11475b8e80941Smrg	uint64_t denom_shl = (uint64_t)alu_denom_lo.value;
11476b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11477b8e80941Smrg			   treg, 2,
11478b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
11479b8e80941Smrg			   0, 0);
11480b8e80941Smrg	if (r)
11481b8e80941Smrg		return r;
11482b8e80941Smrg
11483b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
11484b8e80941Smrg			   treg, 3,
11485b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
11486b8e80941Smrg			   0, 0);
11487b8e80941Smrg	if (r)
11488b8e80941Smrg		return r;
11489b8e80941Smrg
11490b8e80941Smrg	r = emit_u64sge(ctx, sub_tmp,
11491b8e80941Smrg			tmp_num, 0,
11492b8e80941Smrg			treg, 2);
11493b8e80941Smrg	if (r)
11494b8e80941Smrg		return r;
11495b8e80941Smrg
11496b8e80941Smrg	memset(&alu_src, 0, sizeof(alu_src));
11497b8e80941Smrg	alu_src.sel = sub_tmp;
11498b8e80941Smrg	alu_src.chan = 0;
11499b8e80941Smrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
11500b8e80941Smrg	if (r)
11501b8e80941Smrg		return r;
11502b8e80941Smrg
11503b8e80941Smrg	r = emit_u64add(ctx, ALU_OP2_SUB_INT,
11504b8e80941Smrg			sub_tmp,
11505b8e80941Smrg			tmp_num, 0,
11506b8e80941Smrg			treg, 2);
11507b8e80941Smrg	if (r)
11508b8e80941Smrg		return r;
11509b8e80941Smrg
11510b8e80941Smrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
11511b8e80941Smrg			   tmp_num, 2,
11512b8e80941Smrg			   tmp_num, 2,
11513b8e80941Smrg			   V_SQ_ALU_SRC_LITERAL, 1U);
11514b8e80941Smrg	if (r)
11515b8e80941Smrg		return r;
11516b8e80941Smrg	r = tgsi_endif(ctx);
11517b8e80941Smrg	if (r)
11518b8e80941Smrg		return r;
11519b8e80941Smrg
11520b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11521b8e80941Smrg	alu.op = ALU_OP1_MOV;
11522b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
11523b8e80941Smrg	alu.src[0].sel = tmp_num;
11524b8e80941Smrg	alu.src[0].chan = 2;
11525b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11526b8e80941Smrg	if (r)
11527b8e80941Smrg		return r;
11528b8e80941Smrg
11529b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11530b8e80941Smrg	alu.op = ALU_OP1_MOV;
11531b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
11532b8e80941Smrg	alu.src[0].sel = tmp_num;
11533b8e80941Smrg	alu.src[0].chan = 3;
11534b8e80941Smrg	alu.last = 1;
11535b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11536b8e80941Smrg	if (r)
11537b8e80941Smrg		return r;
11538848b8605Smrg	return 0;
11539848b8605Smrg}
11540848b8605Smrg
11541b8e80941Smrgstatic int egcm_u64sne(struct r600_shader_ctx *ctx)
11542b8e80941Smrg{
11543b8e80941Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
11544b8e80941Smrg	struct r600_bytecode_alu alu;
11545b8e80941Smrg	int r;
11546b8e80941Smrg	int treg = ctx->temp_reg;
11547b8e80941Smrg
11548b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11549b8e80941Smrg	alu.op = ALU_OP2_SETNE_INT;
11550b8e80941Smrg	alu.dst.sel = treg;
11551b8e80941Smrg	alu.dst.chan = 0;
11552b8e80941Smrg	alu.dst.write = 1;
11553b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
11554b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
11555b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11556b8e80941Smrg	if (r)
11557b8e80941Smrg		return r;
11558848b8605Smrg
11559b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11560b8e80941Smrg	alu.op = ALU_OP2_SETNE_INT;
11561b8e80941Smrg	alu.dst.sel = treg;
11562b8e80941Smrg	alu.dst.chan = 1;
11563b8e80941Smrg	alu.dst.write = 1;
11564b8e80941Smrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
11565b8e80941Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
11566b8e80941Smrg	alu.last = 1;
11567b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11568b8e80941Smrg	if (r)
11569b8e80941Smrg		return r;
11570b8e80941Smrg
11571b8e80941Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
11572b8e80941Smrg	alu.op = ALU_OP2_OR_INT;
11573b8e80941Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
11574b8e80941Smrg	alu.src[0].sel = treg;
11575b8e80941Smrg	alu.src[0].chan = 0;
11576b8e80941Smrg	alu.src[1].sel = treg;
11577b8e80941Smrg	alu.src[1].chan = 1;
11578b8e80941Smrg	alu.last = 1;
11579b8e80941Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
11580b8e80941Smrg	if (r)
11581b8e80941Smrg		return r;
11582b8e80941Smrg	return 0;
11583b8e80941Smrg}
11584b8e80941Smrg
11585b8e80941Smrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
11586b8e80941Smrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_r600_arl},
11587b8e80941Smrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
11588b8e80941Smrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
11589b8e80941Smrg
11590b8e80941Smrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
11591b8e80941Smrg
11592b8e80941Smrg	[TGSI_OPCODE_RSQ]	= { ALU_OP0_NOP, tgsi_rsq},
11593b8e80941Smrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
11594b8e80941Smrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
11595b8e80941Smrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
11596b8e80941Smrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
11597b8e80941Smrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11598b8e80941Smrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11599b8e80941Smrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
11600b8e80941Smrg	/* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
11601b8e80941Smrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
11602b8e80941Smrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
11603b8e80941Smrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
11604b8e80941Smrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
11605b8e80941Smrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
11606b8e80941Smrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
11607b8e80941Smrg	[TGSI_OPCODE_FMA]	= { ALU_OP0_NOP, tgsi_unsupported},
11608b8e80941Smrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
11609b8e80941Smrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
11610b8e80941Smrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
11611b8e80941Smrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
11612b8e80941Smrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
11613b8e80941Smrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
11614b8e80941Smrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
11615b8e80941Smrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
11616b8e80941Smrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
11617b8e80941Smrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
11618b8e80941Smrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
11619b8e80941Smrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
11620b8e80941Smrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
11621b8e80941Smrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_unsupported},
11622b8e80941Smrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
11623b8e80941Smrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
11624b8e80941Smrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
11625b8e80941Smrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
11626b8e80941Smrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
11627b8e80941Smrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
11628b8e80941Smrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_unsupported},
11629b8e80941Smrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
11630b8e80941Smrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
11631b8e80941Smrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
11632b8e80941Smrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
11633b8e80941Smrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
11634b8e80941Smrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
11635b8e80941Smrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
11636b8e80941Smrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, tgsi_trig},
11637b8e80941Smrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
11638b8e80941Smrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
11639b8e80941Smrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
11640b8e80941Smrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
11641b8e80941Smrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
11642b8e80941Smrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
11643b8e80941Smrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_unsupported},
11644b8e80941Smrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
11645b8e80941Smrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
11646b8e80941Smrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
11647b8e80941Smrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
11648b8e80941Smrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
11649b8e80941Smrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_r600_arl},
11650b8e80941Smrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
11651b8e80941Smrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
11652b8e80941Smrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
11653b8e80941Smrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
11654b8e80941Smrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
11655b8e80941Smrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
11656b8e80941Smrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
11657b8e80941Smrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
11658b8e80941Smrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
11659b8e80941Smrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11660b8e80941Smrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
11661b8e80941Smrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
11662b8e80941Smrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
11663b8e80941Smrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
11664b8e80941Smrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
11665b8e80941Smrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
11666b8e80941Smrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
11667b8e80941Smrg	[TGSI_OPCODE_DDX_FINE]	= { ALU_OP0_NOP, tgsi_unsupported},
11668b8e80941Smrg	[TGSI_OPCODE_DDY_FINE]	= { ALU_OP0_NOP, tgsi_unsupported},
11669b8e80941Smrg	[81]			= { ALU_OP0_NOP, tgsi_unsupported},
11670b8e80941Smrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
11671b8e80941Smrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
11672b8e80941Smrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
11673b8e80941Smrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
11674b8e80941Smrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
11675b8e80941Smrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2_trans},
11676b8e80941Smrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
11677b8e80941Smrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
11678b8e80941Smrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
11679b8e80941Smrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
11680b8e80941Smrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
11681b8e80941Smrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
11682b8e80941Smrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
11683b8e80941Smrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
11684b8e80941Smrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
11685b8e80941Smrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
11686b8e80941Smrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
11687b8e80941Smrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
11688b8e80941Smrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
11689b8e80941Smrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
11690b8e80941Smrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
11691b8e80941Smrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
11692b8e80941Smrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
11693b8e80941Smrg	[TGSI_OPCODE_RESQ]	= { ALU_OP0_NOP, tgsi_unsupported},
11694b8e80941Smrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
11695b8e80941Smrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
11696b8e80941Smrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
11697b8e80941Smrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
11698b8e80941Smrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
11699b8e80941Smrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
11700b8e80941Smrg	[TGSI_OPCODE_MEMBAR]	= { ALU_OP0_NOP, tgsi_unsupported},
11701b8e80941Smrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
11702b8e80941Smrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
11703b8e80941Smrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
11704b8e80941Smrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
11705b8e80941Smrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
11706b8e80941Smrg	[TGSI_OPCODE_DFMA]	= { ALU_OP0_NOP, tgsi_unsupported},
11707b8e80941Smrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
11708b8e80941Smrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
11709b8e80941Smrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
11710b8e80941Smrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
11711b8e80941Smrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
11712b8e80941Smrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
11713b8e80941Smrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2_trans},
11714b8e80941Smrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
11715b8e80941Smrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans},
11716b8e80941Smrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
11717b8e80941Smrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
11718b8e80941Smrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
11719b8e80941Smrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
11720b8e80941Smrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
11721b8e80941Smrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
11722b8e80941Smrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
11723b8e80941Smrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
11724b8e80941Smrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
11725b8e80941Smrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
11726b8e80941Smrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2_trans},
11727b8e80941Smrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
11728b8e80941Smrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2_swap},
11729b8e80941Smrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
11730b8e80941Smrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
11731b8e80941Smrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
11732b8e80941Smrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
11733b8e80941Smrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
11734b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
11735b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
11736b8e80941Smrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
11737b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
11738b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
11739b8e80941Smrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
11740b8e80941Smrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
11741b8e80941Smrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
11742b8e80941Smrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
11743b8e80941Smrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
11744b8e80941Smrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
11745b8e80941Smrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_r600_arl},
11746b8e80941Smrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
11747b8e80941Smrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
11748b8e80941Smrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
11749b8e80941Smrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_unsupported},
11750b8e80941Smrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_unsupported},
11751b8e80941Smrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
11752b8e80941Smrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
11753b8e80941Smrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
11754b8e80941Smrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_NOP, tgsi_unsupported},
11755b8e80941Smrg	[TGSI_OPCODE_ATOMUADD]	= { ALU_OP0_NOP, tgsi_unsupported},
11756b8e80941Smrg	[TGSI_OPCODE_ATOMXCHG]	= { ALU_OP0_NOP, tgsi_unsupported},
11757b8e80941Smrg	[TGSI_OPCODE_ATOMCAS]	= { ALU_OP0_NOP, tgsi_unsupported},
11758b8e80941Smrg	[TGSI_OPCODE_ATOMAND]	= { ALU_OP0_NOP, tgsi_unsupported},
11759b8e80941Smrg	[TGSI_OPCODE_ATOMOR]	= { ALU_OP0_NOP, tgsi_unsupported},
11760b8e80941Smrg	[TGSI_OPCODE_ATOMXOR]	= { ALU_OP0_NOP, tgsi_unsupported},
11761b8e80941Smrg	[TGSI_OPCODE_ATOMUMIN]	= { ALU_OP0_NOP, tgsi_unsupported},
11762b8e80941Smrg	[TGSI_OPCODE_ATOMUMAX]	= { ALU_OP0_NOP, tgsi_unsupported},
11763b8e80941Smrg	[TGSI_OPCODE_ATOMIMIN]	= { ALU_OP0_NOP, tgsi_unsupported},
11764b8e80941Smrg	[TGSI_OPCODE_ATOMIMAX]	= { ALU_OP0_NOP, tgsi_unsupported},
11765b8e80941Smrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
11766b8e80941Smrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
11767b8e80941Smrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
11768b8e80941Smrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, tgsi_op2_trans},
11769b8e80941Smrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
11770b8e80941Smrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_unsupported},
11771b8e80941Smrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_unsupported},
11772b8e80941Smrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_unsupported},
11773b8e80941Smrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_unsupported},
11774b8e80941Smrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_unsupported},
11775b8e80941Smrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_unsupported},
11776b8e80941Smrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_unsupported},
11777b8e80941Smrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_unsupported},
11778b8e80941Smrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_unsupported},
11779b8e80941Smrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_unsupported},
11780b8e80941Smrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_unsupported},
11781b8e80941Smrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_unsupported},
11782b8e80941Smrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_unsupported},
11783b8e80941Smrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
11784848b8605Smrg};
11785848b8605Smrg
11786b8e80941Smrgstatic const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
11787b8e80941Smrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_eg_arl},
11788b8e80941Smrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
11789b8e80941Smrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
11790b8e80941Smrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
11791b8e80941Smrg	[TGSI_OPCODE_RSQ]	= { ALU_OP0_NOP, tgsi_rsq},
11792b8e80941Smrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
11793b8e80941Smrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
11794b8e80941Smrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
11795b8e80941Smrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
11796b8e80941Smrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11797b8e80941Smrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11798b8e80941Smrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
11799b8e80941Smrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
11800b8e80941Smrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
11801b8e80941Smrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
11802b8e80941Smrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
11803b8e80941Smrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
11804b8e80941Smrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
11805b8e80941Smrg	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
11806b8e80941Smrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
11807b8e80941Smrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
11808b8e80941Smrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
11809b8e80941Smrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
11810b8e80941Smrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
11811b8e80941Smrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
11812b8e80941Smrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
11813b8e80941Smrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
11814b8e80941Smrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
11815b8e80941Smrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
11816b8e80941Smrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
11817b8e80941Smrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
11818b8e80941Smrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
11819b8e80941Smrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
11820b8e80941Smrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
11821b8e80941Smrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
11822b8e80941Smrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
11823b8e80941Smrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
11824b8e80941Smrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
11825b8e80941Smrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
11826b8e80941Smrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
11827b8e80941Smrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
11828b8e80941Smrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
11829b8e80941Smrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
11830b8e80941Smrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
11831b8e80941Smrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
11832b8e80941Smrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
11833b8e80941Smrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
11834b8e80941Smrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, tgsi_trig},
11835b8e80941Smrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
11836b8e80941Smrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
11837b8e80941Smrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
11838b8e80941Smrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
11839b8e80941Smrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
11840b8e80941Smrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
11841b8e80941Smrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_up2h},
11842b8e80941Smrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
11843b8e80941Smrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
11844b8e80941Smrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
11845b8e80941Smrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
11846b8e80941Smrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
11847b8e80941Smrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
11848b8e80941Smrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
11849b8e80941Smrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
11850b8e80941Smrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
11851b8e80941Smrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
11852b8e80941Smrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
11853b8e80941Smrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
11854b8e80941Smrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
11855b8e80941Smrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
11856b8e80941Smrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
11857b8e80941Smrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
11858b8e80941Smrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
11859b8e80941Smrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
11860b8e80941Smrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
11861b8e80941Smrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
11862b8e80941Smrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
11863b8e80941Smrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
11864b8e80941Smrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
11865b8e80941Smrg	[TGSI_OPCODE_DDX_FINE]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
11866b8e80941Smrg	[TGSI_OPCODE_DDY_FINE]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
11867b8e80941Smrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
11868b8e80941Smrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
11869b8e80941Smrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
11870b8e80941Smrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
11871b8e80941Smrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
11872b8e80941Smrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2},
11873b8e80941Smrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
11874b8e80941Smrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
11875b8e80941Smrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
11876b8e80941Smrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
11877b8e80941Smrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
11878b8e80941Smrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
11879b8e80941Smrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
11880b8e80941Smrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
11881b8e80941Smrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
11882b8e80941Smrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
11883b8e80941Smrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
11884b8e80941Smrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
11885b8e80941Smrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
11886b8e80941Smrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
11887b8e80941Smrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
11888b8e80941Smrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
11889b8e80941Smrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
11890b8e80941Smrg	[TGSI_OPCODE_RESQ]     	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq},
11891b8e80941Smrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
11892b8e80941Smrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
11893b8e80941Smrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
11894b8e80941Smrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
11895b8e80941Smrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
11896b8e80941Smrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
11897b8e80941Smrg	[TGSI_OPCODE_MEMBAR]    = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
11898b8e80941Smrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
11899b8e80941Smrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
11900b8e80941Smrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
11901b8e80941Smrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
11902b8e80941Smrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
11903b8e80941Smrg	/* Refer below for TGSI_OPCODE_DFMA */
11904b8e80941Smrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_f2i},
11905b8e80941Smrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
11906b8e80941Smrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
11907b8e80941Smrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
11908b8e80941Smrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
11909b8e80941Smrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
11910b8e80941Smrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2},
11911b8e80941Smrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
11912b8e80941Smrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_f2i},
11913b8e80941Smrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
11914b8e80941Smrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
11915b8e80941Smrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
11916b8e80941Smrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
11917b8e80941Smrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
11918b8e80941Smrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
11919b8e80941Smrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
11920b8e80941Smrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
11921b8e80941Smrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
11922b8e80941Smrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
11923b8e80941Smrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2},
11924b8e80941Smrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
11925b8e80941Smrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2},
11926b8e80941Smrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
11927b8e80941Smrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
11928b8e80941Smrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
11929b8e80941Smrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
11930b8e80941Smrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
11931b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
11932b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
11933b8e80941Smrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
11934b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
11935b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
11936b8e80941Smrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
11937b8e80941Smrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
11938b8e80941Smrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
11939b8e80941Smrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
11940b8e80941Smrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
11941b8e80941Smrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
11942b8e80941Smrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_eg_arl},
11943b8e80941Smrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
11944b8e80941Smrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
11945b8e80941Smrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
11946b8e80941Smrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_load},
11947b8e80941Smrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_store},
11948b8e80941Smrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
11949b8e80941Smrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
11950b8e80941Smrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
11951b8e80941Smrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
11952b8e80941Smrg	[TGSI_OPCODE_ATOMUADD]	= { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
11953b8e80941Smrg	[TGSI_OPCODE_ATOMXCHG]	= { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
11954b8e80941Smrg	[TGSI_OPCODE_ATOMCAS]	= { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
11955b8e80941Smrg	[TGSI_OPCODE_ATOMAND]	= { V_RAT_INST_AND_RTN, tgsi_atomic_op},
11956b8e80941Smrg	[TGSI_OPCODE_ATOMOR]	= { V_RAT_INST_OR_RTN, tgsi_atomic_op},
11957b8e80941Smrg	[TGSI_OPCODE_ATOMXOR]	= { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
11958b8e80941Smrg	[TGSI_OPCODE_ATOMUMIN]	= { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
11959b8e80941Smrg	[TGSI_OPCODE_ATOMUMAX]	= { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
11960b8e80941Smrg	[TGSI_OPCODE_ATOMIMIN]	= { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
11961b8e80941Smrg	[TGSI_OPCODE_ATOMIMAX]	= { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
11962b8e80941Smrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
11963b8e80941Smrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
11964b8e80941Smrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
11965b8e80941Smrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, tgsi_op2_trans},
11966b8e80941Smrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
11967b8e80941Smrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_tex},
11968b8e80941Smrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_tex},
11969b8e80941Smrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_bfe},
11970b8e80941Smrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_bfe},
11971b8e80941Smrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_bfi},
11972b8e80941Smrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_op2},
11973b8e80941Smrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_op2},
11974b8e80941Smrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_op2},
11975b8e80941Smrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_msb},
11976b8e80941Smrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_msb},
11977b8e80941Smrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
11978b8e80941Smrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
11979b8e80941Smrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
11980b8e80941Smrg	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
11981b8e80941Smrg	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
11982b8e80941Smrg	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
11983b8e80941Smrg	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
11984b8e80941Smrg	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
11985b8e80941Smrg	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
11986b8e80941Smrg	[TGSI_OPCODE_DDIV]	= { 0, cayman_ddiv_instr },
11987b8e80941Smrg	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
11988b8e80941Smrg	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
11989b8e80941Smrg	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
11990b8e80941Smrg	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
11991b8e80941Smrg	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
11992b8e80941Smrg	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
11993b8e80941Smrg	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
11994b8e80941Smrg	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
11995b8e80941Smrg	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
11996b8e80941Smrg	[TGSI_OPCODE_DFMA]	= { ALU_OP3_FMA_64, tgsi_op3_64},
11997b8e80941Smrg	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
11998b8e80941Smrg	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
11999b8e80941Smrg	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
12000b8e80941Smrg	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
12001b8e80941Smrg	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
12002b8e80941Smrg	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
12003b8e80941Smrg	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
12004b8e80941Smrg	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
12005b8e80941Smrg	[TGSI_OPCODE_U64SNE]    = { ALU_OP0_NOP, egcm_u64sne },
12006b8e80941Smrg	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
12007b8e80941Smrg	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
12008b8e80941Smrg	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
12009b8e80941Smrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
12010848b8605Smrg};
12011848b8605Smrg
12012b8e80941Smrgstatic const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
12013b8e80941Smrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_eg_arl},
12014b8e80941Smrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
12015b8e80941Smrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
12016b8e80941Smrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr},
12017b8e80941Smrg	[TGSI_OPCODE_RSQ]	= { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
12018b8e80941Smrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
12019b8e80941Smrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
12020b8e80941Smrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
12021b8e80941Smrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
12022b8e80941Smrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
12023b8e80941Smrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
12024b8e80941Smrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
12025b8e80941Smrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
12026b8e80941Smrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
12027b8e80941Smrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
12028b8e80941Smrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
12029b8e80941Smrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
12030b8e80941Smrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
12031b8e80941Smrg	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
12032b8e80941Smrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
12033b8e80941Smrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
12034b8e80941Smrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
12035b8e80941Smrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
12036b8e80941Smrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
12037b8e80941Smrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
12038b8e80941Smrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
12039b8e80941Smrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
12040b8e80941Smrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
12041b8e80941Smrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
12042b8e80941Smrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, cayman_pow},
12043b8e80941Smrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
12044b8e80941Smrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
12045b8e80941Smrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
12046b8e80941Smrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
12047b8e80941Smrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
12048b8e80941Smrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, cayman_trig},
12049b8e80941Smrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
12050b8e80941Smrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
12051b8e80941Smrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
12052b8e80941Smrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
12053b8e80941Smrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
12054b8e80941Smrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
12055b8e80941Smrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
12056b8e80941Smrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
12057b8e80941Smrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
12058b8e80941Smrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
12059b8e80941Smrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
12060b8e80941Smrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, cayman_trig},
12061b8e80941Smrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
12062b8e80941Smrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
12063b8e80941Smrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
12064b8e80941Smrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
12065b8e80941Smrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
12066b8e80941Smrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
12067b8e80941Smrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_up2h},
12068b8e80941Smrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
12069b8e80941Smrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
12070b8e80941Smrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
12071b8e80941Smrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
12072b8e80941Smrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
12073b8e80941Smrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
12074b8e80941Smrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
12075b8e80941Smrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
12076b8e80941Smrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
12077b8e80941Smrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
12078b8e80941Smrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
12079b8e80941Smrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
12080b8e80941Smrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
12081b8e80941Smrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
12082b8e80941Smrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
12083b8e80941Smrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
12084b8e80941Smrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
12085b8e80941Smrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
12086b8e80941Smrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
12087b8e80941Smrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
12088b8e80941Smrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
12089b8e80941Smrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
12090b8e80941Smrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
12091b8e80941Smrg	[TGSI_OPCODE_DDX_FINE]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
12092b8e80941Smrg	[TGSI_OPCODE_DDY_FINE]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
12093b8e80941Smrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
12094b8e80941Smrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
12095b8e80941Smrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2},
12096b8e80941Smrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
12097b8e80941Smrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
12098b8e80941Smrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2},
12099b8e80941Smrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
12100b8e80941Smrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
12101b8e80941Smrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
12102b8e80941Smrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
12103b8e80941Smrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
12104b8e80941Smrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
12105b8e80941Smrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
12106b8e80941Smrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
12107b8e80941Smrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
12108b8e80941Smrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
12109b8e80941Smrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
12110b8e80941Smrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
12111b8e80941Smrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
12112b8e80941Smrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
12113b8e80941Smrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
12114b8e80941Smrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
12115b8e80941Smrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
12116b8e80941Smrg	[TGSI_OPCODE_RESQ]     	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq},
12117b8e80941Smrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
12118b8e80941Smrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
12119b8e80941Smrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
12120b8e80941Smrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
12121b8e80941Smrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
12122b8e80941Smrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
12123b8e80941Smrg	[TGSI_OPCODE_MEMBAR]    = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
12124b8e80941Smrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
12125b8e80941Smrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
12126b8e80941Smrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
12127b8e80941Smrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
12128b8e80941Smrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
12129b8e80941Smrg	/* Refer below for TGSI_OPCODE_DFMA */
12130b8e80941Smrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_op2},
12131b8e80941Smrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
12132b8e80941Smrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
12133b8e80941Smrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
12134b8e80941Smrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
12135b8e80941Smrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
12136b8e80941Smrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2},
12137b8e80941Smrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
12138b8e80941Smrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_op2},
12139b8e80941Smrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2},
12140b8e80941Smrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
12141b8e80941Smrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
12142b8e80941Smrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
12143b8e80941Smrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
12144b8e80941Smrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
12145b8e80941Smrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
12146b8e80941Smrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_INT, cayman_mul_int_instr},
12147b8e80941Smrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
12148b8e80941Smrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
12149b8e80941Smrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2},
12150b8e80941Smrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
12151b8e80941Smrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2},
12152b8e80941Smrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
12153b8e80941Smrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
12154b8e80941Smrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
12155b8e80941Smrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
12156b8e80941Smrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
12157b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
12158b8e80941Smrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
12159b8e80941Smrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
12160b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
12161b8e80941Smrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
12162b8e80941Smrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
12163b8e80941Smrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
12164b8e80941Smrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
12165b8e80941Smrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
12166b8e80941Smrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
12167b8e80941Smrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
12168b8e80941Smrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_eg_arl},
12169b8e80941Smrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
12170b8e80941Smrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
12171b8e80941Smrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
12172b8e80941Smrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_load},
12173b8e80941Smrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_store},
12174b8e80941Smrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
12175b8e80941Smrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
12176b8e80941Smrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
12177b8e80941Smrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
12178b8e80941Smrg	[TGSI_OPCODE_ATOMUADD]	= { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
12179b8e80941Smrg	[TGSI_OPCODE_ATOMXCHG]	= { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
12180b8e80941Smrg	[TGSI_OPCODE_ATOMCAS]	= { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
12181b8e80941Smrg	[TGSI_OPCODE_ATOMAND]	= { V_RAT_INST_AND_RTN, tgsi_atomic_op},
12182b8e80941Smrg	[TGSI_OPCODE_ATOMOR]	= { V_RAT_INST_OR_RTN, tgsi_atomic_op},
12183b8e80941Smrg	[TGSI_OPCODE_ATOMXOR]	= { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
12184b8e80941Smrg	[TGSI_OPCODE_ATOMUMIN]	= { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
12185b8e80941Smrg	[TGSI_OPCODE_ATOMUMAX]	= { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
12186b8e80941Smrg	[TGSI_OPCODE_ATOMIMIN]	= { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
12187b8e80941Smrg	[TGSI_OPCODE_ATOMIMAX]	= { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
12188b8e80941Smrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
12189b8e80941Smrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
12190b8e80941Smrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
12191b8e80941Smrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, cayman_mul_int_instr},
12192b8e80941Smrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
12193b8e80941Smrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_tex},
12194b8e80941Smrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_tex},
12195b8e80941Smrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_bfe},
12196b8e80941Smrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_bfe},
12197b8e80941Smrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_bfi},
12198b8e80941Smrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_op2},
12199b8e80941Smrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_op2},
12200b8e80941Smrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_op2},
12201b8e80941Smrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_msb},
12202b8e80941Smrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_msb},
12203b8e80941Smrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
12204b8e80941Smrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
12205b8e80941Smrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
12206b8e80941Smrg	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
12207b8e80941Smrg	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
12208b8e80941Smrg	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
12209b8e80941Smrg	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
12210b8e80941Smrg	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
12211b8e80941Smrg	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
12212b8e80941Smrg	[TGSI_OPCODE_DDIV]	= { 0, cayman_ddiv_instr },
12213b8e80941Smrg	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
12214b8e80941Smrg	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
12215b8e80941Smrg	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
12216b8e80941Smrg	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
12217b8e80941Smrg	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
12218b8e80941Smrg	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
12219b8e80941Smrg	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
12220b8e80941Smrg	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
12221b8e80941Smrg	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
12222b8e80941Smrg	[TGSI_OPCODE_DFMA]	= { ALU_OP3_FMA_64, tgsi_op3_64},
12223b8e80941Smrg	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
12224b8e80941Smrg	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
12225b8e80941Smrg	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
12226b8e80941Smrg	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
12227b8e80941Smrg	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
12228b8e80941Smrg	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
12229b8e80941Smrg	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
12230b8e80941Smrg	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
12231b8e80941Smrg	[TGSI_OPCODE_U64SNE]    = { ALU_OP0_NOP, egcm_u64sne },
12232b8e80941Smrg	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
12233b8e80941Smrg	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
12234b8e80941Smrg	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
12235b8e80941Smrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
12236848b8605Smrg};
12237