13464ebd5Sriastradh/*
23464ebd5Sriastradh * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
33464ebd5Sriastradh *
43464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a
53464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"),
63464ebd5Sriastradh * to deal in the Software without restriction, including without limitation
73464ebd5Sriastradh * on the rights to use, copy, modify, merge, publish, distribute, sub
83464ebd5Sriastradh * license, and/or sell copies of the Software, and to permit persons to whom
93464ebd5Sriastradh * the Software is furnished to do so, subject to the following conditions:
103464ebd5Sriastradh *
113464ebd5Sriastradh * The above copyright notice and this permission notice (including the next
123464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the
133464ebd5Sriastradh * Software.
143464ebd5Sriastradh *
153464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
163464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
173464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
183464ebd5Sriastradh * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
193464ebd5Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
203464ebd5Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
213464ebd5Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE.
223464ebd5Sriastradh */
23af69d88dSmrg#include "r600_sq.h"
24af69d88dSmrg#include "r600_formats.h"
25af69d88dSmrg#include "r600_opcodes.h"
26af69d88dSmrg#include "r600_shader.h"
277ec681f3Smrg#include "r600_dump.h"
28af69d88dSmrg#include "r600d.h"
297ec681f3Smrg#include "sfn/sfn_nir.h"
30af69d88dSmrg
31af69d88dSmrg#include "sb/sb_public.h"
32af69d88dSmrg
333464ebd5Sriastradh#include "pipe/p_shader_tokens.h"
343464ebd5Sriastradh#include "tgsi/tgsi_info.h"
353464ebd5Sriastradh#include "tgsi/tgsi_parse.h"
363464ebd5Sriastradh#include "tgsi/tgsi_scan.h"
373464ebd5Sriastradh#include "tgsi/tgsi_dump.h"
387ec681f3Smrg#include "tgsi/tgsi_from_mesa.h"
397ec681f3Smrg#include "nir/tgsi_to_nir.h"
407ec681f3Smrg#include "nir/nir_to_tgsi_info.h"
417ec681f3Smrg#include "compiler/nir/nir.h"
4201e04c3fSmrg#include "util/u_bitcast.h"
43af69d88dSmrg#include "util/u_memory.h"
44af69d88dSmrg#include "util/u_math.h"
453464ebd5Sriastradh#include <stdio.h>
463464ebd5Sriastradh#include <errno.h>
473464ebd5Sriastradh
4801e04c3fSmrg/* CAYMAN notes
493464ebd5SriastradhWhy CAYMAN got loops for lots of instructions is explained here.
503464ebd5Sriastradh
513464ebd5Sriastradh-These 8xx t-slot only ops are implemented in all vector slots.
523464ebd5SriastradhMUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
5301e04c3fSmrgThese 8xx t-slot only opcodes become vector ops, with all four
5401e04c3fSmrgslots expecting the arguments on sources a and b. Result is
553464ebd5Sriastradhbroadcast to all channels.
5601e04c3fSmrgMULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
5701e04c3fSmrgThese 8xx t-slot only opcodes become vector ops in the z, y, and
583464ebd5Sriastradhx slots.
593464ebd5SriastradhEXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
603464ebd5SriastradhRECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
613464ebd5SriastradhSQRT_IEEE/_64
623464ebd5SriastradhSIN/COS
6301e04c3fSmrgThe w slot may have an independent co-issued operation, or if the
6401e04c3fSmrgresult is required to be in the w slot, the opcode above may be
653464ebd5Sriastradhissued in the w slot as well.
663464ebd5SriastradhThe compiler must issue the source argument to slots z, y, and x
673464ebd5Sriastradh*/
683464ebd5Sriastradh
6901e04c3fSmrg/* Contents of r0 on entry to various shaders
7001e04c3fSmrg
7101e04c3fSmrg VS - .x = VertexID
7201e04c3fSmrg      .y = RelVertexID (??)
7301e04c3fSmrg      .w = InstanceID
7401e04c3fSmrg
7501e04c3fSmrg GS - r0.xyw, r1.xyz = per-vertex offsets
7601e04c3fSmrg      r0.z = PrimitiveID
7701e04c3fSmrg
7801e04c3fSmrg TCS - .x = PatchID
7901e04c3fSmrg       .y = RelPatchID (??)
8001e04c3fSmrg       .z = InvocationID
8101e04c3fSmrg       .w = tess factor base.
8201e04c3fSmrg
8301e04c3fSmrg TES - .x = TessCoord.x
8401e04c3fSmrg     - .y = TessCoord.y
8501e04c3fSmrg     - .z = RelPatchID (??)
8601e04c3fSmrg     - .w = PrimitiveID
8701e04c3fSmrg
8801e04c3fSmrg PS - face_gpr.z = SampleMask
8901e04c3fSmrg      face_gpr.w = SampleID
9001e04c3fSmrg*/
9101e04c3fSmrg#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
92af69d88dSmrgstatic int r600_shader_from_tgsi(struct r600_context *rctx,
93af69d88dSmrg				 struct r600_pipe_shader *pipeshader,
9401e04c3fSmrg				 union r600_shader_key key);
95af69d88dSmrg
96af69d88dSmrgstatic void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
97af69d88dSmrg                           int size, unsigned comp_mask) {
98af69d88dSmrg
99af69d88dSmrg	if (!size)
100af69d88dSmrg		return;
101af69d88dSmrg
102af69d88dSmrg	if (ps->num_arrays == ps->max_arrays) {
103af69d88dSmrg		ps->max_arrays += 64;
104af69d88dSmrg		ps->arrays = realloc(ps->arrays, ps->max_arrays *
105af69d88dSmrg		                     sizeof(struct r600_shader_array));
106af69d88dSmrg	}
107af69d88dSmrg
108af69d88dSmrg	int n = ps->num_arrays;
109af69d88dSmrg	++ps->num_arrays;
1103464ebd5Sriastradh
111af69d88dSmrg	ps->arrays[n].comp_mask = comp_mask;
112af69d88dSmrg	ps->arrays[n].gpr_start = start_gpr;
113af69d88dSmrg	ps->arrays[n].gpr_count = size;
114af69d88dSmrg}
115af69d88dSmrg
116af69d88dSmrgstatic void r600_dump_streamout(struct pipe_stream_output_info *so)
1173464ebd5Sriastradh{
118af69d88dSmrg	unsigned i;
1193464ebd5Sriastradh
120af69d88dSmrg	fprintf(stderr, "STREAMOUT\n");
121af69d88dSmrg	for (i = 0; i < so->num_outputs; i++) {
122af69d88dSmrg		unsigned mask = ((1 << so->output[i].num_components) - 1) <<
123af69d88dSmrg				so->output[i].start_component;
12401e04c3fSmrg		fprintf(stderr, "  %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
12501e04c3fSmrg			i,
12601e04c3fSmrg			so->output[i].stream,
12701e04c3fSmrg			so->output[i].output_buffer,
128af69d88dSmrg			so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
129af69d88dSmrg			so->output[i].register_index,
130af69d88dSmrg			mask & 1 ? "x" : "",
131af69d88dSmrg		        mask & 2 ? "y" : "",
132af69d88dSmrg		        mask & 4 ? "z" : "",
133af69d88dSmrg		        mask & 8 ? "w" : "",
134af69d88dSmrg			so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : "");
1353464ebd5Sriastradh	}
1363464ebd5Sriastradh}
1373464ebd5Sriastradh
138af69d88dSmrgstatic int store_shader(struct pipe_context *ctx,
139af69d88dSmrg			struct r600_pipe_shader *shader)
1403464ebd5Sriastradh{
141af69d88dSmrg	struct r600_context *rctx = (struct r600_context *)ctx;
142af69d88dSmrg	uint32_t *ptr, i;
1433464ebd5Sriastradh
1443464ebd5Sriastradh	if (shader->bo == NULL) {
145af69d88dSmrg		shader->bo = (struct r600_resource*)
14601e04c3fSmrg			pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
1473464ebd5Sriastradh		if (shader->bo == NULL) {
1483464ebd5Sriastradh			return -ENOMEM;
1493464ebd5Sriastradh		}
1509f464c52Smaya		ptr = r600_buffer_map_sync_with_rings(
1519f464c52Smaya			&rctx->b, shader->bo,
1527ec681f3Smrg			PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
1533464ebd5Sriastradh		if (R600_BIG_ENDIAN) {
154af69d88dSmrg			for (i = 0; i < shader->shader.bc.ndw; ++i) {
155af69d88dSmrg				ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
1563464ebd5Sriastradh			}
1573464ebd5Sriastradh		} else {
158af69d88dSmrg			memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
1593464ebd5Sriastradh		}
1607ec681f3Smrg		rctx->b.ws->buffer_unmap(rctx->b.ws, shader->bo->buf);
1613464ebd5Sriastradh	}
162af69d88dSmrg
1633464ebd5Sriastradh	return 0;
1643464ebd5Sriastradh}
1653464ebd5Sriastradh
1667ec681f3Smrgextern const struct nir_shader_compiler_options r600_nir_options;
1677ec681f3Smrgstatic int nshader = 0;
168af69d88dSmrgint r600_pipe_shader_create(struct pipe_context *ctx,
169af69d88dSmrg			    struct r600_pipe_shader *shader,
17001e04c3fSmrg			    union r600_shader_key key)
1713464ebd5Sriastradh{
172af69d88dSmrg	struct r600_context *rctx = (struct r600_context *)ctx;
173af69d88dSmrg	struct r600_pipe_shader_selector *sel = shader->selector;
1743464ebd5Sriastradh	int r;
1757ec681f3Smrg	struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
1767ec681f3Smrg
1777ec681f3Smrg	int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ?
1787ec681f3Smrg		tgsi_get_processor_type(sel->tokens):
1797ec681f3Smrg		pipe_shader_type_from_mesa(sel->nir->info.stage);
1807ec681f3Smrg
1817ec681f3Smrg	bool dump = r600_can_dump_shader(&rctx->screen->b, processor);
1827ec681f3Smrg	unsigned use_sb = !(rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR)) ||
1837ec681f3Smrg                          (rctx->screen->b.debug_flags & DBG_NIR_SB);
18401e04c3fSmrg	unsigned sb_disasm;
18501e04c3fSmrg	unsigned export_shader;
1867ec681f3Smrg
187af69d88dSmrg	shader->shader.bc.isa = rctx->isa;
1887ec681f3Smrg
1897ec681f3Smrg	if (!(rscreen->b.debug_flags & DBG_NIR_PREFERRED)) {
1907ec681f3Smrg		assert(sel->ir_type == PIPE_SHADER_IR_TGSI);
1917ec681f3Smrg		r = r600_shader_from_tgsi(rctx, shader, key);
1927ec681f3Smrg		if (r) {
1937ec681f3Smrg			R600_ERR("translation from TGSI failed !\n");
1947ec681f3Smrg			goto error;
1957ec681f3Smrg		}
1967ec681f3Smrg	} else {
1977ec681f3Smrg		if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
1987ec681f3Smrg			sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true);
1997ec681f3Smrg                        const nir_shader_compiler_options *nir_options =
2007ec681f3Smrg                              (const nir_shader_compiler_options *)
2017ec681f3Smrg                              ctx->screen->get_compiler_options(ctx->screen,
2027ec681f3Smrg                                                                PIPE_SHADER_IR_NIR,
2037ec681f3Smrg                                                                shader->shader.processor_type);
2047ec681f3Smrg                        /* Lower int64 ops because we have some r600 build-in shaders that use it */
2057ec681f3Smrg			if (nir_options->lower_int64_options) {
2067ec681f3Smrg				NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
2077ec681f3Smrg				NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
2087ec681f3Smrg				NIR_PASS_V(sel->nir, nir_lower_int64);
2097ec681f3Smrg				NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
2107ec681f3Smrg			}
2117ec681f3Smrg			NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false);
2127ec681f3Smrg		}
2137ec681f3Smrg		nir_tgsi_scan_shader(sel->nir, &sel->info, true);
2143464ebd5Sriastradh
2157ec681f3Smrg		r = r600_shader_from_nir(rctx, shader, &key);
2167ec681f3Smrg		if (r) {
2177ec681f3Smrg			fprintf(stderr, "--Failed shader--------------------------------------------------\n");
2187ec681f3Smrg
2197ec681f3Smrg			if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
2207ec681f3Smrg				fprintf(stderr, "--TGSI--------------------------------------------------------\n");
2217ec681f3Smrg				tgsi_dump(sel->tokens, 0);
2227ec681f3Smrg			}
2237ec681f3Smrg
2247ec681f3Smrg			if (rscreen->b.debug_flags & (DBG_NIR_PREFERRED)) {
2257ec681f3Smrg				fprintf(stderr, "--NIR --------------------------------------------------------\n");
2267ec681f3Smrg				nir_print_shader(sel->nir, stderr);
2277ec681f3Smrg			}
2287ec681f3Smrg
2297ec681f3Smrg			R600_ERR("translation from NIR failed !\n");
2307ec681f3Smrg			goto error;
2317ec681f3Smrg		}
2327ec681f3Smrg	}
2337ec681f3Smrg
234af69d88dSmrg	if (dump) {
2357ec681f3Smrg		if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
2367ec681f3Smrg			fprintf(stderr, "--TGSI--------------------------------------------------------\n");
2377ec681f3Smrg			tgsi_dump(sel->tokens, 0);
2387ec681f3Smrg		}
2397ec681f3Smrg
240af69d88dSmrg		if (sel->so.num_outputs) {
241af69d88dSmrg			r600_dump_streamout(&sel->so);
242af69d88dSmrg		}
2433464ebd5Sriastradh	}
2447ec681f3Smrg
24501e04c3fSmrg	if (shader->shader.processor_type == PIPE_SHADER_VERTEX) {
24601e04c3fSmrg		/* only disable for vertex shaders in tess paths */
24701e04c3fSmrg		if (key.vs.as_ls)
24801e04c3fSmrg			use_sb = 0;
24901e04c3fSmrg	}
25001e04c3fSmrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL);
25101e04c3fSmrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL);
25201e04c3fSmrg	use_sb &= (shader->shader.processor_type != PIPE_SHADER_COMPUTE);
253af69d88dSmrg
25401e04c3fSmrg	/* disable SB for shaders using doubles */
25501e04c3fSmrg	use_sb &= !shader->shader.uses_doubles;
256af69d88dSmrg
25701e04c3fSmrg	use_sb &= !shader->shader.uses_atomics;
25801e04c3fSmrg	use_sb &= !shader->shader.uses_images;
25901e04c3fSmrg	use_sb &= !shader->shader.uses_helper_invocation;
26001e04c3fSmrg
26101e04c3fSmrg	/* Check if the bytecode has already been built. */
262af69d88dSmrg	if (!shader->shader.bc.bytecode) {
263af69d88dSmrg		r = r600_bytecode_build(&shader->shader.bc);
264af69d88dSmrg		if (r) {
265af69d88dSmrg			R600_ERR("building bytecode failed !\n");
266af69d88dSmrg			goto error;
267af69d88dSmrg		}
2683464ebd5Sriastradh	}
269af69d88dSmrg
27001e04c3fSmrg	sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
271af69d88dSmrg	if (dump && !sb_disasm) {
272af69d88dSmrg		fprintf(stderr, "--------------------------------------------------------------\n");
273af69d88dSmrg		r600_bytecode_disasm(&shader->shader.bc);
2743464ebd5Sriastradh		fprintf(stderr, "______________________________________________________________\n");
275af69d88dSmrg	} else if ((dump && sb_disasm) || use_sb) {
2767ec681f3Smrg                r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
277af69d88dSmrg		                             dump, use_sb);
278af69d88dSmrg		if (r) {
279af69d88dSmrg			R600_ERR("r600_sb_bytecode_process failed !\n");
280af69d88dSmrg			goto error;
281af69d88dSmrg		}
282af69d88dSmrg	}
283af69d88dSmrg
2847ec681f3Smrg        if (dump) {
2857ec681f3Smrg           FILE *f;
2867ec681f3Smrg           char fname[1024];
2877ec681f3Smrg           snprintf(fname, 1024, "shader_from_%s_%d.cpp",
2887ec681f3Smrg                    (sel->ir_type == PIPE_SHADER_IR_TGSI ?
2897ec681f3Smrg                        (rscreen->b.debug_flags & DBG_NIR_PREFERRED ? "tgsi-nir" : "tgsi")
2907ec681f3Smrg                      : "nir"), nshader);
2917ec681f3Smrg           f = fopen(fname, "w");
2927ec681f3Smrg           print_shader_info(f, nshader++, &shader->shader);
2937ec681f3Smrg           print_shader_info(stderr, nshader++, &shader->shader);
2947ec681f3Smrg           print_pipe_info(stderr, &sel->info);
2957ec681f3Smrg           if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
2967ec681f3Smrg              fprintf(f, "/****TGSI**********************************\n");
2977ec681f3Smrg              tgsi_dump_to_file(sel->tokens, 0, f);
2987ec681f3Smrg           }
2997ec681f3Smrg
3007ec681f3Smrg           if (rscreen->b.debug_flags & DBG_NIR_PREFERRED){
3017ec681f3Smrg              fprintf(f, "/****NIR **********************************\n");
3027ec681f3Smrg              nir_print_shader(sel->nir, f);
3037ec681f3Smrg           }
3047ec681f3Smrg           fprintf(f, "******************************************/\n");
3057ec681f3Smrg           fclose(f);
3067ec681f3Smrg        }
3077ec681f3Smrg
308af69d88dSmrg	if (shader->gs_copy_shader) {
309af69d88dSmrg		if (dump) {
310af69d88dSmrg			// dump copy shader
311af69d88dSmrg			r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
312af69d88dSmrg						     &shader->gs_copy_shader->shader, dump, 0);
313af69d88dSmrg			if (r)
314af69d88dSmrg				goto error;
315af69d88dSmrg		}
316af69d88dSmrg
317af69d88dSmrg		if ((r = store_shader(ctx, shader->gs_copy_shader)))
318af69d88dSmrg			goto error;
319af69d88dSmrg	}
320af69d88dSmrg
321af69d88dSmrg	/* Store the shader in a buffer. */
322af69d88dSmrg	if ((r = store_shader(ctx, shader)))
323af69d88dSmrg		goto error;
324af69d88dSmrg
325af69d88dSmrg	/* Build state. */
326af69d88dSmrg	switch (shader->shader.processor_type) {
32701e04c3fSmrg	case PIPE_SHADER_TESS_CTRL:
32801e04c3fSmrg		evergreen_update_hs_state(ctx, shader);
32901e04c3fSmrg		break;
33001e04c3fSmrg	case PIPE_SHADER_TESS_EVAL:
33101e04c3fSmrg		if (key.tes.as_es)
33201e04c3fSmrg			evergreen_update_es_state(ctx, shader);
33301e04c3fSmrg		else
33401e04c3fSmrg			evergreen_update_vs_state(ctx, shader);
33501e04c3fSmrg		break;
33601e04c3fSmrg	case PIPE_SHADER_GEOMETRY:
337af69d88dSmrg		if (rctx->b.chip_class >= EVERGREEN) {
338af69d88dSmrg			evergreen_update_gs_state(ctx, shader);
339af69d88dSmrg			evergreen_update_vs_state(ctx, shader->gs_copy_shader);
340af69d88dSmrg		} else {
341af69d88dSmrg			r600_update_gs_state(ctx, shader);
342af69d88dSmrg			r600_update_vs_state(ctx, shader->gs_copy_shader);
343af69d88dSmrg		}
344af69d88dSmrg		break;
34501e04c3fSmrg	case PIPE_SHADER_VERTEX:
34601e04c3fSmrg		export_shader = key.vs.as_es;
347af69d88dSmrg		if (rctx->b.chip_class >= EVERGREEN) {
34801e04c3fSmrg			if (key.vs.as_ls)
34901e04c3fSmrg				evergreen_update_ls_state(ctx, shader);
35001e04c3fSmrg			else if (key.vs.as_es)
351af69d88dSmrg				evergreen_update_es_state(ctx, shader);
352af69d88dSmrg			else
353af69d88dSmrg				evergreen_update_vs_state(ctx, shader);
354af69d88dSmrg		} else {
355af69d88dSmrg			if (export_shader)
356af69d88dSmrg				r600_update_es_state(ctx, shader);
357af69d88dSmrg			else
358af69d88dSmrg				r600_update_vs_state(ctx, shader);
359af69d88dSmrg		}
360af69d88dSmrg		break;
36101e04c3fSmrg	case PIPE_SHADER_FRAGMENT:
362af69d88dSmrg		if (rctx->b.chip_class >= EVERGREEN) {
363af69d88dSmrg			evergreen_update_ps_state(ctx, shader);
364af69d88dSmrg		} else {
365af69d88dSmrg			r600_update_ps_state(ctx, shader);
366af69d88dSmrg		}
367af69d88dSmrg		break;
36801e04c3fSmrg	case PIPE_SHADER_COMPUTE:
36901e04c3fSmrg		evergreen_update_ls_state(ctx, shader);
37001e04c3fSmrg		break;
371af69d88dSmrg	default:
372af69d88dSmrg		r = -EINVAL;
373af69d88dSmrg		goto error;
3743464ebd5Sriastradh	}
375af69d88dSmrg	return 0;
376af69d88dSmrg
377af69d88dSmrgerror:
378af69d88dSmrg	r600_pipe_shader_destroy(ctx, shader);
379af69d88dSmrg	return r;
3803464ebd5Sriastradh}
3813464ebd5Sriastradh
38201e04c3fSmrgvoid r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader)
3833464ebd5Sriastradh{
38401e04c3fSmrg	r600_resource_reference(&shader->bo, NULL);
3857ec681f3Smrg	if (list_is_linked(&shader->shader.bc.cf))
3867ec681f3Smrg		r600_bytecode_clear(&shader->shader.bc);
387af69d88dSmrg	r600_release_command_buffer(&shader->command_buffer);
3883464ebd5Sriastradh}
3893464ebd5Sriastradh
3903464ebd5Sriastradh/*
3913464ebd5Sriastradh * tgsi -> r600 shader
3923464ebd5Sriastradh */
3933464ebd5Sriastradhstruct r600_shader_tgsi_instruction;
3943464ebd5Sriastradh
3953464ebd5Sriastradhstruct r600_shader_src {
3963464ebd5Sriastradh	unsigned				sel;
3973464ebd5Sriastradh	unsigned				swizzle[4];
3983464ebd5Sriastradh	unsigned				neg;
3993464ebd5Sriastradh	unsigned				abs;
4003464ebd5Sriastradh	unsigned				rel;
401af69d88dSmrg	unsigned				kc_bank;
40201e04c3fSmrg	boolean					kc_rel; /* true if cache bank is indexed */
4033464ebd5Sriastradh	uint32_t				value[4];
4043464ebd5Sriastradh};
4053464ebd5Sriastradh
40601e04c3fSmrgstruct eg_interp {
40701e04c3fSmrg	boolean					enabled;
40801e04c3fSmrg	unsigned				ij_index;
40901e04c3fSmrg};
41001e04c3fSmrg
4113464ebd5Sriastradhstruct r600_shader_ctx {
4123464ebd5Sriastradh	struct tgsi_shader_info			info;
41301e04c3fSmrg	struct tgsi_array_info			*array_infos;
41401e04c3fSmrg	/* flag for each tgsi temp array if its been spilled or not */
41501e04c3fSmrg	bool					*spilled_arrays;
4163464ebd5Sriastradh	struct tgsi_parse_context		parse;
4173464ebd5Sriastradh	const struct tgsi_token			*tokens;
4183464ebd5Sriastradh	unsigned				type;
4193464ebd5Sriastradh	unsigned				file_offset[TGSI_FILE_COUNT];
4203464ebd5Sriastradh	unsigned				temp_reg;
42101e04c3fSmrg	const struct r600_shader_tgsi_instruction	*inst_info;
422af69d88dSmrg	struct r600_bytecode			*bc;
4233464ebd5Sriastradh	struct r600_shader			*shader;
4243464ebd5Sriastradh	struct r600_shader_src			src[4];
425af69d88dSmrg	uint32_t				*literals;
426af69d88dSmrg	uint32_t				nliterals;
427af69d88dSmrg	uint32_t				max_driver_temp_used;
4283464ebd5Sriastradh	/* needed for evergreen interpolation */
42901e04c3fSmrg	struct eg_interp		eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
430af69d88dSmrg	/* evergreen/cayman also store sample mask in face register */
431af69d88dSmrg	int					face_gpr;
43201e04c3fSmrg	/* sample id is .w component stored in fixed point position register */
43301e04c3fSmrg	int					fixed_pt_position_gpr;
434af69d88dSmrg	int					colors_used;
435af69d88dSmrg	boolean                 clip_vertex_write;
436af69d88dSmrg	unsigned                cv_output;
437af69d88dSmrg	unsigned		edgeflag_output;
43801e04c3fSmrg	int					helper_invoc_reg;
43901e04c3fSmrg	int                                     cs_block_size_reg;
44001e04c3fSmrg	int                                     cs_grid_size_reg;
44101e04c3fSmrg	bool cs_block_size_loaded, cs_grid_size_loaded;
442af69d88dSmrg	int					fragcoord_input;
443af69d88dSmrg	int					next_ring_offset;
444af69d88dSmrg	int					gs_out_ring_offset;
445af69d88dSmrg	int					gs_next_vertex;
446af69d88dSmrg	struct r600_shader	*gs_for_vs;
44701e04c3fSmrg	int					gs_export_gpr_tregs[4];
44801e04c3fSmrg	int                                     gs_rotated_input[2];
44901e04c3fSmrg	const struct pipe_stream_output_info	*gs_stream_output_info;
45001e04c3fSmrg	unsigned				enabled_stream_buffers_mask;
45101e04c3fSmrg	unsigned                                tess_input_info; /* temp with tess input offsets */
45201e04c3fSmrg	unsigned                                tess_output_info; /* temp with tess input offsets */
45301e04c3fSmrg	unsigned                                thread_id_gpr; /* temp with thread id calculated for images */
4543464ebd5Sriastradh};
4553464ebd5Sriastradh
4563464ebd5Sriastradhstruct r600_shader_tgsi_instruction {
457af69d88dSmrg	unsigned	op;
4583464ebd5Sriastradh	int (*process)(struct r600_shader_ctx *ctx);
4593464ebd5Sriastradh};
4603464ebd5Sriastradh
46101e04c3fSmrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
46201e04c3fSmrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
4633464ebd5Sriastradhstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
46401e04c3fSmrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
465af69d88dSmrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
466af69d88dSmrgstatic int tgsi_else(struct r600_shader_ctx *ctx);
467af69d88dSmrgstatic int tgsi_endif(struct r600_shader_ctx *ctx);
468af69d88dSmrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx);
469af69d88dSmrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx);
470af69d88dSmrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
47101e04c3fSmrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
47201e04c3fSmrg                                unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
47301e04c3fSmrg                                unsigned int dst_reg);
47401e04c3fSmrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
47501e04c3fSmrg			const struct r600_shader_src *shader_src,
47601e04c3fSmrg			unsigned chan);
47701e04c3fSmrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
47801e04c3fSmrg			       unsigned dst_reg, unsigned mask);
47901e04c3fSmrg
48001e04c3fSmrgstatic bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
48101e04c3fSmrg{
48201e04c3fSmrg	if (ctx->bc->family == CHIP_HEMLOCK ||
48301e04c3fSmrg	    ctx->bc->family == CHIP_CYPRESS ||
48401e04c3fSmrg	    ctx->bc->family == CHIP_JUNIPER)
48501e04c3fSmrg		return false;
48601e04c3fSmrg	return true;
48701e04c3fSmrg}
48801e04c3fSmrg
48901e04c3fSmrgstatic int tgsi_last_instruction(unsigned writemask)
49001e04c3fSmrg{
49101e04c3fSmrg	int i, lasti = 0;
49201e04c3fSmrg
49301e04c3fSmrg	for (i = 0; i < 4; i++) {
49401e04c3fSmrg		if (writemask & (1 << i)) {
49501e04c3fSmrg			lasti = i;
49601e04c3fSmrg		}
49701e04c3fSmrg	}
49801e04c3fSmrg	return lasti;
49901e04c3fSmrg}
5003464ebd5Sriastradh
5013464ebd5Sriastradhstatic int tgsi_is_supported(struct r600_shader_ctx *ctx)
5023464ebd5Sriastradh{
5033464ebd5Sriastradh	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
50401e04c3fSmrg	unsigned j;
5053464ebd5Sriastradh
50601e04c3fSmrg	if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
5073464ebd5Sriastradh		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
5083464ebd5Sriastradh		return -EINVAL;
5093464ebd5Sriastradh	}
5103464ebd5Sriastradh#if 0
5113464ebd5Sriastradh	if (i->Instruction.Label) {
5123464ebd5Sriastradh		R600_ERR("label unsupported\n");
5133464ebd5Sriastradh		return -EINVAL;
5143464ebd5Sriastradh	}
5153464ebd5Sriastradh#endif
5163464ebd5Sriastradh	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
5173464ebd5Sriastradh		if (i->Src[j].Register.Dimension) {
5187ec681f3Smrg			switch (i->Src[j].Register.File) {
5197ec681f3Smrg			case TGSI_FILE_CONSTANT:
5207ec681f3Smrg			case TGSI_FILE_HW_ATOMIC:
5217ec681f3Smrg				break;
5227ec681f3Smrg			case TGSI_FILE_INPUT:
5237ec681f3Smrg				if (ctx->type == PIPE_SHADER_GEOMETRY ||
5247ec681f3Smrg				    ctx->type == PIPE_SHADER_TESS_CTRL ||
5257ec681f3Smrg				    ctx->type == PIPE_SHADER_TESS_EVAL)
5267ec681f3Smrg					break;
5277ec681f3Smrg				FALLTHROUGH;
5287ec681f3Smrg			case TGSI_FILE_OUTPUT:
5297ec681f3Smrg				if (ctx->type == PIPE_SHADER_TESS_CTRL)
5307ec681f3Smrg					break;
5317ec681f3Smrg				FALLTHROUGH;
5327ec681f3Smrg			default:
5337ec681f3Smrg				R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
5347ec681f3Smrg					 i->Src[j].Register.File,
5357ec681f3Smrg					 i->Src[j].Register.Dimension);
5367ec681f3Smrg				return -EINVAL;
5377ec681f3Smrg			}
5383464ebd5Sriastradh		}
5393464ebd5Sriastradh	}
5403464ebd5Sriastradh	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
5413464ebd5Sriastradh		if (i->Dst[j].Register.Dimension) {
54201e04c3fSmrg			if (ctx->type == PIPE_SHADER_TESS_CTRL)
54301e04c3fSmrg				continue;
5443464ebd5Sriastradh			R600_ERR("unsupported dst (dimension)\n");
5453464ebd5Sriastradh			return -EINVAL;
5463464ebd5Sriastradh		}
5473464ebd5Sriastradh	}
5483464ebd5Sriastradh	return 0;
5493464ebd5Sriastradh}
5503464ebd5Sriastradh
55101e04c3fSmrgint eg_get_interpolator_index(unsigned interpolate, unsigned location)
5523464ebd5Sriastradh{
55301e04c3fSmrg	if (interpolate == TGSI_INTERPOLATE_COLOR ||
55401e04c3fSmrg		interpolate == TGSI_INTERPOLATE_LINEAR ||
55501e04c3fSmrg		interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
55601e04c3fSmrg	{
55701e04c3fSmrg		int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
55801e04c3fSmrg		int loc;
5593464ebd5Sriastradh
56001e04c3fSmrg		switch(location) {
56101e04c3fSmrg		case TGSI_INTERPOLATE_LOC_CENTER:
56201e04c3fSmrg			loc = 1;
56301e04c3fSmrg			break;
56401e04c3fSmrg		case TGSI_INTERPOLATE_LOC_CENTROID:
56501e04c3fSmrg			loc = 2;
56601e04c3fSmrg			break;
56701e04c3fSmrg		case TGSI_INTERPOLATE_LOC_SAMPLE:
56801e04c3fSmrg		default:
56901e04c3fSmrg			loc = 0; break;
5703464ebd5Sriastradh		}
57101e04c3fSmrg
57201e04c3fSmrg		return is_linear * 3 + loc;
5733464ebd5Sriastradh	}
5743464ebd5Sriastradh
57501e04c3fSmrg	return -1;
57601e04c3fSmrg}
57701e04c3fSmrg
57801e04c3fSmrgstatic void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
57901e04c3fSmrg		int input)
58001e04c3fSmrg{
58101e04c3fSmrg	int i = eg_get_interpolator_index(
58201e04c3fSmrg		ctx->shader->input[input].interpolate,
58301e04c3fSmrg		ctx->shader->input[input].interpolate_location);
58401e04c3fSmrg	assert(i >= 0);
58501e04c3fSmrg	ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
586af69d88dSmrg}
587af69d88dSmrg
588af69d88dSmrgstatic int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
589af69d88dSmrg{
590af69d88dSmrg	int i, r;
591af69d88dSmrg	struct r600_bytecode_alu alu;
592af69d88dSmrg	int gpr = 0, base_chan = 0;
593af69d88dSmrg	int ij_index = ctx->shader->input[input].ij_index;
594af69d88dSmrg
5953464ebd5Sriastradh	/* work out gpr and base_chan from index */
5963464ebd5Sriastradh	gpr = ij_index / 2;
5973464ebd5Sriastradh	base_chan = (2 * (ij_index % 2)) + 1;
5983464ebd5Sriastradh
5993464ebd5Sriastradh	for (i = 0; i < 8; i++) {
600af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6013464ebd5Sriastradh
6023464ebd5Sriastradh		if (i < 4)
603af69d88dSmrg			alu.op = ALU_OP2_INTERP_ZW;
6043464ebd5Sriastradh		else
605af69d88dSmrg			alu.op = ALU_OP2_INTERP_XY;
6063464ebd5Sriastradh
6073464ebd5Sriastradh		if ((i > 1) && (i < 6)) {
6083464ebd5Sriastradh			alu.dst.sel = ctx->shader->input[input].gpr;
6093464ebd5Sriastradh			alu.dst.write = 1;
6103464ebd5Sriastradh		}
6113464ebd5Sriastradh
6123464ebd5Sriastradh		alu.dst.chan = i % 4;
6133464ebd5Sriastradh
6143464ebd5Sriastradh		alu.src[0].sel = gpr;
6153464ebd5Sriastradh		alu.src[0].chan = (base_chan - (i % 2));
6163464ebd5Sriastradh
6173464ebd5Sriastradh		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
6183464ebd5Sriastradh
6193464ebd5Sriastradh		alu.bank_swizzle_force = SQ_ALU_VEC_210;
6203464ebd5Sriastradh		if ((i % 4) == 3)
6213464ebd5Sriastradh			alu.last = 1;
622af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
623af69d88dSmrg		if (r)
624af69d88dSmrg			return r;
625af69d88dSmrg	}
626af69d88dSmrg	return 0;
627af69d88dSmrg}
628af69d88dSmrg
629af69d88dSmrgstatic int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
630af69d88dSmrg{
631af69d88dSmrg	int i, r;
632af69d88dSmrg	struct r600_bytecode_alu alu;
633af69d88dSmrg
634af69d88dSmrg	for (i = 0; i < 4; i++) {
635af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
636af69d88dSmrg
637af69d88dSmrg		alu.op = ALU_OP1_INTERP_LOAD_P0;
638af69d88dSmrg
639af69d88dSmrg		alu.dst.sel = ctx->shader->input[input].gpr;
640af69d88dSmrg		alu.dst.write = 1;
641af69d88dSmrg
642af69d88dSmrg		alu.dst.chan = i;
643af69d88dSmrg
644af69d88dSmrg		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
645af69d88dSmrg		alu.src[0].chan = i;
646af69d88dSmrg
647af69d88dSmrg		if (i == 3)
648af69d88dSmrg			alu.last = 1;
649af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6503464ebd5Sriastradh		if (r)
6513464ebd5Sriastradh			return r;
6523464ebd5Sriastradh	}
6533464ebd5Sriastradh	return 0;
6543464ebd5Sriastradh}
6553464ebd5Sriastradh
656af69d88dSmrg/*
657af69d88dSmrg * Special export handling in shaders
658af69d88dSmrg *
659af69d88dSmrg * shader export ARRAY_BASE for EXPORT_POS:
660af69d88dSmrg * 60 is position
661af69d88dSmrg * 61 is misc vector
662af69d88dSmrg * 62, 63 are clip distance vectors
663af69d88dSmrg *
664af69d88dSmrg * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
665af69d88dSmrg * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
666af69d88dSmrg * USE_VTX_POINT_SIZE - point size in the X channel of export 61
667af69d88dSmrg * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
668af69d88dSmrg * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
669af69d88dSmrg * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
670af69d88dSmrg * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
671af69d88dSmrg * exclusive from render target index)
672af69d88dSmrg * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
673af69d88dSmrg *
674af69d88dSmrg *
675af69d88dSmrg * shader export ARRAY_BASE for EXPORT_PIXEL:
676af69d88dSmrg * 0-7 CB targets
677af69d88dSmrg * 61 computed Z vector
678af69d88dSmrg *
679af69d88dSmrg * The use of the values exported in the computed Z vector are controlled
680af69d88dSmrg * by DB_SHADER_CONTROL:
681af69d88dSmrg * Z_EXPORT_ENABLE - Z as a float in RED
682af69d88dSmrg * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
683af69d88dSmrg * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
684af69d88dSmrg * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
685af69d88dSmrg * DB_SOURCE_FORMAT - export control restrictions
686af69d88dSmrg *
687af69d88dSmrg */
688af69d88dSmrg
689af69d88dSmrg
690af69d88dSmrg/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
691af69d88dSmrgstatic int r600_spi_sid(struct r600_shader_io * io)
692af69d88dSmrg{
693af69d88dSmrg	int index, name = io->name;
694af69d88dSmrg
695af69d88dSmrg	/* These params are handled differently, they don't need
696af69d88dSmrg	 * semantic indices, so we'll use 0 for them.
697af69d88dSmrg	 */
698af69d88dSmrg	if (name == TGSI_SEMANTIC_POSITION ||
699af69d88dSmrg	    name == TGSI_SEMANTIC_PSIZE ||
700af69d88dSmrg	    name == TGSI_SEMANTIC_EDGEFLAG ||
701af69d88dSmrg	    name == TGSI_SEMANTIC_FACE ||
702af69d88dSmrg	    name == TGSI_SEMANTIC_SAMPLEMASK)
703af69d88dSmrg		index = 0;
704af69d88dSmrg	else {
705af69d88dSmrg		if (name == TGSI_SEMANTIC_GENERIC) {
706af69d88dSmrg			/* For generic params simply use sid from tgsi */
7077ec681f3Smrg			index = 9 + io->sid;
7087ec681f3Smrg		} else if (name == TGSI_SEMANTIC_TEXCOORD) {
709af69d88dSmrg			index = io->sid;
710af69d88dSmrg		} else {
711af69d88dSmrg			/* For non-generic params - pack name and sid into 8 bits */
712af69d88dSmrg			index = 0x80 | (name<<3) | (io->sid);
713af69d88dSmrg		}
714af69d88dSmrg
715af69d88dSmrg		/* Make sure that all really used indices have nonzero value, so
716af69d88dSmrg		 * we can just compare it to 0 later instead of comparing the name
717af69d88dSmrg		 * with different values to detect special cases. */
718af69d88dSmrg		index++;
719af69d88dSmrg	}
720af69d88dSmrg
721af69d88dSmrg	return index;
722af69d88dSmrg};
723af69d88dSmrg
72401e04c3fSmrg/* we need this to get a common lds index for vs/tcs/tes input/outputs */
72501e04c3fSmrgint r600_get_lds_unique_index(unsigned semantic_name, unsigned index)
72601e04c3fSmrg{
72701e04c3fSmrg	switch (semantic_name) {
72801e04c3fSmrg	case TGSI_SEMANTIC_POSITION:
72901e04c3fSmrg		return 0;
73001e04c3fSmrg	case TGSI_SEMANTIC_PSIZE:
73101e04c3fSmrg		return 1;
73201e04c3fSmrg	case TGSI_SEMANTIC_CLIPDIST:
73301e04c3fSmrg		assert(index <= 1);
73401e04c3fSmrg		return 2 + index;
7357ec681f3Smrg	case TGSI_SEMANTIC_TEXCOORD:
7367ec681f3Smrg		return 4 + index;
73701e04c3fSmrg	case TGSI_SEMANTIC_GENERIC:
73801e04c3fSmrg		if (index <= 63-4)
7397ec681f3Smrg			return 4 + index;
74001e04c3fSmrg		else
74101e04c3fSmrg			/* same explanation as in the default statement,
74201e04c3fSmrg			 * the only user hitting this is st/nine.
74301e04c3fSmrg			 */
74401e04c3fSmrg			return 0;
74501e04c3fSmrg
74601e04c3fSmrg	/* patch indices are completely separate and thus start from 0 */
74701e04c3fSmrg	case TGSI_SEMANTIC_TESSOUTER:
74801e04c3fSmrg		return 0;
74901e04c3fSmrg	case TGSI_SEMANTIC_TESSINNER:
75001e04c3fSmrg		return 1;
75101e04c3fSmrg	case TGSI_SEMANTIC_PATCH:
75201e04c3fSmrg		return 2 + index;
75301e04c3fSmrg
75401e04c3fSmrg	default:
75501e04c3fSmrg		/* Don't fail here. The result of this function is only used
75601e04c3fSmrg		 * for LS, TCS, TES, and GS, where legacy GL semantics can't
75701e04c3fSmrg		 * occur, but this function is called for all vertex shaders
75801e04c3fSmrg		 * before it's known whether LS will be compiled or not.
75901e04c3fSmrg		 */
76001e04c3fSmrg		return 0;
76101e04c3fSmrg	}
76201e04c3fSmrg}
76301e04c3fSmrg
764af69d88dSmrg/* turn input into interpolate on EG */
765af69d88dSmrgstatic int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
766af69d88dSmrg{
767af69d88dSmrg	int r = 0;
768af69d88dSmrg
769af69d88dSmrg	if (ctx->shader->input[index].spi_sid) {
770af69d88dSmrg		ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
771af69d88dSmrg		if (ctx->shader->input[index].interpolate > 0) {
772af69d88dSmrg			evergreen_interp_assign_ij_index(ctx, index);
77301e04c3fSmrg			r = evergreen_interp_alu(ctx, index);
774af69d88dSmrg		} else {
77501e04c3fSmrg			r = evergreen_interp_flat(ctx, index);
776af69d88dSmrg		}
777af69d88dSmrg	}
778af69d88dSmrg	return r;
779af69d88dSmrg}
780af69d88dSmrg
781af69d88dSmrgstatic int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
782af69d88dSmrg{
783af69d88dSmrg	struct r600_bytecode_alu alu;
784af69d88dSmrg	int i, r;
785af69d88dSmrg	int gpr_front = ctx->shader->input[front].gpr;
786af69d88dSmrg	int gpr_back = ctx->shader->input[back].gpr;
787af69d88dSmrg
788af69d88dSmrg	for (i = 0; i < 4; i++) {
789af69d88dSmrg		memset(&alu, 0, sizeof(alu));
790af69d88dSmrg		alu.op = ALU_OP3_CNDGT;
791af69d88dSmrg		alu.is_op3 = 1;
792af69d88dSmrg		alu.dst.write = 1;
793af69d88dSmrg		alu.dst.sel = gpr_front;
794af69d88dSmrg		alu.src[0].sel = ctx->face_gpr;
795af69d88dSmrg		alu.src[1].sel = gpr_front;
796af69d88dSmrg		alu.src[2].sel = gpr_back;
797af69d88dSmrg
798af69d88dSmrg		alu.dst.chan = i;
799af69d88dSmrg		alu.src[1].chan = i;
800af69d88dSmrg		alu.src[2].chan = i;
801af69d88dSmrg		alu.last = (i==3);
802af69d88dSmrg
803af69d88dSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
804af69d88dSmrg			return r;
805af69d88dSmrg	}
806af69d88dSmrg
807af69d88dSmrg	return 0;
808af69d88dSmrg}
8093464ebd5Sriastradh
81001e04c3fSmrg/* execute a single slot ALU calculation */
81101e04c3fSmrgstatic int single_alu_op2(struct r600_shader_ctx *ctx, int op,
81201e04c3fSmrg			  int dst_sel, int dst_chan,
81301e04c3fSmrg			  int src0_sel, unsigned src0_chan_val,
81401e04c3fSmrg			  int src1_sel, unsigned src1_chan_val)
81501e04c3fSmrg{
81601e04c3fSmrg	struct r600_bytecode_alu alu;
81701e04c3fSmrg	int r, i;
81801e04c3fSmrg
81901e04c3fSmrg	if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) {
82001e04c3fSmrg		for (i = 0; i < 4; i++) {
82101e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
82201e04c3fSmrg			alu.op = op;
82301e04c3fSmrg			alu.src[0].sel = src0_sel;
82401e04c3fSmrg			if (src0_sel == V_SQ_ALU_SRC_LITERAL)
82501e04c3fSmrg				alu.src[0].value = src0_chan_val;
82601e04c3fSmrg			else
82701e04c3fSmrg				alu.src[0].chan = src0_chan_val;
82801e04c3fSmrg			alu.src[1].sel = src1_sel;
82901e04c3fSmrg			if (src1_sel == V_SQ_ALU_SRC_LITERAL)
83001e04c3fSmrg				alu.src[1].value = src1_chan_val;
83101e04c3fSmrg			else
83201e04c3fSmrg				alu.src[1].chan = src1_chan_val;
83301e04c3fSmrg			alu.dst.sel = dst_sel;
83401e04c3fSmrg			alu.dst.chan = i;
83501e04c3fSmrg			alu.dst.write = i == dst_chan;
83601e04c3fSmrg			alu.last = (i == 3);
83701e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
83801e04c3fSmrg			if (r)
83901e04c3fSmrg				return r;
84001e04c3fSmrg		}
84101e04c3fSmrg		return 0;
84201e04c3fSmrg	}
84301e04c3fSmrg
84401e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
84501e04c3fSmrg	alu.op = op;
84601e04c3fSmrg	alu.src[0].sel = src0_sel;
84701e04c3fSmrg	if (src0_sel == V_SQ_ALU_SRC_LITERAL)
84801e04c3fSmrg		alu.src[0].value = src0_chan_val;
84901e04c3fSmrg	else
85001e04c3fSmrg		alu.src[0].chan = src0_chan_val;
85101e04c3fSmrg	alu.src[1].sel = src1_sel;
85201e04c3fSmrg	if (src1_sel == V_SQ_ALU_SRC_LITERAL)
85301e04c3fSmrg		alu.src[1].value = src1_chan_val;
85401e04c3fSmrg	else
85501e04c3fSmrg		alu.src[1].chan = src1_chan_val;
85601e04c3fSmrg	alu.dst.sel = dst_sel;
85701e04c3fSmrg	alu.dst.chan = dst_chan;
85801e04c3fSmrg	alu.dst.write = 1;
85901e04c3fSmrg	alu.last = 1;
86001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
86101e04c3fSmrg	if (r)
86201e04c3fSmrg		return r;
86301e04c3fSmrg	return 0;
86401e04c3fSmrg}
86501e04c3fSmrg
86601e04c3fSmrg/* execute a single slot ALU calculation */
86701e04c3fSmrgstatic int single_alu_op3(struct r600_shader_ctx *ctx, int op,
86801e04c3fSmrg			  int dst_sel, int dst_chan,
86901e04c3fSmrg			  int src0_sel, unsigned src0_chan_val,
87001e04c3fSmrg			  int src1_sel, unsigned src1_chan_val,
87101e04c3fSmrg			  int src2_sel, unsigned src2_chan_val)
87201e04c3fSmrg{
87301e04c3fSmrg	struct r600_bytecode_alu alu;
87401e04c3fSmrg	int r;
87501e04c3fSmrg
87601e04c3fSmrg	/* validate this for other ops */
87701e04c3fSmrg	assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT);
87801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
87901e04c3fSmrg	alu.op = op;
88001e04c3fSmrg	alu.src[0].sel = src0_sel;
88101e04c3fSmrg	if (src0_sel == V_SQ_ALU_SRC_LITERAL)
88201e04c3fSmrg		alu.src[0].value = src0_chan_val;
88301e04c3fSmrg	else
88401e04c3fSmrg		alu.src[0].chan = src0_chan_val;
88501e04c3fSmrg	alu.src[1].sel = src1_sel;
88601e04c3fSmrg	if (src1_sel == V_SQ_ALU_SRC_LITERAL)
88701e04c3fSmrg		alu.src[1].value = src1_chan_val;
88801e04c3fSmrg	else
88901e04c3fSmrg		alu.src[1].chan = src1_chan_val;
89001e04c3fSmrg	alu.src[2].sel = src2_sel;
89101e04c3fSmrg	if (src2_sel == V_SQ_ALU_SRC_LITERAL)
89201e04c3fSmrg		alu.src[2].value = src2_chan_val;
89301e04c3fSmrg	else
89401e04c3fSmrg		alu.src[2].chan = src2_chan_val;
89501e04c3fSmrg	alu.dst.sel = dst_sel;
89601e04c3fSmrg	alu.dst.chan = dst_chan;
89701e04c3fSmrg	alu.is_op3 = 1;
89801e04c3fSmrg	alu.last = 1;
89901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
90001e04c3fSmrg	if (r)
90101e04c3fSmrg		return r;
90201e04c3fSmrg	return 0;
90301e04c3fSmrg}
90401e04c3fSmrg
90501e04c3fSmrg/* put it in temp_reg.x */
90601e04c3fSmrgstatic int get_lds_offset0(struct r600_shader_ctx *ctx,
90701e04c3fSmrg			   int rel_patch_chan,
90801e04c3fSmrg			   int temp_reg, bool is_patch_var)
90901e04c3fSmrg{
91001e04c3fSmrg	int r;
91101e04c3fSmrg
91201e04c3fSmrg	/* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */
91301e04c3fSmrg	/* ADD
91401e04c3fSmrg	   Dimension - patch0_offset (input_vals.z),
91501e04c3fSmrg	   Non-dim - patch0_data_offset (input_vals.w)
91601e04c3fSmrg	*/
91701e04c3fSmrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
91801e04c3fSmrg			   temp_reg, 0,
91901e04c3fSmrg			   ctx->tess_output_info, 0,
92001e04c3fSmrg			   0, rel_patch_chan,
92101e04c3fSmrg			   ctx->tess_output_info, is_patch_var ? 3 : 2);
92201e04c3fSmrg	if (r)
92301e04c3fSmrg		return r;
92401e04c3fSmrg	return 0;
92501e04c3fSmrg}
92601e04c3fSmrg
92701e04c3fSmrgstatic inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
92801e04c3fSmrg{
92901e04c3fSmrg	return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg;
93001e04c3fSmrg}
93101e04c3fSmrg
93201e04c3fSmrgstatic int r600_get_temp(struct r600_shader_ctx *ctx)
93301e04c3fSmrg{
93401e04c3fSmrg	return ctx->temp_reg + ctx->max_driver_temp_used++;
93501e04c3fSmrg}
93601e04c3fSmrg
93701e04c3fSmrgstatic int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
93801e04c3fSmrg{
93901e04c3fSmrg	int i;
94001e04c3fSmrg	i = ctx->shader->noutput++;
94101e04c3fSmrg	ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID;
94201e04c3fSmrg	ctx->shader->output[i].sid = 0;
94301e04c3fSmrg	ctx->shader->output[i].gpr = 0;
94401e04c3fSmrg	ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT;
94501e04c3fSmrg	ctx->shader->output[i].write_mask = 0x4;
94601e04c3fSmrg	ctx->shader->output[i].spi_sid = prim_id_sid;
94701e04c3fSmrg
94801e04c3fSmrg	return 0;
94901e04c3fSmrg}
95001e04c3fSmrg
95101e04c3fSmrgstatic int tgsi_barrier(struct r600_shader_ctx *ctx)
95201e04c3fSmrg{
95301e04c3fSmrg	struct r600_bytecode_alu alu;
95401e04c3fSmrg	int r;
95501e04c3fSmrg
95601e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
95701e04c3fSmrg	alu.op = ctx->inst_info->op;
95801e04c3fSmrg	alu.last = 1;
95901e04c3fSmrg
96001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
96101e04c3fSmrg	if (r)
96201e04c3fSmrg		return r;
96301e04c3fSmrg	return 0;
96401e04c3fSmrg}
96501e04c3fSmrg
96601e04c3fSmrgstatic void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, unsigned *scratch_space_needed)
96701e04c3fSmrg{
96801e04c3fSmrg	// pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays
96901e04c3fSmrg	unsigned n = ctx->info.array_max[TGSI_FILE_TEMPORARY];
97001e04c3fSmrg	unsigned narrays_left = n;
97101e04c3fSmrg	bool *spilled = ctx->spilled_arrays; // assumed calloc:ed
97201e04c3fSmrg
97301e04c3fSmrg	*scratch_space_needed = 0;
97401e04c3fSmrg	while (*regno > 124 && narrays_left) {
97501e04c3fSmrg		unsigned i;
97601e04c3fSmrg		unsigned largest = 0;
97701e04c3fSmrg		unsigned largest_index = 0;
97801e04c3fSmrg
97901e04c3fSmrg		for (i = 0; i < n; i++) {
98001e04c3fSmrg			unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
98101e04c3fSmrg			if (!spilled[i] && size > largest) {
98201e04c3fSmrg				largest = size;
98301e04c3fSmrg				largest_index = i;
98401e04c3fSmrg			}
98501e04c3fSmrg		}
98601e04c3fSmrg
98701e04c3fSmrg		spilled[largest_index] = true;
98801e04c3fSmrg		*regno -= largest;
98901e04c3fSmrg		*scratch_space_needed += largest;
99001e04c3fSmrg
99101e04c3fSmrg		narrays_left --;
99201e04c3fSmrg	}
99301e04c3fSmrg
99401e04c3fSmrg	if (narrays_left == 0) {
99501e04c3fSmrg		ctx->info.indirect_files &= ~(1 << TGSI_FILE_TEMPORARY);
99601e04c3fSmrg	}
99701e04c3fSmrg}
99801e04c3fSmrg
99901e04c3fSmrg/* Take spilled temp arrays into account when translating tgsi register
100001e04c3fSmrg * indexes into r600 gprs if spilled is false, or scratch array offset if
100101e04c3fSmrg * spilled is true */
100201e04c3fSmrgstatic int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, bool *spilled)
100301e04c3fSmrg{
100401e04c3fSmrg	unsigned i;
100501e04c3fSmrg	unsigned spilled_size = 0;
100601e04c3fSmrg
100701e04c3fSmrg	for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) {
100801e04c3fSmrg		if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) {
100901e04c3fSmrg			if (ctx->spilled_arrays[i]) {
101001e04c3fSmrg				/* vec4 index into spilled scratch memory */
101101e04c3fSmrg				*spilled = true;
101201e04c3fSmrg				return tgsi_reg_index - ctx->array_infos[i].range.First + spilled_size;
101301e04c3fSmrg			}
101401e04c3fSmrg			else {
101501e04c3fSmrg				/* regular GPR array */
101601e04c3fSmrg				*spilled = false;
101701e04c3fSmrg				return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY];
101801e04c3fSmrg			}
101901e04c3fSmrg		}
102001e04c3fSmrg
102101e04c3fSmrg		if (tgsi_reg_index < ctx->array_infos[i].range.First)
102201e04c3fSmrg			break;
102301e04c3fSmrg		if (ctx->spilled_arrays[i]) {
102401e04c3fSmrg			spilled_size += ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
102501e04c3fSmrg		}
102601e04c3fSmrg	}
102701e04c3fSmrg
102801e04c3fSmrg	/* regular GPR index, minus the holes from spilled arrays */
102901e04c3fSmrg	*spilled = false;
103001e04c3fSmrg
103101e04c3fSmrg	return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY];
103201e04c3fSmrg}
103301e04c3fSmrg
103401e04c3fSmrg/* look up spill area base offset and array size for a spilled temp array */
103501e04c3fSmrgstatic void get_spilled_array_base_and_size(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index,
103601e04c3fSmrg	unsigned *array_base, unsigned *array_size)
103701e04c3fSmrg{
103801e04c3fSmrg	unsigned i;
103901e04c3fSmrg	unsigned offset = 0;
104001e04c3fSmrg
104101e04c3fSmrg	for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) {
104201e04c3fSmrg		if (ctx->spilled_arrays[i]) {
104301e04c3fSmrg			unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1;
104401e04c3fSmrg
104501e04c3fSmrg			if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) {
104601e04c3fSmrg				*array_base = offset;
104701e04c3fSmrg				*array_size = size - 1; /* hw counts from 1 */
104801e04c3fSmrg
104901e04c3fSmrg				return;
105001e04c3fSmrg			}
105101e04c3fSmrg
105201e04c3fSmrg			offset += size;
105301e04c3fSmrg		}
105401e04c3fSmrg	}
105501e04c3fSmrg}
105601e04c3fSmrg
10573464ebd5Sriastradhstatic int tgsi_declaration(struct r600_shader_ctx *ctx)
10583464ebd5Sriastradh{
10593464ebd5Sriastradh	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
1060af69d88dSmrg	int r, i, j, count = d->Range.Last - d->Range.First + 1;
10613464ebd5Sriastradh
10623464ebd5Sriastradh	switch (d->Declaration.File) {
10633464ebd5Sriastradh	case TGSI_FILE_INPUT:
106401e04c3fSmrg		for (j = 0; j < count; j++) {
106501e04c3fSmrg			i = ctx->shader->ninput + j;
106601e04c3fSmrg			assert(i < ARRAY_SIZE(ctx->shader->input));
106701e04c3fSmrg			ctx->shader->input[i].name = d->Semantic.Name;
106801e04c3fSmrg			ctx->shader->input[i].sid = d->Semantic.Index + j;
106901e04c3fSmrg			ctx->shader->input[i].interpolate = d->Interp.Interpolate;
107001e04c3fSmrg			ctx->shader->input[i].interpolate_location = d->Interp.Location;
107101e04c3fSmrg			ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j;
107201e04c3fSmrg			if (ctx->type == PIPE_SHADER_FRAGMENT) {
107301e04c3fSmrg				ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
107401e04c3fSmrg				switch (ctx->shader->input[i].name) {
107501e04c3fSmrg				case TGSI_SEMANTIC_FACE:
107601e04c3fSmrg					if (ctx->face_gpr != -1)
107701e04c3fSmrg						ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
107801e04c3fSmrg					else
107901e04c3fSmrg						ctx->face_gpr = ctx->shader->input[i].gpr;
108001e04c3fSmrg					break;
108101e04c3fSmrg				case TGSI_SEMANTIC_COLOR:
108201e04c3fSmrg					ctx->colors_used++;
108301e04c3fSmrg					break;
108401e04c3fSmrg				case TGSI_SEMANTIC_POSITION:
108501e04c3fSmrg					ctx->fragcoord_input = i;
108601e04c3fSmrg					break;
108701e04c3fSmrg				case TGSI_SEMANTIC_PRIMID:
108801e04c3fSmrg					/* set this for now */
108901e04c3fSmrg					ctx->shader->gs_prim_id_input = true;
109001e04c3fSmrg					ctx->shader->ps_prim_id_input = i;
109101e04c3fSmrg					break;
109201e04c3fSmrg				}
109301e04c3fSmrg				if (ctx->bc->chip_class >= EVERGREEN) {
109401e04c3fSmrg					if ((r = evergreen_interp_input(ctx, i)))
109501e04c3fSmrg						return r;
109601e04c3fSmrg				}
109701e04c3fSmrg			} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
109801e04c3fSmrg				/* FIXME probably skip inputs if they aren't passed in the ring */
109901e04c3fSmrg				ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
110001e04c3fSmrg				ctx->next_ring_offset += 16;
110101e04c3fSmrg				if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
110201e04c3fSmrg					ctx->shader->gs_prim_id_input = true;
11033464ebd5Sriastradh			}
11043464ebd5Sriastradh		}
110501e04c3fSmrg		ctx->shader->ninput += count;
11063464ebd5Sriastradh		break;
11073464ebd5Sriastradh	case TGSI_FILE_OUTPUT:
110801e04c3fSmrg		for (j = 0; j < count; j++) {
110901e04c3fSmrg			i = ctx->shader->noutput + j;
111001e04c3fSmrg			assert(i < ARRAY_SIZE(ctx->shader->output));
111101e04c3fSmrg			ctx->shader->output[i].name = d->Semantic.Name;
111201e04c3fSmrg			ctx->shader->output[i].sid = d->Semantic.Index + j;
111301e04c3fSmrg			ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j;
111401e04c3fSmrg			ctx->shader->output[i].interpolate = d->Interp.Interpolate;
111501e04c3fSmrg			ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
111601e04c3fSmrg			if (ctx->type == PIPE_SHADER_VERTEX ||
111701e04c3fSmrg			    ctx->type == PIPE_SHADER_GEOMETRY ||
111801e04c3fSmrg			    ctx->type == PIPE_SHADER_TESS_EVAL) {
111901e04c3fSmrg				ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
112001e04c3fSmrg				switch (d->Semantic.Name) {
112101e04c3fSmrg				case TGSI_SEMANTIC_CLIPDIST:
112201e04c3fSmrg					break;
112301e04c3fSmrg				case TGSI_SEMANTIC_PSIZE:
112401e04c3fSmrg					ctx->shader->vs_out_misc_write = 1;
112501e04c3fSmrg					ctx->shader->vs_out_point_size = 1;
112601e04c3fSmrg					break;
112701e04c3fSmrg				case TGSI_SEMANTIC_EDGEFLAG:
112801e04c3fSmrg					ctx->shader->vs_out_misc_write = 1;
112901e04c3fSmrg					ctx->shader->vs_out_edgeflag = 1;
113001e04c3fSmrg					ctx->edgeflag_output = i;
113101e04c3fSmrg					break;
113201e04c3fSmrg				case TGSI_SEMANTIC_VIEWPORT_INDEX:
113301e04c3fSmrg					ctx->shader->vs_out_misc_write = 1;
113401e04c3fSmrg					ctx->shader->vs_out_viewport = 1;
113501e04c3fSmrg					break;
113601e04c3fSmrg				case TGSI_SEMANTIC_LAYER:
113701e04c3fSmrg					ctx->shader->vs_out_misc_write = 1;
113801e04c3fSmrg					ctx->shader->vs_out_layer = 1;
113901e04c3fSmrg					break;
114001e04c3fSmrg				case TGSI_SEMANTIC_CLIPVERTEX:
114101e04c3fSmrg					ctx->clip_vertex_write = TRUE;
114201e04c3fSmrg					ctx->cv_output = i;
114301e04c3fSmrg					break;
114401e04c3fSmrg				}
114501e04c3fSmrg				if (ctx->type == PIPE_SHADER_GEOMETRY) {
114601e04c3fSmrg					ctx->gs_out_ring_offset += 16;
114701e04c3fSmrg				}
114801e04c3fSmrg			} else if (ctx->type == PIPE_SHADER_FRAGMENT) {
114901e04c3fSmrg				switch (d->Semantic.Name) {
115001e04c3fSmrg				case TGSI_SEMANTIC_COLOR:
115101e04c3fSmrg					ctx->shader->nr_ps_max_color_exports++;
115201e04c3fSmrg					break;
115301e04c3fSmrg				}
1154af69d88dSmrg			}
11553464ebd5Sriastradh		}
115601e04c3fSmrg		ctx->shader->noutput += count;
11573464ebd5Sriastradh		break;
11583464ebd5Sriastradh	case TGSI_FILE_TEMPORARY:
1159af69d88dSmrg		if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1160af69d88dSmrg			if (d->Array.ArrayID) {
116101e04c3fSmrg				bool spilled;
116201e04c3fSmrg				unsigned idx = map_tgsi_reg_index_to_r600_gpr(ctx,
116301e04c3fSmrg					d->Range.First,
116401e04c3fSmrg					&spilled);
116501e04c3fSmrg
116601e04c3fSmrg				if (!spilled) {
116701e04c3fSmrg					r600_add_gpr_array(ctx->shader, idx,
116801e04c3fSmrg						d->Range.Last - d->Range.First + 1, 0x0F);
116901e04c3fSmrg				}
1170af69d88dSmrg			}
1171af69d88dSmrg		}
1172af69d88dSmrg		break;
1173af69d88dSmrg
1174af69d88dSmrg	case TGSI_FILE_CONSTANT:
11753464ebd5Sriastradh	case TGSI_FILE_SAMPLER:
117601e04c3fSmrg	case TGSI_FILE_SAMPLER_VIEW:
11773464ebd5Sriastradh	case TGSI_FILE_ADDRESS:
117801e04c3fSmrg	case TGSI_FILE_BUFFER:
117901e04c3fSmrg	case TGSI_FILE_IMAGE:
118001e04c3fSmrg	case TGSI_FILE_MEMORY:
118101e04c3fSmrg		break;
118201e04c3fSmrg
118301e04c3fSmrg	case TGSI_FILE_HW_ATOMIC:
118401e04c3fSmrg		i = ctx->shader->nhwatomic_ranges;
118501e04c3fSmrg		ctx->shader->atomics[i].start = d->Range.First;
118601e04c3fSmrg		ctx->shader->atomics[i].end = d->Range.Last;
118701e04c3fSmrg		ctx->shader->atomics[i].hw_idx = ctx->shader->atomic_base + ctx->shader->nhwatomic;
118801e04c3fSmrg		ctx->shader->atomics[i].array_id = d->Array.ArrayID;
118901e04c3fSmrg		ctx->shader->atomics[i].buffer_id = d->Dim.Index2D;
119001e04c3fSmrg		ctx->shader->nhwatomic_ranges++;
119101e04c3fSmrg		ctx->shader->nhwatomic += count;
11923464ebd5Sriastradh		break;
11933464ebd5Sriastradh
11943464ebd5Sriastradh	case TGSI_FILE_SYSTEM_VALUE:
119501e04c3fSmrg		if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
119601e04c3fSmrg			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
119701e04c3fSmrg			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
119801e04c3fSmrg			break; /* Already handled from allocate_system_value_inputs */
119901e04c3fSmrg		} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
1200af69d88dSmrg			break;
120101e04c3fSmrg		} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
120201e04c3fSmrg			break;
120301e04c3fSmrg		else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
120401e04c3fSmrg			break;
120501e04c3fSmrg		else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ||
120601e04c3fSmrg			 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) {
120701e04c3fSmrg			int param = r600_get_lds_unique_index(d->Semantic.Name, 0);
120801e04c3fSmrg			int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2;
120901e04c3fSmrg			unsigned temp_reg = r600_get_temp(ctx);
121001e04c3fSmrg
121101e04c3fSmrg			r = get_lds_offset0(ctx, 2, temp_reg, true);
121201e04c3fSmrg			if (r)
121301e04c3fSmrg				return r;
121401e04c3fSmrg
121501e04c3fSmrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
121601e04c3fSmrg					   temp_reg, 0,
121701e04c3fSmrg					   temp_reg, 0,
121801e04c3fSmrg					   V_SQ_ALU_SRC_LITERAL, param * 16);
121901e04c3fSmrg			if (r)
122001e04c3fSmrg				return r;
122101e04c3fSmrg
122201e04c3fSmrg			do_lds_fetch_values(ctx, temp_reg, dreg, 0xf);
1223af69d88dSmrg		}
122401e04c3fSmrg		else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) {
122501e04c3fSmrg			/* MOV r1.x, r0.x;
122601e04c3fSmrg			   MOV r1.y, r0.y;
122701e04c3fSmrg			*/
122801e04c3fSmrg			for (i = 0; i < 2; i++) {
1229af69d88dSmrg				struct r600_bytecode_alu alu;
1230af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
123101e04c3fSmrg				alu.op = ALU_OP1_MOV;
1232af69d88dSmrg				alu.src[0].sel = 0;
123301e04c3fSmrg				alu.src[0].chan = 0 + i;
123401e04c3fSmrg				alu.dst.sel = 1;
123501e04c3fSmrg				alu.dst.chan = 0 + i;
1236af69d88dSmrg				alu.dst.write = 1;
123701e04c3fSmrg				alu.last = (i == 1) ? 1 : 0;
1238af69d88dSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1239af69d88dSmrg					return r;
1240af69d88dSmrg			}
124101e04c3fSmrg			/* ADD r1.z, 1.0f, -r0.x */
124201e04c3fSmrg			struct r600_bytecode_alu alu;
124301e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
124401e04c3fSmrg			alu.op = ALU_OP2_ADD;
124501e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_1;
124601e04c3fSmrg			alu.src[1].sel = 1;
124701e04c3fSmrg			alu.src[1].chan = 0;
124801e04c3fSmrg			alu.src[1].neg = 1;
124901e04c3fSmrg			alu.dst.sel = 1;
125001e04c3fSmrg			alu.dst.chan = 2;
125101e04c3fSmrg			alu.dst.write = 1;
125201e04c3fSmrg			alu.last = 1;
125301e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
125401e04c3fSmrg				return r;
12553464ebd5Sriastradh
125601e04c3fSmrg			/* ADD r1.z, r1.z, -r1.y */
125701e04c3fSmrg			alu.op = ALU_OP2_ADD;
125801e04c3fSmrg			alu.src[0].sel = 1;
125901e04c3fSmrg			alu.src[0].chan = 2;
126001e04c3fSmrg			alu.src[1].sel = 1;
126101e04c3fSmrg			alu.src[1].chan = 1;
126201e04c3fSmrg			alu.src[1].neg = 1;
126301e04c3fSmrg			alu.dst.sel = 1;
126401e04c3fSmrg			alu.dst.chan = 2;
126501e04c3fSmrg			alu.dst.write = 1;
126601e04c3fSmrg			alu.last = 1;
126701e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
126801e04c3fSmrg				return r;
126901e04c3fSmrg			break;
127001e04c3fSmrg		}
127101e04c3fSmrg		break;
127201e04c3fSmrg	default:
127301e04c3fSmrg		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
127401e04c3fSmrg		return -EINVAL;
127501e04c3fSmrg	}
127601e04c3fSmrg	return 0;
127701e04c3fSmrg}
127801e04c3fSmrg
127901e04c3fSmrgstatic int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
128001e04c3fSmrg{
128101e04c3fSmrg	struct tgsi_parse_context parse;
128201e04c3fSmrg	struct {
128301e04c3fSmrg		boolean enabled;
128401e04c3fSmrg		int *reg;
128501e04c3fSmrg		unsigned name, alternate_name;
128601e04c3fSmrg	} inputs[2] = {
128701e04c3fSmrg		{ false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
128801e04c3fSmrg
128901e04c3fSmrg		{ false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
129001e04c3fSmrg	};
129101e04c3fSmrg	int num_regs = 0;
129201e04c3fSmrg	unsigned k, i;
129301e04c3fSmrg
129401e04c3fSmrg	if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
129501e04c3fSmrg		return 0;
129601e04c3fSmrg	}
129701e04c3fSmrg
129801e04c3fSmrg	/* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
129901e04c3fSmrg	while (!tgsi_parse_end_of_tokens(&parse)) {
130001e04c3fSmrg		tgsi_parse_token(&parse);
130101e04c3fSmrg
130201e04c3fSmrg		if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
130301e04c3fSmrg			const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
130401e04c3fSmrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
130501e04c3fSmrg				inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
130601e04c3fSmrg				inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
130701e04c3fSmrg			{
130801e04c3fSmrg				int interpolate, location, k;
130901e04c3fSmrg
131001e04c3fSmrg				if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
131101e04c3fSmrg					location = TGSI_INTERPOLATE_LOC_CENTER;
131201e04c3fSmrg				} else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
131301e04c3fSmrg					location = TGSI_INTERPOLATE_LOC_CENTER;
131401e04c3fSmrg					/* Needs sample positions, currently those are always available */
131501e04c3fSmrg				} else {
131601e04c3fSmrg					location = TGSI_INTERPOLATE_LOC_CENTROID;
131701e04c3fSmrg				}
131801e04c3fSmrg
131901e04c3fSmrg				interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
132001e04c3fSmrg				k = eg_get_interpolator_index(interpolate, location);
132101e04c3fSmrg				if (k >= 0)
132201e04c3fSmrg					ctx->eg_interpolators[k].enabled = true;
132301e04c3fSmrg			}
132401e04c3fSmrg		} else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
132501e04c3fSmrg			struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
132601e04c3fSmrg			if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
132701e04c3fSmrg				for (k = 0; k < ARRAY_SIZE(inputs); k++) {
132801e04c3fSmrg					if (d->Semantic.Name == inputs[k].name ||
132901e04c3fSmrg						d->Semantic.Name == inputs[k].alternate_name) {
133001e04c3fSmrg						inputs[k].enabled = true;
133101e04c3fSmrg					}
133201e04c3fSmrg				}
133301e04c3fSmrg			}
133401e04c3fSmrg		}
133501e04c3fSmrg	}
133601e04c3fSmrg
133701e04c3fSmrg	tgsi_parse_free(&parse);
133801e04c3fSmrg
133901e04c3fSmrg	if (ctx->info.reads_samplemask &&
134001e04c3fSmrg	    (ctx->info.uses_linear_sample || ctx->info.uses_persp_sample)) {
134101e04c3fSmrg		inputs[1].enabled = true;
134201e04c3fSmrg	}
134301e04c3fSmrg
134401e04c3fSmrg	if (ctx->bc->chip_class >= EVERGREEN) {
134501e04c3fSmrg		int num_baryc = 0;
134601e04c3fSmrg		/* assign gpr to each interpolator according to priority */
134701e04c3fSmrg		for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) {
134801e04c3fSmrg			if (ctx->eg_interpolators[i].enabled) {
134901e04c3fSmrg				ctx->eg_interpolators[i].ij_index = num_baryc;
135001e04c3fSmrg				num_baryc++;
135101e04c3fSmrg			}
135201e04c3fSmrg		}
135301e04c3fSmrg		num_baryc = (num_baryc + 1) >> 1;
135401e04c3fSmrg		gpr_offset += num_baryc;
135501e04c3fSmrg	}
135601e04c3fSmrg
135701e04c3fSmrg	for (i = 0; i < ARRAY_SIZE(inputs); i++) {
135801e04c3fSmrg		boolean enabled = inputs[i].enabled;
135901e04c3fSmrg		int *reg = inputs[i].reg;
136001e04c3fSmrg		unsigned name = inputs[i].name;
136101e04c3fSmrg
136201e04c3fSmrg		if (enabled) {
136301e04c3fSmrg			int gpr = gpr_offset + num_regs++;
136401e04c3fSmrg			ctx->shader->nsys_inputs++;
136501e04c3fSmrg
136601e04c3fSmrg			// add to inputs, allocate a gpr
136701e04c3fSmrg			k = ctx->shader->ninput++;
136801e04c3fSmrg			ctx->shader->input[k].name = name;
136901e04c3fSmrg			ctx->shader->input[k].sid = 0;
137001e04c3fSmrg			ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
137101e04c3fSmrg			ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
137201e04c3fSmrg			*reg = ctx->shader->input[k].gpr = gpr;
137301e04c3fSmrg		}
137401e04c3fSmrg	}
137501e04c3fSmrg
137601e04c3fSmrg	return gpr_offset + num_regs;
13773464ebd5Sriastradh}
13783464ebd5Sriastradh
13793464ebd5Sriastradh/*
13803464ebd5Sriastradh * for evergreen we need to scan the shader to find the number of GPRs we need to
138101e04c3fSmrg * reserve for interpolation and system values
13823464ebd5Sriastradh *
138301e04c3fSmrg * we need to know if we are going to emit any sample or centroid inputs
13843464ebd5Sriastradh * if perspective and linear are required
13853464ebd5Sriastradh*/
13863464ebd5Sriastradhstatic int evergreen_gpr_count(struct r600_shader_ctx *ctx)
13873464ebd5Sriastradh{
138801e04c3fSmrg	unsigned i;
13893464ebd5Sriastradh
139001e04c3fSmrg	memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
13913464ebd5Sriastradh
139201e04c3fSmrg	/*
139301e04c3fSmrg	 * Could get this information from the shader info. But right now
139401e04c3fSmrg	 * we interpolate all declared inputs, whereas the shader info will
139501e04c3fSmrg	 * only contain the bits if the inputs are actually used, so it might
139601e04c3fSmrg	 * not be safe...
139701e04c3fSmrg	 */
13983464ebd5Sriastradh	for (i = 0; i < ctx->info.num_inputs; i++) {
139901e04c3fSmrg		int k;
140001e04c3fSmrg		/* skip position/face/mask/sampleid */
14013464ebd5Sriastradh		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
1402af69d88dSmrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
140301e04c3fSmrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
140401e04c3fSmrg		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
14053464ebd5Sriastradh			continue;
140601e04c3fSmrg
140701e04c3fSmrg		k = eg_get_interpolator_index(
140801e04c3fSmrg			ctx->info.input_interpolate[i],
140901e04c3fSmrg			ctx->info.input_interpolate_loc[i]);
141001e04c3fSmrg		if (k >= 0)
141101e04c3fSmrg			ctx->eg_interpolators[k].enabled = TRUE;
141201e04c3fSmrg	}
141301e04c3fSmrg
141401e04c3fSmrg	/* XXX PULL MODEL and LINE STIPPLE */
141501e04c3fSmrg
141601e04c3fSmrg	return allocate_system_value_inputs(ctx, 0);
141701e04c3fSmrg}
141801e04c3fSmrg
141901e04c3fSmrg/* sample_id_sel == NULL means fetch for current sample */
142001e04c3fSmrgstatic int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
142101e04c3fSmrg{
142201e04c3fSmrg	struct r600_bytecode_vtx vtx;
142301e04c3fSmrg	int r, t1;
142401e04c3fSmrg
142501e04c3fSmrg	t1 = r600_get_temp(ctx);
142601e04c3fSmrg
142701e04c3fSmrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
142801e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
142901e04c3fSmrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
143001e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
143101e04c3fSmrg	if (sample_id == NULL) {
143201e04c3fSmrg		assert(ctx->fixed_pt_position_gpr != -1);
143301e04c3fSmrg
143401e04c3fSmrg		vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
143501e04c3fSmrg		vtx.src_sel_x = 3;
143601e04c3fSmrg	}
143701e04c3fSmrg	else {
143801e04c3fSmrg		struct r600_bytecode_alu alu;
143901e04c3fSmrg
144001e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
144101e04c3fSmrg		alu.op = ALU_OP1_MOV;
144201e04c3fSmrg		r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
144301e04c3fSmrg		alu.dst.sel = t1;
144401e04c3fSmrg		alu.dst.write = 1;
144501e04c3fSmrg		alu.last = 1;
144601e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
144701e04c3fSmrg		if (r)
144801e04c3fSmrg			return r;
144901e04c3fSmrg
145001e04c3fSmrg		vtx.src_gpr = t1;
145101e04c3fSmrg		vtx.src_sel_x = 0;
14523464ebd5Sriastradh	}
145301e04c3fSmrg	vtx.mega_fetch_count = 16;
145401e04c3fSmrg	vtx.dst_gpr = t1;
145501e04c3fSmrg	vtx.dst_sel_x = 0;
145601e04c3fSmrg	vtx.dst_sel_y = 1;
145701e04c3fSmrg	vtx.dst_sel_z = 2;
145801e04c3fSmrg	vtx.dst_sel_w = 3;
145901e04c3fSmrg	vtx.data_format = FMT_32_32_32_32_FLOAT;
146001e04c3fSmrg	vtx.num_format_all = 2;
146101e04c3fSmrg	vtx.format_comp_all = 1;
146201e04c3fSmrg	vtx.use_const_fields = 0;
146301e04c3fSmrg	vtx.offset = 0;
146401e04c3fSmrg	vtx.endian = r600_endian_swap(32);
146501e04c3fSmrg	vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
146601e04c3fSmrg
146701e04c3fSmrg	r = r600_bytecode_add_vtx(ctx->bc, &vtx);
146801e04c3fSmrg	if (r)
146901e04c3fSmrg		return r;
147001e04c3fSmrg
147101e04c3fSmrg	return t1;
147201e04c3fSmrg}
147301e04c3fSmrg
147401e04c3fSmrgstatic int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
147501e04c3fSmrg{
147601e04c3fSmrg	int r;
147701e04c3fSmrg	struct r600_bytecode_alu alu;
147801e04c3fSmrg
147901e04c3fSmrg	/* do a vtx fetch with wqm set on the vtx fetch */
148001e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
148101e04c3fSmrg	alu.op = ALU_OP1_MOV;
148201e04c3fSmrg	alu.dst.sel = ctx->helper_invoc_reg;
148301e04c3fSmrg	alu.dst.chan = 0;
148401e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
148501e04c3fSmrg	alu.src[0].value = 0xffffffff;
148601e04c3fSmrg	alu.dst.write = 1;
148701e04c3fSmrg	alu.last = 1;
148801e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
148901e04c3fSmrg	if (r)
149001e04c3fSmrg		return r;
149101e04c3fSmrg
149201e04c3fSmrg	/* do a vtx fetch in VPM mode */
149301e04c3fSmrg	struct r600_bytecode_vtx vtx;
149401e04c3fSmrg	memset(&vtx, 0, sizeof(vtx));
149501e04c3fSmrg	vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
149601e04c3fSmrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
149701e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
149801e04c3fSmrg	vtx.src_gpr = 0;
149901e04c3fSmrg	vtx.mega_fetch_count = 16; /* no idea here really... */
150001e04c3fSmrg	vtx.dst_gpr = ctx->helper_invoc_reg;
150101e04c3fSmrg	vtx.dst_sel_x = 4;
150201e04c3fSmrg	vtx.dst_sel_y = 7;		/* SEL_Y */
150301e04c3fSmrg	vtx.dst_sel_z = 7;		/* SEL_Z */
150401e04c3fSmrg	vtx.dst_sel_w = 7;		/* SEL_W */
150501e04c3fSmrg	vtx.data_format = FMT_32;
150601e04c3fSmrg	if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
150701e04c3fSmrg		return r;
150801e04c3fSmrg	ctx->bc->cf_last->vpm = 1;
150901e04c3fSmrg	return 0;
151001e04c3fSmrg}
151101e04c3fSmrg
151201e04c3fSmrgstatic int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
151301e04c3fSmrg{
151401e04c3fSmrg	int r;
151501e04c3fSmrg	struct r600_bytecode_alu alu;
151601e04c3fSmrg
151701e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
151801e04c3fSmrg	alu.op = ALU_OP1_MOV;
151901e04c3fSmrg	alu.dst.sel = ctx->helper_invoc_reg;
152001e04c3fSmrg	alu.dst.chan = 0;
152101e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
152201e04c3fSmrg	alu.src[0].value = 0xffffffff;
152301e04c3fSmrg	alu.dst.write = 1;
152401e04c3fSmrg	alu.last = 1;
152501e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
152601e04c3fSmrg	if (r)
152701e04c3fSmrg		return r;
152801e04c3fSmrg
152901e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
153001e04c3fSmrg	alu.op = ALU_OP1_MOV;
153101e04c3fSmrg	alu.dst.sel = ctx->helper_invoc_reg;
153201e04c3fSmrg	alu.dst.chan = 0;
153301e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
153401e04c3fSmrg	alu.dst.write = 1;
153501e04c3fSmrg	alu.last = 1;
153601e04c3fSmrg	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
153701e04c3fSmrg	if (r)
153801e04c3fSmrg		return r;
153901e04c3fSmrg
154001e04c3fSmrg	return ctx->helper_invoc_reg;
154101e04c3fSmrg}
154201e04c3fSmrg
154301e04c3fSmrgstatic int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
154401e04c3fSmrg{
154501e04c3fSmrg	struct r600_bytecode_vtx vtx;
154601e04c3fSmrg	int r, t1;
154701e04c3fSmrg
154801e04c3fSmrg	if (ctx->cs_block_size_loaded)
154901e04c3fSmrg		return ctx->cs_block_size_reg;
155001e04c3fSmrg	if (ctx->cs_grid_size_loaded)
155101e04c3fSmrg		return ctx->cs_grid_size_reg;
155201e04c3fSmrg
155301e04c3fSmrg	t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg;
155401e04c3fSmrg	struct r600_bytecode_alu alu;
155501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
155601e04c3fSmrg	alu.op = ALU_OP1_MOV;
155701e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
155801e04c3fSmrg	alu.dst.sel = t1;
155901e04c3fSmrg	alu.dst.write = 1;
156001e04c3fSmrg	alu.last = 1;
156101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
156201e04c3fSmrg	if (r)
156301e04c3fSmrg		return r;
156401e04c3fSmrg
156501e04c3fSmrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
156601e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
156701e04c3fSmrg	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
156801e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
156901e04c3fSmrg	vtx.src_gpr = t1;
157001e04c3fSmrg	vtx.src_sel_x = 0;
15713464ebd5Sriastradh
157201e04c3fSmrg	vtx.mega_fetch_count = 16;
157301e04c3fSmrg	vtx.dst_gpr = t1;
157401e04c3fSmrg	vtx.dst_sel_x = 0;
157501e04c3fSmrg	vtx.dst_sel_y = 1;
157601e04c3fSmrg	vtx.dst_sel_z = 2;
157701e04c3fSmrg	vtx.dst_sel_w = 7;
157801e04c3fSmrg	vtx.data_format = FMT_32_32_32_32;
157901e04c3fSmrg	vtx.num_format_all = 1;
158001e04c3fSmrg	vtx.format_comp_all = 0;
158101e04c3fSmrg	vtx.use_const_fields = 0;
158201e04c3fSmrg	vtx.offset = load_block ? 0 : 16; // first element is size of buffer
158301e04c3fSmrg	vtx.endian = r600_endian_swap(32);
158401e04c3fSmrg	vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
15853464ebd5Sriastradh
158601e04c3fSmrg	r = r600_bytecode_add_vtx(ctx->bc, &vtx);
158701e04c3fSmrg	if (r)
158801e04c3fSmrg		return r;
15893464ebd5Sriastradh
159001e04c3fSmrg	if (load_block)
159101e04c3fSmrg		ctx->cs_block_size_loaded = true;
159201e04c3fSmrg	else
159301e04c3fSmrg		ctx->cs_grid_size_loaded = true;
159401e04c3fSmrg	return t1;
15953464ebd5Sriastradh}
15963464ebd5Sriastradh
15973464ebd5Sriastradhstatic void tgsi_src(struct r600_shader_ctx *ctx,
15983464ebd5Sriastradh		     const struct tgsi_full_src_register *tgsi_src,
15993464ebd5Sriastradh		     struct r600_shader_src *r600_src)
16003464ebd5Sriastradh{
16013464ebd5Sriastradh	memset(r600_src, 0, sizeof(*r600_src));
16023464ebd5Sriastradh	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
16033464ebd5Sriastradh	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
16043464ebd5Sriastradh	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
16053464ebd5Sriastradh	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
16063464ebd5Sriastradh	r600_src->neg = tgsi_src->Register.Negate;
16073464ebd5Sriastradh	r600_src->abs = tgsi_src->Register.Absolute;
16083464ebd5Sriastradh
160901e04c3fSmrg	if (tgsi_src->Register.File == TGSI_FILE_TEMPORARY) {
161001e04c3fSmrg		bool spilled;
161101e04c3fSmrg		unsigned idx;
161201e04c3fSmrg
161301e04c3fSmrg		idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_src->Register.Index, &spilled);
161401e04c3fSmrg
161501e04c3fSmrg		if (spilled) {
161601e04c3fSmrg			int reg = r600_get_temp(ctx);
161701e04c3fSmrg			int r;
161801e04c3fSmrg
161901e04c3fSmrg			r600_src->sel = reg;
162001e04c3fSmrg
162101e04c3fSmrg			if (ctx->bc->chip_class < R700) {
162201e04c3fSmrg				struct r600_bytecode_output cf;
162301e04c3fSmrg
162401e04c3fSmrg				memset(&cf, 0, sizeof(struct r600_bytecode_output));
162501e04c3fSmrg				cf.op = CF_OP_MEM_SCRATCH;
162601e04c3fSmrg				cf.elem_size = 3;
162701e04c3fSmrg				cf.gpr = reg;
162801e04c3fSmrg				cf.comp_mask = 0xF;
162901e04c3fSmrg				cf.swizzle_x = 0;
163001e04c3fSmrg				cf.swizzle_y = 1;
163101e04c3fSmrg				cf.swizzle_z = 2;
163201e04c3fSmrg				cf.swizzle_w = 3;
163301e04c3fSmrg				cf.burst_count = 1;
163401e04c3fSmrg
163501e04c3fSmrg				get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index,
163601e04c3fSmrg					&cf.array_base, &cf.array_size);
163701e04c3fSmrg
163801e04c3fSmrg				if (tgsi_src->Register.Indirect) {
163901e04c3fSmrg					cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
164001e04c3fSmrg					cf.index_gpr = ctx->bc->ar_reg;
164101e04c3fSmrg				}
164201e04c3fSmrg				else {
164301e04c3fSmrg					cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ;
164401e04c3fSmrg					cf.array_base += idx;
164501e04c3fSmrg					cf.array_size = 0;
164601e04c3fSmrg				}
164701e04c3fSmrg
164801e04c3fSmrg				r = r600_bytecode_add_output(ctx->bc, &cf);
164901e04c3fSmrg			}
165001e04c3fSmrg			else {
165101e04c3fSmrg				struct r600_bytecode_vtx vtx;
165201e04c3fSmrg
165301e04c3fSmrg				if (r600_bytecode_get_need_wait_ack(ctx->bc)) {
165401e04c3fSmrg					r600_bytecode_need_wait_ack(ctx->bc, false);
165501e04c3fSmrg					r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
165601e04c3fSmrg				}
165701e04c3fSmrg
165801e04c3fSmrg				memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
165901e04c3fSmrg				vtx.op = FETCH_OP_READ_SCRATCH;
166001e04c3fSmrg				vtx.dst_gpr = reg;
166101e04c3fSmrg				vtx.uncached = 1; // Must bypass cache since prior spill written in same invocation
166201e04c3fSmrg				vtx.elem_size = 3;
166301e04c3fSmrg				vtx.data_format = FMT_32_32_32_32;
166401e04c3fSmrg				vtx.num_format_all = V_038010_SQ_NUM_FORMAT_INT;
166501e04c3fSmrg				vtx.dst_sel_x = tgsi_src->Register.SwizzleX;
166601e04c3fSmrg				vtx.dst_sel_y = tgsi_src->Register.SwizzleY;
166701e04c3fSmrg				vtx.dst_sel_z = tgsi_src->Register.SwizzleZ;
166801e04c3fSmrg				vtx.dst_sel_w = tgsi_src->Register.SwizzleW;
166901e04c3fSmrg
167001e04c3fSmrg				get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index,
167101e04c3fSmrg					&vtx.array_base, &vtx.array_size);
167201e04c3fSmrg
167301e04c3fSmrg				if (tgsi_src->Register.Indirect) {
167401e04c3fSmrg					vtx.indexed = 1;
167501e04c3fSmrg					vtx.src_gpr = ctx->bc->ar_reg;
167601e04c3fSmrg				}
167701e04c3fSmrg				else {
167801e04c3fSmrg					vtx.array_base += idx;
167901e04c3fSmrg					vtx.array_size = 0;
168001e04c3fSmrg				}
168101e04c3fSmrg
168201e04c3fSmrg				r = r600_bytecode_add_vtx(ctx->bc, &vtx);
168301e04c3fSmrg			}
168401e04c3fSmrg
168501e04c3fSmrg			if (r)
168601e04c3fSmrg				return;
168701e04c3fSmrg		}
168801e04c3fSmrg		else {
168901e04c3fSmrg			if (tgsi_src->Register.Indirect)
169001e04c3fSmrg				r600_src->rel = V_SQ_REL_RELATIVE;
169101e04c3fSmrg
169201e04c3fSmrg			r600_src->sel = idx;
169301e04c3fSmrg		}
169401e04c3fSmrg
169501e04c3fSmrg		return;
169601e04c3fSmrg	}
169701e04c3fSmrg
16983464ebd5Sriastradh	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
16993464ebd5Sriastradh		int index;
17003464ebd5Sriastradh		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
17013464ebd5Sriastradh			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
17023464ebd5Sriastradh			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
17033464ebd5Sriastradh
17043464ebd5Sriastradh			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
17057ec681f3Smrg			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel);
17063464ebd5Sriastradh			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
17073464ebd5Sriastradh				return;
17083464ebd5Sriastradh		}
17093464ebd5Sriastradh		index = tgsi_src->Register.Index;
17103464ebd5Sriastradh		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
17113464ebd5Sriastradh		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
17123464ebd5Sriastradh	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
1713af69d88dSmrg		if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
1714af69d88dSmrg			r600_src->swizzle[0] = 2; // Z value
171501e04c3fSmrg			r600_src->swizzle[1] = 2;
171601e04c3fSmrg			r600_src->swizzle[2] = 2;
171701e04c3fSmrg			r600_src->swizzle[3] = 2;
1718af69d88dSmrg			r600_src->sel = ctx->face_gpr;
171901e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
172001e04c3fSmrg			r600_src->swizzle[0] = 3; // W value
172101e04c3fSmrg			r600_src->swizzle[1] = 3;
172201e04c3fSmrg			r600_src->swizzle[2] = 3;
172301e04c3fSmrg			r600_src->swizzle[3] = 3;
172401e04c3fSmrg			r600_src->sel = ctx->fixed_pt_position_gpr;
172501e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
172601e04c3fSmrg			r600_src->swizzle[0] = 0;
172701e04c3fSmrg			r600_src->swizzle[1] = 1;
172801e04c3fSmrg			r600_src->swizzle[2] = 4;
172901e04c3fSmrg			r600_src->swizzle[3] = 4;
173001e04c3fSmrg			r600_src->sel = load_sample_position(ctx, NULL, -1);
1731af69d88dSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
1732af69d88dSmrg			r600_src->swizzle[0] = 3;
1733af69d88dSmrg			r600_src->swizzle[1] = 3;
1734af69d88dSmrg			r600_src->swizzle[2] = 3;
1735af69d88dSmrg			r600_src->swizzle[3] = 3;
1736af69d88dSmrg			r600_src->sel = 0;
1737af69d88dSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
1738af69d88dSmrg			r600_src->swizzle[0] = 0;
1739af69d88dSmrg			r600_src->swizzle[1] = 0;
1740af69d88dSmrg			r600_src->swizzle[2] = 0;
1741af69d88dSmrg			r600_src->swizzle[3] = 0;
1742af69d88dSmrg			r600_src->sel = 0;
174301e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_THREAD_ID) {
174401e04c3fSmrg			r600_src->sel = 0;
174501e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_ID) {
174601e04c3fSmrg			r600_src->sel = 1;
174701e04c3fSmrg		} else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
174801e04c3fSmrg			r600_src->swizzle[0] = 3;
174901e04c3fSmrg			r600_src->swizzle[1] = 3;
175001e04c3fSmrg			r600_src->swizzle[2] = 3;
175101e04c3fSmrg			r600_src->swizzle[3] = 3;
175201e04c3fSmrg			r600_src->sel = 1;
175301e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
175401e04c3fSmrg			r600_src->swizzle[0] = 2;
175501e04c3fSmrg			r600_src->swizzle[1] = 2;
175601e04c3fSmrg			r600_src->swizzle[2] = 2;
175701e04c3fSmrg			r600_src->swizzle[3] = 2;
175801e04c3fSmrg			r600_src->sel = 0;
175901e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) {
176001e04c3fSmrg			r600_src->sel = 1;
176101e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) {
176201e04c3fSmrg			r600_src->sel = 3;
176301e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) {
176401e04c3fSmrg			r600_src->sel = 2;
176501e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) {
176601e04c3fSmrg			r600_src->sel = ctx->tess_input_info;
176701e04c3fSmrg			r600_src->swizzle[0] = 2;
176801e04c3fSmrg			r600_src->swizzle[1] = 2;
176901e04c3fSmrg			r600_src->swizzle[2] = 2;
177001e04c3fSmrg			r600_src->swizzle[3] = 2;
177101e04c3fSmrg		} else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) {
177201e04c3fSmrg			r600_src->sel = 0;
177301e04c3fSmrg			r600_src->swizzle[0] = 0;
177401e04c3fSmrg			r600_src->swizzle[1] = 0;
177501e04c3fSmrg			r600_src->swizzle[2] = 0;
177601e04c3fSmrg			r600_src->swizzle[3] = 0;
177701e04c3fSmrg		} else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) {
177801e04c3fSmrg			r600_src->sel = 0;
177901e04c3fSmrg			r600_src->swizzle[0] = 3;
178001e04c3fSmrg			r600_src->swizzle[1] = 3;
178101e04c3fSmrg			r600_src->swizzle[2] = 3;
178201e04c3fSmrg			r600_src->swizzle[3] = 3;
178301e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) {
178401e04c3fSmrg			r600_src->sel = load_block_grid_size(ctx, false);
178501e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
178601e04c3fSmrg			r600_src->sel = load_block_grid_size(ctx, true);
178701e04c3fSmrg		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
178801e04c3fSmrg			r600_src->sel = ctx->helper_invoc_reg;
178901e04c3fSmrg			r600_src->swizzle[0] = 0;
179001e04c3fSmrg			r600_src->swizzle[1] = 0;
179101e04c3fSmrg			r600_src->swizzle[2] = 0;
179201e04c3fSmrg			r600_src->swizzle[3] = 0;
1793af69d88dSmrg		}
17943464ebd5Sriastradh	} else {
17953464ebd5Sriastradh		if (tgsi_src->Register.Indirect)
17963464ebd5Sriastradh			r600_src->rel = V_SQ_REL_RELATIVE;
17973464ebd5Sriastradh		r600_src->sel = tgsi_src->Register.Index;
17983464ebd5Sriastradh		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
17993464ebd5Sriastradh	}
1800af69d88dSmrg	if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) {
1801af69d88dSmrg		if (tgsi_src->Register.Dimension) {
1802af69d88dSmrg			r600_src->kc_bank = tgsi_src->Dimension.Index;
180301e04c3fSmrg			if (tgsi_src->Dimension.Indirect) {
180401e04c3fSmrg				r600_src->kc_rel = 1;
180501e04c3fSmrg			}
1806af69d88dSmrg		}
1807af69d88dSmrg	}
18083464ebd5Sriastradh}
18093464ebd5Sriastradh
1810af69d88dSmrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
181101e04c3fSmrg                                unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
1812af69d88dSmrg                                unsigned int dst_reg)
18133464ebd5Sriastradh{
1814af69d88dSmrg	struct r600_bytecode_vtx vtx;
18153464ebd5Sriastradh	unsigned int ar_reg;
18163464ebd5Sriastradh	int r;
18173464ebd5Sriastradh
18183464ebd5Sriastradh	if (offset) {
1819af69d88dSmrg		struct r600_bytecode_alu alu;
18203464ebd5Sriastradh
18213464ebd5Sriastradh		memset(&alu, 0, sizeof(alu));
18223464ebd5Sriastradh
1823af69d88dSmrg		alu.op = ALU_OP2_ADD_INT;
1824af69d88dSmrg		alu.src[0].sel = ctx->bc->ar_reg;
1825af69d88dSmrg		alu.src[0].chan = ar_chan;
18263464ebd5Sriastradh
18273464ebd5Sriastradh		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
18283464ebd5Sriastradh		alu.src[1].value = offset;
18293464ebd5Sriastradh
18303464ebd5Sriastradh		alu.dst.sel = dst_reg;
1831af69d88dSmrg		alu.dst.chan = ar_chan;
18323464ebd5Sriastradh		alu.dst.write = 1;
18333464ebd5Sriastradh		alu.last = 1;
18343464ebd5Sriastradh
1835af69d88dSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
18363464ebd5Sriastradh			return r;
18373464ebd5Sriastradh
18383464ebd5Sriastradh		ar_reg = dst_reg;
18393464ebd5Sriastradh	} else {
1840af69d88dSmrg		ar_reg = ctx->bc->ar_reg;
18413464ebd5Sriastradh	}
18423464ebd5Sriastradh
18433464ebd5Sriastradh	memset(&vtx, 0, sizeof(vtx));
1844af69d88dSmrg	vtx.buffer_id = cb_idx;
184501e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
18463464ebd5Sriastradh	vtx.src_gpr = ar_reg;
1847af69d88dSmrg	vtx.src_sel_x = ar_chan;
18483464ebd5Sriastradh	vtx.mega_fetch_count = 16;
18493464ebd5Sriastradh	vtx.dst_gpr = dst_reg;
18503464ebd5Sriastradh	vtx.dst_sel_x = 0;		/* SEL_X */
18513464ebd5Sriastradh	vtx.dst_sel_y = 1;		/* SEL_Y */
18523464ebd5Sriastradh	vtx.dst_sel_z = 2;		/* SEL_Z */
18533464ebd5Sriastradh	vtx.dst_sel_w = 3;		/* SEL_W */
18543464ebd5Sriastradh	vtx.data_format = FMT_32_32_32_32_FLOAT;
18553464ebd5Sriastradh	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
18563464ebd5Sriastradh	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
18573464ebd5Sriastradh	vtx.endian = r600_endian_swap(32);
185801e04c3fSmrg	vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE;
18593464ebd5Sriastradh
1860af69d88dSmrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1861af69d88dSmrg		return r;
1862af69d88dSmrg
1863af69d88dSmrg	return 0;
1864af69d88dSmrg}
1865af69d88dSmrg
1866af69d88dSmrgstatic int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
1867af69d88dSmrg{
1868af69d88dSmrg	struct r600_bytecode_vtx vtx;
1869af69d88dSmrg	int r;
1870af69d88dSmrg	unsigned index = src->Register.Index;
1871af69d88dSmrg	unsigned vtx_id = src->Dimension.Index;
187201e04c3fSmrg	int offset_reg = ctx->gs_rotated_input[vtx_id / 3];
1873af69d88dSmrg	int offset_chan = vtx_id % 3;
187401e04c3fSmrg	int t2 = 0;
1875af69d88dSmrg
1876af69d88dSmrg	/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
1877af69d88dSmrg	 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
1878af69d88dSmrg
187901e04c3fSmrg	if (offset_reg == ctx->gs_rotated_input[0] && offset_chan == 2)
1880af69d88dSmrg		offset_chan = 3;
1881af69d88dSmrg
188201e04c3fSmrg	if (src->Dimension.Indirect || src->Register.Indirect)
188301e04c3fSmrg		t2 = r600_get_temp(ctx);
188401e04c3fSmrg
1885af69d88dSmrg	if (src->Dimension.Indirect) {
1886af69d88dSmrg		int treg[3];
1887af69d88dSmrg		struct r600_bytecode_alu alu;
1888af69d88dSmrg		int r, i;
188901e04c3fSmrg		unsigned addr_reg;
189001e04c3fSmrg		addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index);
189101e04c3fSmrg		if (src->DimIndirect.Index > 0) {
189201e04c3fSmrg			r = single_alu_op2(ctx, ALU_OP1_MOV,
189301e04c3fSmrg					   ctx->bc->ar_reg, 0,
189401e04c3fSmrg					   addr_reg, 0,
189501e04c3fSmrg					   0, 0);
189601e04c3fSmrg			if (r)
189701e04c3fSmrg				return r;
189801e04c3fSmrg		}
189901e04c3fSmrg		/*
1900af69d88dSmrg		   we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
1901af69d88dSmrg		   at least this is what fglrx seems to do. */
1902af69d88dSmrg		for (i = 0; i < 3; i++) {
1903af69d88dSmrg			treg[i] = r600_get_temp(ctx);
1904af69d88dSmrg		}
190501e04c3fSmrg		r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
190601e04c3fSmrg
1907af69d88dSmrg		for (i = 0; i < 3; i++) {
1908af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1909af69d88dSmrg			alu.op = ALU_OP1_MOV;
191001e04c3fSmrg			alu.src[0].sel = ctx->gs_rotated_input[0];
1911af69d88dSmrg			alu.src[0].chan = i == 2 ? 3 : i;
1912af69d88dSmrg			alu.dst.sel = treg[i];
1913af69d88dSmrg			alu.dst.chan = 0;
1914af69d88dSmrg			alu.dst.write = 1;
1915af69d88dSmrg			alu.last = 1;
1916af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
1917af69d88dSmrg			if (r)
1918af69d88dSmrg				return r;
1919af69d88dSmrg		}
1920af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1921af69d88dSmrg		alu.op = ALU_OP1_MOV;
1922af69d88dSmrg		alu.src[0].sel = treg[0];
1923af69d88dSmrg		alu.src[0].rel = 1;
1924af69d88dSmrg		alu.dst.sel = t2;
1925af69d88dSmrg		alu.dst.write = 1;
1926af69d88dSmrg		alu.last = 1;
1927af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1928af69d88dSmrg		if (r)
1929af69d88dSmrg			return r;
1930af69d88dSmrg		offset_reg = t2;
193101e04c3fSmrg		offset_chan = 0;
1932af69d88dSmrg	}
1933af69d88dSmrg
193401e04c3fSmrg	if (src->Register.Indirect) {
193501e04c3fSmrg		int addr_reg;
193601e04c3fSmrg		unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID];
193701e04c3fSmrg
193801e04c3fSmrg		addr_reg = get_address_file_reg(ctx, src->Indirect.Index);
193901e04c3fSmrg
194001e04c3fSmrg		/* pull the value from index_reg */
194101e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
194201e04c3fSmrg				   t2, 1,
194301e04c3fSmrg				   addr_reg, 0,
194401e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, first);
194501e04c3fSmrg		if (r)
194601e04c3fSmrg			return r;
194701e04c3fSmrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
194801e04c3fSmrg				   t2, 0,
194901e04c3fSmrg				   t2, 1,
195001e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 4,
195101e04c3fSmrg				   offset_reg, offset_chan);
195201e04c3fSmrg		if (r)
195301e04c3fSmrg			return r;
195401e04c3fSmrg		offset_reg = t2;
195501e04c3fSmrg		offset_chan = 0;
195601e04c3fSmrg		index = src->Register.Index - first;
195701e04c3fSmrg	}
1958af69d88dSmrg
1959af69d88dSmrg	memset(&vtx, 0, sizeof(vtx));
1960af69d88dSmrg	vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
196101e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
1962af69d88dSmrg	vtx.src_gpr = offset_reg;
1963af69d88dSmrg	vtx.src_sel_x = offset_chan;
1964af69d88dSmrg	vtx.offset = index * 16; /*bytes*/
1965af69d88dSmrg	vtx.mega_fetch_count = 16;
1966af69d88dSmrg	vtx.dst_gpr = dst_reg;
1967af69d88dSmrg	vtx.dst_sel_x = 0;		/* SEL_X */
1968af69d88dSmrg	vtx.dst_sel_y = 1;		/* SEL_Y */
1969af69d88dSmrg	vtx.dst_sel_z = 2;		/* SEL_Z */
1970af69d88dSmrg	vtx.dst_sel_w = 3;		/* SEL_W */
1971af69d88dSmrg	if (ctx->bc->chip_class >= EVERGREEN) {
1972af69d88dSmrg		vtx.use_const_fields = 1;
1973af69d88dSmrg	} else {
1974af69d88dSmrg		vtx.data_format = FMT_32_32_32_32_FLOAT;
1975af69d88dSmrg	}
1976af69d88dSmrg
1977af69d88dSmrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
19783464ebd5Sriastradh		return r;
19793464ebd5Sriastradh
19803464ebd5Sriastradh	return 0;
19813464ebd5Sriastradh}
19823464ebd5Sriastradh
1983af69d88dSmrgstatic int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
1984af69d88dSmrg{
1985af69d88dSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
198601e04c3fSmrg	unsigned i;
1987af69d88dSmrg
1988af69d88dSmrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1989af69d88dSmrg		struct tgsi_full_src_register *src = &inst->Src[i];
1990af69d88dSmrg
1991af69d88dSmrg		if (src->Register.File == TGSI_FILE_INPUT) {
1992af69d88dSmrg			if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) {
1993af69d88dSmrg				/* primitive id is in R0.z */
1994af69d88dSmrg				ctx->src[i].sel = 0;
1995af69d88dSmrg				ctx->src[i].swizzle[0] = 2;
1996af69d88dSmrg			}
1997af69d88dSmrg		}
1998af69d88dSmrg		if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
1999af69d88dSmrg			int treg = r600_get_temp(ctx);
2000af69d88dSmrg
2001af69d88dSmrg			fetch_gs_input(ctx, src, treg);
2002af69d88dSmrg			ctx->src[i].sel = treg;
200301e04c3fSmrg			ctx->src[i].rel = 0;
2004af69d88dSmrg		}
2005af69d88dSmrg	}
2006af69d88dSmrg	return 0;
2007af69d88dSmrg}
2008af69d88dSmrg
20093464ebd5Sriastradh
201001e04c3fSmrg/* Tessellation shaders pass outputs to the next shader using LDS.
201101e04c3fSmrg *
201201e04c3fSmrg * LS outputs = TCS(HS) inputs
201301e04c3fSmrg * TCS(HS) outputs = TES(DS) inputs
201401e04c3fSmrg *
201501e04c3fSmrg * The LDS layout is:
201601e04c3fSmrg * - TCS inputs for patch 0
201701e04c3fSmrg * - TCS inputs for patch 1
201801e04c3fSmrg * - TCS inputs for patch 2		= get_tcs_in_current_patch_offset (if RelPatchID==2)
201901e04c3fSmrg * - ...
202001e04c3fSmrg * - TCS outputs for patch 0            = get_tcs_out_patch0_offset
202101e04c3fSmrg * - Per-patch TCS outputs for patch 0  = get_tcs_out_patch0_patch_data_offset
202201e04c3fSmrg * - TCS outputs for patch 1
202301e04c3fSmrg * - Per-patch TCS outputs for patch 1
202401e04c3fSmrg * - TCS outputs for patch 2            = get_tcs_out_current_patch_offset (if RelPatchID==2)
202501e04c3fSmrg * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
202601e04c3fSmrg * - ...
202701e04c3fSmrg *
202801e04c3fSmrg * All three shaders VS(LS), TCS, TES share the same LDS space.
202901e04c3fSmrg */
203001e04c3fSmrg/* this will return with the dw address in temp_reg.x */
203101e04c3fSmrgstatic int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg,
203201e04c3fSmrg				 const struct tgsi_full_dst_register *dst,
203301e04c3fSmrg				 const struct tgsi_full_src_register *src,
203401e04c3fSmrg				 int stride_bytes_reg, int stride_bytes_chan)
203501e04c3fSmrg{
203601e04c3fSmrg	struct tgsi_full_dst_register reg;
203701e04c3fSmrg	ubyte *name, *index, *array_first;
203801e04c3fSmrg	int r;
203901e04c3fSmrg	int param;
204001e04c3fSmrg	struct tgsi_shader_info *info = &ctx->info;
204101e04c3fSmrg	/* Set the register description. The address computation is the same
204201e04c3fSmrg	 * for sources and destinations. */
204301e04c3fSmrg	if (src) {
204401e04c3fSmrg		reg.Register.File = src->Register.File;
204501e04c3fSmrg		reg.Register.Index = src->Register.Index;
204601e04c3fSmrg		reg.Register.Indirect = src->Register.Indirect;
204701e04c3fSmrg		reg.Register.Dimension = src->Register.Dimension;
204801e04c3fSmrg		reg.Indirect = src->Indirect;
204901e04c3fSmrg		reg.Dimension = src->Dimension;
205001e04c3fSmrg		reg.DimIndirect = src->DimIndirect;
205101e04c3fSmrg	} else
205201e04c3fSmrg		reg = *dst;
205301e04c3fSmrg
205401e04c3fSmrg	/* If the register is 2-dimensional (e.g. an array of vertices
205501e04c3fSmrg	 * in a primitive), calculate the base address of the vertex. */
205601e04c3fSmrg	if (reg.Register.Dimension) {
205701e04c3fSmrg		int sel, chan;
205801e04c3fSmrg		if (reg.Dimension.Indirect) {
205901e04c3fSmrg			unsigned addr_reg;
206001e04c3fSmrg			assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS);
206101e04c3fSmrg
206201e04c3fSmrg			addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index);
206301e04c3fSmrg			/* pull the value from index_reg */
206401e04c3fSmrg			sel = addr_reg;
206501e04c3fSmrg			chan = 0;
206601e04c3fSmrg		} else {
206701e04c3fSmrg			sel = V_SQ_ALU_SRC_LITERAL;
206801e04c3fSmrg			chan = reg.Dimension.Index;
20693464ebd5Sriastradh		}
20703464ebd5Sriastradh
207101e04c3fSmrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
207201e04c3fSmrg				   temp_reg, 0,
207301e04c3fSmrg				   stride_bytes_reg, stride_bytes_chan,
207401e04c3fSmrg				   sel, chan,
207501e04c3fSmrg				   temp_reg, 0);
207601e04c3fSmrg		if (r)
207701e04c3fSmrg			return r;
207801e04c3fSmrg	}
20793464ebd5Sriastradh
208001e04c3fSmrg	if (reg.Register.File == TGSI_FILE_INPUT) {
208101e04c3fSmrg		name = info->input_semantic_name;
208201e04c3fSmrg		index = info->input_semantic_index;
208301e04c3fSmrg		array_first = info->input_array_first;
208401e04c3fSmrg	} else if (reg.Register.File == TGSI_FILE_OUTPUT) {
208501e04c3fSmrg		name = info->output_semantic_name;
208601e04c3fSmrg		index = info->output_semantic_index;
208701e04c3fSmrg		array_first = info->output_array_first;
208801e04c3fSmrg	} else {
208901e04c3fSmrg		assert(0);
209001e04c3fSmrg		return -1;
209101e04c3fSmrg	}
209201e04c3fSmrg	if (reg.Register.Indirect) {
209301e04c3fSmrg		int addr_reg;
209401e04c3fSmrg		int first;
209501e04c3fSmrg		/* Add the relative address of the element. */
209601e04c3fSmrg		if (reg.Indirect.ArrayID)
209701e04c3fSmrg			first = array_first[reg.Indirect.ArrayID];
209801e04c3fSmrg		else
209901e04c3fSmrg			first = reg.Register.Index;
210001e04c3fSmrg
210101e04c3fSmrg		addr_reg = get_address_file_reg(ctx, reg.Indirect.Index);
210201e04c3fSmrg
210301e04c3fSmrg		/* pull the value from index_reg */
210401e04c3fSmrg		r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
210501e04c3fSmrg				   temp_reg, 0,
210601e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 16,
210701e04c3fSmrg				   addr_reg, 0,
210801e04c3fSmrg				   temp_reg, 0);
210901e04c3fSmrg		if (r)
211001e04c3fSmrg			return r;
211101e04c3fSmrg
211201e04c3fSmrg		param = r600_get_lds_unique_index(name[first],
211301e04c3fSmrg						  index[first]);
211401e04c3fSmrg
211501e04c3fSmrg	} else {
211601e04c3fSmrg		param = r600_get_lds_unique_index(name[reg.Register.Index],
211701e04c3fSmrg						  index[reg.Register.Index]);
211801e04c3fSmrg	}
211901e04c3fSmrg
212001e04c3fSmrg	/* add to base_addr - passed in temp_reg.x */
212101e04c3fSmrg	if (param) {
212201e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
212301e04c3fSmrg				   temp_reg, 0,
212401e04c3fSmrg				   temp_reg, 0,
212501e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, param * 16);
212601e04c3fSmrg		if (r)
212701e04c3fSmrg			return r;
212801e04c3fSmrg
212901e04c3fSmrg	}
213001e04c3fSmrg	return 0;
213101e04c3fSmrg}
213201e04c3fSmrg
213301e04c3fSmrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
213401e04c3fSmrg			       unsigned dst_reg, unsigned mask)
213501e04c3fSmrg{
213601e04c3fSmrg	struct r600_bytecode_alu alu;
213701e04c3fSmrg	int r, i, lasti;
213801e04c3fSmrg
213901e04c3fSmrg	if ((ctx->bc->cf_last->ndw>>1) >= 0x60)
214001e04c3fSmrg		ctx->bc->force_add_cf = 1;
214101e04c3fSmrg
214201e04c3fSmrg	lasti = tgsi_last_instruction(mask);
214301e04c3fSmrg	for (i = 1; i <= lasti; i++) {
214401e04c3fSmrg		if (!(mask & (1 << i)))
214501e04c3fSmrg			continue;
214601e04c3fSmrg
214701e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
214801e04c3fSmrg				   temp_reg, i,
214901e04c3fSmrg				   temp_reg, 0,
215001e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
215101e04c3fSmrg		if (r)
215201e04c3fSmrg			return r;
215301e04c3fSmrg	}
215401e04c3fSmrg	for (i = 0; i <= lasti; i++) {
215501e04c3fSmrg		if (!(mask & (1 << i)))
215601e04c3fSmrg			continue;
215701e04c3fSmrg
215801e04c3fSmrg		/* emit an LDS_READ_RET */
215901e04c3fSmrg		memset(&alu, 0, sizeof(alu));
216001e04c3fSmrg		alu.op = LDS_OP1_LDS_READ_RET;
216101e04c3fSmrg		alu.src[0].sel = temp_reg;
216201e04c3fSmrg		alu.src[0].chan = i;
216301e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_0;
216401e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
216501e04c3fSmrg		alu.dst.chan = 0;
216601e04c3fSmrg		alu.is_lds_idx_op = true;
216701e04c3fSmrg		alu.last = 1;
216801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
216901e04c3fSmrg		if (r)
217001e04c3fSmrg			return r;
217101e04c3fSmrg	}
217201e04c3fSmrg	for (i = 0; i <= lasti; i++) {
217301e04c3fSmrg		if (!(mask & (1 << i)))
217401e04c3fSmrg			continue;
217501e04c3fSmrg
217601e04c3fSmrg		/* then read from LDS_OQ_A_POP */
217701e04c3fSmrg		memset(&alu, 0, sizeof(alu));
217801e04c3fSmrg
217901e04c3fSmrg		alu.op = ALU_OP1_MOV;
218001e04c3fSmrg		alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
218101e04c3fSmrg		alu.src[0].chan = 0;
218201e04c3fSmrg		alu.dst.sel = dst_reg;
218301e04c3fSmrg		alu.dst.chan = i;
218401e04c3fSmrg		alu.dst.write = 1;
218501e04c3fSmrg		alu.last = 1;
218601e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
218701e04c3fSmrg		if (r)
218801e04c3fSmrg			return r;
218901e04c3fSmrg	}
219001e04c3fSmrg	return 0;
219101e04c3fSmrg}
219201e04c3fSmrg
219301e04c3fSmrgstatic int fetch_mask(struct tgsi_src_register *reg)
219401e04c3fSmrg{
219501e04c3fSmrg	int mask = 0;
219601e04c3fSmrg	mask |= 1 << reg->SwizzleX;
219701e04c3fSmrg	mask |= 1 << reg->SwizzleY;
219801e04c3fSmrg	mask |= 1 << reg->SwizzleZ;
219901e04c3fSmrg	mask |= 1 << reg->SwizzleW;
220001e04c3fSmrg	return mask;
220101e04c3fSmrg}
220201e04c3fSmrg
220301e04c3fSmrgstatic int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
220401e04c3fSmrg{
220501e04c3fSmrg	int r;
220601e04c3fSmrg	unsigned temp_reg = r600_get_temp(ctx);
220701e04c3fSmrg
220801e04c3fSmrg	r = get_lds_offset0(ctx, 2, temp_reg,
220901e04c3fSmrg			    src->Register.Dimension ? false : true);
221001e04c3fSmrg	if (r)
221101e04c3fSmrg		return r;
221201e04c3fSmrg
221301e04c3fSmrg	/* the base address is now in temp.x */
221401e04c3fSmrg	r = r600_get_byte_address(ctx, temp_reg,
221501e04c3fSmrg				  NULL, src, ctx->tess_output_info, 1);
221601e04c3fSmrg	if (r)
221701e04c3fSmrg		return r;
221801e04c3fSmrg
221901e04c3fSmrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
222001e04c3fSmrg	if (r)
222101e04c3fSmrg		return r;
222201e04c3fSmrg	return 0;
222301e04c3fSmrg}
222401e04c3fSmrg
222501e04c3fSmrgstatic int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
222601e04c3fSmrg{
222701e04c3fSmrg	int r;
222801e04c3fSmrg	unsigned temp_reg = r600_get_temp(ctx);
222901e04c3fSmrg
223001e04c3fSmrg	/* t.x = ips * r0.y */
223101e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24,
223201e04c3fSmrg			   temp_reg, 0,
223301e04c3fSmrg			   ctx->tess_input_info, 0,
223401e04c3fSmrg			   0, 1);
223501e04c3fSmrg
223601e04c3fSmrg	if (r)
223701e04c3fSmrg		return r;
223801e04c3fSmrg
223901e04c3fSmrg	/* the base address is now in temp.x */
224001e04c3fSmrg	r = r600_get_byte_address(ctx, temp_reg,
224101e04c3fSmrg				  NULL, src, ctx->tess_input_info, 1);
224201e04c3fSmrg	if (r)
224301e04c3fSmrg		return r;
224401e04c3fSmrg
224501e04c3fSmrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
224601e04c3fSmrg	if (r)
224701e04c3fSmrg		return r;
224801e04c3fSmrg	return 0;
224901e04c3fSmrg}
225001e04c3fSmrg
225101e04c3fSmrgstatic int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
225201e04c3fSmrg{
225301e04c3fSmrg	int r;
225401e04c3fSmrg	unsigned temp_reg = r600_get_temp(ctx);
225501e04c3fSmrg
225601e04c3fSmrg	r = get_lds_offset0(ctx, 1, temp_reg,
225701e04c3fSmrg			    src->Register.Dimension ? false : true);
225801e04c3fSmrg	if (r)
225901e04c3fSmrg		return r;
226001e04c3fSmrg	/* the base address is now in temp.x */
226101e04c3fSmrg	r = r600_get_byte_address(ctx, temp_reg,
226201e04c3fSmrg				  NULL, src,
226301e04c3fSmrg				  ctx->tess_output_info, 1);
226401e04c3fSmrg	if (r)
226501e04c3fSmrg		return r;
226601e04c3fSmrg
226701e04c3fSmrg	r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register));
226801e04c3fSmrg	if (r)
226901e04c3fSmrg		return r;
227001e04c3fSmrg	return 0;
227101e04c3fSmrg}
227201e04c3fSmrg
227301e04c3fSmrgstatic int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx)
227401e04c3fSmrg{
227501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
227601e04c3fSmrg	unsigned i;
227701e04c3fSmrg
227801e04c3fSmrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
227901e04c3fSmrg		struct tgsi_full_src_register *src = &inst->Src[i];
228001e04c3fSmrg
228101e04c3fSmrg		if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) {
228201e04c3fSmrg			int treg = r600_get_temp(ctx);
228301e04c3fSmrg			fetch_tes_input(ctx, src, treg);
228401e04c3fSmrg			ctx->src[i].sel = treg;
228501e04c3fSmrg			ctx->src[i].rel = 0;
228601e04c3fSmrg		}
228701e04c3fSmrg		if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) {
228801e04c3fSmrg			int treg = r600_get_temp(ctx);
228901e04c3fSmrg			fetch_tcs_input(ctx, src, treg);
229001e04c3fSmrg			ctx->src[i].sel = treg;
229101e04c3fSmrg			ctx->src[i].rel = 0;
229201e04c3fSmrg		}
229301e04c3fSmrg		if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) {
229401e04c3fSmrg			int treg = r600_get_temp(ctx);
229501e04c3fSmrg			fetch_tcs_output(ctx, src, treg);
229601e04c3fSmrg			ctx->src[i].sel = treg;
229701e04c3fSmrg			ctx->src[i].rel = 0;
229801e04c3fSmrg		}
229901e04c3fSmrg	}
230001e04c3fSmrg	return 0;
230101e04c3fSmrg}
230201e04c3fSmrg
230301e04c3fSmrgstatic int tgsi_split_constant(struct r600_shader_ctx *ctx)
230401e04c3fSmrg{
230501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
230601e04c3fSmrg	struct r600_bytecode_alu alu;
230701e04c3fSmrg	int i, j, k, nconst, r;
230801e04c3fSmrg
230901e04c3fSmrg	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
231001e04c3fSmrg		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
231101e04c3fSmrg			nconst++;
231201e04c3fSmrg		}
231301e04c3fSmrg		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
231401e04c3fSmrg	}
231501e04c3fSmrg	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
231601e04c3fSmrg		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
231701e04c3fSmrg			continue;
231801e04c3fSmrg		}
231901e04c3fSmrg
232001e04c3fSmrg		if (ctx->src[i].rel) {
232101e04c3fSmrg			int chan = inst->Src[i].Indirect.Swizzle;
232201e04c3fSmrg			int treg = r600_get_temp(ctx);
232301e04c3fSmrg			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg)))
232401e04c3fSmrg				return r;
232501e04c3fSmrg
232601e04c3fSmrg			ctx->src[i].kc_bank = 0;
232701e04c3fSmrg			ctx->src[i].kc_rel = 0;
232801e04c3fSmrg			ctx->src[i].sel = treg;
232901e04c3fSmrg			ctx->src[i].rel = 0;
23303464ebd5Sriastradh			j--;
23313464ebd5Sriastradh		} else if (j > 0) {
23323464ebd5Sriastradh			int treg = r600_get_temp(ctx);
23333464ebd5Sriastradh			for (k = 0; k < 4; k++) {
2334af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2335af69d88dSmrg				alu.op = ALU_OP1_MOV;
23363464ebd5Sriastradh				alu.src[0].sel = ctx->src[i].sel;
23373464ebd5Sriastradh				alu.src[0].chan = k;
23383464ebd5Sriastradh				alu.src[0].rel = ctx->src[i].rel;
2339af69d88dSmrg				alu.src[0].kc_bank = ctx->src[i].kc_bank;
234001e04c3fSmrg				alu.src[0].kc_rel = ctx->src[i].kc_rel;
23413464ebd5Sriastradh				alu.dst.sel = treg;
23423464ebd5Sriastradh				alu.dst.chan = k;
23433464ebd5Sriastradh				alu.dst.write = 1;
23443464ebd5Sriastradh				if (k == 3)
23453464ebd5Sriastradh					alu.last = 1;
2346af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
23473464ebd5Sriastradh				if (r)
23483464ebd5Sriastradh					return r;
23493464ebd5Sriastradh			}
23503464ebd5Sriastradh			ctx->src[i].sel = treg;
23513464ebd5Sriastradh			ctx->src[i].rel =0;
23523464ebd5Sriastradh			j--;
23533464ebd5Sriastradh		}
23543464ebd5Sriastradh	}
23553464ebd5Sriastradh	return 0;
23563464ebd5Sriastradh}
23573464ebd5Sriastradh
23583464ebd5Sriastradh/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
23593464ebd5Sriastradhstatic int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
23603464ebd5Sriastradh{
23613464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2362af69d88dSmrg	struct r600_bytecode_alu alu;
23633464ebd5Sriastradh	int i, j, k, nliteral, r;
23643464ebd5Sriastradh
23653464ebd5Sriastradh	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
23663464ebd5Sriastradh		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
23673464ebd5Sriastradh			nliteral++;
23683464ebd5Sriastradh		}
23693464ebd5Sriastradh	}
23703464ebd5Sriastradh	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
23713464ebd5Sriastradh		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
23723464ebd5Sriastradh			int treg = r600_get_temp(ctx);
23733464ebd5Sriastradh			for (k = 0; k < 4; k++) {
2374af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2375af69d88dSmrg				alu.op = ALU_OP1_MOV;
23763464ebd5Sriastradh				alu.src[0].sel = ctx->src[i].sel;
23773464ebd5Sriastradh				alu.src[0].chan = k;
23783464ebd5Sriastradh				alu.src[0].value = ctx->src[i].value[k];
23793464ebd5Sriastradh				alu.dst.sel = treg;
23803464ebd5Sriastradh				alu.dst.chan = k;
23813464ebd5Sriastradh				alu.dst.write = 1;
23823464ebd5Sriastradh				if (k == 3)
23833464ebd5Sriastradh					alu.last = 1;
2384af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
23853464ebd5Sriastradh				if (r)
23863464ebd5Sriastradh					return r;
23873464ebd5Sriastradh			}
23883464ebd5Sriastradh			ctx->src[i].sel = treg;
23893464ebd5Sriastradh			j--;
23903464ebd5Sriastradh		}
23913464ebd5Sriastradh	}
23923464ebd5Sriastradh	return 0;
23933464ebd5Sriastradh}
23943464ebd5Sriastradh
2395af69d88dSmrgstatic int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
23963464ebd5Sriastradh{
2397af69d88dSmrg	int i, r, count = ctx->shader->ninput;
23983464ebd5Sriastradh
2399af69d88dSmrg	for (i = 0; i < count; i++) {
2400af69d88dSmrg		if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
2401af69d88dSmrg			r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input);
2402af69d88dSmrg			if (r)
2403af69d88dSmrg				return r;
2404af69d88dSmrg		}
2405af69d88dSmrg	}
2406af69d88dSmrg	return 0;
2407af69d88dSmrg}
2408af69d88dSmrg
240901e04c3fSmrgstatic int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
241001e04c3fSmrg						  int stream, unsigned *stream_item_size UNUSED)
2411af69d88dSmrg{
2412af69d88dSmrg	unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
241301e04c3fSmrg	unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
241401e04c3fSmrg	int j, r;
241501e04c3fSmrg	unsigned i;
2416af69d88dSmrg
2417af69d88dSmrg	/* Sanity checking. */
241801e04c3fSmrg	if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) {
2419af69d88dSmrg		R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
2420af69d88dSmrg		r = -EINVAL;
2421af69d88dSmrg		goto out_err;
2422af69d88dSmrg	}
2423af69d88dSmrg	for (i = 0; i < so->num_outputs; i++) {
2424af69d88dSmrg		if (so->output[i].output_buffer >= 4) {
2425af69d88dSmrg			R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
2426af69d88dSmrg				 so->output[i].output_buffer);
2427af69d88dSmrg			r = -EINVAL;
2428af69d88dSmrg			goto out_err;
2429af69d88dSmrg		}
2430af69d88dSmrg	}
2431af69d88dSmrg
2432af69d88dSmrg	/* Initialize locations where the outputs are stored. */
2433af69d88dSmrg	for (i = 0; i < so->num_outputs; i++) {
2434af69d88dSmrg
243501e04c3fSmrg		so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
243601e04c3fSmrg		start_comp[i] = so->output[i].start_component;
2437af69d88dSmrg		/* Lower outputs with dst_offset < start_component.
2438af69d88dSmrg		 *
2439af69d88dSmrg		 * We can only output 4D vectors with a write mask, e.g. we can
2440af69d88dSmrg		 * only output the W component at offset 3, etc. If we want
2441af69d88dSmrg		 * to store Y, Z, or W at buffer offset 0, we need to use MOV
2442af69d88dSmrg		 * to move it to X and output X. */
2443af69d88dSmrg		if (so->output[i].dst_offset < so->output[i].start_component) {
2444af69d88dSmrg			unsigned tmp = r600_get_temp(ctx);
2445af69d88dSmrg
2446af69d88dSmrg			for (j = 0; j < so->output[i].num_components; j++) {
2447af69d88dSmrg				struct r600_bytecode_alu alu;
2448af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2449af69d88dSmrg				alu.op = ALU_OP1_MOV;
2450af69d88dSmrg				alu.src[0].sel = so_gpr[i];
2451af69d88dSmrg				alu.src[0].chan = so->output[i].start_component + j;
2452af69d88dSmrg
2453af69d88dSmrg				alu.dst.sel = tmp;
2454af69d88dSmrg				alu.dst.chan = j;
2455af69d88dSmrg				alu.dst.write = 1;
2456af69d88dSmrg				if (j == so->output[i].num_components - 1)
2457af69d88dSmrg					alu.last = 1;
2458af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
2459af69d88dSmrg				if (r)
2460af69d88dSmrg					return r;
2461af69d88dSmrg			}
246201e04c3fSmrg			start_comp[i] = 0;
2463af69d88dSmrg			so_gpr[i] = tmp;
2464af69d88dSmrg		}
2465af69d88dSmrg	}
2466af69d88dSmrg
2467af69d88dSmrg	/* Write outputs to buffers. */
2468af69d88dSmrg	for (i = 0; i < so->num_outputs; i++) {
2469af69d88dSmrg		struct r600_bytecode_output output;
2470af69d88dSmrg
247101e04c3fSmrg		if (stream != -1 && stream != so->output[i].stream)
247201e04c3fSmrg			continue;
247301e04c3fSmrg
2474af69d88dSmrg		memset(&output, 0, sizeof(struct r600_bytecode_output));
2475af69d88dSmrg		output.gpr = so_gpr[i];
247601e04c3fSmrg		output.elem_size = so->output[i].num_components - 1;
247701e04c3fSmrg		if (output.elem_size == 2)
247801e04c3fSmrg			output.elem_size = 3; // 3 not supported, write 4 with junk at end
247901e04c3fSmrg		output.array_base = so->output[i].dst_offset - start_comp[i];
2480af69d88dSmrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
2481af69d88dSmrg		output.burst_count = 1;
2482af69d88dSmrg		/* array_size is an upper limit for the burst_count
2483af69d88dSmrg		 * with MEM_STREAM instructions */
2484af69d88dSmrg		output.array_size = 0xFFF;
248501e04c3fSmrg		output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
248601e04c3fSmrg
2487af69d88dSmrg		if (ctx->bc->chip_class >= EVERGREEN) {
2488af69d88dSmrg			switch (so->output[i].output_buffer) {
2489af69d88dSmrg			case 0:
2490af69d88dSmrg				output.op = CF_OP_MEM_STREAM0_BUF0;
2491af69d88dSmrg				break;
2492af69d88dSmrg			case 1:
2493af69d88dSmrg				output.op = CF_OP_MEM_STREAM0_BUF1;
2494af69d88dSmrg				break;
2495af69d88dSmrg			case 2:
2496af69d88dSmrg				output.op = CF_OP_MEM_STREAM0_BUF2;
2497af69d88dSmrg				break;
2498af69d88dSmrg			case 3:
2499af69d88dSmrg				output.op = CF_OP_MEM_STREAM0_BUF3;
2500af69d88dSmrg				break;
2501af69d88dSmrg			}
250201e04c3fSmrg			output.op += so->output[i].stream * 4;
250301e04c3fSmrg			assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
250401e04c3fSmrg			ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
2505af69d88dSmrg		} else {
2506af69d88dSmrg			switch (so->output[i].output_buffer) {
2507af69d88dSmrg			case 0:
2508af69d88dSmrg				output.op = CF_OP_MEM_STREAM0;
2509af69d88dSmrg				break;
2510af69d88dSmrg			case 1:
2511af69d88dSmrg				output.op = CF_OP_MEM_STREAM1;
2512af69d88dSmrg				break;
2513af69d88dSmrg			case 2:
2514af69d88dSmrg				output.op = CF_OP_MEM_STREAM2;
2515af69d88dSmrg				break;
2516af69d88dSmrg			case 3:
2517af69d88dSmrg				output.op = CF_OP_MEM_STREAM3;
2518af69d88dSmrg					break;
2519af69d88dSmrg			}
252001e04c3fSmrg			ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
2521af69d88dSmrg		}
2522af69d88dSmrg		r = r600_bytecode_add_output(ctx->bc, &output);
2523af69d88dSmrg		if (r)
2524af69d88dSmrg			goto out_err;
2525af69d88dSmrg	}
2526af69d88dSmrg	return 0;
2527af69d88dSmrgout_err:
2528af69d88dSmrg	return r;
2529af69d88dSmrg}
2530af69d88dSmrg
2531af69d88dSmrgstatic void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
2532af69d88dSmrg{
2533af69d88dSmrg	struct r600_bytecode_alu alu;
2534af69d88dSmrg	unsigned reg;
2535af69d88dSmrg
2536af69d88dSmrg	if (!ctx->shader->vs_out_edgeflag)
2537af69d88dSmrg		return;
2538af69d88dSmrg
2539af69d88dSmrg	reg = ctx->shader->output[ctx->edgeflag_output].gpr;
2540af69d88dSmrg
2541af69d88dSmrg	/* clamp(x, 0, 1) */
2542af69d88dSmrg	memset(&alu, 0, sizeof(alu));
2543af69d88dSmrg	alu.op = ALU_OP1_MOV;
2544af69d88dSmrg	alu.src[0].sel = reg;
2545af69d88dSmrg	alu.dst.sel = reg;
2546af69d88dSmrg	alu.dst.write = 1;
2547af69d88dSmrg	alu.dst.clamp = 1;
2548af69d88dSmrg	alu.last = 1;
2549af69d88dSmrg	r600_bytecode_add_alu(ctx->bc, &alu);
2550af69d88dSmrg
2551af69d88dSmrg	memset(&alu, 0, sizeof(alu));
2552af69d88dSmrg	alu.op = ALU_OP1_FLT_TO_INT;
2553af69d88dSmrg	alu.src[0].sel = reg;
2554af69d88dSmrg	alu.dst.sel = reg;
2555af69d88dSmrg	alu.dst.write = 1;
2556af69d88dSmrg	alu.last = 1;
2557af69d88dSmrg	r600_bytecode_add_alu(ctx->bc, &alu);
2558af69d88dSmrg}
2559af69d88dSmrg
25607ec681f3Smrgint generate_gs_copy_shader(struct r600_context *rctx,
25617ec681f3Smrg                            struct r600_pipe_shader *gs,
25627ec681f3Smrg                            struct pipe_stream_output_info *so)
2563af69d88dSmrg{
2564af69d88dSmrg	struct r600_shader_ctx ctx = {};
2565af69d88dSmrg	struct r600_shader *gs_shader = &gs->shader;
2566af69d88dSmrg	struct r600_pipe_shader *cshader;
256701e04c3fSmrg	unsigned ocnt = gs_shader->noutput;
2568af69d88dSmrg	struct r600_bytecode_alu alu;
2569af69d88dSmrg	struct r600_bytecode_vtx vtx;
2570af69d88dSmrg	struct r600_bytecode_output output;
2571af69d88dSmrg	struct r600_bytecode_cf *cf_jump, *cf_pop,
2572af69d88dSmrg		*last_exp_pos = NULL, *last_exp_param = NULL;
257301e04c3fSmrg	int next_clip_pos = 61, next_param = 0;
257401e04c3fSmrg	unsigned i, j;
257501e04c3fSmrg	int ring;
257601e04c3fSmrg	bool only_ring_0 = true;
2577af69d88dSmrg	cshader = calloc(1, sizeof(struct r600_pipe_shader));
2578af69d88dSmrg	if (!cshader)
2579af69d88dSmrg		return 0;
2580af69d88dSmrg
2581af69d88dSmrg	memcpy(cshader->shader.output, gs_shader->output, ocnt *
2582af69d88dSmrg	       sizeof(struct r600_shader_io));
2583af69d88dSmrg
2584af69d88dSmrg	cshader->shader.noutput = ocnt;
2585af69d88dSmrg
2586af69d88dSmrg	ctx.shader = &cshader->shader;
2587af69d88dSmrg	ctx.bc = &ctx.shader->bc;
258801e04c3fSmrg	ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX;
2589af69d88dSmrg
2590af69d88dSmrg	r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family,
2591af69d88dSmrg			   rctx->screen->has_compressed_msaa_texturing);
2592af69d88dSmrg
2593af69d88dSmrg	ctx.bc->isa = rctx->isa;
2594af69d88dSmrg
259501e04c3fSmrg	cf_jump = NULL;
259601e04c3fSmrg	memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
259701e04c3fSmrg
2598af69d88dSmrg	/* R0.x = R0.x & 0x3fffffff */
2599af69d88dSmrg	memset(&alu, 0, sizeof(alu));
2600af69d88dSmrg	alu.op = ALU_OP2_AND_INT;
2601af69d88dSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2602af69d88dSmrg	alu.src[1].value = 0x3fffffff;
2603af69d88dSmrg	alu.dst.write = 1;
2604af69d88dSmrg	r600_bytecode_add_alu(ctx.bc, &alu);
2605af69d88dSmrg
2606af69d88dSmrg	/* R0.y = R0.x >> 30 */
2607af69d88dSmrg	memset(&alu, 0, sizeof(alu));
2608af69d88dSmrg	alu.op = ALU_OP2_LSHR_INT;
2609af69d88dSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2610af69d88dSmrg	alu.src[1].value = 0x1e;
2611af69d88dSmrg	alu.dst.chan = 1;
2612af69d88dSmrg	alu.dst.write = 1;
2613af69d88dSmrg	alu.last = 1;
2614af69d88dSmrg	r600_bytecode_add_alu(ctx.bc, &alu);
2615af69d88dSmrg
2616af69d88dSmrg	/* fetch vertex data from GSVS ring */
2617af69d88dSmrg	for (i = 0; i < ocnt; ++i) {
2618af69d88dSmrg		struct r600_shader_io *out = &ctx.shader->output[i];
261901e04c3fSmrg
2620af69d88dSmrg		out->gpr = i + 1;
2621af69d88dSmrg		out->ring_offset = i * 16;
2622af69d88dSmrg
2623af69d88dSmrg		memset(&vtx, 0, sizeof(vtx));
2624af69d88dSmrg		vtx.op = FETCH_OP_VFETCH;
2625af69d88dSmrg		vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
262601e04c3fSmrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
262701e04c3fSmrg		vtx.mega_fetch_count = 16;
2628af69d88dSmrg		vtx.offset = out->ring_offset;
2629af69d88dSmrg		vtx.dst_gpr = out->gpr;
263001e04c3fSmrg		vtx.src_gpr = 0;
2631af69d88dSmrg		vtx.dst_sel_x = 0;
2632af69d88dSmrg		vtx.dst_sel_y = 1;
2633af69d88dSmrg		vtx.dst_sel_z = 2;
2634af69d88dSmrg		vtx.dst_sel_w = 3;
2635af69d88dSmrg		if (rctx->b.chip_class >= EVERGREEN) {
2636af69d88dSmrg			vtx.use_const_fields = 1;
2637af69d88dSmrg		} else {
2638af69d88dSmrg			vtx.data_format = FMT_32_32_32_32_FLOAT;
2639af69d88dSmrg		}
2640af69d88dSmrg
2641af69d88dSmrg		r600_bytecode_add_vtx(ctx.bc, &vtx);
2642af69d88dSmrg	}
264301e04c3fSmrg	ctx.temp_reg = i + 1;
264401e04c3fSmrg	for (ring = 3; ring >= 0; --ring) {
264501e04c3fSmrg		bool enabled = false;
264601e04c3fSmrg		for (i = 0; i < so->num_outputs; i++) {
264701e04c3fSmrg			if (so->output[i].stream == ring) {
264801e04c3fSmrg				enabled = true;
264901e04c3fSmrg				if (ring > 0)
265001e04c3fSmrg					only_ring_0 = false;
265101e04c3fSmrg				break;
265201e04c3fSmrg			}
265301e04c3fSmrg		}
265401e04c3fSmrg		if (ring != 0 && !enabled) {
265501e04c3fSmrg			cshader->shader.ring_item_sizes[ring] = 0;
265601e04c3fSmrg			continue;
265701e04c3fSmrg		}
265801e04c3fSmrg
265901e04c3fSmrg		if (cf_jump) {
266001e04c3fSmrg			// Patch up jump label
266101e04c3fSmrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
266201e04c3fSmrg			cf_pop = ctx.bc->cf_last;
266301e04c3fSmrg
266401e04c3fSmrg			cf_jump->cf_addr = cf_pop->id + 2;
266501e04c3fSmrg			cf_jump->pop_count = 1;
266601e04c3fSmrg			cf_pop->cf_addr = cf_pop->id + 2;
266701e04c3fSmrg			cf_pop->pop_count = 1;
266801e04c3fSmrg		}
266901e04c3fSmrg
267001e04c3fSmrg		/* PRED_SETE_INT __, R0.y, ring */
267101e04c3fSmrg		memset(&alu, 0, sizeof(alu));
267201e04c3fSmrg		alu.op = ALU_OP2_PRED_SETE_INT;
267301e04c3fSmrg		alu.src[0].chan = 1;
267401e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
267501e04c3fSmrg		alu.src[1].value = ring;
267601e04c3fSmrg		alu.execute_mask = 1;
267701e04c3fSmrg		alu.update_pred = 1;
267801e04c3fSmrg		alu.last = 1;
267901e04c3fSmrg		r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
268001e04c3fSmrg
268101e04c3fSmrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
268201e04c3fSmrg		cf_jump = ctx.bc->cf_last;
268301e04c3fSmrg
268401e04c3fSmrg		if (enabled)
268501e04c3fSmrg			emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]);
268601e04c3fSmrg		cshader->shader.ring_item_sizes[ring] = ocnt * 16;
268701e04c3fSmrg	}
268801e04c3fSmrg
268901e04c3fSmrg	/* bc adds nops - copy it */
269001e04c3fSmrg	if (ctx.bc->chip_class == R600) {
269101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
269201e04c3fSmrg		alu.op = ALU_OP0_NOP;
269301e04c3fSmrg		alu.last = 1;
269401e04c3fSmrg		r600_bytecode_add_alu(ctx.bc, &alu);
2695af69d88dSmrg
269601e04c3fSmrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
269701e04c3fSmrg	}
2698af69d88dSmrg
2699af69d88dSmrg	/* export vertex data */
2700af69d88dSmrg	/* XXX factor out common code with r600_shader_from_tgsi ? */
2701af69d88dSmrg	for (i = 0; i < ocnt; ++i) {
2702af69d88dSmrg		struct r600_shader_io *out = &ctx.shader->output[i];
270301e04c3fSmrg		bool instream0 = true;
2704af69d88dSmrg		if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
2705af69d88dSmrg			continue;
2706af69d88dSmrg
270701e04c3fSmrg		for (j = 0; j < so->num_outputs; j++) {
270801e04c3fSmrg			if (so->output[j].register_index == i) {
270901e04c3fSmrg				if (so->output[j].stream == 0)
271001e04c3fSmrg					break;
271101e04c3fSmrg				if (so->output[j].stream > 0)
271201e04c3fSmrg					instream0 = false;
271301e04c3fSmrg			}
271401e04c3fSmrg		}
271501e04c3fSmrg		if (!instream0)
271601e04c3fSmrg			continue;
2717af69d88dSmrg		memset(&output, 0, sizeof(output));
2718af69d88dSmrg		output.gpr = out->gpr;
2719af69d88dSmrg		output.elem_size = 3;
2720af69d88dSmrg		output.swizzle_x = 0;
2721af69d88dSmrg		output.swizzle_y = 1;
2722af69d88dSmrg		output.swizzle_z = 2;
2723af69d88dSmrg		output.swizzle_w = 3;
2724af69d88dSmrg		output.burst_count = 1;
2725af69d88dSmrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2726af69d88dSmrg		output.op = CF_OP_EXPORT;
2727af69d88dSmrg		switch (out->name) {
2728af69d88dSmrg		case TGSI_SEMANTIC_POSITION:
2729af69d88dSmrg			output.array_base = 60;
2730af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2731af69d88dSmrg			break;
2732af69d88dSmrg
2733af69d88dSmrg		case TGSI_SEMANTIC_PSIZE:
2734af69d88dSmrg			output.array_base = 61;
2735af69d88dSmrg			if (next_clip_pos == 61)
2736af69d88dSmrg				next_clip_pos = 62;
2737af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2738af69d88dSmrg			output.swizzle_y = 7;
2739af69d88dSmrg			output.swizzle_z = 7;
2740af69d88dSmrg			output.swizzle_w = 7;
2741af69d88dSmrg			ctx.shader->vs_out_misc_write = 1;
2742af69d88dSmrg			ctx.shader->vs_out_point_size = 1;
2743af69d88dSmrg			break;
2744af69d88dSmrg		case TGSI_SEMANTIC_LAYER:
2745af69d88dSmrg			if (out->spi_sid) {
2746af69d88dSmrg				/* duplicate it as PARAM to pass to the pixel shader */
2747af69d88dSmrg				output.array_base = next_param++;
2748af69d88dSmrg				r600_bytecode_add_output(ctx.bc, &output);
2749af69d88dSmrg				last_exp_param = ctx.bc->cf_last;
2750af69d88dSmrg			}
2751af69d88dSmrg			output.array_base = 61;
2752af69d88dSmrg			if (next_clip_pos == 61)
2753af69d88dSmrg				next_clip_pos = 62;
2754af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2755af69d88dSmrg			output.swizzle_x = 7;
2756af69d88dSmrg			output.swizzle_y = 7;
2757af69d88dSmrg			output.swizzle_z = 0;
2758af69d88dSmrg			output.swizzle_w = 7;
2759af69d88dSmrg			ctx.shader->vs_out_misc_write = 1;
2760af69d88dSmrg			ctx.shader->vs_out_layer = 1;
2761af69d88dSmrg			break;
2762af69d88dSmrg		case TGSI_SEMANTIC_VIEWPORT_INDEX:
2763af69d88dSmrg			if (out->spi_sid) {
2764af69d88dSmrg				/* duplicate it as PARAM to pass to the pixel shader */
2765af69d88dSmrg				output.array_base = next_param++;
2766af69d88dSmrg				r600_bytecode_add_output(ctx.bc, &output);
2767af69d88dSmrg				last_exp_param = ctx.bc->cf_last;
2768af69d88dSmrg			}
2769af69d88dSmrg			output.array_base = 61;
2770af69d88dSmrg			if (next_clip_pos == 61)
2771af69d88dSmrg				next_clip_pos = 62;
2772af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2773af69d88dSmrg			ctx.shader->vs_out_misc_write = 1;
2774af69d88dSmrg			ctx.shader->vs_out_viewport = 1;
2775af69d88dSmrg			output.swizzle_x = 7;
2776af69d88dSmrg			output.swizzle_y = 7;
2777af69d88dSmrg			output.swizzle_z = 7;
2778af69d88dSmrg			output.swizzle_w = 0;
2779af69d88dSmrg			break;
2780af69d88dSmrg		case TGSI_SEMANTIC_CLIPDIST:
2781af69d88dSmrg			/* spi_sid is 0 for clipdistance outputs that were generated
2782af69d88dSmrg			 * for clipvertex - we don't need to pass them to PS */
2783af69d88dSmrg			ctx.shader->clip_dist_write = gs->shader.clip_dist_write;
278401e04c3fSmrg			ctx.shader->cull_dist_write = gs->shader.cull_dist_write;
278501e04c3fSmrg			ctx.shader->cc_dist_mask = gs->shader.cc_dist_mask;
2786af69d88dSmrg			if (out->spi_sid) {
2787af69d88dSmrg				/* duplicate it as PARAM to pass to the pixel shader */
2788af69d88dSmrg				output.array_base = next_param++;
2789af69d88dSmrg				r600_bytecode_add_output(ctx.bc, &output);
2790af69d88dSmrg				last_exp_param = ctx.bc->cf_last;
2791af69d88dSmrg			}
2792af69d88dSmrg			output.array_base = next_clip_pos++;
2793af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2794af69d88dSmrg			break;
2795af69d88dSmrg		case TGSI_SEMANTIC_FOG:
2796af69d88dSmrg			output.swizzle_y = 4; /* 0 */
2797af69d88dSmrg			output.swizzle_z = 4; /* 0 */
2798af69d88dSmrg			output.swizzle_w = 5; /* 1 */
2799af69d88dSmrg			break;
2800af69d88dSmrg		default:
2801af69d88dSmrg			output.array_base = next_param++;
2802af69d88dSmrg			break;
2803af69d88dSmrg		}
2804af69d88dSmrg		r600_bytecode_add_output(ctx.bc, &output);
2805af69d88dSmrg		if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
2806af69d88dSmrg			last_exp_param = ctx.bc->cf_last;
2807af69d88dSmrg		else
2808af69d88dSmrg			last_exp_pos = ctx.bc->cf_last;
2809af69d88dSmrg	}
2810af69d88dSmrg
2811af69d88dSmrg	if (!last_exp_pos) {
2812af69d88dSmrg		memset(&output, 0, sizeof(output));
2813af69d88dSmrg		output.gpr = 0;
2814af69d88dSmrg		output.elem_size = 3;
2815af69d88dSmrg		output.swizzle_x = 7;
2816af69d88dSmrg		output.swizzle_y = 7;
2817af69d88dSmrg		output.swizzle_z = 7;
2818af69d88dSmrg		output.swizzle_w = 7;
2819af69d88dSmrg		output.burst_count = 1;
2820af69d88dSmrg		output.type = 2;
2821af69d88dSmrg		output.op = CF_OP_EXPORT;
2822af69d88dSmrg		output.array_base = 60;
2823af69d88dSmrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
2824af69d88dSmrg		r600_bytecode_add_output(ctx.bc, &output);
2825af69d88dSmrg		last_exp_pos = ctx.bc->cf_last;
2826af69d88dSmrg	}
2827af69d88dSmrg
2828af69d88dSmrg	if (!last_exp_param) {
2829af69d88dSmrg		memset(&output, 0, sizeof(output));
2830af69d88dSmrg		output.gpr = 0;
2831af69d88dSmrg		output.elem_size = 3;
2832af69d88dSmrg		output.swizzle_x = 7;
2833af69d88dSmrg		output.swizzle_y = 7;
2834af69d88dSmrg		output.swizzle_z = 7;
2835af69d88dSmrg		output.swizzle_w = 7;
2836af69d88dSmrg		output.burst_count = 1;
2837af69d88dSmrg		output.type = 2;
2838af69d88dSmrg		output.op = CF_OP_EXPORT;
2839af69d88dSmrg		output.array_base = next_param++;
2840af69d88dSmrg		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
2841af69d88dSmrg		r600_bytecode_add_output(ctx.bc, &output);
2842af69d88dSmrg		last_exp_param = ctx.bc->cf_last;
2843af69d88dSmrg	}
2844af69d88dSmrg
2845af69d88dSmrg	last_exp_pos->op = CF_OP_EXPORT_DONE;
2846af69d88dSmrg	last_exp_param->op = CF_OP_EXPORT_DONE;
2847af69d88dSmrg
2848af69d88dSmrg	r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
2849af69d88dSmrg	cf_pop = ctx.bc->cf_last;
2850af69d88dSmrg
2851af69d88dSmrg	cf_jump->cf_addr = cf_pop->id + 2;
2852af69d88dSmrg	cf_jump->pop_count = 1;
2853af69d88dSmrg	cf_pop->cf_addr = cf_pop->id + 2;
2854af69d88dSmrg	cf_pop->pop_count = 1;
2855af69d88dSmrg
2856af69d88dSmrg	if (ctx.bc->chip_class == CAYMAN)
2857af69d88dSmrg		cm_bytecode_add_cf_end(ctx.bc);
2858af69d88dSmrg	else {
2859af69d88dSmrg		r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
2860af69d88dSmrg		ctx.bc->cf_last->end_of_program = 1;
2861af69d88dSmrg	}
2862af69d88dSmrg
2863af69d88dSmrg	gs->gs_copy_shader = cshader;
286401e04c3fSmrg	cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
2865af69d88dSmrg
2866af69d88dSmrg	ctx.bc->nstack = 1;
2867af69d88dSmrg
2868af69d88dSmrg	return r600_bytecode_build(ctx.bc);
2869af69d88dSmrg}
2870af69d88dSmrg
287101e04c3fSmrgstatic int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind)
287201e04c3fSmrg{
287301e04c3fSmrg	if (ind) {
287401e04c3fSmrg		struct r600_bytecode_alu alu;
287501e04c3fSmrg		int r;
287601e04c3fSmrg
287701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
287801e04c3fSmrg		alu.op = ALU_OP2_ADD_INT;
287901e04c3fSmrg		alu.src[0].sel = ctx->gs_export_gpr_tregs[idx];
288001e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
288101e04c3fSmrg		alu.src[1].value = ctx->gs_out_ring_offset >> 4;
288201e04c3fSmrg		alu.dst.sel = ctx->gs_export_gpr_tregs[idx];
288301e04c3fSmrg		alu.dst.write = 1;
288401e04c3fSmrg		alu.last = 1;
288501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
288601e04c3fSmrg		if (r)
288701e04c3fSmrg			return r;
288801e04c3fSmrg	}
288901e04c3fSmrg	return 0;
289001e04c3fSmrg}
289101e04c3fSmrg
289201e04c3fSmrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so UNUSED, int stream, bool ind)
2893af69d88dSmrg{
2894af69d88dSmrg	struct r600_bytecode_output output;
289501e04c3fSmrg	int ring_offset;
289601e04c3fSmrg	unsigned i, k;
289701e04c3fSmrg	int effective_stream = stream == -1 ? 0 : stream;
289801e04c3fSmrg	int idx = 0;
2899af69d88dSmrg
2900af69d88dSmrg	for (i = 0; i < ctx->shader->noutput; i++) {
2901af69d88dSmrg		if (ctx->gs_for_vs) {
2902af69d88dSmrg			/* for ES we need to lookup corresponding ring offset expected by GS
2903af69d88dSmrg			 * (map this output to GS input by name and sid) */
2904af69d88dSmrg			/* FIXME precompute offsets */
2905af69d88dSmrg			ring_offset = -1;
2906af69d88dSmrg			for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
2907af69d88dSmrg				struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
2908af69d88dSmrg				struct r600_shader_io *out = &ctx->shader->output[i];
2909af69d88dSmrg				if (in->name == out->name && in->sid == out->sid)
2910af69d88dSmrg					ring_offset = in->ring_offset;
2911af69d88dSmrg			}
2912af69d88dSmrg
2913af69d88dSmrg			if (ring_offset == -1)
2914af69d88dSmrg				continue;
291501e04c3fSmrg		} else {
291601e04c3fSmrg			ring_offset = idx * 16;
291701e04c3fSmrg			idx++;
291801e04c3fSmrg		}
2919af69d88dSmrg
292001e04c3fSmrg		if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
292101e04c3fSmrg			continue;
2922af69d88dSmrg		/* next_ring_offset after parsing input decls contains total size of
2923af69d88dSmrg		 * single vertex data, gs_next_vertex - current vertex index */
2924af69d88dSmrg		if (!ind)
2925af69d88dSmrg			ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
2926af69d88dSmrg
2927af69d88dSmrg		memset(&output, 0, sizeof(struct r600_bytecode_output));
2928af69d88dSmrg		output.gpr = ctx->shader->output[i].gpr;
2929af69d88dSmrg		output.elem_size = 3;
2930af69d88dSmrg		output.comp_mask = 0xF;
2931af69d88dSmrg		output.burst_count = 1;
2932af69d88dSmrg
2933af69d88dSmrg		if (ind)
2934af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
2935af69d88dSmrg		else
2936af69d88dSmrg			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
2937af69d88dSmrg
293801e04c3fSmrg		switch (stream) {
293901e04c3fSmrg		default:
294001e04c3fSmrg		case 0:
294101e04c3fSmrg			output.op = CF_OP_MEM_RING; break;
294201e04c3fSmrg		case 1:
294301e04c3fSmrg			output.op = CF_OP_MEM_RING1; break;
294401e04c3fSmrg		case 2:
294501e04c3fSmrg			output.op = CF_OP_MEM_RING2; break;
294601e04c3fSmrg		case 3:
294701e04c3fSmrg			output.op = CF_OP_MEM_RING3; break;
294801e04c3fSmrg		}
2949af69d88dSmrg
2950af69d88dSmrg		if (ind) {
2951af69d88dSmrg			output.array_base = ring_offset >> 2; /* in dwords */
2952af69d88dSmrg			output.array_size = 0xfff;
295301e04c3fSmrg			output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
2954af69d88dSmrg		} else
2955af69d88dSmrg			output.array_base = ring_offset >> 2; /* in dwords */
2956af69d88dSmrg		r600_bytecode_add_output(ctx->bc, &output);
2957af69d88dSmrg	}
2958af69d88dSmrg
2959af69d88dSmrg	++ctx->gs_next_vertex;
2960af69d88dSmrg	return 0;
2961af69d88dSmrg}
2962af69d88dSmrg
296301e04c3fSmrg
296401e04c3fSmrgstatic int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx)
2965af69d88dSmrg{
296601e04c3fSmrg	int r;
296701e04c3fSmrg	struct r600_bytecode_vtx vtx;
296801e04c3fSmrg	int temp_val = ctx->temp_reg;
296901e04c3fSmrg	/* need to store the TCS output somewhere */
297001e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
297101e04c3fSmrg			   temp_val, 0,
297201e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 0,
297301e04c3fSmrg			   0, 0);
297401e04c3fSmrg	if (r)
297501e04c3fSmrg		return r;
2976af69d88dSmrg
297701e04c3fSmrg	/* used by VS/TCS */
297801e04c3fSmrg	if (ctx->tess_input_info) {
297901e04c3fSmrg		/* fetch tcs input values into resv space */
298001e04c3fSmrg		memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
298101e04c3fSmrg		vtx.op = FETCH_OP_VFETCH;
298201e04c3fSmrg		vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
298301e04c3fSmrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
298401e04c3fSmrg		vtx.mega_fetch_count = 16;
298501e04c3fSmrg		vtx.data_format = FMT_32_32_32_32;
298601e04c3fSmrg		vtx.num_format_all = 2;
298701e04c3fSmrg		vtx.format_comp_all = 1;
298801e04c3fSmrg		vtx.use_const_fields = 0;
298901e04c3fSmrg		vtx.endian = r600_endian_swap(32);
299001e04c3fSmrg		vtx.srf_mode_all = 1;
299101e04c3fSmrg		vtx.offset = 0;
299201e04c3fSmrg		vtx.dst_gpr = ctx->tess_input_info;
299301e04c3fSmrg		vtx.dst_sel_x = 0;
299401e04c3fSmrg		vtx.dst_sel_y = 1;
299501e04c3fSmrg		vtx.dst_sel_z = 2;
299601e04c3fSmrg		vtx.dst_sel_w = 3;
299701e04c3fSmrg		vtx.src_gpr = temp_val;
299801e04c3fSmrg		vtx.src_sel_x = 0;
299901e04c3fSmrg
300001e04c3fSmrg		r = r600_bytecode_add_vtx(ctx->bc, &vtx);
300101e04c3fSmrg		if (r)
300201e04c3fSmrg			return r;
300301e04c3fSmrg	}
300401e04c3fSmrg
300501e04c3fSmrg	/* used by TCS/TES */
300601e04c3fSmrg	if (ctx->tess_output_info) {
300701e04c3fSmrg		/* fetch tcs output values into resv space */
300801e04c3fSmrg		memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
300901e04c3fSmrg		vtx.op = FETCH_OP_VFETCH;
301001e04c3fSmrg		vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
301101e04c3fSmrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
301201e04c3fSmrg		vtx.mega_fetch_count = 16;
301301e04c3fSmrg		vtx.data_format = FMT_32_32_32_32;
301401e04c3fSmrg		vtx.num_format_all = 2;
301501e04c3fSmrg		vtx.format_comp_all = 1;
301601e04c3fSmrg		vtx.use_const_fields = 0;
301701e04c3fSmrg		vtx.endian = r600_endian_swap(32);
301801e04c3fSmrg		vtx.srf_mode_all = 1;
301901e04c3fSmrg		vtx.offset = 16;
302001e04c3fSmrg		vtx.dst_gpr = ctx->tess_output_info;
302101e04c3fSmrg		vtx.dst_sel_x = 0;
302201e04c3fSmrg		vtx.dst_sel_y = 1;
302301e04c3fSmrg		vtx.dst_sel_z = 2;
302401e04c3fSmrg		vtx.dst_sel_w = 3;
302501e04c3fSmrg		vtx.src_gpr = temp_val;
302601e04c3fSmrg		vtx.src_sel_x = 0;
302701e04c3fSmrg
302801e04c3fSmrg		r = r600_bytecode_add_vtx(ctx->bc, &vtx);
302901e04c3fSmrg		if (r)
303001e04c3fSmrg			return r;
303101e04c3fSmrg	}
303201e04c3fSmrg	return 0;
303301e04c3fSmrg}
303401e04c3fSmrg
303501e04c3fSmrgstatic int emit_lds_vs_writes(struct r600_shader_ctx *ctx)
303601e04c3fSmrg{
303701e04c3fSmrg	int j, r;
303801e04c3fSmrg	int temp_reg;
303901e04c3fSmrg	unsigned i;
304001e04c3fSmrg
304101e04c3fSmrg	/* fetch tcs input values into input_vals */
304201e04c3fSmrg	ctx->tess_input_info = r600_get_temp(ctx);
304301e04c3fSmrg	ctx->tess_output_info = 0;
304401e04c3fSmrg	r = r600_fetch_tess_io_info(ctx);
304501e04c3fSmrg	if (r)
304601e04c3fSmrg		return r;
304701e04c3fSmrg
304801e04c3fSmrg	temp_reg = r600_get_temp(ctx);
304901e04c3fSmrg	/* dst reg contains LDS address stride * idx */
305001e04c3fSmrg	/* MUL vertexID, vertex_dw_stride */
305101e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24,
305201e04c3fSmrg			   temp_reg, 0,
305301e04c3fSmrg			   ctx->tess_input_info, 1,
305401e04c3fSmrg			   0, 1); /* rel id in r0.y? */
305501e04c3fSmrg	if (r)
305601e04c3fSmrg		return r;
305701e04c3fSmrg
305801e04c3fSmrg	for (i = 0; i < ctx->shader->noutput; i++) {
305901e04c3fSmrg		struct r600_bytecode_alu alu;
30607ec681f3Smrg		int param = r600_get_lds_unique_index(ctx->shader->output[i].name,
30617ec681f3Smrg						      ctx->shader->output[i].sid);
306201e04c3fSmrg
306301e04c3fSmrg		if (param) {
306401e04c3fSmrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
306501e04c3fSmrg					   temp_reg, 1,
306601e04c3fSmrg					   temp_reg, 0,
306701e04c3fSmrg					   V_SQ_ALU_SRC_LITERAL, param * 16);
306801e04c3fSmrg			if (r)
306901e04c3fSmrg				return r;
307001e04c3fSmrg		}
307101e04c3fSmrg
307201e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
307301e04c3fSmrg				   temp_reg, 2,
307401e04c3fSmrg				   temp_reg, param ? 1 : 0,
307501e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 8);
307601e04c3fSmrg		if (r)
307701e04c3fSmrg			return r;
307801e04c3fSmrg
307901e04c3fSmrg
308001e04c3fSmrg		for (j = 0; j < 2; j++) {
308101e04c3fSmrg			int chan = (j == 1) ? 2 : (param ? 1 : 0);
308201e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
308301e04c3fSmrg			alu.op = LDS_OP3_LDS_WRITE_REL;
308401e04c3fSmrg			alu.src[0].sel = temp_reg;
308501e04c3fSmrg			alu.src[0].chan = chan;
308601e04c3fSmrg			alu.src[1].sel = ctx->shader->output[i].gpr;
308701e04c3fSmrg			alu.src[1].chan = j * 2;
308801e04c3fSmrg			alu.src[2].sel = ctx->shader->output[i].gpr;
308901e04c3fSmrg			alu.src[2].chan = (j * 2) + 1;
309001e04c3fSmrg			alu.last = 1;
309101e04c3fSmrg			alu.dst.chan = 0;
309201e04c3fSmrg			alu.lds_idx = 1;
309301e04c3fSmrg			alu.is_lds_idx_op = true;
309401e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
309501e04c3fSmrg			if (r)
309601e04c3fSmrg				return r;
309701e04c3fSmrg		}
309801e04c3fSmrg	}
309901e04c3fSmrg	return 0;
310001e04c3fSmrg}
310101e04c3fSmrg
310201e04c3fSmrgstatic int r600_store_tcs_output(struct r600_shader_ctx *ctx)
310301e04c3fSmrg{
310401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
310501e04c3fSmrg	const struct tgsi_full_dst_register *dst = &inst->Dst[0];
310601e04c3fSmrg	int i, r, lasti;
310701e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
310801e04c3fSmrg	struct r600_bytecode_alu alu;
310901e04c3fSmrg	unsigned write_mask = dst->Register.WriteMask;
311001e04c3fSmrg
311101e04c3fSmrg	if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
311201e04c3fSmrg		return 0;
311301e04c3fSmrg
311401e04c3fSmrg	r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true);
311501e04c3fSmrg	if (r)
311601e04c3fSmrg		return r;
311701e04c3fSmrg
311801e04c3fSmrg	/* the base address is now in temp.x */
311901e04c3fSmrg	r = r600_get_byte_address(ctx, temp_reg,
312001e04c3fSmrg				  &inst->Dst[0], NULL, ctx->tess_output_info, 1);
312101e04c3fSmrg	if (r)
312201e04c3fSmrg		return r;
312301e04c3fSmrg
312401e04c3fSmrg	/* LDS write */
312501e04c3fSmrg	lasti = tgsi_last_instruction(write_mask);
312601e04c3fSmrg	for (i = 1; i <= lasti; i++) {
312701e04c3fSmrg
312801e04c3fSmrg		if (!(write_mask & (1 << i)))
312901e04c3fSmrg			continue;
313001e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
313101e04c3fSmrg				   temp_reg, i,
313201e04c3fSmrg				   temp_reg, 0,
313301e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
313401e04c3fSmrg		if (r)
313501e04c3fSmrg			return r;
313601e04c3fSmrg	}
313701e04c3fSmrg
313801e04c3fSmrg	for (i = 0; i <= lasti; i++) {
313901e04c3fSmrg		if (!(write_mask & (1 << i)))
314001e04c3fSmrg			continue;
314101e04c3fSmrg
314201e04c3fSmrg		if ((i == 0 && ((write_mask & 3) == 3)) ||
314301e04c3fSmrg		    (i == 2 && ((write_mask & 0xc) == 0xc))) {
314401e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
314501e04c3fSmrg			alu.op = LDS_OP3_LDS_WRITE_REL;
314601e04c3fSmrg			alu.src[0].sel = temp_reg;
314701e04c3fSmrg			alu.src[0].chan = i;
314801e04c3fSmrg
314901e04c3fSmrg			alu.src[1].sel = dst->Register.Index;
315001e04c3fSmrg			alu.src[1].sel += ctx->file_offset[dst->Register.File];
315101e04c3fSmrg			alu.src[1].chan = i;
315201e04c3fSmrg
315301e04c3fSmrg			alu.src[2].sel = dst->Register.Index;
315401e04c3fSmrg			alu.src[2].sel += ctx->file_offset[dst->Register.File];
315501e04c3fSmrg			alu.src[2].chan = i + 1;
315601e04c3fSmrg			alu.lds_idx = 1;
315701e04c3fSmrg			alu.dst.chan = 0;
315801e04c3fSmrg			alu.last = 1;
315901e04c3fSmrg			alu.is_lds_idx_op = true;
316001e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
316101e04c3fSmrg			if (r)
316201e04c3fSmrg				return r;
316301e04c3fSmrg			i += 1;
316401e04c3fSmrg			continue;
316501e04c3fSmrg		}
316601e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
316701e04c3fSmrg		alu.op = LDS_OP2_LDS_WRITE;
316801e04c3fSmrg		alu.src[0].sel = temp_reg;
316901e04c3fSmrg		alu.src[0].chan = i;
317001e04c3fSmrg
317101e04c3fSmrg		alu.src[1].sel = dst->Register.Index;
317201e04c3fSmrg		alu.src[1].sel += ctx->file_offset[dst->Register.File];
317301e04c3fSmrg		alu.src[1].chan = i;
317401e04c3fSmrg
317501e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
317601e04c3fSmrg		alu.dst.chan = 0;
317701e04c3fSmrg		alu.last = 1;
317801e04c3fSmrg		alu.is_lds_idx_op = true;
317901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
318001e04c3fSmrg		if (r)
318101e04c3fSmrg			return r;
318201e04c3fSmrg	}
318301e04c3fSmrg	return 0;
318401e04c3fSmrg}
318501e04c3fSmrg
318601e04c3fSmrgstatic int r600_tess_factor_read(struct r600_shader_ctx *ctx,
318701e04c3fSmrg				 int output_idx, int nc)
318801e04c3fSmrg{
318901e04c3fSmrg	int param;
319001e04c3fSmrg	unsigned temp_reg = r600_get_temp(ctx);
319101e04c3fSmrg	unsigned name = ctx->shader->output[output_idx].name;
319201e04c3fSmrg	int dreg = ctx->shader->output[output_idx].gpr;
319301e04c3fSmrg	int r;
319401e04c3fSmrg
319501e04c3fSmrg	param = r600_get_lds_unique_index(name, 0);
319601e04c3fSmrg	r = get_lds_offset0(ctx, 1, temp_reg, true);
319701e04c3fSmrg	if (r)
319801e04c3fSmrg		return r;
319901e04c3fSmrg
320001e04c3fSmrg	if (param) {
320101e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
320201e04c3fSmrg				   temp_reg, 0,
320301e04c3fSmrg				   temp_reg, 0,
320401e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, param * 16);
320501e04c3fSmrg		if (r)
320601e04c3fSmrg			return r;
320701e04c3fSmrg	}
320801e04c3fSmrg
320901e04c3fSmrg	do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1));
321001e04c3fSmrg	return 0;
321101e04c3fSmrg}
321201e04c3fSmrg
321301e04c3fSmrgstatic int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
321401e04c3fSmrg{
321501e04c3fSmrg	int stride, outer_comps, inner_comps;
321601e04c3fSmrg	int tessinner_idx = -1, tessouter_idx = -1;
321701e04c3fSmrg	int i, r;
321801e04c3fSmrg	unsigned j;
321901e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
322001e04c3fSmrg	int treg[3] = {-1, -1, -1};
322101e04c3fSmrg	struct r600_bytecode_alu alu;
322201e04c3fSmrg	struct r600_bytecode_cf *cf_jump, *cf_pop;
322301e04c3fSmrg
322401e04c3fSmrg	/* only execute factor emission for invocation 0 */
322501e04c3fSmrg	/* PRED_SETE_INT __, R0.x, 0 */
322601e04c3fSmrg	memset(&alu, 0, sizeof(alu));
322701e04c3fSmrg	alu.op = ALU_OP2_PRED_SETE_INT;
322801e04c3fSmrg	alu.src[0].chan = 2;
322901e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
323001e04c3fSmrg	alu.execute_mask = 1;
323101e04c3fSmrg	alu.update_pred = 1;
323201e04c3fSmrg	alu.last = 1;
323301e04c3fSmrg	r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
323401e04c3fSmrg
323501e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
323601e04c3fSmrg	cf_jump = ctx->bc->cf_last;
323701e04c3fSmrg
323801e04c3fSmrg	treg[0] = r600_get_temp(ctx);
323901e04c3fSmrg	switch (ctx->shader->tcs_prim_mode) {
324001e04c3fSmrg	case PIPE_PRIM_LINES:
324101e04c3fSmrg		stride = 8; /* 2 dwords, 1 vec2 store */
324201e04c3fSmrg		outer_comps = 2;
324301e04c3fSmrg		inner_comps = 0;
324401e04c3fSmrg		break;
324501e04c3fSmrg	case PIPE_PRIM_TRIANGLES:
324601e04c3fSmrg		stride = 16; /* 4 dwords, 1 vec4 store */
324701e04c3fSmrg		outer_comps = 3;
324801e04c3fSmrg		inner_comps = 1;
324901e04c3fSmrg		treg[1] = r600_get_temp(ctx);
325001e04c3fSmrg		break;
325101e04c3fSmrg	case PIPE_PRIM_QUADS:
325201e04c3fSmrg		stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */
325301e04c3fSmrg		outer_comps = 4;
325401e04c3fSmrg		inner_comps = 2;
325501e04c3fSmrg		treg[1] = r600_get_temp(ctx);
325601e04c3fSmrg		treg[2] = r600_get_temp(ctx);
325701e04c3fSmrg		break;
325801e04c3fSmrg	default:
325901e04c3fSmrg		assert(0);
326001e04c3fSmrg		return -1;
326101e04c3fSmrg	}
326201e04c3fSmrg
326301e04c3fSmrg	/* R0 is InvocationID, RelPatchID, PatchID, tf_base */
326401e04c3fSmrg	/* TF_WRITE takes index in R.x, value in R.y */
326501e04c3fSmrg	for (j = 0; j < ctx->shader->noutput; j++) {
326601e04c3fSmrg		if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSINNER)
326701e04c3fSmrg			tessinner_idx = j;
326801e04c3fSmrg		if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSOUTER)
326901e04c3fSmrg			tessouter_idx = j;
327001e04c3fSmrg	}
327101e04c3fSmrg
327201e04c3fSmrg	if (tessouter_idx == -1)
327301e04c3fSmrg		return -1;
327401e04c3fSmrg
327501e04c3fSmrg	if (tessinner_idx == -1 && inner_comps)
327601e04c3fSmrg		return -1;
327701e04c3fSmrg
327801e04c3fSmrg	if (tessouter_idx != -1) {
327901e04c3fSmrg		r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps);
328001e04c3fSmrg		if (r)
328101e04c3fSmrg			return r;
328201e04c3fSmrg	}
328301e04c3fSmrg
328401e04c3fSmrg	if (tessinner_idx != -1) {
328501e04c3fSmrg		r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps);
328601e04c3fSmrg		if (r)
328701e04c3fSmrg			return r;
328801e04c3fSmrg	}
328901e04c3fSmrg
329001e04c3fSmrg	/* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */
329101e04c3fSmrg	/* r.x = relpatchid(r0.y) * tf_stride */
329201e04c3fSmrg
329301e04c3fSmrg	/* multiply incoming r0.y * stride - t.x = r0.y * stride */
329401e04c3fSmrg	/* add incoming r0.w to it: t.x = t.x + r0.w */
329501e04c3fSmrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
329601e04c3fSmrg			   temp_reg, 0,
329701e04c3fSmrg			   0, 1,
329801e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, stride,
329901e04c3fSmrg			   0, 3);
330001e04c3fSmrg	if (r)
330101e04c3fSmrg		return r;
330201e04c3fSmrg
330301e04c3fSmrg	for (i = 0; i < outer_comps + inner_comps; i++) {
330401e04c3fSmrg		int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
330501e04c3fSmrg		int out_comp = i >= outer_comps ? i - outer_comps : i;
330601e04c3fSmrg
330701e04c3fSmrg		if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) {
330801e04c3fSmrg			if (out_comp == 1)
330901e04c3fSmrg				out_comp = 0;
331001e04c3fSmrg			else if (out_comp == 0)
331101e04c3fSmrg				out_comp = 1;
331201e04c3fSmrg		}
331301e04c3fSmrg
331401e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
331501e04c3fSmrg				   treg[i / 2], (2 * (i % 2)),
331601e04c3fSmrg				   temp_reg, 0,
331701e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
331801e04c3fSmrg		if (r)
331901e04c3fSmrg			return r;
332001e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
332101e04c3fSmrg				   treg[i / 2], 1 + (2 * (i%2)),
332201e04c3fSmrg				   ctx->shader->output[out_idx].gpr, out_comp,
332301e04c3fSmrg				   0, 0);
332401e04c3fSmrg		if (r)
332501e04c3fSmrg			return r;
332601e04c3fSmrg	}
332701e04c3fSmrg	for (i = 0; i < outer_comps + inner_comps; i++) {
332801e04c3fSmrg		struct r600_bytecode_gds gds;
332901e04c3fSmrg
333001e04c3fSmrg		memset(&gds, 0, sizeof(struct r600_bytecode_gds));
333101e04c3fSmrg		gds.src_gpr = treg[i / 2];
333201e04c3fSmrg		gds.src_sel_x = 2 * (i % 2);
333301e04c3fSmrg		gds.src_sel_y = 1 + (2 * (i % 2));
333401e04c3fSmrg		gds.src_sel_z = 4;
333501e04c3fSmrg		gds.dst_sel_x = 7;
333601e04c3fSmrg		gds.dst_sel_y = 7;
333701e04c3fSmrg		gds.dst_sel_z = 7;
333801e04c3fSmrg		gds.dst_sel_w = 7;
333901e04c3fSmrg		gds.op = FETCH_OP_TF_WRITE;
334001e04c3fSmrg		r = r600_bytecode_add_gds(ctx->bc, &gds);
334101e04c3fSmrg		if (r)
334201e04c3fSmrg			return r;
334301e04c3fSmrg	}
334401e04c3fSmrg
334501e04c3fSmrg	// Patch up jump label
334601e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
334701e04c3fSmrg	cf_pop = ctx->bc->cf_last;
334801e04c3fSmrg
334901e04c3fSmrg	cf_jump->cf_addr = cf_pop->id + 2;
335001e04c3fSmrg	cf_jump->pop_count = 1;
335101e04c3fSmrg	cf_pop->cf_addr = cf_pop->id + 2;
335201e04c3fSmrg	cf_pop->pop_count = 1;
335301e04c3fSmrg
335401e04c3fSmrg	return 0;
335501e04c3fSmrg}
335601e04c3fSmrg
335701e04c3fSmrg/*
335801e04c3fSmrg * We have to work out the thread ID for load and atomic
335901e04c3fSmrg * operations, which store the returned value to an index
336001e04c3fSmrg * in an intermediate buffer.
336101e04c3fSmrg * The index is calculated by taking the thread id,
336201e04c3fSmrg * calculated from the MBCNT instructions.
336301e04c3fSmrg * Then the shader engine ID is multiplied by 256,
336401e04c3fSmrg * and the wave id is added.
336501e04c3fSmrg * Then the result is multipled by 64 and thread id is
336601e04c3fSmrg * added.
336701e04c3fSmrg */
336801e04c3fSmrgstatic int load_thread_id_gpr(struct r600_shader_ctx *ctx)
336901e04c3fSmrg{
337001e04c3fSmrg	struct r600_bytecode_alu alu;
337101e04c3fSmrg	int r;
337201e04c3fSmrg
337301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
337401e04c3fSmrg	alu.op = ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT;
337501e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
337601e04c3fSmrg	alu.dst.chan = 0;
337701e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
337801e04c3fSmrg	alu.src[0].value = 0xffffffff;
337901e04c3fSmrg	alu.dst.write = 1;
338001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
338101e04c3fSmrg	if (r)
338201e04c3fSmrg		return r;
338301e04c3fSmrg
338401e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
338501e04c3fSmrg	alu.op = ALU_OP1_MBCNT_32HI_INT;
338601e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
338701e04c3fSmrg	alu.dst.chan = 1;
338801e04c3fSmrg	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
338901e04c3fSmrg	alu.src[0].value = 0xffffffff;
339001e04c3fSmrg	alu.dst.write = 1;
339101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
339201e04c3fSmrg	if (r)
339301e04c3fSmrg		return r;
339401e04c3fSmrg
339501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
339601e04c3fSmrg	alu.op = ALU_OP3_MULADD_UINT24;
339701e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
339801e04c3fSmrg	alu.dst.chan = 2;
339901e04c3fSmrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_SE_ID;
340001e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
340101e04c3fSmrg	alu.src[1].value = 256;
340201e04c3fSmrg	alu.src[2].sel = EG_V_SQ_ALU_SRC_HW_WAVE_ID;
340301e04c3fSmrg	alu.dst.write = 1;
340401e04c3fSmrg	alu.is_op3 = 1;
340501e04c3fSmrg	alu.last = 1;
340601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
340701e04c3fSmrg	if (r)
340801e04c3fSmrg		return r;
340901e04c3fSmrg
341001e04c3fSmrg	r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
341101e04c3fSmrg			   ctx->thread_id_gpr, 1,
341201e04c3fSmrg			   ctx->temp_reg, 2,
341301e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 0x40,
341401e04c3fSmrg			   ctx->temp_reg, 0);
341501e04c3fSmrg	if (r)
341601e04c3fSmrg		return r;
341701e04c3fSmrg	return 0;
341801e04c3fSmrg}
341901e04c3fSmrg
342001e04c3fSmrgstatic int r600_shader_from_tgsi(struct r600_context *rctx,
342101e04c3fSmrg				 struct r600_pipe_shader *pipeshader,
342201e04c3fSmrg				 union r600_shader_key key)
342301e04c3fSmrg{
342401e04c3fSmrg	struct r600_screen *rscreen = rctx->screen;
342501e04c3fSmrg	struct r600_shader *shader = &pipeshader->shader;
342601e04c3fSmrg	struct tgsi_token *tokens = pipeshader->selector->tokens;
342701e04c3fSmrg	struct pipe_stream_output_info so = pipeshader->selector->so;
342801e04c3fSmrg	struct tgsi_full_immediate *immediate;
342901e04c3fSmrg	struct r600_shader_ctx ctx;
343001e04c3fSmrg	struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
343101e04c3fSmrg	unsigned output_done, noutput;
343201e04c3fSmrg	unsigned opcode;
343301e04c3fSmrg	int j, k, r = 0;
343401e04c3fSmrg	unsigned i;
343501e04c3fSmrg	int next_param_base = 0, next_clip_base;
343601e04c3fSmrg	int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
343701e04c3fSmrg	bool indirect_gprs;
343801e04c3fSmrg	bool ring_outputs = false;
343901e04c3fSmrg	bool lds_outputs = false;
344001e04c3fSmrg	bool lds_inputs = false;
344101e04c3fSmrg	bool pos_emitted = false;
3442af69d88dSmrg
344301e04c3fSmrg	ctx.bc = &shader->bc;
344401e04c3fSmrg	ctx.shader = shader;
3445af69d88dSmrg
3446af69d88dSmrg	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
3447af69d88dSmrg			   rscreen->has_compressed_msaa_texturing);
3448af69d88dSmrg	ctx.tokens = tokens;
3449af69d88dSmrg	tgsi_scan_shader(tokens, &ctx.info);
3450af69d88dSmrg	shader->indirect_files = ctx.info.indirect_files;
345101e04c3fSmrg
345201e04c3fSmrg	int narrays = ctx.info.array_max[TGSI_FILE_TEMPORARY];
345301e04c3fSmrg	ctx.array_infos = calloc(narrays, sizeof(*ctx.array_infos));
345401e04c3fSmrg	ctx.spilled_arrays = calloc(narrays, sizeof(bool));
345501e04c3fSmrg	tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, narrays, ctx.array_infos);
345601e04c3fSmrg
345701e04c3fSmrg	shader->uses_helper_invocation = false;
345801e04c3fSmrg	shader->uses_doubles = ctx.info.uses_doubles;
345901e04c3fSmrg	shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
346001e04c3fSmrg	shader->nsys_inputs = 0;
346101e04c3fSmrg
346201e04c3fSmrg	shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 ||
346301e04c3fSmrg		ctx.info.file_count[TGSI_FILE_BUFFER] > 0;
346401e04c3fSmrg	indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
3465af69d88dSmrg	tgsi_parse_init(&ctx.parse, tokens);
346601e04c3fSmrg	ctx.type = ctx.info.processor;
34673464ebd5Sriastradh	shader->processor_type = ctx.type;
34683464ebd5Sriastradh	ctx.bc->type = shader->processor_type;
34693464ebd5Sriastradh
347001e04c3fSmrg	switch (ctx.type) {
347101e04c3fSmrg	case PIPE_SHADER_VERTEX:
347201e04c3fSmrg		shader->vs_as_gs_a = key.vs.as_gs_a;
347301e04c3fSmrg		shader->vs_as_es = key.vs.as_es;
347401e04c3fSmrg		shader->vs_as_ls = key.vs.as_ls;
347501e04c3fSmrg		shader->atomic_base = key.vs.first_atomic_counter;
347601e04c3fSmrg		if (shader->vs_as_es)
347701e04c3fSmrg			ring_outputs = true;
347801e04c3fSmrg		if (shader->vs_as_ls)
347901e04c3fSmrg			lds_outputs = true;
348001e04c3fSmrg		break;
348101e04c3fSmrg	case PIPE_SHADER_GEOMETRY:
348201e04c3fSmrg		ring_outputs = true;
348301e04c3fSmrg		shader->atomic_base = key.gs.first_atomic_counter;
348401e04c3fSmrg		shader->gs_tri_strip_adj_fix = key.gs.tri_strip_adj_fix;
348501e04c3fSmrg		break;
348601e04c3fSmrg	case PIPE_SHADER_TESS_CTRL:
348701e04c3fSmrg		shader->tcs_prim_mode = key.tcs.prim_mode;
348801e04c3fSmrg		shader->atomic_base = key.tcs.first_atomic_counter;
348901e04c3fSmrg		lds_outputs = true;
349001e04c3fSmrg		lds_inputs = true;
349101e04c3fSmrg		break;
349201e04c3fSmrg	case PIPE_SHADER_TESS_EVAL:
349301e04c3fSmrg		shader->tes_as_es = key.tes.as_es;
349401e04c3fSmrg		shader->atomic_base = key.tes.first_atomic_counter;
349501e04c3fSmrg		lds_inputs = true;
349601e04c3fSmrg		if (shader->tes_as_es)
349701e04c3fSmrg			ring_outputs = true;
349801e04c3fSmrg		break;
349901e04c3fSmrg	case PIPE_SHADER_FRAGMENT:
350001e04c3fSmrg		shader->two_side = key.ps.color_two_side;
350101e04c3fSmrg		shader->atomic_base = key.ps.first_atomic_counter;
350201e04c3fSmrg		shader->rat_base = key.ps.nr_cbufs;
350301e04c3fSmrg		shader->image_size_const_offset = key.ps.image_size_const_offset;
350401e04c3fSmrg		break;
350501e04c3fSmrg	case PIPE_SHADER_COMPUTE:
350601e04c3fSmrg		shader->rat_base = 0;
350701e04c3fSmrg		shader->image_size_const_offset = ctx.info.file_count[TGSI_FILE_SAMPLER];
350801e04c3fSmrg		break;
350901e04c3fSmrg	default:
351001e04c3fSmrg		break;
351101e04c3fSmrg	}
3512af69d88dSmrg
351301e04c3fSmrg	if (shader->vs_as_es || shader->tes_as_es) {
3514af69d88dSmrg		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
3515af69d88dSmrg	} else {
3516af69d88dSmrg		ctx.gs_for_vs = NULL;
3517af69d88dSmrg	}
3518af69d88dSmrg
3519af69d88dSmrg	ctx.next_ring_offset = 0;
3520af69d88dSmrg	ctx.gs_out_ring_offset = 0;
3521af69d88dSmrg	ctx.gs_next_vertex = 0;
352201e04c3fSmrg	ctx.gs_stream_output_info = &so;
3523af69d88dSmrg
352401e04c3fSmrg	ctx.thread_id_gpr = -1;
3525af69d88dSmrg	ctx.face_gpr = -1;
352601e04c3fSmrg	ctx.fixed_pt_position_gpr = -1;
3527af69d88dSmrg	ctx.fragcoord_input = -1;
3528af69d88dSmrg	ctx.colors_used = 0;
3529af69d88dSmrg	ctx.clip_vertex_write = 0;
3530af69d88dSmrg
353101e04c3fSmrg	ctx.helper_invoc_reg = -1;
353201e04c3fSmrg	ctx.cs_block_size_reg = -1;
353301e04c3fSmrg	ctx.cs_grid_size_reg = -1;
353401e04c3fSmrg	ctx.cs_block_size_loaded = false;
353501e04c3fSmrg	ctx.cs_grid_size_loaded = false;
353601e04c3fSmrg
3537af69d88dSmrg	shader->nr_ps_color_exports = 0;
3538af69d88dSmrg	shader->nr_ps_max_color_exports = 0;
35393464ebd5Sriastradh
35403464ebd5Sriastradh
35413464ebd5Sriastradh	/* register allocations */
35423464ebd5Sriastradh	/* Values [0,127] correspond to GPR[0..127].
35433464ebd5Sriastradh	 * Values [128,159] correspond to constant buffer bank 0
35443464ebd5Sriastradh	 * Values [160,191] correspond to constant buffer bank 1
35453464ebd5Sriastradh	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
35463464ebd5Sriastradh	 * Values [256,287] correspond to constant buffer bank 2 (EG)
35473464ebd5Sriastradh	 * Values [288,319] correspond to constant buffer bank 3 (EG)
35483464ebd5Sriastradh	 * Other special values are shown in the list below.
35493464ebd5Sriastradh	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
35503464ebd5Sriastradh	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
35513464ebd5Sriastradh	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
35523464ebd5Sriastradh	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
35533464ebd5Sriastradh	 * 248	SQ_ALU_SRC_0: special constant 0.0.
35543464ebd5Sriastradh	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
35553464ebd5Sriastradh	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
35563464ebd5Sriastradh	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
35573464ebd5Sriastradh	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
35583464ebd5Sriastradh	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
35593464ebd5Sriastradh	 * 254	SQ_ALU_SRC_PV: previous vector result.
35603464ebd5Sriastradh	 * 255	SQ_ALU_SRC_PS: previous scalar result.
35613464ebd5Sriastradh	 */
35623464ebd5Sriastradh	for (i = 0; i < TGSI_FILE_COUNT; i++) {
35633464ebd5Sriastradh		ctx.file_offset[i] = 0;
35643464ebd5Sriastradh	}
3565af69d88dSmrg
356601e04c3fSmrg	if (ctx.type == PIPE_SHADER_VERTEX)  {
356701e04c3fSmrg
35683464ebd5Sriastradh		ctx.file_offset[TGSI_FILE_INPUT] = 1;
356901e04c3fSmrg		if (ctx.info.num_inputs)
3570af69d88dSmrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
35713464ebd5Sriastradh	}
357201e04c3fSmrg	if (ctx.type == PIPE_SHADER_FRAGMENT) {
357301e04c3fSmrg		if (ctx.bc->chip_class >= EVERGREEN)
357401e04c3fSmrg			ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
357501e04c3fSmrg		else
357601e04c3fSmrg			ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
357701e04c3fSmrg
357801e04c3fSmrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
357901e04c3fSmrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
358001e04c3fSmrg				ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
358101e04c3fSmrg				shader->uses_helper_invocation = true;
358201e04c3fSmrg			}
358301e04c3fSmrg		}
35843464ebd5Sriastradh	}
358501e04c3fSmrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
3586af69d88dSmrg		/* FIXME 1 would be enough in some cases (3 or less input vertices) */
3587af69d88dSmrg		ctx.file_offset[TGSI_FILE_INPUT] = 2;
3588af69d88dSmrg	}
358901e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
359001e04c3fSmrg		ctx.file_offset[TGSI_FILE_INPUT] = 1;
359101e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_EVAL) {
359201e04c3fSmrg		bool add_tesscoord = false, add_tess_inout = false;
359301e04c3fSmrg		ctx.file_offset[TGSI_FILE_INPUT] = 1;
359401e04c3fSmrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
359501e04c3fSmrg			/* if we have tesscoord save one reg */
359601e04c3fSmrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD)
359701e04c3fSmrg				add_tesscoord = true;
359801e04c3fSmrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER ||
359901e04c3fSmrg			    ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER)
360001e04c3fSmrg				add_tess_inout = true;
360101e04c3fSmrg		}
360201e04c3fSmrg		if (add_tesscoord || add_tess_inout)
360301e04c3fSmrg			ctx.file_offset[TGSI_FILE_INPUT]++;
360401e04c3fSmrg		if (add_tess_inout)
360501e04c3fSmrg			ctx.file_offset[TGSI_FILE_INPUT]+=2;
360601e04c3fSmrg	}
360701e04c3fSmrg	if (ctx.type == PIPE_SHADER_COMPUTE) {
360801e04c3fSmrg		ctx.file_offset[TGSI_FILE_INPUT] = 2;
360901e04c3fSmrg		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
361001e04c3fSmrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE)
361101e04c3fSmrg				ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
361201e04c3fSmrg			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE)
361301e04c3fSmrg				ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
361401e04c3fSmrg		}
361501e04c3fSmrg	}
3616af69d88dSmrg
361701e04c3fSmrg	ctx.file_offset[TGSI_FILE_OUTPUT] =
3618af69d88dSmrg			ctx.file_offset[TGSI_FILE_INPUT] +
3619af69d88dSmrg			ctx.info.file_max[TGSI_FILE_INPUT] + 1;
36203464ebd5Sriastradh	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
3621af69d88dSmrg						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
36223464ebd5Sriastradh
36233464ebd5Sriastradh	/* Outside the GPR range. This will be translated to one of the
36243464ebd5Sriastradh	 * kcache banks later. */
36253464ebd5Sriastradh	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
36263464ebd5Sriastradh	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
3627af69d88dSmrg
362801e04c3fSmrg	pipeshader->scratch_space_needed = 0;
362901e04c3fSmrg	int regno = ctx.file_offset[TGSI_FILE_TEMPORARY] +
363001e04c3fSmrg			ctx.info.file_max[TGSI_FILE_TEMPORARY];
363101e04c3fSmrg	if (regno > 124) {
363201e04c3fSmrg		choose_spill_arrays(&ctx, &regno, &pipeshader->scratch_space_needed);
363301e04c3fSmrg		shader->indirect_files = ctx.info.indirect_files;
363401e04c3fSmrg	}
363501e04c3fSmrg	shader->needs_scratch_space = pipeshader->scratch_space_needed != 0;
363601e04c3fSmrg
363701e04c3fSmrg	ctx.bc->ar_reg = ++regno;
363801e04c3fSmrg	ctx.bc->index_reg[0] = ++regno;
363901e04c3fSmrg	ctx.bc->index_reg[1] = ++regno;
364001e04c3fSmrg
364101e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_CTRL) {
364201e04c3fSmrg		ctx.tess_input_info = ++regno;
364301e04c3fSmrg		ctx.tess_output_info = ++regno;
364401e04c3fSmrg	} else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
364501e04c3fSmrg		ctx.tess_input_info = ++regno;
364601e04c3fSmrg		ctx.tess_output_info = ++regno;
364701e04c3fSmrg	} else if (ctx.type == PIPE_SHADER_GEOMETRY) {
364801e04c3fSmrg		ctx.gs_export_gpr_tregs[0] = ++regno;
364901e04c3fSmrg		ctx.gs_export_gpr_tregs[1] = ++regno;
365001e04c3fSmrg		ctx.gs_export_gpr_tregs[2] = ++regno;
365101e04c3fSmrg		ctx.gs_export_gpr_tregs[3] = ++regno;
365201e04c3fSmrg		if (ctx.shader->gs_tri_strip_adj_fix) {
365301e04c3fSmrg			ctx.gs_rotated_input[0] = ++regno;
365401e04c3fSmrg			ctx.gs_rotated_input[1] = ++regno;
365501e04c3fSmrg		} else {
365601e04c3fSmrg			ctx.gs_rotated_input[0] = 0;
365701e04c3fSmrg			ctx.gs_rotated_input[1] = 1;
365801e04c3fSmrg		}
365901e04c3fSmrg	}
366001e04c3fSmrg
366101e04c3fSmrg	if (shader->uses_images) {
366201e04c3fSmrg		ctx.thread_id_gpr = ++regno;
366301e04c3fSmrg	}
366401e04c3fSmrg	ctx.temp_reg = ++regno;
366501e04c3fSmrg
366601e04c3fSmrg	shader->max_arrays = 0;
366701e04c3fSmrg	shader->num_arrays = 0;
3668af69d88dSmrg	if (indirect_gprs) {
3669af69d88dSmrg
3670af69d88dSmrg		if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
3671af69d88dSmrg			r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
3672af69d88dSmrg			                   ctx.file_offset[TGSI_FILE_OUTPUT] -
3673af69d88dSmrg			                   ctx.file_offset[TGSI_FILE_INPUT],
3674af69d88dSmrg			                   0x0F);
3675af69d88dSmrg		}
3676af69d88dSmrg		if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3677af69d88dSmrg			r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
3678af69d88dSmrg			                   ctx.file_offset[TGSI_FILE_TEMPORARY] -
3679af69d88dSmrg			                   ctx.file_offset[TGSI_FILE_OUTPUT],
3680af69d88dSmrg			                   0x0F);
3681af69d88dSmrg		}
3682af69d88dSmrg	}
36833464ebd5Sriastradh
36843464ebd5Sriastradh	ctx.nliterals = 0;
36853464ebd5Sriastradh	ctx.literals = NULL;
368601e04c3fSmrg	ctx.max_driver_temp_used = 0;
368701e04c3fSmrg
368801e04c3fSmrg	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
368901e04c3fSmrg			       ctx.info.colors_written == 1;
369001e04c3fSmrg	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
369101e04c3fSmrg	shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
369201e04c3fSmrg
369301e04c3fSmrg	if (ctx.type == PIPE_SHADER_VERTEX ||
369401e04c3fSmrg	    ctx.type == PIPE_SHADER_GEOMETRY ||
369501e04c3fSmrg	    ctx.type == PIPE_SHADER_TESS_EVAL) {
369601e04c3fSmrg		shader->cc_dist_mask = (1 << (ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED] +
369701e04c3fSmrg					      ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED])) - 1;
369801e04c3fSmrg		shader->clip_dist_write = (1 << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]) - 1;
369901e04c3fSmrg		shader->cull_dist_write = ((1 << ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED]) - 1) << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED];
370001e04c3fSmrg	}
370101e04c3fSmrg
370201e04c3fSmrg	if (shader->vs_as_gs_a)
370301e04c3fSmrg		vs_add_primid_output(&ctx, key.vs.prim_id_out);
370401e04c3fSmrg
370501e04c3fSmrg	if (ctx.thread_id_gpr != -1) {
370601e04c3fSmrg		r = load_thread_id_gpr(&ctx);
370701e04c3fSmrg		if (r)
370801e04c3fSmrg			return r;
370901e04c3fSmrg	}
371001e04c3fSmrg
371101e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_EVAL)
371201e04c3fSmrg		r600_fetch_tess_io_info(&ctx);
371301e04c3fSmrg
37143464ebd5Sriastradh	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
37153464ebd5Sriastradh		tgsi_parse_token(&ctx.parse);
37163464ebd5Sriastradh		switch (ctx.parse.FullToken.Token.Type) {
37173464ebd5Sriastradh		case TGSI_TOKEN_TYPE_IMMEDIATE:
37183464ebd5Sriastradh			immediate = &ctx.parse.FullToken.FullImmediate;
37193464ebd5Sriastradh			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
37203464ebd5Sriastradh			if(ctx.literals == NULL) {
37213464ebd5Sriastradh				r = -ENOMEM;
37223464ebd5Sriastradh				goto out_err;
37233464ebd5Sriastradh			}
37243464ebd5Sriastradh			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
37253464ebd5Sriastradh			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
37263464ebd5Sriastradh			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
37273464ebd5Sriastradh			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
37283464ebd5Sriastradh			ctx.nliterals++;
37293464ebd5Sriastradh			break;
37303464ebd5Sriastradh		case TGSI_TOKEN_TYPE_DECLARATION:
37313464ebd5Sriastradh			r = tgsi_declaration(&ctx);
37323464ebd5Sriastradh			if (r)
37333464ebd5Sriastradh				goto out_err;
37343464ebd5Sriastradh			break;
37353464ebd5Sriastradh		case TGSI_TOKEN_TYPE_INSTRUCTION:
37363464ebd5Sriastradh		case TGSI_TOKEN_TYPE_PROPERTY:
37373464ebd5Sriastradh			break;
37383464ebd5Sriastradh		default:
37393464ebd5Sriastradh			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
37403464ebd5Sriastradh			r = -EINVAL;
37413464ebd5Sriastradh			goto out_err;
37423464ebd5Sriastradh		}
37433464ebd5Sriastradh	}
3744af69d88dSmrg
374501e04c3fSmrg	shader->ring_item_sizes[0] = ctx.next_ring_offset;
374601e04c3fSmrg	shader->ring_item_sizes[1] = 0;
374701e04c3fSmrg	shader->ring_item_sizes[2] = 0;
374801e04c3fSmrg	shader->ring_item_sizes[3] = 0;
37493464ebd5Sriastradh
3750af69d88dSmrg	/* Process two side if needed */
3751af69d88dSmrg	if (shader->two_side && ctx.colors_used) {
3752af69d88dSmrg		int i, count = ctx.shader->ninput;
3753af69d88dSmrg		unsigned next_lds_loc = ctx.shader->nlds;
3754af69d88dSmrg
3755af69d88dSmrg		/* additional inputs will be allocated right after the existing inputs,
3756af69d88dSmrg		 * we won't need them after the color selection, so we don't need to
3757af69d88dSmrg		 * reserve these gprs for the rest of the shader code and to adjust
3758af69d88dSmrg		 * output offsets etc. */
3759af69d88dSmrg		int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
3760af69d88dSmrg				ctx.info.file_max[TGSI_FILE_INPUT] + 1;
3761af69d88dSmrg
376201e04c3fSmrg		/* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
3763af69d88dSmrg		if (ctx.face_gpr == -1) {
3764af69d88dSmrg			i = ctx.shader->ninput++;
3765af69d88dSmrg			ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
3766af69d88dSmrg			ctx.shader->input[i].spi_sid = 0;
3767af69d88dSmrg			ctx.shader->input[i].gpr = gpr++;
3768af69d88dSmrg			ctx.face_gpr = ctx.shader->input[i].gpr;
3769af69d88dSmrg		}
37703464ebd5Sriastradh
3771af69d88dSmrg		for (i = 0; i < count; i++) {
3772af69d88dSmrg			if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) {
3773af69d88dSmrg				int ni = ctx.shader->ninput++;
3774af69d88dSmrg				memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io));
3775af69d88dSmrg				ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
3776af69d88dSmrg				ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]);
3777af69d88dSmrg				ctx.shader->input[ni].gpr = gpr++;
3778af69d88dSmrg				// TGSI to LLVM needs to know the lds position of inputs.
3779af69d88dSmrg				// Non LLVM path computes it later (in process_twoside_color)
3780af69d88dSmrg				ctx.shader->input[ni].lds_pos = next_lds_loc++;
3781af69d88dSmrg				ctx.shader->input[i].back_color_input = ni;
3782af69d88dSmrg				if (ctx.bc->chip_class >= EVERGREEN) {
3783af69d88dSmrg					if ((r = evergreen_interp_input(&ctx, ni)))
37843464ebd5Sriastradh						return r;
37853464ebd5Sriastradh				}
37863464ebd5Sriastradh			}
37873464ebd5Sriastradh		}
37883464ebd5Sriastradh	}
37893464ebd5Sriastradh
3790af69d88dSmrg	if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
3791af69d88dSmrg		shader->nr_ps_max_color_exports = 8;
3792af69d88dSmrg
379301e04c3fSmrg	if (ctx.shader->uses_helper_invocation) {
379401e04c3fSmrg		if (ctx.bc->chip_class == CAYMAN)
379501e04c3fSmrg			r = cm_load_helper_invocation(&ctx);
379601e04c3fSmrg		else
379701e04c3fSmrg			r = eg_load_helper_invocation(&ctx);
379801e04c3fSmrg		if (r)
379901e04c3fSmrg			return r;
380001e04c3fSmrg	}
3801af69d88dSmrg
380201e04c3fSmrg	/*
380301e04c3fSmrg	 * XXX this relies on fixed_pt_position_gpr only being present when
380401e04c3fSmrg	 * this shader should be executed per sample. Should be the case for now...
380501e04c3fSmrg	 */
380601e04c3fSmrg	if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) {
380701e04c3fSmrg		/*
380801e04c3fSmrg		 * Fix up sample mask. The hw always gives us coverage mask for
380901e04c3fSmrg		 * the pixel. However, for per-sample shading, we need the
381001e04c3fSmrg		 * coverage for the shader invocation only.
381101e04c3fSmrg		 * Also, with disabled msaa, only the first bit should be set
381201e04c3fSmrg		 * (luckily the same fixup works for both problems).
381301e04c3fSmrg		 * For now, we can only do it if we know this shader is always
381401e04c3fSmrg		 * executed per sample (due to usage of bits in the shader
381501e04c3fSmrg		 * forcing per-sample execution).
381601e04c3fSmrg		 * If the fb is not multisampled, we'd do unnecessary work but
381701e04c3fSmrg		 * it should still be correct.
381801e04c3fSmrg		 * It will however do nothing for sample shading according
381901e04c3fSmrg		 * to MinSampleShading.
382001e04c3fSmrg		 */
382101e04c3fSmrg		struct r600_bytecode_alu alu;
382201e04c3fSmrg		int tmp = r600_get_temp(&ctx);
382301e04c3fSmrg		assert(ctx.face_gpr != -1);
382401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
382501e04c3fSmrg
382601e04c3fSmrg		alu.op = ALU_OP2_LSHL_INT;
382701e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
382801e04c3fSmrg		alu.src[0].value = 0x1;
382901e04c3fSmrg		alu.src[1].sel = ctx.fixed_pt_position_gpr;
383001e04c3fSmrg		alu.src[1].chan = 3;
383101e04c3fSmrg		alu.dst.sel = tmp;
383201e04c3fSmrg		alu.dst.chan = 0;
383301e04c3fSmrg		alu.dst.write = 1;
383401e04c3fSmrg		alu.last = 1;
383501e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
383601e04c3fSmrg			return r;
383701e04c3fSmrg
383801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
383901e04c3fSmrg		alu.op = ALU_OP2_AND_INT;
384001e04c3fSmrg		alu.src[0].sel = tmp;
384101e04c3fSmrg		alu.src[1].sel = ctx.face_gpr;
384201e04c3fSmrg		alu.src[1].chan = 2;
384301e04c3fSmrg		alu.dst.sel = ctx.face_gpr;
384401e04c3fSmrg		alu.dst.chan = 2;
384501e04c3fSmrg		alu.dst.write = 1;
384601e04c3fSmrg		alu.last = 1;
384701e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
384801e04c3fSmrg			return r;
384901e04c3fSmrg	}
385001e04c3fSmrg
385101e04c3fSmrg	if (ctx.fragcoord_input >= 0) {
385201e04c3fSmrg		if (ctx.bc->chip_class == CAYMAN) {
385301e04c3fSmrg			for (j = 0 ; j < 4; j++) {
3854af69d88dSmrg				struct r600_bytecode_alu alu;
3855af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3856af69d88dSmrg				alu.op = ALU_OP1_RECIP_IEEE;
3857af69d88dSmrg				alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
3858af69d88dSmrg				alu.src[0].chan = 3;
3859af69d88dSmrg
3860af69d88dSmrg				alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
386101e04c3fSmrg				alu.dst.chan = j;
386201e04c3fSmrg				alu.dst.write = (j == 3);
386301e04c3fSmrg				alu.last = (j == 3);
3864af69d88dSmrg				if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
3865af69d88dSmrg					return r;
38663464ebd5Sriastradh			}
386701e04c3fSmrg		} else {
386801e04c3fSmrg			struct r600_bytecode_alu alu;
386901e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
387001e04c3fSmrg			alu.op = ALU_OP1_RECIP_IEEE;
387101e04c3fSmrg			alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
387201e04c3fSmrg			alu.src[0].chan = 3;
387301e04c3fSmrg
387401e04c3fSmrg			alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
387501e04c3fSmrg			alu.dst.chan = 3;
387601e04c3fSmrg			alu.dst.write = 1;
387701e04c3fSmrg			alu.last = 1;
387801e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
387901e04c3fSmrg				return r;
38803464ebd5Sriastradh		}
388101e04c3fSmrg	}
3882af69d88dSmrg
388301e04c3fSmrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
388401e04c3fSmrg		struct r600_bytecode_alu alu;
388501e04c3fSmrg		int r;
3886af69d88dSmrg
388701e04c3fSmrg		/* GS thread with no output workaround - emit a cut at start of GS */
388801e04c3fSmrg		if (ctx.bc->chip_class == R600)
388901e04c3fSmrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
389001e04c3fSmrg
389101e04c3fSmrg		for (j = 0; j < 4; j++) {
3892af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3893af69d88dSmrg			alu.op = ALU_OP1_MOV;
3894af69d88dSmrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
3895af69d88dSmrg			alu.src[0].value = 0;
389601e04c3fSmrg			alu.dst.sel = ctx.gs_export_gpr_tregs[j];
3897af69d88dSmrg			alu.dst.write = 1;
3898af69d88dSmrg			alu.last = 1;
3899af69d88dSmrg			r = r600_bytecode_add_alu(ctx.bc, &alu);
3900af69d88dSmrg			if (r)
3901af69d88dSmrg				return r;
3902af69d88dSmrg		}
390301e04c3fSmrg
390401e04c3fSmrg		if (ctx.shader->gs_tri_strip_adj_fix) {
390501e04c3fSmrg			r = single_alu_op2(&ctx, ALU_OP2_AND_INT,
390601e04c3fSmrg					   ctx.gs_rotated_input[0], 2,
390701e04c3fSmrg					   0, 2,
390801e04c3fSmrg					   V_SQ_ALU_SRC_LITERAL, 1);
390901e04c3fSmrg			if (r)
3910af69d88dSmrg				return r;
3911af69d88dSmrg
391201e04c3fSmrg			for (i = 0; i < 6; i++) {
391301e04c3fSmrg				int rotated = (i + 4) % 6;
391401e04c3fSmrg				int offset_reg = i / 3;
391501e04c3fSmrg				int offset_chan = i % 3;
391601e04c3fSmrg				int rotated_offset_reg = rotated / 3;
391701e04c3fSmrg				int rotated_offset_chan = rotated % 3;
391801e04c3fSmrg
391901e04c3fSmrg				if (offset_reg == 0 && offset_chan == 2)
392001e04c3fSmrg					offset_chan = 3;
392101e04c3fSmrg				if (rotated_offset_reg == 0 && rotated_offset_chan == 2)
392201e04c3fSmrg					rotated_offset_chan = 3;
392301e04c3fSmrg
392401e04c3fSmrg				r = single_alu_op3(&ctx, ALU_OP3_CNDE_INT,
392501e04c3fSmrg						   ctx.gs_rotated_input[offset_reg], offset_chan,
392601e04c3fSmrg						   ctx.gs_rotated_input[0], 2,
392701e04c3fSmrg						   offset_reg, offset_chan,
392801e04c3fSmrg						   rotated_offset_reg, rotated_offset_chan);
3929af69d88dSmrg				if (r)
393001e04c3fSmrg					return r;
393101e04c3fSmrg			}
393201e04c3fSmrg		}
393301e04c3fSmrg	}
393401e04c3fSmrg
393501e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
393601e04c3fSmrg		r600_fetch_tess_io_info(&ctx);
393701e04c3fSmrg
393801e04c3fSmrg	if (shader->two_side && ctx.colors_used) {
393901e04c3fSmrg		if ((r = process_twoside_color_inputs(&ctx)))
394001e04c3fSmrg			return r;
394101e04c3fSmrg	}
394201e04c3fSmrg
394301e04c3fSmrg	tgsi_parse_init(&ctx.parse, tokens);
394401e04c3fSmrg	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
394501e04c3fSmrg		tgsi_parse_token(&ctx.parse);
394601e04c3fSmrg		switch (ctx.parse.FullToken.Token.Type) {
394701e04c3fSmrg		case TGSI_TOKEN_TYPE_INSTRUCTION:
394801e04c3fSmrg			r = tgsi_is_supported(&ctx);
394901e04c3fSmrg			if (r)
395001e04c3fSmrg				goto out_err;
395101e04c3fSmrg			ctx.max_driver_temp_used = 0;
395201e04c3fSmrg			/* reserve first tmp for everyone */
395301e04c3fSmrg			r600_get_temp(&ctx);
3954af69d88dSmrg
395501e04c3fSmrg			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
395601e04c3fSmrg			if ((r = tgsi_split_constant(&ctx)))
395701e04c3fSmrg				goto out_err;
395801e04c3fSmrg			if ((r = tgsi_split_literal_constant(&ctx)))
395901e04c3fSmrg				goto out_err;
396001e04c3fSmrg			if (ctx.type == PIPE_SHADER_GEOMETRY) {
396101e04c3fSmrg				if ((r = tgsi_split_gs_inputs(&ctx)))
3962af69d88dSmrg					goto out_err;
396301e04c3fSmrg			} else if (lds_inputs) {
396401e04c3fSmrg				if ((r = tgsi_split_lds_inputs(&ctx)))
3965af69d88dSmrg					goto out_err;
396601e04c3fSmrg			}
396701e04c3fSmrg			if (ctx.bc->chip_class == CAYMAN)
396801e04c3fSmrg				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
396901e04c3fSmrg			else if (ctx.bc->chip_class >= EVERGREEN)
397001e04c3fSmrg				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
397101e04c3fSmrg			else
397201e04c3fSmrg				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
397301e04c3fSmrg
397401e04c3fSmrg			ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise;
397501e04c3fSmrg
397601e04c3fSmrg			r = ctx.inst_info->process(&ctx);
397701e04c3fSmrg			if (r)
397801e04c3fSmrg				goto out_err;
397901e04c3fSmrg
398001e04c3fSmrg			if (ctx.type == PIPE_SHADER_TESS_CTRL) {
398101e04c3fSmrg				r = r600_store_tcs_output(&ctx);
3982af69d88dSmrg				if (r)
3983af69d88dSmrg					goto out_err;
39843464ebd5Sriastradh			}
398501e04c3fSmrg			break;
398601e04c3fSmrg		default:
398701e04c3fSmrg			break;
39883464ebd5Sriastradh		}
39893464ebd5Sriastradh	}
3990af69d88dSmrg
3991af69d88dSmrg	/* Reset the temporary register counter. */
3992af69d88dSmrg	ctx.max_driver_temp_used = 0;
3993af69d88dSmrg
3994af69d88dSmrg	noutput = shader->noutput;
3995af69d88dSmrg
3996af69d88dSmrg	if (!ring_outputs && ctx.clip_vertex_write) {
3997af69d88dSmrg		unsigned clipdist_temp[2];
3998af69d88dSmrg
3999af69d88dSmrg		clipdist_temp[0] = r600_get_temp(&ctx);
4000af69d88dSmrg		clipdist_temp[1] = r600_get_temp(&ctx);
4001af69d88dSmrg
4002af69d88dSmrg		/* need to convert a clipvertex write into clipdistance writes and not export
4003af69d88dSmrg		   the clip vertex anymore */
4004af69d88dSmrg
4005af69d88dSmrg		memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
4006af69d88dSmrg		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
4007af69d88dSmrg		shader->output[noutput].gpr = clipdist_temp[0];
4008af69d88dSmrg		noutput++;
4009af69d88dSmrg		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
4010af69d88dSmrg		shader->output[noutput].gpr = clipdist_temp[1];
4011af69d88dSmrg		noutput++;
4012af69d88dSmrg
4013af69d88dSmrg		/* reset spi_sid for clipvertex output to avoid confusing spi */
4014af69d88dSmrg		shader->output[ctx.cv_output].spi_sid = 0;
4015af69d88dSmrg
4016af69d88dSmrg		shader->clip_dist_write = 0xFF;
401701e04c3fSmrg		shader->cc_dist_mask = 0xFF;
4018af69d88dSmrg
4019af69d88dSmrg		for (i = 0; i < 8; i++) {
4020af69d88dSmrg			int oreg = i >> 2;
4021af69d88dSmrg			int ochan = i & 3;
4022af69d88dSmrg
4023af69d88dSmrg			for (j = 0; j < 4; j++) {
4024af69d88dSmrg				struct r600_bytecode_alu alu;
4025af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4026af69d88dSmrg				alu.op = ALU_OP2_DOT4;
4027af69d88dSmrg				alu.src[0].sel = shader->output[ctx.cv_output].gpr;
4028af69d88dSmrg				alu.src[0].chan = j;
4029af69d88dSmrg
4030af69d88dSmrg				alu.src[1].sel = 512 + i;
403101e04c3fSmrg				alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
4032af69d88dSmrg				alu.src[1].chan = j;
4033af69d88dSmrg
4034af69d88dSmrg				alu.dst.sel = clipdist_temp[oreg];
4035af69d88dSmrg				alu.dst.chan = j;
4036af69d88dSmrg				alu.dst.write = (j == ochan);
4037af69d88dSmrg				if (j == 3)
4038af69d88dSmrg					alu.last = 1;
403901e04c3fSmrg				r = r600_bytecode_add_alu(ctx.bc, &alu);
4040af69d88dSmrg				if (r)
4041af69d88dSmrg					return r;
40423464ebd5Sriastradh			}
40433464ebd5Sriastradh		}
40443464ebd5Sriastradh	}
40453464ebd5Sriastradh
4046af69d88dSmrg	/* Add stream outputs. */
404701e04c3fSmrg	if (so.num_outputs) {
404801e04c3fSmrg		bool emit = false;
404901e04c3fSmrg		if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX)
405001e04c3fSmrg			emit = true;
405101e04c3fSmrg		if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL)
405201e04c3fSmrg			emit = true;
405301e04c3fSmrg		if (emit)
405401e04c3fSmrg			emit_streamout(&ctx, &so, -1, NULL);
405501e04c3fSmrg	}
405601e04c3fSmrg	pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
4057af69d88dSmrg	convert_edgeflag_to_int(&ctx);
4058af69d88dSmrg
405901e04c3fSmrg	if (ctx.type == PIPE_SHADER_TESS_CTRL)
406001e04c3fSmrg		r600_emit_tess_factor(&ctx);
406101e04c3fSmrg
406201e04c3fSmrg	if (lds_outputs) {
406301e04c3fSmrg		if (ctx.type == PIPE_SHADER_VERTEX) {
406401e04c3fSmrg			if (ctx.shader->noutput)
406501e04c3fSmrg				emit_lds_vs_writes(&ctx);
406601e04c3fSmrg		}
406701e04c3fSmrg	} else if (ring_outputs) {
406801e04c3fSmrg		if (shader->vs_as_es || shader->tes_as_es) {
406901e04c3fSmrg			ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
407001e04c3fSmrg			ctx.gs_export_gpr_tregs[1] = -1;
407101e04c3fSmrg			ctx.gs_export_gpr_tregs[2] = -1;
407201e04c3fSmrg			ctx.gs_export_gpr_tregs[3] = -1;
407301e04c3fSmrg
407401e04c3fSmrg			emit_gs_ring_writes(&ctx, &so, -1, FALSE);
407501e04c3fSmrg		}
4076af69d88dSmrg	} else {
4077af69d88dSmrg		/* Export output */
4078af69d88dSmrg		next_clip_base = shader->vs_out_misc_write ? 62 : 61;
4079af69d88dSmrg
4080af69d88dSmrg		for (i = 0, j = 0; i < noutput; i++, j++) {
4081af69d88dSmrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4082af69d88dSmrg			output[j].gpr = shader->output[i].gpr;
4083af69d88dSmrg			output[j].elem_size = 3;
4084af69d88dSmrg			output[j].swizzle_x = 0;
4085af69d88dSmrg			output[j].swizzle_y = 1;
4086af69d88dSmrg			output[j].swizzle_z = 2;
4087af69d88dSmrg			output[j].swizzle_w = 3;
4088af69d88dSmrg			output[j].burst_count = 1;
408901e04c3fSmrg			output[j].type = 0xffffffff;
4090af69d88dSmrg			output[j].op = CF_OP_EXPORT;
4091af69d88dSmrg			switch (ctx.type) {
409201e04c3fSmrg			case PIPE_SHADER_VERTEX:
409301e04c3fSmrg			case PIPE_SHADER_TESS_EVAL:
4094af69d88dSmrg				switch (shader->output[i].name) {
4095af69d88dSmrg				case TGSI_SEMANTIC_POSITION:
4096af69d88dSmrg					output[j].array_base = 60;
4097af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4098af69d88dSmrg					pos_emitted = true;
4099af69d88dSmrg					break;
4100af69d88dSmrg
4101af69d88dSmrg				case TGSI_SEMANTIC_PSIZE:
4102af69d88dSmrg					output[j].array_base = 61;
4103af69d88dSmrg					output[j].swizzle_y = 7;
4104af69d88dSmrg					output[j].swizzle_z = 7;
4105af69d88dSmrg					output[j].swizzle_w = 7;
4106af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4107af69d88dSmrg					pos_emitted = true;
4108af69d88dSmrg					break;
4109af69d88dSmrg				case TGSI_SEMANTIC_EDGEFLAG:
4110af69d88dSmrg					output[j].array_base = 61;
4111af69d88dSmrg					output[j].swizzle_x = 7;
4112af69d88dSmrg					output[j].swizzle_y = 0;
4113af69d88dSmrg					output[j].swizzle_z = 7;
4114af69d88dSmrg					output[j].swizzle_w = 7;
4115af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4116af69d88dSmrg					pos_emitted = true;
4117af69d88dSmrg					break;
4118af69d88dSmrg				case TGSI_SEMANTIC_LAYER:
4119af69d88dSmrg					/* spi_sid is 0 for outputs that are
4120af69d88dSmrg					 * not consumed by PS */
4121af69d88dSmrg					if (shader->output[i].spi_sid) {
4122af69d88dSmrg						output[j].array_base = next_param_base++;
4123af69d88dSmrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4124af69d88dSmrg						j++;
4125af69d88dSmrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4126af69d88dSmrg					}
4127af69d88dSmrg					output[j].array_base = 61;
4128af69d88dSmrg					output[j].swizzle_x = 7;
4129af69d88dSmrg					output[j].swizzle_y = 7;
4130af69d88dSmrg					output[j].swizzle_z = 0;
4131af69d88dSmrg					output[j].swizzle_w = 7;
4132af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4133af69d88dSmrg					pos_emitted = true;
4134af69d88dSmrg					break;
4135af69d88dSmrg				case TGSI_SEMANTIC_VIEWPORT_INDEX:
4136af69d88dSmrg					/* spi_sid is 0 for outputs that are
4137af69d88dSmrg					 * not consumed by PS */
4138af69d88dSmrg					if (shader->output[i].spi_sid) {
4139af69d88dSmrg						output[j].array_base = next_param_base++;
4140af69d88dSmrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4141af69d88dSmrg						j++;
4142af69d88dSmrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4143af69d88dSmrg					}
4144af69d88dSmrg					output[j].array_base = 61;
4145af69d88dSmrg					output[j].swizzle_x = 7;
4146af69d88dSmrg					output[j].swizzle_y = 7;
4147af69d88dSmrg					output[j].swizzle_z = 7;
4148af69d88dSmrg					output[j].swizzle_w = 0;
4149af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4150af69d88dSmrg					pos_emitted = true;
4151af69d88dSmrg					break;
4152af69d88dSmrg				case TGSI_SEMANTIC_CLIPVERTEX:
4153af69d88dSmrg					j--;
4154af69d88dSmrg					break;
4155af69d88dSmrg				case TGSI_SEMANTIC_CLIPDIST:
4156af69d88dSmrg					output[j].array_base = next_clip_base++;
4157af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4158af69d88dSmrg					pos_emitted = true;
4159af69d88dSmrg					/* spi_sid is 0 for clipdistance outputs that were generated
4160af69d88dSmrg					 * for clipvertex - we don't need to pass them to PS */
4161af69d88dSmrg					if (shader->output[i].spi_sid) {
4162af69d88dSmrg						j++;
4163af69d88dSmrg						/* duplicate it as PARAM to pass to the pixel shader */
4164af69d88dSmrg						memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
4165af69d88dSmrg						output[j].array_base = next_param_base++;
4166af69d88dSmrg						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4167af69d88dSmrg					}
4168af69d88dSmrg					break;
4169af69d88dSmrg				case TGSI_SEMANTIC_FOG:
4170af69d88dSmrg					output[j].swizzle_y = 4; /* 0 */
4171af69d88dSmrg					output[j].swizzle_z = 4; /* 0 */
4172af69d88dSmrg					output[j].swizzle_w = 5; /* 1 */
4173af69d88dSmrg					break;
417401e04c3fSmrg				case TGSI_SEMANTIC_PRIMID:
417501e04c3fSmrg					output[j].swizzle_x = 2;
417601e04c3fSmrg					output[j].swizzle_y = 4; /* 0 */
417701e04c3fSmrg					output[j].swizzle_z = 4; /* 0 */
417801e04c3fSmrg					output[j].swizzle_w = 4; /* 0 */
417901e04c3fSmrg					break;
4180af69d88dSmrg				}
418101e04c3fSmrg
4182af69d88dSmrg				break;
418301e04c3fSmrg			case PIPE_SHADER_FRAGMENT:
4184af69d88dSmrg				if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
4185af69d88dSmrg					/* never export more colors than the number of CBs */
4186af69d88dSmrg					if (shader->output[i].sid >= max_color_exports) {
4187af69d88dSmrg						/* skip export */
4188af69d88dSmrg						j--;
4189af69d88dSmrg						continue;
4190af69d88dSmrg					}
419101e04c3fSmrg					output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
4192af69d88dSmrg					output[j].array_base = shader->output[i].sid;
4193af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4194af69d88dSmrg					shader->nr_ps_color_exports++;
419501e04c3fSmrg					shader->ps_color_export_mask |= (0xf << (shader->output[i].sid * 4));
419601e04c3fSmrg
419701e04c3fSmrg					/* If the i-th target format is set, all previous target formats must
419801e04c3fSmrg					 * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well.
419901e04c3fSmrg					 */
420001e04c3fSmrg					if (shader->output[i].sid > 0)
420101e04c3fSmrg						for (unsigned x = 0; x < shader->output[i].sid; x++)
420201e04c3fSmrg							shader->ps_color_export_mask |= (1 << (x*4));
420301e04c3fSmrg
420401e04c3fSmrg					if (shader->output[i].sid > shader->ps_export_highest)
420501e04c3fSmrg						shader->ps_export_highest = shader->output[i].sid;
4206af69d88dSmrg					if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
4207af69d88dSmrg						for (k = 1; k < max_color_exports; k++) {
4208af69d88dSmrg							j++;
4209af69d88dSmrg							memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4210af69d88dSmrg							output[j].gpr = shader->output[i].gpr;
4211af69d88dSmrg							output[j].elem_size = 3;
4212af69d88dSmrg							output[j].swizzle_x = 0;
4213af69d88dSmrg							output[j].swizzle_y = 1;
4214af69d88dSmrg							output[j].swizzle_z = 2;
421501e04c3fSmrg							output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
4216af69d88dSmrg							output[j].burst_count = 1;
4217af69d88dSmrg							output[j].array_base = k;
4218af69d88dSmrg							output[j].op = CF_OP_EXPORT;
4219af69d88dSmrg							output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4220af69d88dSmrg							shader->nr_ps_color_exports++;
422101e04c3fSmrg							if (k > shader->ps_export_highest)
422201e04c3fSmrg								shader->ps_export_highest = k;
422301e04c3fSmrg							shader->ps_color_export_mask |= (0xf << (j * 4));
4224af69d88dSmrg						}
4225af69d88dSmrg					}
4226af69d88dSmrg				} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
4227af69d88dSmrg					output[j].array_base = 61;
4228af69d88dSmrg					output[j].swizzle_x = 2;
4229af69d88dSmrg					output[j].swizzle_y = 7;
4230af69d88dSmrg					output[j].swizzle_z = output[j].swizzle_w = 7;
4231af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4232af69d88dSmrg				} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
4233af69d88dSmrg					output[j].array_base = 61;
4234af69d88dSmrg					output[j].swizzle_x = 7;
4235af69d88dSmrg					output[j].swizzle_y = 1;
4236af69d88dSmrg					output[j].swizzle_z = output[j].swizzle_w = 7;
4237af69d88dSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
423801e04c3fSmrg				} else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
423901e04c3fSmrg					output[j].array_base = 61;
424001e04c3fSmrg					output[j].swizzle_x = 7;
424101e04c3fSmrg					output[j].swizzle_y = 7;
424201e04c3fSmrg					output[j].swizzle_z = 0;
424301e04c3fSmrg					output[j].swizzle_w = 7;
424401e04c3fSmrg					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4245af69d88dSmrg				} else {
4246af69d88dSmrg					R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
4247af69d88dSmrg					r = -EINVAL;
4248af69d88dSmrg					goto out_err;
4249af69d88dSmrg				}
4250af69d88dSmrg				break;
425101e04c3fSmrg			case PIPE_SHADER_TESS_CTRL:
425201e04c3fSmrg				break;
4253af69d88dSmrg			default:
4254af69d88dSmrg				R600_ERR("unsupported processor type %d\n", ctx.type);
4255af69d88dSmrg				r = -EINVAL;
4256af69d88dSmrg				goto out_err;
4257af69d88dSmrg			}
4258af69d88dSmrg
425901e04c3fSmrg			if (output[j].type == 0xffffffff) {
4260af69d88dSmrg				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4261af69d88dSmrg				output[j].array_base = next_param_base++;
4262af69d88dSmrg			}
4263af69d88dSmrg		}
4264af69d88dSmrg
4265af69d88dSmrg		/* add fake position export */
426601e04c3fSmrg		if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) {
4267af69d88dSmrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4268af69d88dSmrg			output[j].gpr = 0;
4269af69d88dSmrg			output[j].elem_size = 3;
4270af69d88dSmrg			output[j].swizzle_x = 7;
4271af69d88dSmrg			output[j].swizzle_y = 7;
4272af69d88dSmrg			output[j].swizzle_z = 7;
4273af69d88dSmrg			output[j].swizzle_w = 7;
4274af69d88dSmrg			output[j].burst_count = 1;
4275af69d88dSmrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
4276af69d88dSmrg			output[j].array_base = 60;
4277af69d88dSmrg			output[j].op = CF_OP_EXPORT;
4278af69d88dSmrg			j++;
4279af69d88dSmrg		}
4280af69d88dSmrg
4281af69d88dSmrg		/* add fake param output for vertex shader if no param is exported */
428201e04c3fSmrg		if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) {
4283af69d88dSmrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4284af69d88dSmrg			output[j].gpr = 0;
4285af69d88dSmrg			output[j].elem_size = 3;
4286af69d88dSmrg			output[j].swizzle_x = 7;
4287af69d88dSmrg			output[j].swizzle_y = 7;
4288af69d88dSmrg			output[j].swizzle_z = 7;
4289af69d88dSmrg			output[j].swizzle_w = 7;
4290af69d88dSmrg			output[j].burst_count = 1;
4291af69d88dSmrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
4292af69d88dSmrg			output[j].array_base = 0;
4293af69d88dSmrg			output[j].op = CF_OP_EXPORT;
4294af69d88dSmrg			j++;
4295af69d88dSmrg		}
4296af69d88dSmrg
4297af69d88dSmrg		/* add fake pixel export */
429801e04c3fSmrg		if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) {
4299af69d88dSmrg			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
4300af69d88dSmrg			output[j].gpr = 0;
4301af69d88dSmrg			output[j].elem_size = 3;
4302af69d88dSmrg			output[j].swizzle_x = 7;
4303af69d88dSmrg			output[j].swizzle_y = 7;
4304af69d88dSmrg			output[j].swizzle_z = 7;
4305af69d88dSmrg			output[j].swizzle_w = 7;
4306af69d88dSmrg			output[j].burst_count = 1;
4307af69d88dSmrg			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
4308af69d88dSmrg			output[j].array_base = 0;
4309af69d88dSmrg			output[j].op = CF_OP_EXPORT;
4310af69d88dSmrg			j++;
431101e04c3fSmrg			shader->nr_ps_color_exports++;
431201e04c3fSmrg			shader->ps_color_export_mask = 0xf;
4313af69d88dSmrg		}
4314af69d88dSmrg
4315af69d88dSmrg		noutput = j;
4316af69d88dSmrg
4317af69d88dSmrg		/* set export done on last export of each type */
431801e04c3fSmrg		for (k = noutput - 1, output_done = 0; k >= 0; k--) {
431901e04c3fSmrg			if (!(output_done & (1 << output[k].type))) {
432001e04c3fSmrg				output_done |= (1 << output[k].type);
432101e04c3fSmrg				output[k].op = CF_OP_EXPORT_DONE;
4322af69d88dSmrg			}
4323af69d88dSmrg		}
4324af69d88dSmrg		/* add output to bytecode */
432501e04c3fSmrg		for (i = 0; i < noutput; i++) {
432601e04c3fSmrg			r = r600_bytecode_add_output(ctx.bc, &output[i]);
432701e04c3fSmrg			if (r)
432801e04c3fSmrg				goto out_err;
4329af69d88dSmrg		}
4330af69d88dSmrg	}
4331af69d88dSmrg
4332af69d88dSmrg	/* add program end */
433301e04c3fSmrg	if (ctx.bc->chip_class == CAYMAN)
433401e04c3fSmrg		cm_bytecode_add_cf_end(ctx.bc);
433501e04c3fSmrg	else {
433601e04c3fSmrg		const struct cf_op_info *last = NULL;
4337af69d88dSmrg
433801e04c3fSmrg		if (ctx.bc->cf_last)
433901e04c3fSmrg			last = r600_isa_cf(ctx.bc->cf_last->op);
4340af69d88dSmrg
434101e04c3fSmrg		/* alu clause instructions don't have EOP bit, so add NOP */
434201e04c3fSmrg		if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_POP)
434301e04c3fSmrg			r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
4344af69d88dSmrg
434501e04c3fSmrg		ctx.bc->cf_last->end_of_program = 1;
4346af69d88dSmrg	}
4347af69d88dSmrg
4348af69d88dSmrg	/* check GPR limit - we have 124 = 128 - 4
4349af69d88dSmrg	 * (4 are reserved as alu clause temporary registers) */
4350af69d88dSmrg	if (ctx.bc->ngpr > 124) {
4351af69d88dSmrg		R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
4352af69d88dSmrg		r = -ENOMEM;
4353af69d88dSmrg		goto out_err;
4354af69d88dSmrg	}
4355af69d88dSmrg
435601e04c3fSmrg	if (ctx.type == PIPE_SHADER_GEOMETRY) {
4357af69d88dSmrg		if ((r = generate_gs_copy_shader(rctx, pipeshader, &so)))
4358af69d88dSmrg			return r;
4359af69d88dSmrg	}
4360af69d88dSmrg
436101e04c3fSmrg	free(ctx.spilled_arrays);
436201e04c3fSmrg	free(ctx.array_infos);
4363af69d88dSmrg	free(ctx.literals);
4364af69d88dSmrg	tgsi_parse_free(&ctx.parse);
4365af69d88dSmrg	return 0;
43663464ebd5Sriastradhout_err:
436701e04c3fSmrg	free(ctx.spilled_arrays);
436801e04c3fSmrg	free(ctx.array_infos);
43693464ebd5Sriastradh	free(ctx.literals);
43703464ebd5Sriastradh	tgsi_parse_free(&ctx.parse);
43713464ebd5Sriastradh	return r;
43723464ebd5Sriastradh}
43733464ebd5Sriastradh
43743464ebd5Sriastradhstatic int tgsi_unsupported(struct r600_shader_ctx *ctx)
43753464ebd5Sriastradh{
437601e04c3fSmrg	const unsigned tgsi_opcode =
437701e04c3fSmrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode;
43783464ebd5Sriastradh	R600_ERR("%s tgsi opcode unsupported\n",
437901e04c3fSmrg		 tgsi_get_opcode_name(tgsi_opcode));
43803464ebd5Sriastradh	return -EINVAL;
43813464ebd5Sriastradh}
43823464ebd5Sriastradh
438301e04c3fSmrgstatic int tgsi_end(struct r600_shader_ctx *ctx UNUSED)
43843464ebd5Sriastradh{
43853464ebd5Sriastradh	return 0;
43863464ebd5Sriastradh}
43873464ebd5Sriastradh
4388af69d88dSmrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
43893464ebd5Sriastradh			const struct r600_shader_src *shader_src,
43903464ebd5Sriastradh			unsigned chan)
43913464ebd5Sriastradh{
43923464ebd5Sriastradh	bc_src->sel = shader_src->sel;
43933464ebd5Sriastradh	bc_src->chan = shader_src->swizzle[chan];
43943464ebd5Sriastradh	bc_src->neg = shader_src->neg;
43953464ebd5Sriastradh	bc_src->abs = shader_src->abs;
43963464ebd5Sriastradh	bc_src->rel = shader_src->rel;
43973464ebd5Sriastradh	bc_src->value = shader_src->value[bc_src->chan];
4398af69d88dSmrg	bc_src->kc_bank = shader_src->kc_bank;
439901e04c3fSmrg	bc_src->kc_rel = shader_src->kc_rel;
44003464ebd5Sriastradh}
44013464ebd5Sriastradh
4402af69d88dSmrgstatic void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
44033464ebd5Sriastradh{
44043464ebd5Sriastradh	bc_src->abs = 1;
44053464ebd5Sriastradh	bc_src->neg = 0;
44063464ebd5Sriastradh}
44073464ebd5Sriastradh
4408af69d88dSmrgstatic void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
44093464ebd5Sriastradh{
44103464ebd5Sriastradh	bc_src->neg = !bc_src->neg;
44113464ebd5Sriastradh}
44123464ebd5Sriastradh
44133464ebd5Sriastradhstatic void tgsi_dst(struct r600_shader_ctx *ctx,
44143464ebd5Sriastradh		     const struct tgsi_full_dst_register *tgsi_dst,
44153464ebd5Sriastradh		     unsigned swizzle,
4416af69d88dSmrg		     struct r600_bytecode_alu_dst *r600_dst)
44173464ebd5Sriastradh{
44183464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
44193464ebd5Sriastradh
442001e04c3fSmrg	if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) {
442101e04c3fSmrg		bool spilled;
442201e04c3fSmrg		unsigned idx;
442301e04c3fSmrg
442401e04c3fSmrg		idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_dst->Register.Index, &spilled);
442501e04c3fSmrg
442601e04c3fSmrg		if (spilled) {
442701e04c3fSmrg			struct r600_bytecode_output cf;
442801e04c3fSmrg			int reg = 0;
442901e04c3fSmrg			int r;
443001e04c3fSmrg			bool add_pending_output = true;
443101e04c3fSmrg
443201e04c3fSmrg			memset(&cf, 0, sizeof(struct r600_bytecode_output));
443301e04c3fSmrg			get_spilled_array_base_and_size(ctx, tgsi_dst->Register.Index,
443401e04c3fSmrg				&cf.array_base, &cf.array_size);
443501e04c3fSmrg
443601e04c3fSmrg			/* If no component has spilled, reserve a register and add the spill code
443701e04c3fSmrg			 *  ctx->bc->n_pending_outputs is cleared after each instruction group */
443801e04c3fSmrg			if (ctx->bc->n_pending_outputs == 0) {
443901e04c3fSmrg				reg = r600_get_temp(ctx);
444001e04c3fSmrg			} else {
444101e04c3fSmrg				/* If we are already spilling and the output address is the same like
444201e04c3fSmrg				* before then just reuse the same slot */
444301e04c3fSmrg				struct r600_bytecode_output *tmpl = &ctx->bc->pending_outputs[ctx->bc->n_pending_outputs-1];
444401e04c3fSmrg				if ((cf.array_base + idx == tmpl->array_base) ||
444501e04c3fSmrg				    (cf.array_base == tmpl->array_base &&
444601e04c3fSmrg				     tmpl->index_gpr == ctx->bc->ar_reg &&
444701e04c3fSmrg				     tgsi_dst->Register.Indirect)) {
444801e04c3fSmrg					reg = ctx->bc->pending_outputs[0].gpr;
444901e04c3fSmrg					add_pending_output = false;
445001e04c3fSmrg				} else {
445101e04c3fSmrg					reg = r600_get_temp(ctx);
445201e04c3fSmrg				}
445301e04c3fSmrg			}
445401e04c3fSmrg
445501e04c3fSmrg			r600_dst->sel = reg;
445601e04c3fSmrg			r600_dst->chan = swizzle;
445701e04c3fSmrg			r600_dst->write = 1;
445801e04c3fSmrg			if (inst->Instruction.Saturate) {
445901e04c3fSmrg				r600_dst->clamp = 1;
446001e04c3fSmrg			}
446101e04c3fSmrg
446201e04c3fSmrg			/* Add new outputs as pending */
446301e04c3fSmrg			if (add_pending_output) {
446401e04c3fSmrg				cf.op = CF_OP_MEM_SCRATCH;
446501e04c3fSmrg				cf.elem_size = 3;
446601e04c3fSmrg				cf.gpr = reg;
446701e04c3fSmrg				cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
446801e04c3fSmrg				cf.mark = 1;
446901e04c3fSmrg				cf.comp_mask = inst->Dst[0].Register.WriteMask;
447001e04c3fSmrg				cf.swizzle_x = 0;
447101e04c3fSmrg				cf.swizzle_y = 1;
447201e04c3fSmrg				cf.swizzle_z = 2;
447301e04c3fSmrg				cf.swizzle_w = 3;
447401e04c3fSmrg				cf.burst_count = 1;
447501e04c3fSmrg
447601e04c3fSmrg				if (tgsi_dst->Register.Indirect) {
447701e04c3fSmrg					if (ctx->bc->chip_class < R700)
447801e04c3fSmrg						cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
447901e04c3fSmrg					else
448001e04c3fSmrg						cf.type = 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
448101e04c3fSmrg					cf.index_gpr = ctx->bc->ar_reg;
448201e04c3fSmrg			}
448301e04c3fSmrg			else {
448401e04c3fSmrg				cf.array_base += idx;
448501e04c3fSmrg				cf.array_size = 0;
448601e04c3fSmrg			}
448701e04c3fSmrg
448801e04c3fSmrg			r = r600_bytecode_add_pending_output(ctx->bc, &cf);
448901e04c3fSmrg			if (r)
449001e04c3fSmrg				return;
449101e04c3fSmrg
449201e04c3fSmrg			if (ctx->bc->chip_class >= R700)
449301e04c3fSmrg				r600_bytecode_need_wait_ack(ctx->bc, true);
449401e04c3fSmrg			}
449501e04c3fSmrg			return;
449601e04c3fSmrg		}
449701e04c3fSmrg		else {
449801e04c3fSmrg			r600_dst->sel = idx;
449901e04c3fSmrg		}
450001e04c3fSmrg	}
450101e04c3fSmrg	else {
450201e04c3fSmrg		r600_dst->sel = tgsi_dst->Register.Index;
450301e04c3fSmrg		r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
450401e04c3fSmrg	}
45053464ebd5Sriastradh	r600_dst->chan = swizzle;
45063464ebd5Sriastradh	r600_dst->write = 1;
45073464ebd5Sriastradh	if (inst->Instruction.Saturate) {
45083464ebd5Sriastradh		r600_dst->clamp = 1;
45093464ebd5Sriastradh	}
451001e04c3fSmrg	if (ctx->type == PIPE_SHADER_TESS_CTRL) {
451101e04c3fSmrg		if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) {
451201e04c3fSmrg			return;
45133464ebd5Sriastradh		}
45143464ebd5Sriastradh	}
451501e04c3fSmrg	if (tgsi_dst->Register.Indirect)
451601e04c3fSmrg		r600_dst->rel = V_SQ_REL_RELATIVE;
451701e04c3fSmrg
45183464ebd5Sriastradh}
45193464ebd5Sriastradh
452001e04c3fSmrgstatic int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap, int dest_temp, int op_override)
45213464ebd5Sriastradh{
45223464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4523af69d88dSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
452401e04c3fSmrg	struct r600_bytecode_alu alu;
4525af69d88dSmrg	int i, j, r, lasti = tgsi_last_instruction(write_mask);
452601e04c3fSmrg	int use_tmp = 0;
452701e04c3fSmrg	int swizzle_x = inst->Src[0].Register.SwizzleX;
452801e04c3fSmrg
452901e04c3fSmrg	if (singledest) {
453001e04c3fSmrg		switch (write_mask) {
453101e04c3fSmrg		case 0x1:
453201e04c3fSmrg			if (swizzle_x == 2) {
453301e04c3fSmrg				write_mask = 0xc;
453401e04c3fSmrg				use_tmp = 3;
453501e04c3fSmrg			} else
453601e04c3fSmrg				write_mask = 0x3;
453701e04c3fSmrg			break;
453801e04c3fSmrg		case 0x2:
453901e04c3fSmrg			if (swizzle_x == 2) {
454001e04c3fSmrg				write_mask = 0xc;
454101e04c3fSmrg				use_tmp = 3;
454201e04c3fSmrg			} else {
454301e04c3fSmrg				write_mask = 0x3;
454401e04c3fSmrg				use_tmp = 1;
454501e04c3fSmrg			}
454601e04c3fSmrg			break;
454701e04c3fSmrg		case 0x4:
454801e04c3fSmrg			if (swizzle_x == 0) {
454901e04c3fSmrg				write_mask = 0x3;
455001e04c3fSmrg				use_tmp = 1;
455101e04c3fSmrg			} else
455201e04c3fSmrg				write_mask = 0xc;
455301e04c3fSmrg			break;
455401e04c3fSmrg		case 0x8:
455501e04c3fSmrg			if (swizzle_x == 0) {
455601e04c3fSmrg				write_mask = 0x3;
455701e04c3fSmrg				use_tmp = 1;
455801e04c3fSmrg			} else {
455901e04c3fSmrg				write_mask = 0xc;
456001e04c3fSmrg				use_tmp = 3;
456101e04c3fSmrg			}
456201e04c3fSmrg			break;
456301e04c3fSmrg		}
456401e04c3fSmrg	}
4565af69d88dSmrg
456601e04c3fSmrg	lasti = tgsi_last_instruction(write_mask);
4567af69d88dSmrg	for (i = 0; i <= lasti; i++) {
456801e04c3fSmrg
4569af69d88dSmrg		if (!(write_mask & (1 << i)))
45703464ebd5Sriastradh			continue;
45713464ebd5Sriastradh
4572af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
457301e04c3fSmrg
457401e04c3fSmrg		if (singledest) {
457501e04c3fSmrg			if (use_tmp || dest_temp) {
457601e04c3fSmrg				alu.dst.sel = use_tmp ? ctx->temp_reg : dest_temp;
457701e04c3fSmrg				alu.dst.chan = i;
457801e04c3fSmrg				alu.dst.write = 1;
457901e04c3fSmrg			} else {
458001e04c3fSmrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
458101e04c3fSmrg			}
458201e04c3fSmrg			if (i == 1 || i == 3)
458301e04c3fSmrg				alu.dst.write = 0;
4584af69d88dSmrg		} else
4585af69d88dSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
45863464ebd5Sriastradh
458701e04c3fSmrg		alu.op = op_override ? op_override : ctx->inst_info->op;
458801e04c3fSmrg		if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
458901e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
459001e04c3fSmrg		} else if (!swap) {
45913464ebd5Sriastradh			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
459201e04c3fSmrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
45933464ebd5Sriastradh			}
45943464ebd5Sriastradh		} else {
459501e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
459601e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
45973464ebd5Sriastradh		}
459801e04c3fSmrg
45993464ebd5Sriastradh		/* handle some special cases */
460001e04c3fSmrg		if (i == 1 || i == 3) {
460101e04c3fSmrg			switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
460201e04c3fSmrg			case TGSI_OPCODE_DABS:
460301e04c3fSmrg				r600_bytecode_src_set_abs(&alu.src[0]);
460401e04c3fSmrg				break;
460501e04c3fSmrg			default:
460601e04c3fSmrg				break;
460701e04c3fSmrg			}
46083464ebd5Sriastradh		}
460901e04c3fSmrg		if (i == lasti) {
46103464ebd5Sriastradh			alu.last = 1;
46113464ebd5Sriastradh		}
4612af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
46133464ebd5Sriastradh		if (r)
46143464ebd5Sriastradh			return r;
46153464ebd5Sriastradh	}
4616af69d88dSmrg
4617af69d88dSmrg	if (use_tmp) {
461801e04c3fSmrg		write_mask = inst->Dst[0].Register.WriteMask;
461901e04c3fSmrg
462001e04c3fSmrg		lasti = tgsi_last_instruction(write_mask);
4621af69d88dSmrg		/* move result from temp to dst */
4622af69d88dSmrg		for (i = 0; i <= lasti; i++) {
4623af69d88dSmrg			if (!(write_mask & (1 << i)))
4624af69d88dSmrg				continue;
4625af69d88dSmrg
4626af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4627af69d88dSmrg			alu.op = ALU_OP1_MOV;
462801e04c3fSmrg
462901e04c3fSmrg			if (dest_temp) {
463001e04c3fSmrg				alu.dst.sel = dest_temp;
463101e04c3fSmrg				alu.dst.chan = i;
463201e04c3fSmrg				alu.dst.write = 1;
463301e04c3fSmrg			} else
463401e04c3fSmrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4635af69d88dSmrg			alu.src[0].sel = ctx->temp_reg;
463601e04c3fSmrg			alu.src[0].chan = use_tmp - 1;
4637af69d88dSmrg			alu.last = (i == lasti);
4638af69d88dSmrg
4639af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4640af69d88dSmrg			if (r)
4641af69d88dSmrg				return r;
4642af69d88dSmrg		}
4643af69d88dSmrg	}
46443464ebd5Sriastradh	return 0;
46453464ebd5Sriastradh}
46463464ebd5Sriastradh
464701e04c3fSmrgstatic int tgsi_op2_64(struct r600_shader_ctx *ctx)
46483464ebd5Sriastradh{
464901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
465001e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
465101e04c3fSmrg	/* confirm writemasking */
465201e04c3fSmrg	if ((write_mask & 0x3) != 0x3 &&
465301e04c3fSmrg	    (write_mask & 0xc) != 0xc) {
465401e04c3fSmrg		fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
465501e04c3fSmrg		return -1;
465601e04c3fSmrg	}
465701e04c3fSmrg	return tgsi_op2_64_params(ctx, false, false, 0, 0);
465801e04c3fSmrg}
465901e04c3fSmrg
466001e04c3fSmrgstatic int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
46613464ebd5Sriastradh{
466201e04c3fSmrg	return tgsi_op2_64_params(ctx, true, false, 0, 0);
4663af69d88dSmrg}
4664af69d88dSmrg
466501e04c3fSmrgstatic int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
4666af69d88dSmrg{
466701e04c3fSmrg	return tgsi_op2_64_params(ctx, true, true, 0, 0);
4668af69d88dSmrg}
4669af69d88dSmrg
467001e04c3fSmrgstatic int tgsi_op3_64(struct r600_shader_ctx *ctx)
4671af69d88dSmrg{
4672af69d88dSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4673af69d88dSmrg	struct r600_bytecode_alu alu;
467401e04c3fSmrg	int i, j, r;
467501e04c3fSmrg	int lasti = 3;
467601e04c3fSmrg	int tmp = r600_get_temp(ctx);
4677af69d88dSmrg
4678af69d88dSmrg	for (i = 0; i < lasti + 1; i++) {
4679af69d88dSmrg
4680af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4681af69d88dSmrg		alu.op = ctx->inst_info->op;
468201e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
468301e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
468401e04c3fSmrg		}
4685af69d88dSmrg
468601e04c3fSmrg		if (inst->Dst[0].Register.WriteMask & (1 << i))
468701e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
468801e04c3fSmrg		else
468901e04c3fSmrg			alu.dst.sel = tmp;
4690af69d88dSmrg
469101e04c3fSmrg		alu.dst.chan = i;
469201e04c3fSmrg		alu.is_op3 = 1;
4693af69d88dSmrg		if (i == lasti) {
4694af69d88dSmrg			alu.last = 1;
4695af69d88dSmrg		}
4696af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4697af69d88dSmrg		if (r)
4698af69d88dSmrg			return r;
4699af69d88dSmrg	}
4700af69d88dSmrg	return 0;
47013464ebd5Sriastradh}
47023464ebd5Sriastradh
470301e04c3fSmrgstatic int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
47043464ebd5Sriastradh{
47053464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4706af69d88dSmrg	struct r600_bytecode_alu alu;
470701e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
470801e04c3fSmrg	int i, j, r, lasti = tgsi_last_instruction(write_mask);
470901e04c3fSmrg	/* use temp register if trans_only and more than one dst component */
471001e04c3fSmrg	int use_tmp = trans_only && (write_mask ^ (1 << lasti));
471101e04c3fSmrg	unsigned op = ctx->inst_info->op;
471201e04c3fSmrg
471301e04c3fSmrg	if (op == ALU_OP2_MUL_IEEE &&
471401e04c3fSmrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
471501e04c3fSmrg		op = ALU_OP2_MUL;
471601e04c3fSmrg
47177ec681f3Smrg	/* nir_to_tgsi lowers nir_op_isub to UADD + negate, since r600 doesn't support
47187ec681f3Smrg	 * source modifiers with integer ops we switch back to SUB_INT */
47197ec681f3Smrg	bool src1_neg = ctx->src[1].neg;
47207ec681f3Smrg	if (op == ALU_OP2_ADD_INT && src1_neg) {
47217ec681f3Smrg		src1_neg = false;
47227ec681f3Smrg		op = ALU_OP2_SUB_INT;
47237ec681f3Smrg	}
47247ec681f3Smrg
472501e04c3fSmrg	for (i = 0; i <= lasti; i++) {
472601e04c3fSmrg		if (!(write_mask & (1 << i)))
472701e04c3fSmrg			continue;
472801e04c3fSmrg
4729af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
473001e04c3fSmrg		if (use_tmp) {
473101e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
473201e04c3fSmrg			alu.dst.chan = i;
473301e04c3fSmrg			alu.dst.write = 1;
473401e04c3fSmrg		} else
473501e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4736af69d88dSmrg
473701e04c3fSmrg		alu.op = op;
473801e04c3fSmrg		if (!swap) {
473901e04c3fSmrg			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
474001e04c3fSmrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
4741af69d88dSmrg			}
47427ec681f3Smrg			alu.src[1].neg = src1_neg;
474301e04c3fSmrg		} else {
474401e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
474501e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
47463464ebd5Sriastradh		}
474701e04c3fSmrg		if (i == lasti || trans_only) {
47483464ebd5Sriastradh			alu.last = 1;
474901e04c3fSmrg		}
4750af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
4751af69d88dSmrg		if (r)
4752af69d88dSmrg			return r;
4753af69d88dSmrg	}
4754af69d88dSmrg
475501e04c3fSmrg	if (use_tmp) {
475601e04c3fSmrg		/* move result from temp to dst */
475701e04c3fSmrg		for (i = 0; i <= lasti; i++) {
475801e04c3fSmrg			if (!(write_mask & (1 << i)))
475901e04c3fSmrg				continue;
4760af69d88dSmrg
4761af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
476201e04c3fSmrg			alu.op = ALU_OP1_MOV;
476301e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
476401e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
476501e04c3fSmrg			alu.src[0].chan = i;
476601e04c3fSmrg			alu.last = (i == lasti);
476701e04c3fSmrg
4768af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
4769af69d88dSmrg			if (r)
4770af69d88dSmrg				return r;
4771af69d88dSmrg		}
4772af69d88dSmrg	}
47733464ebd5Sriastradh	return 0;
47743464ebd5Sriastradh}
47753464ebd5Sriastradh
477601e04c3fSmrgstatic int tgsi_op2(struct r600_shader_ctx *ctx)
47773464ebd5Sriastradh{
477801e04c3fSmrg	return tgsi_op2_s(ctx, 0, 0);
477901e04c3fSmrg}
47803464ebd5Sriastradh
478101e04c3fSmrgstatic int tgsi_op2_swap(struct r600_shader_ctx *ctx)
478201e04c3fSmrg{
478301e04c3fSmrg	return tgsi_op2_s(ctx, 1, 0);
478401e04c3fSmrg}
47853464ebd5Sriastradh
478601e04c3fSmrgstatic int tgsi_op2_trans(struct r600_shader_ctx *ctx)
478701e04c3fSmrg{
478801e04c3fSmrg	return tgsi_op2_s(ctx, 0, 1);
47893464ebd5Sriastradh}
47903464ebd5Sriastradh
479101e04c3fSmrgstatic int tgsi_ineg(struct r600_shader_ctx *ctx)
47923464ebd5Sriastradh{
47933464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4794af69d88dSmrg	struct r600_bytecode_alu alu;
47953464ebd5Sriastradh	int i, r;
479601e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
47973464ebd5Sriastradh
479801e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
47993464ebd5Sriastradh
480001e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
480101e04c3fSmrg			continue;
4802af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4803af69d88dSmrg		alu.op = ctx->inst_info->op;
480401e04c3fSmrg
480501e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
480601e04c3fSmrg
480701e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
48083464ebd5Sriastradh
48093464ebd5Sriastradh		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
48103464ebd5Sriastradh
481101e04c3fSmrg		if (i == lasti) {
48123464ebd5Sriastradh			alu.last = 1;
481301e04c3fSmrg		}
4814af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
48153464ebd5Sriastradh		if (r)
48163464ebd5Sriastradh			return r;
48173464ebd5Sriastradh	}
48183464ebd5Sriastradh	return 0;
481901e04c3fSmrg
48203464ebd5Sriastradh}
48213464ebd5Sriastradh
482201e04c3fSmrgstatic int tgsi_dneg(struct r600_shader_ctx *ctx)
48233464ebd5Sriastradh{
48243464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4825af69d88dSmrg	struct r600_bytecode_alu alu;
48263464ebd5Sriastradh	int i, r;
48273464ebd5Sriastradh	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
48283464ebd5Sriastradh
48293464ebd5Sriastradh	for (i = 0; i < lasti + 1; i++) {
483001e04c3fSmrg
48313464ebd5Sriastradh		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
48323464ebd5Sriastradh			continue;
4833af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4834af69d88dSmrg		alu.op = ALU_OP1_MOV;
48353464ebd5Sriastradh
483601e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
483701e04c3fSmrg
483801e04c3fSmrg		if (i == 1 || i == 3)
483901e04c3fSmrg			r600_bytecode_src_toggle_neg(&alu.src[0]);
48403464ebd5Sriastradh		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
484101e04c3fSmrg
484201e04c3fSmrg		if (i == lasti) {
48433464ebd5Sriastradh			alu.last = 1;
484401e04c3fSmrg		}
4845af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
48463464ebd5Sriastradh		if (r)
48473464ebd5Sriastradh			return r;
48483464ebd5Sriastradh	}
48493464ebd5Sriastradh	return 0;
485001e04c3fSmrg
48513464ebd5Sriastradh}
48523464ebd5Sriastradh
485301e04c3fSmrgstatic int tgsi_dfracexp(struct r600_shader_ctx *ctx)
48543464ebd5Sriastradh{
48553464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4856af69d88dSmrg	struct r600_bytecode_alu alu;
485701e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
485801e04c3fSmrg	int i, j, r;
48593464ebd5Sriastradh
486001e04c3fSmrg	for (i = 0; i <= 3; i++) {
486101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
486201e04c3fSmrg		alu.op = ctx->inst_info->op;
48633464ebd5Sriastradh
486401e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
486501e04c3fSmrg		alu.dst.chan = i;
486601e04c3fSmrg		alu.dst.write = 1;
486701e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
486801e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
486901e04c3fSmrg		}
48703464ebd5Sriastradh
487101e04c3fSmrg		if (i == 3)
487201e04c3fSmrg			alu.last = 1;
487301e04c3fSmrg
487401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
487501e04c3fSmrg		if (r)
487601e04c3fSmrg			return r;
487701e04c3fSmrg	}
487801e04c3fSmrg
487901e04c3fSmrg	/* Replicate significand result across channels. */
488001e04c3fSmrg	for (i = 0; i <= 3; i++) {
488101e04c3fSmrg		if (!(write_mask & (1 << i)))
488201e04c3fSmrg			continue;
488301e04c3fSmrg
488401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
488501e04c3fSmrg		alu.op = ALU_OP1_MOV;
488601e04c3fSmrg		alu.src[0].chan = (i & 1) + 2;
488701e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
488801e04c3fSmrg
488901e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
489001e04c3fSmrg		alu.dst.write = 1;
489101e04c3fSmrg		alu.last = 1;
489201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
489301e04c3fSmrg		if (r)
489401e04c3fSmrg			return r;
489501e04c3fSmrg	}
48963464ebd5Sriastradh
489701e04c3fSmrg	for (i = 0; i <= 3; i++) {
489801e04c3fSmrg		if (inst->Dst[1].Register.WriteMask & (1 << i)) {
489901e04c3fSmrg			/* MOV third channels to writemask dst1 */
490001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
490101e04c3fSmrg			alu.op = ALU_OP1_MOV;
490201e04c3fSmrg			alu.src[0].chan = 1;
49033464ebd5Sriastradh			alu.src[0].sel = ctx->temp_reg;
490401e04c3fSmrg
490501e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
49063464ebd5Sriastradh			alu.last = 1;
4907af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
49083464ebd5Sriastradh			if (r)
49093464ebd5Sriastradh				return r;
491001e04c3fSmrg			break;
49113464ebd5Sriastradh		}
49123464ebd5Sriastradh	}
491301e04c3fSmrg	return 0;
491401e04c3fSmrg}
49153464ebd5Sriastradh
491601e04c3fSmrg
491701e04c3fSmrgstatic int egcm_int_to_double(struct r600_shader_ctx *ctx)
491801e04c3fSmrg{
491901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
492001e04c3fSmrg	struct r600_bytecode_alu alu;
492101e04c3fSmrg	int i, c, r;
492201e04c3fSmrg	int write_mask = inst->Dst[0].Register.WriteMask;
492301e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
492401e04c3fSmrg
492501e04c3fSmrg	assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
492601e04c3fSmrg		inst->Instruction.Opcode == TGSI_OPCODE_U2D);
492701e04c3fSmrg
492801e04c3fSmrg	for (c = 0; c < 2; c++) {
492901e04c3fSmrg		int dchan = c * 2;
493001e04c3fSmrg		if (write_mask & (0x3 << dchan)) {
493101e04c3fSmrg	/* split into 24-bit int and 8-bit int */
4932af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
493301e04c3fSmrg			alu.op = ALU_OP2_AND_INT;
493401e04c3fSmrg			alu.dst.sel = temp_reg;
493501e04c3fSmrg			alu.dst.chan = dchan;
493601e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
493701e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
493801e04c3fSmrg			alu.src[1].value = 0xffffff00;
493901e04c3fSmrg			alu.dst.write = 1;
494001e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
494101e04c3fSmrg			if (r)
494201e04c3fSmrg				return r;
49433464ebd5Sriastradh
494401e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
494501e04c3fSmrg			alu.op = ALU_OP2_AND_INT;
494601e04c3fSmrg			alu.dst.sel = temp_reg;
494701e04c3fSmrg			alu.dst.chan = dchan + 1;
494801e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
494901e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
495001e04c3fSmrg			alu.src[1].value = 0xff;
495101e04c3fSmrg			alu.dst.write = 1;
49523464ebd5Sriastradh			alu.last = 1;
4953af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
49543464ebd5Sriastradh			if (r)
49553464ebd5Sriastradh				return r;
49563464ebd5Sriastradh		}
49573464ebd5Sriastradh	}
49583464ebd5Sriastradh
495901e04c3fSmrg	for (c = 0; c < 2; c++) {
496001e04c3fSmrg		int dchan = c * 2;
496101e04c3fSmrg		if (write_mask & (0x3 << dchan)) {
496201e04c3fSmrg			for (i = dchan; i <= dchan + 1; i++) {
496301e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
496401e04c3fSmrg				alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT;
49653464ebd5Sriastradh
496601e04c3fSmrg				alu.src[0].sel = temp_reg;
496701e04c3fSmrg				alu.src[0].chan = i;
496801e04c3fSmrg				alu.dst.sel = temp_reg;
496901e04c3fSmrg				alu.dst.chan = i;
497001e04c3fSmrg				alu.dst.write = 1;
497101e04c3fSmrg				if (ctx->bc->chip_class == CAYMAN)
497201e04c3fSmrg					alu.last = i == dchan + 1;
497301e04c3fSmrg				else
497401e04c3fSmrg					alu.last = 1; /* trans only ops on evergreen */
497501e04c3fSmrg
497601e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
497701e04c3fSmrg				if (r)
497801e04c3fSmrg					return r;
497901e04c3fSmrg			}
498001e04c3fSmrg		}
498101e04c3fSmrg	}
49823464ebd5Sriastradh
498301e04c3fSmrg	for (c = 0; c < 2; c++) {
498401e04c3fSmrg		int dchan = c * 2;
498501e04c3fSmrg		if (write_mask & (0x3 << dchan)) {
498601e04c3fSmrg			for (i = 0; i < 4; i++) {
498701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
498801e04c3fSmrg				alu.op = ALU_OP1_FLT32_TO_FLT64;
498901e04c3fSmrg
499001e04c3fSmrg				alu.src[0].chan = dchan + (i / 2);
499101e04c3fSmrg				if (i == 0 || i == 2)
499201e04c3fSmrg					alu.src[0].sel = temp_reg;
499301e04c3fSmrg				else {
499401e04c3fSmrg					alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
499501e04c3fSmrg					alu.src[0].value = 0x0;
499601e04c3fSmrg				}
499701e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
499801e04c3fSmrg				alu.dst.chan = i;
499901e04c3fSmrg				alu.last = i == 3;
500001e04c3fSmrg				alu.dst.write = 1;
50013464ebd5Sriastradh
500201e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
500301e04c3fSmrg				if (r)
500401e04c3fSmrg					return r;
500501e04c3fSmrg			}
50063464ebd5Sriastradh
500701e04c3fSmrg			for (i = 0; i <= 1; i++) {
500801e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
500901e04c3fSmrg				alu.op = ALU_OP2_ADD_64;
50103464ebd5Sriastradh
501101e04c3fSmrg				alu.src[0].chan = fp64_switch(i);
501201e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
501301e04c3fSmrg
501401e04c3fSmrg				alu.src[1].chan = fp64_switch(i + 2);
501501e04c3fSmrg				alu.src[1].sel = ctx->temp_reg;
501601e04c3fSmrg				tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst);
501701e04c3fSmrg				alu.last = i == 1;
501801e04c3fSmrg
501901e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
502001e04c3fSmrg				if (r)
502101e04c3fSmrg					return r;
502201e04c3fSmrg			}
502301e04c3fSmrg		}
50243464ebd5Sriastradh	}
50253464ebd5Sriastradh
502601e04c3fSmrg	return 0;
502701e04c3fSmrg}
50283464ebd5Sriastradh
502901e04c3fSmrgstatic int egcm_double_to_int(struct r600_shader_ctx *ctx)
503001e04c3fSmrg{
503101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
503201e04c3fSmrg	struct r600_bytecode_alu alu;
503301e04c3fSmrg	int i, r;
503401e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
503501e04c3fSmrg	int treg = r600_get_temp(ctx);
503601e04c3fSmrg	assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
503701e04c3fSmrg		inst->Instruction.Opcode == TGSI_OPCODE_D2U);
50383464ebd5Sriastradh
503901e04c3fSmrg	/* do a 64->32 into a temp register */
504001e04c3fSmrg	r = tgsi_op2_64_params(ctx, true, false, treg, ALU_OP1_FLT64_TO_FLT32);
504101e04c3fSmrg	if (r)
504201e04c3fSmrg		return r;
50433464ebd5Sriastradh
504401e04c3fSmrg	for (i = 0; i <= lasti; i++) {
504501e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
504601e04c3fSmrg			continue;
504701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
504801e04c3fSmrg		alu.op = ctx->inst_info->op;
50493464ebd5Sriastradh
505001e04c3fSmrg		alu.src[0].chan = i;
505101e04c3fSmrg		alu.src[0].sel = treg;
505201e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
505301e04c3fSmrg		alu.last = (i == lasti);
50543464ebd5Sriastradh
5055af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
50563464ebd5Sriastradh		if (r)
50573464ebd5Sriastradh			return r;
50583464ebd5Sriastradh	}
50593464ebd5Sriastradh
50603464ebd5Sriastradh	return 0;
50613464ebd5Sriastradh}
50623464ebd5Sriastradh
506301e04c3fSmrgstatic int cayman_emit_unary_double_raw(struct r600_bytecode *bc,
506401e04c3fSmrg					unsigned op,
506501e04c3fSmrg					int dst_reg,
506601e04c3fSmrg					struct r600_shader_src *src,
506701e04c3fSmrg					bool abs)
50683464ebd5Sriastradh{
5069af69d88dSmrg	struct r600_bytecode_alu alu;
507001e04c3fSmrg	const int last_slot = 3;
507101e04c3fSmrg	int r;
50723464ebd5Sriastradh
507301e04c3fSmrg	/* these have to write the result to X/Y by the looks of it */
507401e04c3fSmrg	for (int i = 0 ; i < last_slot; i++) {
5075af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
507601e04c3fSmrg		alu.op = op;
50773464ebd5Sriastradh
507801e04c3fSmrg		r600_bytecode_src(&alu.src[0], src, 1);
507901e04c3fSmrg		r600_bytecode_src(&alu.src[1], src, 0);
50803464ebd5Sriastradh
508101e04c3fSmrg		if (abs)
508201e04c3fSmrg			r600_bytecode_src_set_abs(&alu.src[1]);
50833464ebd5Sriastradh
508401e04c3fSmrg		alu.dst.sel = dst_reg;
508501e04c3fSmrg		alu.dst.chan = i;
508601e04c3fSmrg		alu.dst.write = (i == 0 || i == 1);
508701e04c3fSmrg
508801e04c3fSmrg		if (bc->chip_class != CAYMAN || i == last_slot - 1)
50893464ebd5Sriastradh			alu.last = 1;
509001e04c3fSmrg		r = r600_bytecode_add_alu(bc, &alu);
50913464ebd5Sriastradh		if (r)
50923464ebd5Sriastradh			return r;
50933464ebd5Sriastradh	}
50943464ebd5Sriastradh
50953464ebd5Sriastradh	return 0;
50963464ebd5Sriastradh}
50973464ebd5Sriastradh
509801e04c3fSmrgstatic int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
50993464ebd5Sriastradh{
51003464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
510101e04c3fSmrg	int i, r;
5102af69d88dSmrg	struct r600_bytecode_alu alu;
510301e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
510401e04c3fSmrg	int t1 = ctx->temp_reg;
51053464ebd5Sriastradh
510601e04c3fSmrg	/* should only be one src regs */
510701e04c3fSmrg	assert(inst->Instruction.NumSrcRegs == 1);
51083464ebd5Sriastradh
510901e04c3fSmrg	/* only support one double at a time */
511001e04c3fSmrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
511101e04c3fSmrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
51123464ebd5Sriastradh
511301e04c3fSmrg	r = cayman_emit_unary_double_raw(
511401e04c3fSmrg		ctx->bc, ctx->inst_info->op, t1,
511501e04c3fSmrg		&ctx->src[0],
511601e04c3fSmrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
511701e04c3fSmrg		ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT);
51183464ebd5Sriastradh	if (r)
51193464ebd5Sriastradh		return r;
51203464ebd5Sriastradh
512101e04c3fSmrg	for (i = 0 ; i <= lasti; i++) {
512201e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
512301e04c3fSmrg			continue;
5124af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
512501e04c3fSmrg		alu.op = ALU_OP1_MOV;
512601e04c3fSmrg		alu.src[0].sel = t1;
512701e04c3fSmrg		alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
512801e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
51293464ebd5Sriastradh		alu.dst.write = 1;
513001e04c3fSmrg		if (i == lasti)
513101e04c3fSmrg			alu.last = 1;
5132af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
51333464ebd5Sriastradh		if (r)
51343464ebd5Sriastradh			return r;
513501e04c3fSmrg	}
513601e04c3fSmrg	return 0;
513701e04c3fSmrg}
51383464ebd5Sriastradh
513901e04c3fSmrgstatic int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
514001e04c3fSmrg{
514101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
514201e04c3fSmrg	int i, j, r;
514301e04c3fSmrg	struct r600_bytecode_alu alu;
514401e04c3fSmrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
51453464ebd5Sriastradh
514601e04c3fSmrg	for (i = 0 ; i < last_slot; i++) {
514701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
514801e04c3fSmrg		alu.op = ctx->inst_info->op;
514901e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
515001e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
51513464ebd5Sriastradh
515201e04c3fSmrg			/* RSQ should take the absolute value of src */
515301e04c3fSmrg			if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) {
515401e04c3fSmrg				r600_bytecode_src_set_abs(&alu.src[j]);
515501e04c3fSmrg			}
515601e04c3fSmrg		}
515701e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
515801e04c3fSmrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
51593464ebd5Sriastradh
516001e04c3fSmrg		if (i == last_slot - 1)
516101e04c3fSmrg			alu.last = 1;
516201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
516301e04c3fSmrg		if (r)
516401e04c3fSmrg			return r;
516501e04c3fSmrg	}
51663464ebd5Sriastradh	return 0;
51673464ebd5Sriastradh}
51683464ebd5Sriastradh
516901e04c3fSmrgstatic int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
51703464ebd5Sriastradh{
51713464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
517201e04c3fSmrg	int i, j, k, r;
5173af69d88dSmrg	struct r600_bytecode_alu alu;
517401e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
517501e04c3fSmrg	int t1 = ctx->temp_reg;
51763464ebd5Sriastradh
517701e04c3fSmrg	for (k = 0; k <= lasti; k++) {
517801e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
517901e04c3fSmrg			continue;
51803464ebd5Sriastradh
518101e04c3fSmrg		for (i = 0 ; i < 4; i++) {
518201e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
518301e04c3fSmrg			alu.op = ctx->inst_info->op;
518401e04c3fSmrg			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
518501e04c3fSmrg				r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
518601e04c3fSmrg			}
518701e04c3fSmrg			alu.dst.sel = t1;
518801e04c3fSmrg			alu.dst.chan = i;
518901e04c3fSmrg			alu.dst.write = (i == k);
519001e04c3fSmrg			if (i == 3)
519101e04c3fSmrg				alu.last = 1;
519201e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
519301e04c3fSmrg			if (r)
519401e04c3fSmrg				return r;
519501e04c3fSmrg		}
519601e04c3fSmrg	}
51973464ebd5Sriastradh
519801e04c3fSmrg	for (i = 0 ; i <= lasti; i++) {
519901e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
520001e04c3fSmrg			continue;
520101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
520201e04c3fSmrg		alu.op = ALU_OP1_MOV;
520301e04c3fSmrg		alu.src[0].sel = t1;
520401e04c3fSmrg		alu.src[0].chan = i;
520501e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
520601e04c3fSmrg		alu.dst.write = 1;
520701e04c3fSmrg		if (i == lasti)
520801e04c3fSmrg			alu.last = 1;
520901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
521001e04c3fSmrg		if (r)
521101e04c3fSmrg			return r;
52123464ebd5Sriastradh	}
521301e04c3fSmrg
521401e04c3fSmrg	return 0;
52153464ebd5Sriastradh}
52163464ebd5Sriastradh
521701e04c3fSmrg
521801e04c3fSmrgstatic int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
52193464ebd5Sriastradh{
52203464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
522101e04c3fSmrg	int i, j, k, r;
5222af69d88dSmrg	struct r600_bytecode_alu alu;
522301e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
522401e04c3fSmrg	int t1 = ctx->temp_reg;
522501e04c3fSmrg
522601e04c3fSmrg	/* t1 would get overwritten below if we actually tried to
522701e04c3fSmrg	 * multiply two pairs of doubles at a time. */
522801e04c3fSmrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
522901e04c3fSmrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
523001e04c3fSmrg
523101e04c3fSmrg	k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
52323464ebd5Sriastradh
52333464ebd5Sriastradh	for (i = 0; i < 4; i++) {
5234af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
523501e04c3fSmrg		alu.op = ctx->inst_info->op;
523601e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
523701e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
523801e04c3fSmrg		}
523901e04c3fSmrg		alu.dst.sel = t1;
52403464ebd5Sriastradh		alu.dst.chan = i;
524101e04c3fSmrg		alu.dst.write = 1;
52423464ebd5Sriastradh		if (i == 3)
52433464ebd5Sriastradh			alu.last = 1;
5244af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
52453464ebd5Sriastradh		if (r)
52463464ebd5Sriastradh			return r;
52473464ebd5Sriastradh	}
524801e04c3fSmrg
524901e04c3fSmrg	for (i = 0; i <= lasti; i++) {
525001e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
525101e04c3fSmrg			continue;
525201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
525301e04c3fSmrg		alu.op = ALU_OP1_MOV;
525401e04c3fSmrg		alu.src[0].sel = t1;
525501e04c3fSmrg		alu.src[0].chan = i;
525601e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
525701e04c3fSmrg		alu.dst.write = 1;
525801e04c3fSmrg		if (i == lasti)
525901e04c3fSmrg			alu.last = 1;
526001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
526101e04c3fSmrg		if (r)
526201e04c3fSmrg			return r;
526301e04c3fSmrg	}
526401e04c3fSmrg
52653464ebd5Sriastradh	return 0;
52663464ebd5Sriastradh}
52673464ebd5Sriastradh
526801e04c3fSmrg/*
526901e04c3fSmrg * Emit RECIP_64 + MUL_64 to implement division.
527001e04c3fSmrg */
527101e04c3fSmrgstatic int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
52723464ebd5Sriastradh{
52733464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
527401e04c3fSmrg	int r;
5275af69d88dSmrg	struct r600_bytecode_alu alu;
527601e04c3fSmrg	int t1 = ctx->temp_reg;
527701e04c3fSmrg	int k;
52783464ebd5Sriastradh
527901e04c3fSmrg	/* Only support one double at a time. This is the same constraint as
528001e04c3fSmrg	 * in DMUL lowering. */
528101e04c3fSmrg	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
528201e04c3fSmrg	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
528301e04c3fSmrg
528401e04c3fSmrg	k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
528501e04c3fSmrg
528601e04c3fSmrg	r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
52873464ebd5Sriastradh	if (r)
52883464ebd5Sriastradh		return r;
52893464ebd5Sriastradh
529001e04c3fSmrg	for (int i = 0; i < 4; i++) {
5291af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
529201e04c3fSmrg		alu.op = ALU_OP2_MUL_64;
529301e04c3fSmrg
529401e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
529501e04c3fSmrg
529601e04c3fSmrg		alu.src[1].sel = t1;
529701e04c3fSmrg		alu.src[1].chan = (i == 3) ? 0 : 1;
529801e04c3fSmrg
529901e04c3fSmrg		alu.dst.sel = t1;
53003464ebd5Sriastradh		alu.dst.chan = i;
53013464ebd5Sriastradh		alu.dst.write = 1;
530201e04c3fSmrg		if (i == 3)
53033464ebd5Sriastradh			alu.last = 1;
5304af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
53053464ebd5Sriastradh		if (r)
53063464ebd5Sriastradh			return r;
53073464ebd5Sriastradh	}
53083464ebd5Sriastradh
530901e04c3fSmrg	for (int i = 0; i < 2; i++) {
5310af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
531101e04c3fSmrg		alu.op = ALU_OP1_MOV;
531201e04c3fSmrg		alu.src[0].sel = t1;
531301e04c3fSmrg		alu.src[0].chan = i;
531401e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
531501e04c3fSmrg		alu.dst.write = 1;
531601e04c3fSmrg		if (i == 1)
53173464ebd5Sriastradh			alu.last = 1;
5318af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
53193464ebd5Sriastradh		if (r)
53203464ebd5Sriastradh			return r;
53213464ebd5Sriastradh	}
53223464ebd5Sriastradh	return 0;
53233464ebd5Sriastradh}
53243464ebd5Sriastradh
532501e04c3fSmrg/*
532601e04c3fSmrg * r600 - trunc to -PI..PI range
532701e04c3fSmrg * r700 - normalize by dividing by 2PI
532801e04c3fSmrg * see fdo bug 27901
532901e04c3fSmrg */
533001e04c3fSmrgstatic int tgsi_setup_trig(struct r600_shader_ctx *ctx)
53313464ebd5Sriastradh{
53323464ebd5Sriastradh	int r;
533301e04c3fSmrg	struct r600_bytecode_alu alu;
53343464ebd5Sriastradh
5335af69d88dSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
533601e04c3fSmrg	alu.op = ALU_OP3_MULADD;
533701e04c3fSmrg	alu.is_op3 = 1;
533801e04c3fSmrg
533901e04c3fSmrg	alu.dst.chan = 0;
53403464ebd5Sriastradh	alu.dst.sel = ctx->temp_reg;
53413464ebd5Sriastradh	alu.dst.write = 1;
534201e04c3fSmrg
534301e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
534401e04c3fSmrg
534501e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
534601e04c3fSmrg	alu.src[1].chan = 0;
534701e04c3fSmrg	alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI);
534801e04c3fSmrg	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
534901e04c3fSmrg	alu.src[2].chan = 0;
53503464ebd5Sriastradh	alu.last = 1;
5351af69d88dSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
53523464ebd5Sriastradh	if (r)
53533464ebd5Sriastradh		return r;
535401e04c3fSmrg
5355af69d88dSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
535601e04c3fSmrg	alu.op = ALU_OP1_FRACT;
535701e04c3fSmrg
535801e04c3fSmrg	alu.dst.chan = 0;
53593464ebd5Sriastradh	alu.dst.sel = ctx->temp_reg;
53603464ebd5Sriastradh	alu.dst.write = 1;
536101e04c3fSmrg
536201e04c3fSmrg	alu.src[0].sel = ctx->temp_reg;
536301e04c3fSmrg	alu.src[0].chan = 0;
53643464ebd5Sriastradh	alu.last = 1;
5365af69d88dSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
53663464ebd5Sriastradh	if (r)
53673464ebd5Sriastradh		return r;
536801e04c3fSmrg
5369af69d88dSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
537001e04c3fSmrg	alu.op = ALU_OP3_MULADD;
537101e04c3fSmrg	alu.is_op3 = 1;
537201e04c3fSmrg
537301e04c3fSmrg	alu.dst.chan = 0;
53743464ebd5Sriastradh	alu.dst.sel = ctx->temp_reg;
53753464ebd5Sriastradh	alu.dst.write = 1;
537601e04c3fSmrg
537701e04c3fSmrg	alu.src[0].sel = ctx->temp_reg;
537801e04c3fSmrg	alu.src[0].chan = 0;
537901e04c3fSmrg
538001e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
538101e04c3fSmrg	alu.src[1].chan = 0;
538201e04c3fSmrg	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
538301e04c3fSmrg	alu.src[2].chan = 0;
538401e04c3fSmrg
538501e04c3fSmrg	if (ctx->bc->chip_class == R600) {
538601e04c3fSmrg		alu.src[1].value = u_bitcast_f2u(2.0f * M_PI);
538701e04c3fSmrg		alu.src[2].value = u_bitcast_f2u(-M_PI);
538801e04c3fSmrg	} else {
538901e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
539001e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
539101e04c3fSmrg		alu.src[2].neg = 1;
539201e04c3fSmrg	}
539301e04c3fSmrg
53943464ebd5Sriastradh	alu.last = 1;
5395af69d88dSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
53963464ebd5Sriastradh	if (r)
53973464ebd5Sriastradh		return r;
539801e04c3fSmrg	return 0;
53993464ebd5Sriastradh}
54003464ebd5Sriastradh
540101e04c3fSmrgstatic int cayman_trig(struct r600_shader_ctx *ctx)
5402af69d88dSmrg{
5403af69d88dSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5404af69d88dSmrg	struct r600_bytecode_alu alu;
540501e04c3fSmrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
540601e04c3fSmrg	int i, r;
5407af69d88dSmrg
540801e04c3fSmrg	r = tgsi_setup_trig(ctx);
540901e04c3fSmrg	if (r)
541001e04c3fSmrg		return r;
5411af69d88dSmrg
5412af69d88dSmrg
541301e04c3fSmrg	for (i = 0; i < last_slot; i++) {
541401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
541501e04c3fSmrg		alu.op = ctx->inst_info->op;
541601e04c3fSmrg		alu.dst.chan = i;
5417af69d88dSmrg
541801e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
541901e04c3fSmrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
5420af69d88dSmrg
542101e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
542201e04c3fSmrg		alu.src[0].chan = 0;
542301e04c3fSmrg		if (i == last_slot - 1)
5424af69d88dSmrg			alu.last = 1;
542501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
542601e04c3fSmrg		if (r)
542701e04c3fSmrg			return r;
542801e04c3fSmrg	}
542901e04c3fSmrg	return 0;
543001e04c3fSmrg}
5431af69d88dSmrg
543201e04c3fSmrgstatic int tgsi_trig(struct r600_shader_ctx *ctx)
543301e04c3fSmrg{
543401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
543501e04c3fSmrg	struct r600_bytecode_alu alu;
543601e04c3fSmrg	int i, r;
543701e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5438af69d88dSmrg
543901e04c3fSmrg	r = tgsi_setup_trig(ctx);
544001e04c3fSmrg	if (r)
544101e04c3fSmrg		return r;
5442af69d88dSmrg
544301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
544401e04c3fSmrg	alu.op = ctx->inst_info->op;
544501e04c3fSmrg	alu.dst.chan = 0;
544601e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
544701e04c3fSmrg	alu.dst.write = 1;
5448af69d88dSmrg
544901e04c3fSmrg	alu.src[0].sel = ctx->temp_reg;
545001e04c3fSmrg	alu.src[0].chan = 0;
545101e04c3fSmrg	alu.last = 1;
545201e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
545301e04c3fSmrg	if (r)
545401e04c3fSmrg		return r;
5455af69d88dSmrg
545601e04c3fSmrg	/* replicate result */
545701e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
545801e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
545901e04c3fSmrg			continue;
5460af69d88dSmrg
546101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
546201e04c3fSmrg		alu.op = ALU_OP1_MOV;
5463af69d88dSmrg
546401e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
546501e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
546601e04c3fSmrg		if (i == lasti)
546701e04c3fSmrg			alu.last = 1;
546801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
546901e04c3fSmrg		if (r)
547001e04c3fSmrg			return r;
547101e04c3fSmrg	}
547201e04c3fSmrg	return 0;
547301e04c3fSmrg}
5474af69d88dSmrg
547501e04c3fSmrgstatic int tgsi_kill(struct r600_shader_ctx *ctx)
547601e04c3fSmrg{
547701e04c3fSmrg	const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
547801e04c3fSmrg	struct r600_bytecode_alu alu;
547901e04c3fSmrg	int i, r;
5480af69d88dSmrg
548101e04c3fSmrg	for (i = 0; i < 4; i++) {
548201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
548301e04c3fSmrg		alu.op = ctx->inst_info->op;
5484af69d88dSmrg
548501e04c3fSmrg		alu.dst.chan = i;
5486af69d88dSmrg
548701e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
5488af69d88dSmrg
548901e04c3fSmrg		if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) {
549001e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_1;
549101e04c3fSmrg			alu.src[1].neg = 1;
549201e04c3fSmrg		} else {
5493af69d88dSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
549401e04c3fSmrg		}
549501e04c3fSmrg		if (i == 3) {
5496af69d88dSmrg			alu.last = 1;
549701e04c3fSmrg		}
549801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
549901e04c3fSmrg		if (r)
550001e04c3fSmrg			return r;
550101e04c3fSmrg	}
5502af69d88dSmrg
550301e04c3fSmrg	/* kill must be last in ALU */
550401e04c3fSmrg	ctx->bc->force_add_cf = 1;
550501e04c3fSmrg	ctx->shader->uses_kill = TRUE;
550601e04c3fSmrg	return 0;
550701e04c3fSmrg}
5508af69d88dSmrg
550901e04c3fSmrgstatic int tgsi_lit(struct r600_shader_ctx *ctx)
551001e04c3fSmrg{
551101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
551201e04c3fSmrg	struct r600_bytecode_alu alu;
551301e04c3fSmrg	int r;
5514af69d88dSmrg
551501e04c3fSmrg	/* tmp.x = max(src.y, 0.0) */
551601e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
551701e04c3fSmrg	alu.op = ALU_OP2_MAX;
551801e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
551901e04c3fSmrg	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
552001e04c3fSmrg	alu.src[1].chan = 1;
5521af69d88dSmrg
552201e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
552301e04c3fSmrg	alu.dst.chan = 0;
552401e04c3fSmrg	alu.dst.write = 1;
5525af69d88dSmrg
552601e04c3fSmrg	alu.last = 1;
552701e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
552801e04c3fSmrg	if (r)
552901e04c3fSmrg		return r;
553001e04c3fSmrg
553101e04c3fSmrg	if (inst->Dst[0].Register.WriteMask & (1 << 2))
553201e04c3fSmrg	{
553301e04c3fSmrg		int chan;
553401e04c3fSmrg		int sel;
553501e04c3fSmrg		unsigned i;
5536af69d88dSmrg
5537af69d88dSmrg		if (ctx->bc->chip_class == CAYMAN) {
553801e04c3fSmrg			for (i = 0; i < 3; i++) {
553901e04c3fSmrg				/* tmp.z = log(tmp.x) */
5540af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
554101e04c3fSmrg				alu.op = ALU_OP1_LOG_CLAMPED;
554201e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
5543af69d88dSmrg				alu.src[0].chan = 0;
554401e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
554501e04c3fSmrg				alu.dst.chan = i;
554601e04c3fSmrg				if (i == 2) {
554701e04c3fSmrg					alu.dst.write = 1;
5548af69d88dSmrg					alu.last = 1;
554901e04c3fSmrg				} else
555001e04c3fSmrg					alu.dst.write = 0;
555101e04c3fSmrg
555201e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
555301e04c3fSmrg				if (r)
5554af69d88dSmrg					return r;
5555af69d88dSmrg			}
555601e04c3fSmrg		} else {
555701e04c3fSmrg			/* tmp.z = log(tmp.x) */
5558af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
555901e04c3fSmrg			alu.op = ALU_OP1_LOG_CLAMPED;
556001e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
5561af69d88dSmrg			alu.src[0].chan = 0;
556201e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
556301e04c3fSmrg			alu.dst.chan = 2;
5564af69d88dSmrg			alu.dst.write = 1;
5565af69d88dSmrg			alu.last = 1;
5566af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
5567af69d88dSmrg			if (r)
5568af69d88dSmrg				return r;
5569af69d88dSmrg		}
5570af69d88dSmrg
557101e04c3fSmrg		chan = alu.dst.chan;
557201e04c3fSmrg		sel = alu.dst.sel;
5573af69d88dSmrg
557401e04c3fSmrg		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
5575af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
557601e04c3fSmrg		alu.op = ALU_OP3_MUL_LIT;
557701e04c3fSmrg		alu.src[0].sel  = sel;
557801e04c3fSmrg		alu.src[0].chan = chan;
557901e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
558001e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
558101e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
558201e04c3fSmrg		alu.dst.chan = 0;
5583af69d88dSmrg		alu.dst.write = 1;
558401e04c3fSmrg		alu.is_op3 = 1;
5585af69d88dSmrg		alu.last = 1;
558601e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
558701e04c3fSmrg		if (r)
5588af69d88dSmrg			return r;
5589af69d88dSmrg
5590af69d88dSmrg		if (ctx->bc->chip_class == CAYMAN) {
559101e04c3fSmrg			for (i = 0; i < 3; i++) {
559201e04c3fSmrg				/* dst.z = exp(tmp.x) */
5593af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
559401e04c3fSmrg				alu.op = ALU_OP1_EXP_IEEE;
559501e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
5596af69d88dSmrg				alu.src[0].chan = 0;
559701e04c3fSmrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
559801e04c3fSmrg				if (i == 2) {
559901e04c3fSmrg					alu.dst.write = 1;
560001e04c3fSmrg					alu.last = 1;
560101e04c3fSmrg				} else
560201e04c3fSmrg					alu.dst.write = 0;
560301e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
560401e04c3fSmrg				if (r)
5605af69d88dSmrg					return r;
5606af69d88dSmrg			}
5607af69d88dSmrg		} else {
560801e04c3fSmrg			/* dst.z = exp(tmp.x) */
5609af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
561001e04c3fSmrg			alu.op = ALU_OP1_EXP_IEEE;
561101e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
5612af69d88dSmrg			alu.src[0].chan = 0;
561301e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
5614af69d88dSmrg			alu.last = 1;
561501e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
561601e04c3fSmrg			if (r)
5617af69d88dSmrg				return r;
5618af69d88dSmrg		}
561901e04c3fSmrg	}
5620af69d88dSmrg
562101e04c3fSmrg	/* dst.x, <- 1.0  */
562201e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
562301e04c3fSmrg	alu.op = ALU_OP1_MOV;
562401e04c3fSmrg	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
562501e04c3fSmrg	alu.src[0].chan = 0;
562601e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
562701e04c3fSmrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
562801e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
562901e04c3fSmrg	if (r)
563001e04c3fSmrg		return r;
5631af69d88dSmrg
563201e04c3fSmrg	/* dst.y = max(src.x, 0.0) */
563301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
563401e04c3fSmrg	alu.op = ALU_OP2_MAX;
563501e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
563601e04c3fSmrg	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
563701e04c3fSmrg	alu.src[1].chan = 0;
563801e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
563901e04c3fSmrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
564001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
564101e04c3fSmrg	if (r)
564201e04c3fSmrg		return r;
5643af69d88dSmrg
564401e04c3fSmrg	/* dst.w, <- 1.0  */
564501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
564601e04c3fSmrg	alu.op = ALU_OP1_MOV;
564701e04c3fSmrg	alu.src[0].sel  = V_SQ_ALU_SRC_1;
564801e04c3fSmrg	alu.src[0].chan = 0;
564901e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
565001e04c3fSmrg	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
565101e04c3fSmrg	alu.last = 1;
565201e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
565301e04c3fSmrg	if (r)
565401e04c3fSmrg		return r;
5655af69d88dSmrg
565601e04c3fSmrg	return 0;
565701e04c3fSmrg}
5658af69d88dSmrg
565901e04c3fSmrgstatic int tgsi_rsq(struct r600_shader_ctx *ctx)
566001e04c3fSmrg{
566101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
566201e04c3fSmrg	struct r600_bytecode_alu alu;
566301e04c3fSmrg	int i, r;
5664af69d88dSmrg
566501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5666af69d88dSmrg
566701e04c3fSmrg	alu.op = ALU_OP1_RECIPSQRT_IEEE;
5668af69d88dSmrg
566901e04c3fSmrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
567001e04c3fSmrg		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
567101e04c3fSmrg		r600_bytecode_src_set_abs(&alu.src[i]);
567201e04c3fSmrg	}
567301e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
567401e04c3fSmrg	alu.dst.write = 1;
567501e04c3fSmrg	alu.last = 1;
567601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
567701e04c3fSmrg	if (r)
567801e04c3fSmrg		return r;
567901e04c3fSmrg	/* replicate result */
568001e04c3fSmrg	return tgsi_helper_tempx_replicate(ctx);
568101e04c3fSmrg}
5682af69d88dSmrg
568301e04c3fSmrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
568401e04c3fSmrg{
568501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
568601e04c3fSmrg	struct r600_bytecode_alu alu;
568701e04c3fSmrg	int i, r;
5688af69d88dSmrg
568901e04c3fSmrg	for (i = 0; i < 4; i++) {
5690af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
569101e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
569201e04c3fSmrg		alu.op = ALU_OP1_MOV;
569301e04c3fSmrg		alu.dst.chan = i;
569401e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
569501e04c3fSmrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
569601e04c3fSmrg		if (i == 3)
569701e04c3fSmrg			alu.last = 1;
569801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
569901e04c3fSmrg		if (r)
5700af69d88dSmrg			return r;
570101e04c3fSmrg	}
570201e04c3fSmrg	return 0;
570301e04c3fSmrg}
5704af69d88dSmrg
570501e04c3fSmrgstatic int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
570601e04c3fSmrg{
570701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
570801e04c3fSmrg	struct r600_bytecode_alu alu;
570901e04c3fSmrg	int i, r;
5710af69d88dSmrg
571101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
571201e04c3fSmrg	alu.op = ctx->inst_info->op;
571301e04c3fSmrg	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
571401e04c3fSmrg		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
571501e04c3fSmrg	}
571601e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
571701e04c3fSmrg	alu.dst.write = 1;
571801e04c3fSmrg	alu.last = 1;
571901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
572001e04c3fSmrg	if (r)
572101e04c3fSmrg		return r;
572201e04c3fSmrg	/* replicate result */
572301e04c3fSmrg	return tgsi_helper_tempx_replicate(ctx);
572401e04c3fSmrg}
5725af69d88dSmrg
572601e04c3fSmrgstatic int cayman_pow(struct r600_shader_ctx *ctx)
572701e04c3fSmrg{
572801e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
572901e04c3fSmrg	int i, r;
573001e04c3fSmrg	struct r600_bytecode_alu alu;
573101e04c3fSmrg	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
5732af69d88dSmrg
573301e04c3fSmrg	for (i = 0; i < 3; i++) {
5734af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
573501e04c3fSmrg		alu.op = ALU_OP1_LOG_IEEE;
573601e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
573701e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
573801e04c3fSmrg		alu.dst.chan = i;
5739af69d88dSmrg		alu.dst.write = 1;
574001e04c3fSmrg		if (i == 2)
574101e04c3fSmrg			alu.last = 1;
574201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
574301e04c3fSmrg		if (r)
5744af69d88dSmrg			return r;
574501e04c3fSmrg	}
5746af69d88dSmrg
574701e04c3fSmrg	/* b * LOG2(a) */
574801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
574901e04c3fSmrg	alu.op = ALU_OP2_MUL;
575001e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
575101e04c3fSmrg	alu.src[1].sel = ctx->temp_reg;
575201e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
575301e04c3fSmrg	alu.dst.write = 1;
575401e04c3fSmrg	alu.last = 1;
575501e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
575601e04c3fSmrg	if (r)
575701e04c3fSmrg		return r;
5758af69d88dSmrg
575901e04c3fSmrg	for (i = 0; i < last_slot; i++) {
576001e04c3fSmrg		/* POW(a,b) = EXP2(b * LOG2(a))*/
576101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
576201e04c3fSmrg		alu.op = ALU_OP1_EXP_IEEE;
576301e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
5764af69d88dSmrg
576501e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
576601e04c3fSmrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
576701e04c3fSmrg		if (i == last_slot - 1)
576801e04c3fSmrg			alu.last = 1;
576901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
577001e04c3fSmrg		if (r)
577101e04c3fSmrg			return r;
577201e04c3fSmrg	}
577301e04c3fSmrg	return 0;
577401e04c3fSmrg}
5775af69d88dSmrg
577601e04c3fSmrgstatic int tgsi_pow(struct r600_shader_ctx *ctx)
577701e04c3fSmrg{
577801e04c3fSmrg	struct r600_bytecode_alu alu;
577901e04c3fSmrg	int r;
5780af69d88dSmrg
578101e04c3fSmrg	/* LOG2(a) */
578201e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
578301e04c3fSmrg	alu.op = ALU_OP1_LOG_IEEE;
578401e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
578501e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
578601e04c3fSmrg	alu.dst.write = 1;
578701e04c3fSmrg	alu.last = 1;
578801e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
578901e04c3fSmrg	if (r)
579001e04c3fSmrg		return r;
579101e04c3fSmrg	/* b * LOG2(a) */
579201e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
579301e04c3fSmrg	alu.op = ALU_OP2_MUL;
579401e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
579501e04c3fSmrg	alu.src[1].sel = ctx->temp_reg;
579601e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
579701e04c3fSmrg	alu.dst.write = 1;
579801e04c3fSmrg	alu.last = 1;
579901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
580001e04c3fSmrg	if (r)
580101e04c3fSmrg		return r;
580201e04c3fSmrg	/* POW(a,b) = EXP2(b * LOG2(a))*/
580301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
580401e04c3fSmrg	alu.op = ALU_OP1_EXP_IEEE;
580501e04c3fSmrg	alu.src[0].sel = ctx->temp_reg;
580601e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
580701e04c3fSmrg	alu.dst.write = 1;
580801e04c3fSmrg	alu.last = 1;
580901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
581001e04c3fSmrg	if (r)
581101e04c3fSmrg		return r;
581201e04c3fSmrg	return tgsi_helper_tempx_replicate(ctx);
581301e04c3fSmrg}
5814af69d88dSmrg
581501e04c3fSmrgstatic int emit_mul_int_op(struct r600_bytecode *bc,
581601e04c3fSmrg			   struct r600_bytecode_alu *alu_src)
581701e04c3fSmrg{
581801e04c3fSmrg	struct r600_bytecode_alu alu;
581901e04c3fSmrg	int i, r;
582001e04c3fSmrg	alu = *alu_src;
582101e04c3fSmrg	if (bc->chip_class == CAYMAN) {
582201e04c3fSmrg		for (i = 0; i < 4; i++) {
582301e04c3fSmrg			alu.dst.chan = i;
582401e04c3fSmrg			alu.dst.write = (i == alu_src->dst.chan);
582501e04c3fSmrg			alu.last = (i == 3);
5826af69d88dSmrg
582701e04c3fSmrg			r = r600_bytecode_add_alu(bc, &alu);
582801e04c3fSmrg			if (r)
5829af69d88dSmrg				return r;
5830af69d88dSmrg		}
583101e04c3fSmrg	} else {
583201e04c3fSmrg		alu.last = 1;
583301e04c3fSmrg		r = r600_bytecode_add_alu(bc, &alu);
583401e04c3fSmrg		if (r)
583501e04c3fSmrg			return r;
583601e04c3fSmrg	}
583701e04c3fSmrg	return 0;
583801e04c3fSmrg}
5839af69d88dSmrg
584001e04c3fSmrgstatic int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
584101e04c3fSmrg{
584201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
584301e04c3fSmrg	struct r600_bytecode_alu alu;
584401e04c3fSmrg	int i, r, j;
584501e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
584601e04c3fSmrg	int lasti = tgsi_last_instruction(write_mask);
584701e04c3fSmrg	int tmp0 = ctx->temp_reg;
584801e04c3fSmrg	int tmp1 = r600_get_temp(ctx);
584901e04c3fSmrg	int tmp2 = r600_get_temp(ctx);
585001e04c3fSmrg	int tmp3 = r600_get_temp(ctx);
585101e04c3fSmrg	int tmp4 = 0;
5852af69d88dSmrg
585301e04c3fSmrg	/* Use additional temp if dst register and src register are the same */
585401e04c3fSmrg	if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index ||
585501e04c3fSmrg	    inst->Src[1].Register.Index == inst->Dst[0].Register.Index) {
585601e04c3fSmrg		tmp4 = r600_get_temp(ctx);
585701e04c3fSmrg	}
5858af69d88dSmrg
585901e04c3fSmrg	/* Unsigned path:
586001e04c3fSmrg	 *
586101e04c3fSmrg	 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
586201e04c3fSmrg	 *
586301e04c3fSmrg	 * 1. tmp0.x = rcp (src2)     = 2^32/src2 + e, where e is rounding error
586401e04c3fSmrg	 * 2. tmp0.z = lo (tmp0.x * src2)
586501e04c3fSmrg	 * 3. tmp0.w = -tmp0.z
586601e04c3fSmrg	 * 4. tmp0.y = hi (tmp0.x * src2)
586701e04c3fSmrg	 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src2))
586801e04c3fSmrg	 * 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error
586901e04c3fSmrg	 * 7. tmp1.x = tmp0.x - tmp0.w
587001e04c3fSmrg	 * 8. tmp1.y = tmp0.x + tmp0.w
587101e04c3fSmrg	 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
587201e04c3fSmrg	 * 10. tmp0.z = hi(tmp0.x * src1)     = q
587301e04c3fSmrg	 * 11. tmp0.y = lo (tmp0.z * src2)     = src2*q = src1 - r
587401e04c3fSmrg	 *
587501e04c3fSmrg	 * 12. tmp0.w = src1 - tmp0.y       = r
587601e04c3fSmrg	 * 13. tmp1.x = tmp0.w >= src2		= r >= src2 (uint comparison)
587701e04c3fSmrg	 * 14. tmp1.y = src1 >= tmp0.y      = r >= 0 (uint comparison)
587801e04c3fSmrg	 *
587901e04c3fSmrg	 * if DIV
588001e04c3fSmrg	 *
588101e04c3fSmrg	 *   15. tmp1.z = tmp0.z + 1			= q + 1
588201e04c3fSmrg	 *   16. tmp1.w = tmp0.z - 1			= q - 1
588301e04c3fSmrg	 *
588401e04c3fSmrg	 * else MOD
588501e04c3fSmrg	 *
588601e04c3fSmrg	 *   15. tmp1.z = tmp0.w - src2			= r - src2
588701e04c3fSmrg	 *   16. tmp1.w = tmp0.w + src2			= r + src2
588801e04c3fSmrg	 *
588901e04c3fSmrg	 * endif
589001e04c3fSmrg	 *
589101e04c3fSmrg	 * 17. tmp1.x = tmp1.x & tmp1.y
589201e04c3fSmrg	 *
589301e04c3fSmrg	 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
589401e04c3fSmrg	 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
589501e04c3fSmrg	 *
589601e04c3fSmrg	 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
589701e04c3fSmrg	 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
589801e04c3fSmrg	 *
589901e04c3fSmrg	 * Signed path:
590001e04c3fSmrg	 *
590101e04c3fSmrg	 * Same as unsigned, using abs values of the operands,
590201e04c3fSmrg	 * and fixing the sign of the result in the end.
590301e04c3fSmrg	 */
5904af69d88dSmrg
590501e04c3fSmrg	for (i = 0; i < 4; i++) {
590601e04c3fSmrg		if (!(write_mask & (1<<i)))
590701e04c3fSmrg			continue;
5908af69d88dSmrg
590901e04c3fSmrg		if (signed_op) {
591001e04c3fSmrg
591101e04c3fSmrg			/* tmp2.x = -src0 */
5912af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
591301e04c3fSmrg			alu.op = ALU_OP2_SUB_INT;
5914af69d88dSmrg
591501e04c3fSmrg			alu.dst.sel = tmp2;
591601e04c3fSmrg			alu.dst.chan = 0;
5917af69d88dSmrg			alu.dst.write = 1;
5918af69d88dSmrg
591901e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_0;
592001e04c3fSmrg
592101e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
5922af69d88dSmrg
5923af69d88dSmrg			alu.last = 1;
5924af69d88dSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5925af69d88dSmrg				return r;
5926af69d88dSmrg
592701e04c3fSmrg			/* tmp2.y = -src1 */
592801e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
592901e04c3fSmrg			alu.op = ALU_OP2_SUB_INT;
5930af69d88dSmrg
593101e04c3fSmrg			alu.dst.sel = tmp2;
593201e04c3fSmrg			alu.dst.chan = 1;
593301e04c3fSmrg			alu.dst.write = 1;
5934af69d88dSmrg
593501e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_0;
5936af69d88dSmrg
593701e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5938af69d88dSmrg
593901e04c3fSmrg			alu.last = 1;
594001e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
594101e04c3fSmrg				return r;
5942af69d88dSmrg
594301e04c3fSmrg			/* tmp2.z sign bit is set if src0 and src2 signs are different */
594401e04c3fSmrg			/* it will be a sign of the quotient */
594501e04c3fSmrg			if (!mod) {
5946af69d88dSmrg
594701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
594801e04c3fSmrg				alu.op = ALU_OP2_XOR_INT;
5949af69d88dSmrg
595001e04c3fSmrg				alu.dst.sel = tmp2;
595101e04c3fSmrg				alu.dst.chan = 2;
595201e04c3fSmrg				alu.dst.write = 1;
5953af69d88dSmrg
595401e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
595501e04c3fSmrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
5956af69d88dSmrg
595701e04c3fSmrg				alu.last = 1;
595801e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
595901e04c3fSmrg					return r;
596001e04c3fSmrg			}
5961af69d88dSmrg
596201e04c3fSmrg			/* tmp2.x = |src0| */
596301e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
596401e04c3fSmrg			alu.op = ALU_OP3_CNDGE_INT;
596501e04c3fSmrg			alu.is_op3 = 1;
5966af69d88dSmrg
596701e04c3fSmrg			alu.dst.sel = tmp2;
596801e04c3fSmrg			alu.dst.chan = 0;
596901e04c3fSmrg			alu.dst.write = 1;
5970af69d88dSmrg
597101e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
597201e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
597301e04c3fSmrg			alu.src[2].sel = tmp2;
597401e04c3fSmrg			alu.src[2].chan = 0;
5975af69d88dSmrg
597601e04c3fSmrg			alu.last = 1;
597701e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
597801e04c3fSmrg				return r;
5979af69d88dSmrg
598001e04c3fSmrg			/* tmp2.y = |src1| */
5981af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
598201e04c3fSmrg			alu.op = ALU_OP3_CNDGE_INT;
598301e04c3fSmrg			alu.is_op3 = 1;
5984af69d88dSmrg
598501e04c3fSmrg			alu.dst.sel = tmp2;
598601e04c3fSmrg			alu.dst.chan = 1;
5987af69d88dSmrg			alu.dst.write = 1;
5988af69d88dSmrg
598901e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
599001e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
599101e04c3fSmrg			alu.src[2].sel = tmp2;
599201e04c3fSmrg			alu.src[2].chan = 1;
5993af69d88dSmrg
5994af69d88dSmrg			alu.last = 1;
5995af69d88dSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
5996af69d88dSmrg				return r;
5997af69d88dSmrg
599801e04c3fSmrg		}
599901e04c3fSmrg
600001e04c3fSmrg		/* 1. tmp0.x = rcp_u (src2)     = 2^32/src2 + e, where e is rounding error */
600101e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
600201e04c3fSmrg			/* tmp3.x = u2f(src2) */
6003af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
600401e04c3fSmrg			alu.op = ALU_OP1_UINT_TO_FLT;
6005af69d88dSmrg
600601e04c3fSmrg			alu.dst.sel = tmp3;
600701e04c3fSmrg			alu.dst.chan = 0;
6008af69d88dSmrg			alu.dst.write = 1;
6009af69d88dSmrg
6010af69d88dSmrg			if (signed_op) {
601101e04c3fSmrg				alu.src[0].sel = tmp2;
601201e04c3fSmrg				alu.src[0].chan = 1;
6013af69d88dSmrg			} else {
601401e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
6015af69d88dSmrg			}
6016af69d88dSmrg
6017af69d88dSmrg			alu.last = 1;
6018af69d88dSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6019af69d88dSmrg				return r;
6020af69d88dSmrg
602101e04c3fSmrg			/* tmp0.x = recip(tmp3.x) */
602201e04c3fSmrg			for (j = 0 ; j < 3; j++) {
602301e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
602401e04c3fSmrg				alu.op = ALU_OP1_RECIP_IEEE;
6025af69d88dSmrg
602601e04c3fSmrg				alu.dst.sel = tmp0;
602701e04c3fSmrg				alu.dst.chan = j;
602801e04c3fSmrg				alu.dst.write = (j == 0);
6029af69d88dSmrg
603001e04c3fSmrg				alu.src[0].sel = tmp3;
603101e04c3fSmrg				alu.src[0].chan = 0;
603201e04c3fSmrg
603301e04c3fSmrg				if (j == 2)
603401e04c3fSmrg					alu.last = 1;
603501e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
603601e04c3fSmrg					return r;
603701e04c3fSmrg			}
603801e04c3fSmrg
603901e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
604001e04c3fSmrg			alu.op = ALU_OP2_MUL;
6041af69d88dSmrg
6042af69d88dSmrg			alu.src[0].sel = tmp0;
604301e04c3fSmrg			alu.src[0].chan = 0;
604401e04c3fSmrg
604501e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
604601e04c3fSmrg			alu.src[1].value = 0x4f800000;
6047af69d88dSmrg
604801e04c3fSmrg			alu.dst.sel = tmp3;
604901e04c3fSmrg			alu.dst.write = 1;
6050af69d88dSmrg			alu.last = 1;
605101e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
605201e04c3fSmrg			if (r)
6053af69d88dSmrg				return r;
6054af69d88dSmrg
6055af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
605601e04c3fSmrg			alu.op = ALU_OP1_FLT_TO_UINT;
6057af69d88dSmrg
605801e04c3fSmrg			alu.dst.sel = tmp0;
605901e04c3fSmrg			alu.dst.chan = 0;
6060af69d88dSmrg			alu.dst.write = 1;
6061af69d88dSmrg
606201e04c3fSmrg			alu.src[0].sel = tmp3;
606301e04c3fSmrg			alu.src[0].chan = 0;
6064af69d88dSmrg
6065af69d88dSmrg			alu.last = 1;
6066af69d88dSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6067af69d88dSmrg				return r;
6068af69d88dSmrg
606901e04c3fSmrg		} else {
607001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
607101e04c3fSmrg			alu.op = ALU_OP1_RECIP_UINT;
607201e04c3fSmrg
607301e04c3fSmrg			alu.dst.sel = tmp0;
607401e04c3fSmrg			alu.dst.chan = 0;
607501e04c3fSmrg			alu.dst.write = 1;
607601e04c3fSmrg
607701e04c3fSmrg			if (signed_op) {
607801e04c3fSmrg				alu.src[0].sel = tmp2;
607901e04c3fSmrg				alu.src[0].chan = 1;
608001e04c3fSmrg			} else {
608101e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
608201e04c3fSmrg			}
608301e04c3fSmrg
608401e04c3fSmrg			alu.last = 1;
608501e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
608601e04c3fSmrg				return r;
6087af69d88dSmrg		}
6088af69d88dSmrg
608901e04c3fSmrg		/* 2. tmp0.z = lo (tmp0.x * src2) */
6090af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
609101e04c3fSmrg		alu.op = ALU_OP2_MULLO_UINT;
6092af69d88dSmrg
609301e04c3fSmrg		alu.dst.sel = tmp0;
609401e04c3fSmrg		alu.dst.chan = 2;
6095af69d88dSmrg		alu.dst.write = 1;
6096af69d88dSmrg
609701e04c3fSmrg		alu.src[0].sel = tmp0;
6098af69d88dSmrg		alu.src[0].chan = 0;
609901e04c3fSmrg		if (signed_op) {
610001e04c3fSmrg			alu.src[1].sel = tmp2;
610101e04c3fSmrg			alu.src[1].chan = 1;
610201e04c3fSmrg		} else {
610301e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
610401e04c3fSmrg		}
6105af69d88dSmrg
610601e04c3fSmrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
6107af69d88dSmrg			return r;
6108af69d88dSmrg
610901e04c3fSmrg		/* 3. tmp0.w = -tmp0.z */
6110af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
611101e04c3fSmrg		alu.op = ALU_OP2_SUB_INT;
6112af69d88dSmrg
6113af69d88dSmrg		alu.dst.sel = tmp0;
611401e04c3fSmrg		alu.dst.chan = 3;
6115af69d88dSmrg		alu.dst.write = 1;
6116af69d88dSmrg
611701e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
6118af69d88dSmrg		alu.src[1].sel = tmp0;
611901e04c3fSmrg		alu.src[1].chan = 2;
6120af69d88dSmrg
6121af69d88dSmrg		alu.last = 1;
6122af69d88dSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6123af69d88dSmrg			return r;
6124af69d88dSmrg
612501e04c3fSmrg		/* 4. tmp0.y = hi (tmp0.x * src2) */
6126af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
612701e04c3fSmrg		alu.op = ALU_OP2_MULHI_UINT;
612801e04c3fSmrg
612901e04c3fSmrg		alu.dst.sel = tmp0;
613001e04c3fSmrg		alu.dst.chan = 1;
613101e04c3fSmrg		alu.dst.write = 1;
613201e04c3fSmrg
613301e04c3fSmrg		alu.src[0].sel = tmp0;
613401e04c3fSmrg		alu.src[0].chan = 0;
6135af69d88dSmrg
6136af69d88dSmrg		if (signed_op) {
613701e04c3fSmrg			alu.src[1].sel = tmp2;
613801e04c3fSmrg			alu.src[1].chan = 1;
6139af69d88dSmrg		} else {
614001e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
6141af69d88dSmrg		}
6142af69d88dSmrg
614301e04c3fSmrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
614401e04c3fSmrg			return r;
614501e04c3fSmrg
614601e04c3fSmrg		/* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src)) */
614701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
614801e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
614901e04c3fSmrg		alu.is_op3 = 1;
615001e04c3fSmrg
615101e04c3fSmrg		alu.dst.sel = tmp0;
615201e04c3fSmrg		alu.dst.chan = 2;
615301e04c3fSmrg		alu.dst.write = 1;
615401e04c3fSmrg
615501e04c3fSmrg		alu.src[0].sel = tmp0;
6156af69d88dSmrg		alu.src[0].chan = 1;
615701e04c3fSmrg		alu.src[1].sel = tmp0;
6158af69d88dSmrg		alu.src[1].chan = 3;
6159af69d88dSmrg		alu.src[2].sel = tmp0;
6160af69d88dSmrg		alu.src[2].chan = 2;
6161af69d88dSmrg
6162af69d88dSmrg		alu.last = 1;
6163af69d88dSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6164af69d88dSmrg			return r;
6165af69d88dSmrg
616601e04c3fSmrg		/* 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error */
616701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
616801e04c3fSmrg		alu.op = ALU_OP2_MULHI_UINT;
6169af69d88dSmrg
617001e04c3fSmrg		alu.dst.sel = tmp0;
617101e04c3fSmrg		alu.dst.chan = 3;
617201e04c3fSmrg		alu.dst.write = 1;
6173af69d88dSmrg
617401e04c3fSmrg		alu.src[0].sel = tmp0;
617501e04c3fSmrg		alu.src[0].chan = 2;
6176af69d88dSmrg
617701e04c3fSmrg		alu.src[1].sel = tmp0;
617801e04c3fSmrg		alu.src[1].chan = 0;
6179af69d88dSmrg
618001e04c3fSmrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
618101e04c3fSmrg				return r;
6182af69d88dSmrg
618301e04c3fSmrg		/* 7. tmp1.x = tmp0.x - tmp0.w */
618401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
618501e04c3fSmrg		alu.op = ALU_OP2_SUB_INT;
6186af69d88dSmrg
618701e04c3fSmrg		alu.dst.sel = tmp1;
618801e04c3fSmrg		alu.dst.chan = 0;
618901e04c3fSmrg		alu.dst.write = 1;
6190af69d88dSmrg
619101e04c3fSmrg		alu.src[0].sel = tmp0;
619201e04c3fSmrg		alu.src[0].chan = 0;
619301e04c3fSmrg		alu.src[1].sel = tmp0;
619401e04c3fSmrg		alu.src[1].chan = 3;
6195af69d88dSmrg
619601e04c3fSmrg		alu.last = 1;
619701e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
619801e04c3fSmrg			return r;
6199af69d88dSmrg
620001e04c3fSmrg		/* 8. tmp1.y = tmp0.x + tmp0.w */
620101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
620201e04c3fSmrg		alu.op = ALU_OP2_ADD_INT;
6203af69d88dSmrg
620401e04c3fSmrg		alu.dst.sel = tmp1;
620501e04c3fSmrg		alu.dst.chan = 1;
620601e04c3fSmrg		alu.dst.write = 1;
6207af69d88dSmrg
620801e04c3fSmrg		alu.src[0].sel = tmp0;
620901e04c3fSmrg		alu.src[0].chan = 0;
621001e04c3fSmrg		alu.src[1].sel = tmp0;
621101e04c3fSmrg		alu.src[1].chan = 3;
6212af69d88dSmrg
621301e04c3fSmrg		alu.last = 1;
621401e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
621501e04c3fSmrg			return r;
6216af69d88dSmrg
621701e04c3fSmrg		/* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
621801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
621901e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
622001e04c3fSmrg		alu.is_op3 = 1;
6221af69d88dSmrg
622201e04c3fSmrg		alu.dst.sel = tmp0;
622301e04c3fSmrg		alu.dst.chan = 0;
622401e04c3fSmrg		alu.dst.write = 1;
6225af69d88dSmrg
622601e04c3fSmrg		alu.src[0].sel = tmp0;
622701e04c3fSmrg		alu.src[0].chan = 1;
622801e04c3fSmrg		alu.src[1].sel = tmp1;
622901e04c3fSmrg		alu.src[1].chan = 1;
623001e04c3fSmrg		alu.src[2].sel = tmp1;
623101e04c3fSmrg		alu.src[2].chan = 0;
6232af69d88dSmrg
623301e04c3fSmrg		alu.last = 1;
623401e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
623501e04c3fSmrg			return r;
6236af69d88dSmrg
623701e04c3fSmrg		/* 10. tmp0.z = hi(tmp0.x * src1)     = q */
623801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
623901e04c3fSmrg		alu.op = ALU_OP2_MULHI_UINT;
6240af69d88dSmrg
624101e04c3fSmrg		alu.dst.sel = tmp0;
624201e04c3fSmrg		alu.dst.chan = 2;
624301e04c3fSmrg		alu.dst.write = 1;
6244af69d88dSmrg
624501e04c3fSmrg		alu.src[0].sel = tmp0;
624601e04c3fSmrg		alu.src[0].chan = 0;
6247af69d88dSmrg
624801e04c3fSmrg		if (signed_op) {
624901e04c3fSmrg			alu.src[1].sel = tmp2;
625001e04c3fSmrg			alu.src[1].chan = 0;
625101e04c3fSmrg		} else {
625201e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
625301e04c3fSmrg		}
6254af69d88dSmrg
625501e04c3fSmrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
625601e04c3fSmrg			return r;
6257af69d88dSmrg
625801e04c3fSmrg		/* 11. tmp0.y = lo (src2 * tmp0.z)     = src2*q = src1 - r */
625901e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
626001e04c3fSmrg		alu.op = ALU_OP2_MULLO_UINT;
6261af69d88dSmrg
626201e04c3fSmrg		alu.dst.sel = tmp0;
626301e04c3fSmrg		alu.dst.chan = 1;
626401e04c3fSmrg		alu.dst.write = 1;
6265af69d88dSmrg
626601e04c3fSmrg		if (signed_op) {
626701e04c3fSmrg			alu.src[0].sel = tmp2;
626801e04c3fSmrg			alu.src[0].chan = 1;
626901e04c3fSmrg		} else {
627001e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
627101e04c3fSmrg		}
6272af69d88dSmrg
627301e04c3fSmrg		alu.src[1].sel = tmp0;
627401e04c3fSmrg		alu.src[1].chan = 2;
6275af69d88dSmrg
627601e04c3fSmrg		if ((r = emit_mul_int_op(ctx->bc, &alu)))
627701e04c3fSmrg			return r;
6278af69d88dSmrg
627901e04c3fSmrg		/* 12. tmp0.w = src1 - tmp0.y       = r */
6280af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
628101e04c3fSmrg		alu.op = ALU_OP2_SUB_INT;
6282af69d88dSmrg
628301e04c3fSmrg		alu.dst.sel = tmp0;
628401e04c3fSmrg		alu.dst.chan = 3;
6285af69d88dSmrg		alu.dst.write = 1;
6286af69d88dSmrg
628701e04c3fSmrg		if (signed_op) {
628801e04c3fSmrg			alu.src[0].sel = tmp2;
628901e04c3fSmrg			alu.src[0].chan = 0;
629001e04c3fSmrg		} else {
629101e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
629201e04c3fSmrg		}
6293af69d88dSmrg
629401e04c3fSmrg		alu.src[1].sel = tmp0;
629501e04c3fSmrg		alu.src[1].chan = 1;
6296af69d88dSmrg
629701e04c3fSmrg		alu.last = 1;
629801e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6299af69d88dSmrg			return r;
6300af69d88dSmrg
630101e04c3fSmrg		/* 13. tmp1.x = tmp0.w >= src2		= r >= src2 */
6302af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
630301e04c3fSmrg		alu.op = ALU_OP2_SETGE_UINT;
6304af69d88dSmrg
630501e04c3fSmrg		alu.dst.sel = tmp1;
630601e04c3fSmrg		alu.dst.chan = 0;
6307af69d88dSmrg		alu.dst.write = 1;
6308af69d88dSmrg
630901e04c3fSmrg		alu.src[0].sel = tmp0;
631001e04c3fSmrg		alu.src[0].chan = 3;
631101e04c3fSmrg		if (signed_op) {
631201e04c3fSmrg			alu.src[1].sel = tmp2;
631301e04c3fSmrg			alu.src[1].chan = 1;
631401e04c3fSmrg		} else {
631501e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
631601e04c3fSmrg		}
6317af69d88dSmrg
631801e04c3fSmrg		alu.last = 1;
631901e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6320af69d88dSmrg			return r;
6321af69d88dSmrg
632201e04c3fSmrg		/* 14. tmp1.y = src1 >= tmp0.y       = r >= 0 */
6323af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
632401e04c3fSmrg		alu.op = ALU_OP2_SETGE_UINT;
632501e04c3fSmrg
632601e04c3fSmrg		alu.dst.sel = tmp1;
632701e04c3fSmrg		alu.dst.chan = 1;
6328af69d88dSmrg		alu.dst.write = 1;
6329af69d88dSmrg
633001e04c3fSmrg		if (signed_op) {
633101e04c3fSmrg			alu.src[0].sel = tmp2;
633201e04c3fSmrg			alu.src[0].chan = 0;
633301e04c3fSmrg		} else {
633401e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
633501e04c3fSmrg		}
6336af69d88dSmrg
633701e04c3fSmrg		alu.src[1].sel = tmp0;
633801e04c3fSmrg		alu.src[1].chan = 1;
6339af69d88dSmrg
634001e04c3fSmrg		alu.last = 1;
634101e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6342af69d88dSmrg			return r;
6343af69d88dSmrg
634401e04c3fSmrg		if (mod) { /* UMOD */
6345af69d88dSmrg
634601e04c3fSmrg			/* 15. tmp1.z = tmp0.w - src2			= r - src2 */
634701e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
634801e04c3fSmrg			alu.op = ALU_OP2_SUB_INT;
6349af69d88dSmrg
635001e04c3fSmrg			alu.dst.sel = tmp1;
635101e04c3fSmrg			alu.dst.chan = 2;
635201e04c3fSmrg			alu.dst.write = 1;
6353af69d88dSmrg
635401e04c3fSmrg			alu.src[0].sel = tmp0;
635501e04c3fSmrg			alu.src[0].chan = 3;
6356af69d88dSmrg
635701e04c3fSmrg			if (signed_op) {
635801e04c3fSmrg				alu.src[1].sel = tmp2;
635901e04c3fSmrg				alu.src[1].chan = 1;
636001e04c3fSmrg			} else {
636101e04c3fSmrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
636201e04c3fSmrg			}
6363af69d88dSmrg
6364af69d88dSmrg			alu.last = 1;
636501e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
636601e04c3fSmrg				return r;
6367af69d88dSmrg
636801e04c3fSmrg			/* 16. tmp1.w = tmp0.w + src2			= r + src2 */
636901e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
637001e04c3fSmrg			alu.op = ALU_OP2_ADD_INT;
6371af69d88dSmrg
637201e04c3fSmrg			alu.dst.sel = tmp1;
637301e04c3fSmrg			alu.dst.chan = 3;
637401e04c3fSmrg			alu.dst.write = 1;
6375af69d88dSmrg
637601e04c3fSmrg			alu.src[0].sel = tmp0;
637701e04c3fSmrg			alu.src[0].chan = 3;
637801e04c3fSmrg			if (signed_op) {
637901e04c3fSmrg				alu.src[1].sel = tmp2;
638001e04c3fSmrg				alu.src[1].chan = 1;
638101e04c3fSmrg			} else {
638201e04c3fSmrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
638301e04c3fSmrg			}
6384af69d88dSmrg
638501e04c3fSmrg			alu.last = 1;
638601e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
638701e04c3fSmrg				return r;
6388af69d88dSmrg
638901e04c3fSmrg		} else { /* UDIV */
6390af69d88dSmrg
639101e04c3fSmrg			/* 15. tmp1.z = tmp0.z + 1       = q + 1       DIV */
639201e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
639301e04c3fSmrg			alu.op = ALU_OP2_ADD_INT;
639401e04c3fSmrg
639501e04c3fSmrg			alu.dst.sel = tmp1;
639601e04c3fSmrg			alu.dst.chan = 2;
639701e04c3fSmrg			alu.dst.write = 1;
639801e04c3fSmrg
639901e04c3fSmrg			alu.src[0].sel = tmp0;
640001e04c3fSmrg			alu.src[0].chan = 2;
640101e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
6402af69d88dSmrg
6403af69d88dSmrg			alu.last = 1;
640401e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
640501e04c3fSmrg				return r;
6406af69d88dSmrg
640701e04c3fSmrg			/* 16. tmp1.w = tmp0.z - 1			= q - 1 */
640801e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
640901e04c3fSmrg			alu.op = ALU_OP2_ADD_INT;
6410af69d88dSmrg
641101e04c3fSmrg			alu.dst.sel = tmp1;
641201e04c3fSmrg			alu.dst.chan = 3;
641301e04c3fSmrg			alu.dst.write = 1;
6414af69d88dSmrg
641501e04c3fSmrg			alu.src[0].sel = tmp0;
641601e04c3fSmrg			alu.src[0].chan = 2;
641701e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
6418af69d88dSmrg
641901e04c3fSmrg			alu.last = 1;
642001e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
642101e04c3fSmrg				return r;
642201e04c3fSmrg
642301e04c3fSmrg		}
642401e04c3fSmrg
642501e04c3fSmrg		/* 17. tmp1.x = tmp1.x & tmp1.y */
6426af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
642701e04c3fSmrg		alu.op = ALU_OP2_AND_INT;
6428af69d88dSmrg
642901e04c3fSmrg		alu.dst.sel = tmp1;
643001e04c3fSmrg		alu.dst.chan = 0;
643101e04c3fSmrg		alu.dst.write = 1;
6432af69d88dSmrg
643301e04c3fSmrg		alu.src[0].sel = tmp1;
643401e04c3fSmrg		alu.src[0].chan = 0;
643501e04c3fSmrg		alu.src[1].sel = tmp1;
643601e04c3fSmrg		alu.src[1].chan = 1;
6437af69d88dSmrg
643801e04c3fSmrg		alu.last = 1;
643901e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6440af69d88dSmrg			return r;
6441af69d88dSmrg
644201e04c3fSmrg		/* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z    DIV */
644301e04c3fSmrg		/* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z    MOD */
6444af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
644501e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
6446af69d88dSmrg		alu.is_op3 = 1;
6447af69d88dSmrg
644801e04c3fSmrg		alu.dst.sel = tmp0;
644901e04c3fSmrg		alu.dst.chan = 2;
645001e04c3fSmrg		alu.dst.write = 1;
6451af69d88dSmrg
645201e04c3fSmrg		alu.src[0].sel = tmp1;
645301e04c3fSmrg		alu.src[0].chan = 0;
645401e04c3fSmrg		alu.src[1].sel = tmp0;
645501e04c3fSmrg		alu.src[1].chan = mod ? 3 : 2;
645601e04c3fSmrg		alu.src[2].sel = tmp1;
645701e04c3fSmrg		alu.src[2].chan = 2;
6458af69d88dSmrg
645901e04c3fSmrg		alu.last = 1;
646001e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
646101e04c3fSmrg			return r;
6462af69d88dSmrg
646301e04c3fSmrg		/* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
646401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
646501e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
646601e04c3fSmrg		alu.is_op3 = 1;
646701e04c3fSmrg
646801e04c3fSmrg		if (signed_op) {
646901e04c3fSmrg			alu.dst.sel = tmp0;
647001e04c3fSmrg			alu.dst.chan = 2;
647101e04c3fSmrg			alu.dst.write = 1;
647201e04c3fSmrg		} else {
647301e04c3fSmrg			if (tmp4 > 0) {
647401e04c3fSmrg				alu.dst.sel = tmp4;
647501e04c3fSmrg				alu.dst.chan = i;
647601e04c3fSmrg				alu.dst.write = 1;
647701e04c3fSmrg			} else {
647801e04c3fSmrg				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
647901e04c3fSmrg			}
648001e04c3fSmrg		}
648101e04c3fSmrg
648201e04c3fSmrg		alu.src[0].sel = tmp1;
648301e04c3fSmrg		alu.src[0].chan = 1;
648401e04c3fSmrg		alu.src[1].sel = tmp1;
648501e04c3fSmrg		alu.src[1].chan = 3;
648601e04c3fSmrg		alu.src[2].sel = tmp0;
648701e04c3fSmrg		alu.src[2].chan = 2;
648801e04c3fSmrg
648901e04c3fSmrg		alu.last = 1;
649001e04c3fSmrg		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
6491af69d88dSmrg			return r;
649201e04c3fSmrg
649301e04c3fSmrg		if (signed_op) {
649401e04c3fSmrg
649501e04c3fSmrg			/* fix the sign of the result */
649601e04c3fSmrg
649701e04c3fSmrg			if (mod) {
649801e04c3fSmrg
649901e04c3fSmrg				/* tmp0.x = -tmp0.z */
650001e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
650101e04c3fSmrg				alu.op = ALU_OP2_SUB_INT;
650201e04c3fSmrg
650301e04c3fSmrg				alu.dst.sel = tmp0;
650401e04c3fSmrg				alu.dst.chan = 0;
650501e04c3fSmrg				alu.dst.write = 1;
650601e04c3fSmrg
650701e04c3fSmrg				alu.src[0].sel = V_SQ_ALU_SRC_0;
650801e04c3fSmrg				alu.src[1].sel = tmp0;
650901e04c3fSmrg				alu.src[1].chan = 2;
651001e04c3fSmrg
651101e04c3fSmrg				alu.last = 1;
651201e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
651301e04c3fSmrg					return r;
651401e04c3fSmrg
651501e04c3fSmrg				/* sign of the remainder is the same as the sign of src0 */
651601e04c3fSmrg				/* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
651701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
651801e04c3fSmrg				alu.op = ALU_OP3_CNDGE_INT;
651901e04c3fSmrg				alu.is_op3 = 1;
652001e04c3fSmrg
652101e04c3fSmrg				if (tmp4 > 0) {
652201e04c3fSmrg					alu.dst.sel = tmp4;
652301e04c3fSmrg					alu.dst.chan = i;
652401e04c3fSmrg					alu.dst.write = 1;
652501e04c3fSmrg				} else {
652601e04c3fSmrg					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
652701e04c3fSmrg				}
652801e04c3fSmrg
652901e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
653001e04c3fSmrg				alu.src[1].sel = tmp0;
653101e04c3fSmrg				alu.src[1].chan = 2;
653201e04c3fSmrg				alu.src[2].sel = tmp0;
653301e04c3fSmrg				alu.src[2].chan = 0;
653401e04c3fSmrg
653501e04c3fSmrg				alu.last = 1;
653601e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
653701e04c3fSmrg					return r;
653801e04c3fSmrg
653901e04c3fSmrg			} else {
654001e04c3fSmrg
654101e04c3fSmrg				/* tmp0.x = -tmp0.z */
654201e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
654301e04c3fSmrg				alu.op = ALU_OP2_SUB_INT;
654401e04c3fSmrg
654501e04c3fSmrg				alu.dst.sel = tmp0;
654601e04c3fSmrg				alu.dst.chan = 0;
654701e04c3fSmrg				alu.dst.write = 1;
654801e04c3fSmrg
654901e04c3fSmrg				alu.src[0].sel = V_SQ_ALU_SRC_0;
655001e04c3fSmrg				alu.src[1].sel = tmp0;
655101e04c3fSmrg				alu.src[1].chan = 2;
655201e04c3fSmrg
655301e04c3fSmrg				alu.last = 1;
655401e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
655501e04c3fSmrg					return r;
655601e04c3fSmrg
655701e04c3fSmrg				/* fix the quotient sign (same as the sign of src0*src1) */
655801e04c3fSmrg				/* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
655901e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
656001e04c3fSmrg				alu.op = ALU_OP3_CNDGE_INT;
656101e04c3fSmrg				alu.is_op3 = 1;
656201e04c3fSmrg
656301e04c3fSmrg				if (tmp4 > 0) {
656401e04c3fSmrg					alu.dst.sel = tmp4;
656501e04c3fSmrg					alu.dst.chan = i;
656601e04c3fSmrg					alu.dst.write = 1;
656701e04c3fSmrg				} else {
656801e04c3fSmrg					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
656901e04c3fSmrg				}
657001e04c3fSmrg
657101e04c3fSmrg				alu.src[0].sel = tmp2;
657201e04c3fSmrg				alu.src[0].chan = 2;
657301e04c3fSmrg				alu.src[1].sel = tmp0;
657401e04c3fSmrg				alu.src[1].chan = 2;
657501e04c3fSmrg				alu.src[2].sel = tmp0;
657601e04c3fSmrg				alu.src[2].chan = 0;
657701e04c3fSmrg
657801e04c3fSmrg				alu.last = 1;
657901e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
658001e04c3fSmrg					return r;
658101e04c3fSmrg			}
658201e04c3fSmrg		}
658301e04c3fSmrg	}
658401e04c3fSmrg
658501e04c3fSmrg	if (tmp4 > 0) {
658601e04c3fSmrg		for (i = 0; i <= lasti; ++i) {
658701e04c3fSmrg			if (!(write_mask & (1<<i)))
658801e04c3fSmrg				continue;
658901e04c3fSmrg
659001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
659101e04c3fSmrg			alu.op = ALU_OP1_MOV;
659201e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
659301e04c3fSmrg			alu.src[0].sel = tmp4;
659401e04c3fSmrg			alu.src[0].chan = i;
659501e04c3fSmrg
659601e04c3fSmrg			if (i == lasti)
659701e04c3fSmrg				alu.last = 1;
659801e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
659901e04c3fSmrg				return r;
660001e04c3fSmrg		}
6601af69d88dSmrg	}
660201e04c3fSmrg
6603af69d88dSmrg	return 0;
6604af69d88dSmrg}
6605af69d88dSmrg
660601e04c3fSmrgstatic int tgsi_udiv(struct r600_shader_ctx *ctx)
6607af69d88dSmrg{
660801e04c3fSmrg	return tgsi_divmod(ctx, 0, 0);
660901e04c3fSmrg}
6610af69d88dSmrg
661101e04c3fSmrgstatic int tgsi_umod(struct r600_shader_ctx *ctx)
661201e04c3fSmrg{
661301e04c3fSmrg	return tgsi_divmod(ctx, 1, 0);
661401e04c3fSmrg}
6615af69d88dSmrg
661601e04c3fSmrgstatic int tgsi_idiv(struct r600_shader_ctx *ctx)
661701e04c3fSmrg{
661801e04c3fSmrg	return tgsi_divmod(ctx, 0, 1);
661901e04c3fSmrg}
6620af69d88dSmrg
662101e04c3fSmrgstatic int tgsi_imod(struct r600_shader_ctx *ctx)
662201e04c3fSmrg{
662301e04c3fSmrg	return tgsi_divmod(ctx, 1, 1);
662401e04c3fSmrg}
6625af69d88dSmrg
6626af69d88dSmrg
662701e04c3fSmrgstatic int tgsi_f2i(struct r600_shader_ctx *ctx)
662801e04c3fSmrg{
662901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
663001e04c3fSmrg	struct r600_bytecode_alu alu;
663101e04c3fSmrg	int i, r;
663201e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
663301e04c3fSmrg	int last_inst = tgsi_last_instruction(write_mask);
6634af69d88dSmrg
6635af69d88dSmrg	for (i = 0; i < 4; i++) {
6636af69d88dSmrg		if (!(write_mask & (1<<i)))
6637af69d88dSmrg			continue;
6638af69d88dSmrg
6639af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
664001e04c3fSmrg		alu.op = ALU_OP1_TRUNC;
664101e04c3fSmrg
664201e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
6643af69d88dSmrg		alu.dst.chan = i;
6644af69d88dSmrg		alu.dst.write = 1;
6645af69d88dSmrg
664601e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
664701e04c3fSmrg		if (i == last_inst)
664801e04c3fSmrg			alu.last = 1;
6649af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6650af69d88dSmrg		if (r)
6651af69d88dSmrg			return r;
6652af69d88dSmrg	}
6653af69d88dSmrg
6654af69d88dSmrg	for (i = 0; i < 4; i++) {
6655af69d88dSmrg		if (!(write_mask & (1<<i)))
6656af69d88dSmrg			continue;
6657af69d88dSmrg
6658af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
665901e04c3fSmrg		alu.op = ctx->inst_info->op;
666001e04c3fSmrg
6661af69d88dSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6662af69d88dSmrg
666301e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
6664af69d88dSmrg		alu.src[0].chan = i;
6665af69d88dSmrg
666601e04c3fSmrg		if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT)
666701e04c3fSmrg			alu.last = 1;
6668af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6669af69d88dSmrg		if (r)
6670af69d88dSmrg			return r;
6671af69d88dSmrg	}
6672af69d88dSmrg
6673af69d88dSmrg	return 0;
6674af69d88dSmrg}
6675af69d88dSmrg
667601e04c3fSmrgstatic int tgsi_iabs(struct r600_shader_ctx *ctx)
66773464ebd5Sriastradh{
66783464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6679af69d88dSmrg	struct r600_bytecode_alu alu;
668001e04c3fSmrg	int i, r;
6681af69d88dSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
6682af69d88dSmrg	int last_inst = tgsi_last_instruction(write_mask);
6683af69d88dSmrg
668401e04c3fSmrg	/* tmp = -src */
66853464ebd5Sriastradh	for (i = 0; i < 4; i++) {
6686af69d88dSmrg		if (!(write_mask & (1<<i)))
6687af69d88dSmrg			continue;
66883464ebd5Sriastradh
6689af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
669001e04c3fSmrg		alu.op = ALU_OP2_SUB_INT;
669101e04c3fSmrg
669201e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
66933464ebd5Sriastradh		alu.dst.chan = i;
6694af69d88dSmrg		alu.dst.write = 1;
66953464ebd5Sriastradh
669601e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
669701e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
66983464ebd5Sriastradh
669901e04c3fSmrg		if (i == last_inst)
670001e04c3fSmrg			alu.last = 1;
6701af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6702af69d88dSmrg		if (r)
6703af69d88dSmrg			return r;
6704af69d88dSmrg	}
6705af69d88dSmrg
670601e04c3fSmrg	/* dst = (src >= 0 ? src : tmp) */
6707af69d88dSmrg	for (i = 0; i < 4; i++) {
6708af69d88dSmrg		if (!(write_mask & (1<<i)))
6709af69d88dSmrg			continue;
6710af69d88dSmrg
6711af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
671201e04c3fSmrg		alu.op = ALU_OP3_CNDGE_INT;
671301e04c3fSmrg		alu.is_op3 = 1;
6714af69d88dSmrg		alu.dst.write = 1;
6715af69d88dSmrg
671601e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
6717af69d88dSmrg
671801e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
671901e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
672001e04c3fSmrg		alu.src[2].sel = ctx->temp_reg;
672101e04c3fSmrg		alu.src[2].chan = i;
672201e04c3fSmrg
672301e04c3fSmrg		if (i == last_inst)
672401e04c3fSmrg			alu.last = 1;
6725af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
67263464ebd5Sriastradh		if (r)
67273464ebd5Sriastradh			return r;
67283464ebd5Sriastradh	}
672901e04c3fSmrg	return 0;
673001e04c3fSmrg}
673101e04c3fSmrg
673201e04c3fSmrgstatic int tgsi_issg(struct r600_shader_ctx *ctx)
673301e04c3fSmrg{
673401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
673501e04c3fSmrg	struct r600_bytecode_alu alu;
673601e04c3fSmrg	int i, r;
673701e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
673801e04c3fSmrg	int last_inst = tgsi_last_instruction(write_mask);
67393464ebd5Sriastradh
674001e04c3fSmrg	/* tmp = (src >= 0 ? src : -1) */
67413464ebd5Sriastradh	for (i = 0; i < 4; i++) {
6742af69d88dSmrg		if (!(write_mask & (1<<i)))
6743af69d88dSmrg			continue;
6744af69d88dSmrg
6745af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
6746af69d88dSmrg		alu.op = ALU_OP3_CNDGE_INT;
67473464ebd5Sriastradh		alu.is_op3 = 1;
674801e04c3fSmrg
674901e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
6750af69d88dSmrg		alu.dst.chan = i;
6751af69d88dSmrg		alu.dst.write = 1;
67523464ebd5Sriastradh
675301e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
675401e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
675501e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
67563464ebd5Sriastradh
675701e04c3fSmrg		if (i == last_inst)
675801e04c3fSmrg			alu.last = 1;
6759af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
67603464ebd5Sriastradh		if (r)
67613464ebd5Sriastradh			return r;
67623464ebd5Sriastradh	}
6763af69d88dSmrg
676401e04c3fSmrg	/* dst = (tmp > 0 ? 1 : tmp) */
67653464ebd5Sriastradh	for (i = 0; i < 4; i++) {
676601e04c3fSmrg		if (!(write_mask & (1<<i)))
676701e04c3fSmrg			continue;
676801e04c3fSmrg
6769af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
677001e04c3fSmrg		alu.op = ALU_OP3_CNDGT_INT;
677101e04c3fSmrg		alu.is_op3 = 1;
677201e04c3fSmrg		alu.dst.write = 1;
677301e04c3fSmrg
677401e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
677501e04c3fSmrg
677601e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
677701e04c3fSmrg		alu.src[0].chan = i;
677801e04c3fSmrg
677901e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
678001e04c3fSmrg
678101e04c3fSmrg		alu.src[2].sel = ctx->temp_reg;
678201e04c3fSmrg		alu.src[2].chan = i;
678301e04c3fSmrg
678401e04c3fSmrg		if (i == last_inst)
67853464ebd5Sriastradh			alu.last = 1;
6786af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
67873464ebd5Sriastradh		if (r)
67883464ebd5Sriastradh			return r;
67893464ebd5Sriastradh	}
67903464ebd5Sriastradh	return 0;
67913464ebd5Sriastradh}
67923464ebd5Sriastradh
679301e04c3fSmrg
679401e04c3fSmrg
679501e04c3fSmrgstatic int tgsi_ssg(struct r600_shader_ctx *ctx)
67963464ebd5Sriastradh{
67973464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
679801e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
679901e04c3fSmrg	int last_inst = tgsi_last_instruction(write_mask);
6800af69d88dSmrg	struct r600_bytecode_alu alu;
680101e04c3fSmrg	int i, r;
68023464ebd5Sriastradh
680301e04c3fSmrg	/* tmp = (src > 0 ? 1 : src) */
680401e04c3fSmrg	for (i = 0; i <= last_inst; i++) {
680501e04c3fSmrg		if (!(write_mask & (1 << i)))
68063464ebd5Sriastradh			continue;
6807af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
680801e04c3fSmrg		alu.op = ALU_OP3_CNDGT;
680901e04c3fSmrg		alu.is_op3 = 1;
68103464ebd5Sriastradh
681101e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
68123464ebd5Sriastradh		alu.dst.chan = i;
681301e04c3fSmrg
681401e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
681501e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
681601e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
681701e04c3fSmrg
681801e04c3fSmrg		if (i == last_inst)
68193464ebd5Sriastradh			alu.last = 1;
6820af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
68213464ebd5Sriastradh		if (r)
68223464ebd5Sriastradh			return r;
68233464ebd5Sriastradh	}
68243464ebd5Sriastradh
682501e04c3fSmrg	/* dst = (-tmp > 0 ? -1 : tmp) */
682601e04c3fSmrg	for (i = 0; i <= last_inst; i++) {
682701e04c3fSmrg		if (!(write_mask & (1 << i)))
682801e04c3fSmrg			continue;
6829af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
683001e04c3fSmrg		alu.op = ALU_OP3_CNDGT;
683101e04c3fSmrg		alu.is_op3 = 1;
68323464ebd5Sriastradh		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
683301e04c3fSmrg
683401e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
683501e04c3fSmrg		alu.src[0].chan = i;
683601e04c3fSmrg		alu.src[0].neg = 1;
683701e04c3fSmrg
683801e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_1;
683901e04c3fSmrg		alu.src[1].neg = 1;
684001e04c3fSmrg
684101e04c3fSmrg		alu.src[2].sel = ctx->temp_reg;
684201e04c3fSmrg		alu.src[2].chan = i;
684301e04c3fSmrg
684401e04c3fSmrg		if (i == last_inst)
68453464ebd5Sriastradh			alu.last = 1;
6846af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
68473464ebd5Sriastradh		if (r)
68483464ebd5Sriastradh			return r;
68493464ebd5Sriastradh	}
68503464ebd5Sriastradh	return 0;
68513464ebd5Sriastradh}
68523464ebd5Sriastradh
685301e04c3fSmrgstatic int tgsi_bfi(struct r600_shader_ctx *ctx)
68543464ebd5Sriastradh{
68553464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6856af69d88dSmrg	struct r600_bytecode_alu alu;
685701e04c3fSmrg	int i, r, t1, t2;
6858af69d88dSmrg
685901e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
686001e04c3fSmrg	int last_inst = tgsi_last_instruction(write_mask);
6861af69d88dSmrg
686201e04c3fSmrg	t1 = r600_get_temp(ctx);
6863af69d88dSmrg
686401e04c3fSmrg	for (i = 0; i < 4; i++) {
686501e04c3fSmrg		if (!(write_mask & (1<<i)))
686601e04c3fSmrg			continue;
6867af69d88dSmrg
686801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
686901e04c3fSmrg		alu.op = ALU_OP2_SETGE_INT;
687001e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
687101e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
687201e04c3fSmrg		alu.src[1].value = 32;
687301e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
687401e04c3fSmrg		alu.dst.chan = i;
687501e04c3fSmrg		alu.dst.write = 1;
687601e04c3fSmrg		alu.last = i == last_inst;
687701e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
687801e04c3fSmrg		if (r)
687901e04c3fSmrg			return r;
688001e04c3fSmrg	}
6881af69d88dSmrg
6882af69d88dSmrg	for (i = 0; i < 4; i++) {
688301e04c3fSmrg		if (!(write_mask & (1<<i)))
6884af69d88dSmrg			continue;
6885af69d88dSmrg
688601e04c3fSmrg		/* create mask tmp */
6887af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
688801e04c3fSmrg		alu.op = ALU_OP2_BFM_INT;
688901e04c3fSmrg		alu.dst.sel = t1;
6890af69d88dSmrg		alu.dst.chan = i;
6891af69d88dSmrg		alu.dst.write = 1;
689201e04c3fSmrg		alu.last = i == last_inst;
6893af69d88dSmrg
689401e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
689501e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
6896af69d88dSmrg
6897af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6898af69d88dSmrg		if (r)
6899af69d88dSmrg			return r;
6900af69d88dSmrg	}
6901af69d88dSmrg
690201e04c3fSmrg	t2 = r600_get_temp(ctx);
690301e04c3fSmrg
690401e04c3fSmrg	for (i = 0; i < 4; i++) {
690501e04c3fSmrg		if (!(write_mask & (1<<i)))
690601e04c3fSmrg			continue;
690701e04c3fSmrg
690801e04c3fSmrg		/* shift insert left */
6909af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
691001e04c3fSmrg		alu.op = ALU_OP2_LSHL_INT;
691101e04c3fSmrg		alu.dst.sel = t2;
691201e04c3fSmrg		alu.dst.chan = i;
691301e04c3fSmrg		alu.dst.write = 1;
691401e04c3fSmrg		alu.last = i == last_inst;
6915af69d88dSmrg
691601e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
691701e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
691801e04c3fSmrg
691901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
692001e04c3fSmrg		if (r)
692101e04c3fSmrg			return r;
692201e04c3fSmrg	}
692301e04c3fSmrg
692401e04c3fSmrg	for (i = 0; i < 4; i++) {
692501e04c3fSmrg		if (!(write_mask & (1<<i)))
692601e04c3fSmrg			continue;
692701e04c3fSmrg
692801e04c3fSmrg		/* actual bitfield insert */
692901e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
693001e04c3fSmrg		alu.op = ALU_OP3_BFI_INT;
693101e04c3fSmrg		alu.is_op3 = 1;
693201e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
693301e04c3fSmrg		alu.dst.chan = i;
6934af69d88dSmrg		alu.dst.write = 1;
693501e04c3fSmrg		alu.last = i == last_inst;
6936af69d88dSmrg
693701e04c3fSmrg		alu.src[0].sel = t1;
693801e04c3fSmrg		alu.src[0].chan = i;
693901e04c3fSmrg		alu.src[1].sel = t2;
694001e04c3fSmrg		alu.src[1].chan = i;
694101e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
6942af69d88dSmrg
694301e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
694401e04c3fSmrg		if (r)
694501e04c3fSmrg			return r;
694601e04c3fSmrg	}
6947af69d88dSmrg
694801e04c3fSmrg	for (i = 0; i < 4; i++) {
694901e04c3fSmrg		if (!(write_mask & (1<<i)))
695001e04c3fSmrg			continue;
695101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
695201e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
695301e04c3fSmrg		alu.is_op3 = 1;
695401e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
695501e04c3fSmrg		alu.src[0].chan = i;
695601e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
695701e04c3fSmrg
695801e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
695901e04c3fSmrg
696001e04c3fSmrg		alu.src[1].sel = alu.dst.sel;
696101e04c3fSmrg		alu.src[1].chan = i;
696201e04c3fSmrg
696301e04c3fSmrg		alu.last = i == last_inst;
6964af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
6965af69d88dSmrg		if (r)
6966af69d88dSmrg			return r;
6967af69d88dSmrg	}
6968af69d88dSmrg	return 0;
6969af69d88dSmrg}
6970af69d88dSmrg
697101e04c3fSmrgstatic int tgsi_msb(struct r600_shader_ctx *ctx)
6972af69d88dSmrg{
6973af69d88dSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
6974af69d88dSmrg	struct r600_bytecode_alu alu;
697501e04c3fSmrg	int i, r, t1, t2;
6976af69d88dSmrg
697701e04c3fSmrg	unsigned write_mask = inst->Dst[0].Register.WriteMask;
697801e04c3fSmrg	int last_inst = tgsi_last_instruction(write_mask);
6979af69d88dSmrg
698001e04c3fSmrg	assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
698101e04c3fSmrg		ctx->inst_info->op == ALU_OP1_FFBH_UINT);
698201e04c3fSmrg
698301e04c3fSmrg	t1 = ctx->temp_reg;
698401e04c3fSmrg
698501e04c3fSmrg	/* bit position is indexed from lsb by TGSI, and from msb by the hardware */
698601e04c3fSmrg	for (i = 0; i < 4; i++) {
698701e04c3fSmrg		if (!(write_mask & (1<<i)))
698801e04c3fSmrg			continue;
698901e04c3fSmrg
699001e04c3fSmrg		/* t1 = FFBH_INT / FFBH_UINT */
699101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
699201e04c3fSmrg		alu.op = ctx->inst_info->op;
699301e04c3fSmrg		alu.dst.sel = t1;
699401e04c3fSmrg		alu.dst.chan = i;
699501e04c3fSmrg		alu.dst.write = 1;
699601e04c3fSmrg		alu.last = i == last_inst;
699701e04c3fSmrg
699801e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
699901e04c3fSmrg
700001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
700101e04c3fSmrg		if (r)
700201e04c3fSmrg			return r;
7003af69d88dSmrg	}
7004af69d88dSmrg
700501e04c3fSmrg	t2 = r600_get_temp(ctx);
7006af69d88dSmrg
700701e04c3fSmrg	for (i = 0; i < 4; i++) {
700801e04c3fSmrg		if (!(write_mask & (1<<i)))
700901e04c3fSmrg			continue;
7010af69d88dSmrg
701101e04c3fSmrg		/* t2 = 31 - t1 */
701201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
701301e04c3fSmrg		alu.op = ALU_OP2_SUB_INT;
701401e04c3fSmrg		alu.dst.sel = t2;
701501e04c3fSmrg		alu.dst.chan = i;
701601e04c3fSmrg		alu.dst.write = 1;
701701e04c3fSmrg		alu.last = i == last_inst;
7018af69d88dSmrg
701901e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
702001e04c3fSmrg		alu.src[0].value = 31;
702101e04c3fSmrg		alu.src[1].sel = t1;
702201e04c3fSmrg		alu.src[1].chan = i;
7023af69d88dSmrg
702401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
702501e04c3fSmrg		if (r)
702601e04c3fSmrg			return r;
702701e04c3fSmrg	}
7028af69d88dSmrg
702901e04c3fSmrg	for (i = 0; i < 4; i++) {
703001e04c3fSmrg		if (!(write_mask & (1<<i)))
703101e04c3fSmrg			continue;
70323464ebd5Sriastradh
703301e04c3fSmrg		/* result = t1 >= 0 ? t2 : t1 */
703401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
703501e04c3fSmrg		alu.op = ALU_OP3_CNDGE_INT;
703601e04c3fSmrg		alu.is_op3 = 1;
703701e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
703801e04c3fSmrg		alu.dst.chan = i;
703901e04c3fSmrg		alu.dst.write = 1;
704001e04c3fSmrg		alu.last = i == last_inst;
70413464ebd5Sriastradh
704201e04c3fSmrg		alu.src[0].sel = t1;
704301e04c3fSmrg		alu.src[0].chan = i;
704401e04c3fSmrg		alu.src[1].sel = t2;
704501e04c3fSmrg		alu.src[1].chan = i;
704601e04c3fSmrg		alu.src[2].sel = t1;
704701e04c3fSmrg		alu.src[2].chan = i;
704801e04c3fSmrg
704901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
705001e04c3fSmrg		if (r)
705101e04c3fSmrg			return r;
7052af69d88dSmrg	}
7053af69d88dSmrg
705401e04c3fSmrg	return 0;
705501e04c3fSmrg}
70563464ebd5Sriastradh
705701e04c3fSmrgstatic int tgsi_interp_egcm(struct r600_shader_ctx *ctx)
705801e04c3fSmrg{
705901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
706001e04c3fSmrg	struct r600_bytecode_alu alu;
706101e04c3fSmrg	int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
706201e04c3fSmrg	unsigned location;
706301e04c3fSmrg	const int input = inst->Src[0].Register.Index + ctx->shader->nsys_inputs;
70643464ebd5Sriastradh
706501e04c3fSmrg	assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
70663464ebd5Sriastradh
706701e04c3fSmrg	/* Interpolators have been marked for use already by allocate_system_value_inputs */
706801e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
706901e04c3fSmrg		inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
707001e04c3fSmrg		location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */
707101e04c3fSmrg	}
707201e04c3fSmrg	else {
707301e04c3fSmrg		location = TGSI_INTERPOLATE_LOC_CENTROID;
70747ec681f3Smrg		ctx->shader->input[input].uses_interpolate_at_centroid = 1;
707501e04c3fSmrg	}
70763464ebd5Sriastradh
707701e04c3fSmrg	k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location);
707801e04c3fSmrg	if (k < 0)
707901e04c3fSmrg		k = 0;
708001e04c3fSmrg	interp_gpr = ctx->eg_interpolators[k].ij_index / 2;
708101e04c3fSmrg	interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2);
708201e04c3fSmrg
708301e04c3fSmrg	/* NOTE: currently offset is not perspective correct */
708401e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
708501e04c3fSmrg		inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
708601e04c3fSmrg		int sample_gpr = -1;
708701e04c3fSmrg		int gradientsH, gradientsV;
708801e04c3fSmrg		struct r600_bytecode_tex tex;
708901e04c3fSmrg
709001e04c3fSmrg		if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
709101e04c3fSmrg			sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]);
709201e04c3fSmrg		}
709301e04c3fSmrg
709401e04c3fSmrg		gradientsH = r600_get_temp(ctx);
709501e04c3fSmrg		gradientsV = r600_get_temp(ctx);
709601e04c3fSmrg		for (i = 0; i < 2; i++) {
709701e04c3fSmrg			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
709801e04c3fSmrg			tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V;
709901e04c3fSmrg			tex.src_gpr = interp_gpr;
710001e04c3fSmrg			tex.src_sel_x = interp_base_chan + 0;
710101e04c3fSmrg			tex.src_sel_y = interp_base_chan + 1;
710201e04c3fSmrg			tex.src_sel_z = 0;
710301e04c3fSmrg			tex.src_sel_w = 0;
710401e04c3fSmrg			tex.dst_gpr = i == 0 ? gradientsH : gradientsV;
710501e04c3fSmrg			tex.dst_sel_x = 0;
710601e04c3fSmrg			tex.dst_sel_y = 1;
710701e04c3fSmrg			tex.dst_sel_z = 7;
710801e04c3fSmrg			tex.dst_sel_w = 7;
710901e04c3fSmrg			tex.inst_mod = 1; // Use per pixel gradient calculation
711001e04c3fSmrg			tex.sampler_id = 0;
711101e04c3fSmrg			tex.resource_id = tex.sampler_id;
7112af69d88dSmrg			r = r600_bytecode_add_tex(ctx->bc, &tex);
71133464ebd5Sriastradh			if (r)
71143464ebd5Sriastradh				return r;
71153464ebd5Sriastradh		}
71163464ebd5Sriastradh
711701e04c3fSmrg		for (i = 0; i < 2; i++) {
7118af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
711901e04c3fSmrg			alu.op = ALU_OP3_MULADD;
712001e04c3fSmrg			alu.is_op3 = 1;
712101e04c3fSmrg			alu.src[0].sel = gradientsH;
712201e04c3fSmrg			alu.src[0].chan = i;
712301e04c3fSmrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
712401e04c3fSmrg				alu.src[1].sel = sample_gpr;
712501e04c3fSmrg				alu.src[1].chan = 2;
712601e04c3fSmrg			}
712701e04c3fSmrg			else {
712801e04c3fSmrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
712901e04c3fSmrg			}
713001e04c3fSmrg			alu.src[2].sel = interp_gpr;
713101e04c3fSmrg			alu.src[2].chan = interp_base_chan + i;
71323464ebd5Sriastradh			alu.dst.sel = ctx->temp_reg;
713301e04c3fSmrg			alu.dst.chan = i;
713401e04c3fSmrg			alu.last = i == 1;
713501e04c3fSmrg
7136af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
71373464ebd5Sriastradh			if (r)
71383464ebd5Sriastradh				return r;
71393464ebd5Sriastradh		}
71403464ebd5Sriastradh
714101e04c3fSmrg		for (i = 0; i < 2; i++) {
7142af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
714301e04c3fSmrg			alu.op = ALU_OP3_MULADD;
714401e04c3fSmrg			alu.is_op3 = 1;
714501e04c3fSmrg			alu.src[0].sel = gradientsV;
714601e04c3fSmrg			alu.src[0].chan = i;
714701e04c3fSmrg			if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
714801e04c3fSmrg				alu.src[1].sel = sample_gpr;
714901e04c3fSmrg				alu.src[1].chan = 3;
715001e04c3fSmrg			}
715101e04c3fSmrg			else {
715201e04c3fSmrg				r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
715301e04c3fSmrg			}
715401e04c3fSmrg			alu.src[2].sel = ctx->temp_reg;
715501e04c3fSmrg			alu.src[2].chan = i;
71563464ebd5Sriastradh			alu.dst.sel = ctx->temp_reg;
71573464ebd5Sriastradh			alu.dst.chan = i;
715801e04c3fSmrg			alu.last = i == 1;
715901e04c3fSmrg
7160af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
71613464ebd5Sriastradh			if (r)
71623464ebd5Sriastradh				return r;
71633464ebd5Sriastradh		}
71643464ebd5Sriastradh	}
71653464ebd5Sriastradh
716601e04c3fSmrg	tmp = r600_get_temp(ctx);
716701e04c3fSmrg	for (i = 0; i < 8; i++) {
716801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
716901e04c3fSmrg		alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY;
71703464ebd5Sriastradh
717101e04c3fSmrg		alu.dst.sel = tmp;
717201e04c3fSmrg		if ((i > 1 && i < 6)) {
71733464ebd5Sriastradh			alu.dst.write = 1;
71743464ebd5Sriastradh		}
717501e04c3fSmrg		else {
717601e04c3fSmrg			alu.dst.write = 0;
717701e04c3fSmrg		}
717801e04c3fSmrg		alu.dst.chan = i % 4;
71793464ebd5Sriastradh
718001e04c3fSmrg		if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
718101e04c3fSmrg			inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
71823464ebd5Sriastradh			alu.src[0].sel = ctx->temp_reg;
718301e04c3fSmrg			alu.src[0].chan = 1 - (i % 2);
718401e04c3fSmrg		} else {
718501e04c3fSmrg			alu.src[0].sel = interp_gpr;
718601e04c3fSmrg			alu.src[0].chan = interp_base_chan + 1 - (i % 2);
71873464ebd5Sriastradh		}
718801e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
718901e04c3fSmrg		alu.src[1].chan = 0;
71903464ebd5Sriastradh
719101e04c3fSmrg		alu.last = i % 4 == 3;
719201e04c3fSmrg		alu.bank_swizzle_force = SQ_ALU_VEC_210;
71933464ebd5Sriastradh
719401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
719501e04c3fSmrg		if (r)
719601e04c3fSmrg			return r;
719701e04c3fSmrg	}
71983464ebd5Sriastradh
719901e04c3fSmrg	// INTERP can't swizzle dst
720001e04c3fSmrg	lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
720101e04c3fSmrg	for (i = 0; i <= lasti; i++) {
720201e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
720301e04c3fSmrg			continue;
72043464ebd5Sriastradh
720501e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
720601e04c3fSmrg		alu.op = ALU_OP1_MOV;
720701e04c3fSmrg		alu.src[0].sel = tmp;
720801e04c3fSmrg		alu.src[0].chan = ctx->src[0].swizzle[i];
720901e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
72103464ebd5Sriastradh		alu.dst.write = 1;
721101e04c3fSmrg		alu.last = i == lasti;
7212af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
72133464ebd5Sriastradh		if (r)
72143464ebd5Sriastradh			return r;
721501e04c3fSmrg	}
72163464ebd5Sriastradh
721701e04c3fSmrg	return 0;
721801e04c3fSmrg}
72193464ebd5Sriastradh
72203464ebd5Sriastradh
722101e04c3fSmrgstatic int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
722201e04c3fSmrg{
722301e04c3fSmrg	struct r600_bytecode_alu alu;
722401e04c3fSmrg	int i, r;
7225af69d88dSmrg
722601e04c3fSmrg	for (i = 0; i < 4; i++) {
722701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
722801e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
722901e04c3fSmrg			alu.op = ALU_OP0_NOP;
723001e04c3fSmrg			alu.dst.chan = i;
723101e04c3fSmrg		} else {
7232af69d88dSmrg			alu.op = ALU_OP1_MOV;
723301e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
723401e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
723501e04c3fSmrg			alu.src[0].chan = i;
723601e04c3fSmrg		}
723701e04c3fSmrg		if (i == 3) {
7238af69d88dSmrg			alu.last = 1;
7239af69d88dSmrg		}
724001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
724101e04c3fSmrg		if (r)
724201e04c3fSmrg			return r;
724301e04c3fSmrg	}
724401e04c3fSmrg	return 0;
724501e04c3fSmrg}
7246af69d88dSmrg
724701e04c3fSmrgstatic int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx,
724801e04c3fSmrg                                 unsigned writemask,
724901e04c3fSmrg                                 struct r600_bytecode_alu_src *bc_src,
725001e04c3fSmrg                                 const struct r600_shader_src *shader_src)
725101e04c3fSmrg{
725201e04c3fSmrg	struct r600_bytecode_alu alu;
725301e04c3fSmrg	int i, r;
725401e04c3fSmrg	int lasti = tgsi_last_instruction(writemask);
725501e04c3fSmrg	int temp_reg = 0;
7256af69d88dSmrg
725701e04c3fSmrg	r600_bytecode_src(&bc_src[0], shader_src, 0);
725801e04c3fSmrg	r600_bytecode_src(&bc_src[1], shader_src, 1);
725901e04c3fSmrg	r600_bytecode_src(&bc_src[2], shader_src, 2);
726001e04c3fSmrg	r600_bytecode_src(&bc_src[3], shader_src, 3);
7261af69d88dSmrg
726201e04c3fSmrg	if (bc_src->abs) {
726301e04c3fSmrg		temp_reg = r600_get_temp(ctx);
7264af69d88dSmrg
726501e04c3fSmrg		for (i = 0; i < lasti + 1; i++) {
726601e04c3fSmrg			if (!(writemask & (1 << i)))
726701e04c3fSmrg				continue;
7268af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7269af69d88dSmrg			alu.op = ALU_OP1_MOV;
727001e04c3fSmrg			alu.dst.sel = temp_reg;
727101e04c3fSmrg			alu.dst.chan = i;
7272af69d88dSmrg			alu.dst.write = 1;
727301e04c3fSmrg			alu.src[0] = bc_src[i];
727401e04c3fSmrg			if (i == lasti) {
727501e04c3fSmrg				alu.last = 1;
727601e04c3fSmrg			}
7277af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7278af69d88dSmrg			if (r)
7279af69d88dSmrg				return r;
728001e04c3fSmrg			memset(&bc_src[i], 0, sizeof(*bc_src));
728101e04c3fSmrg			bc_src[i].sel = temp_reg;
728201e04c3fSmrg			bc_src[i].chan = i;
7283af69d88dSmrg		}
7284af69d88dSmrg	}
728501e04c3fSmrg	return 0;
728601e04c3fSmrg}
7287af69d88dSmrg
728801e04c3fSmrgstatic int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst)
728901e04c3fSmrg{
729001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
729101e04c3fSmrg	struct r600_bytecode_alu alu;
729201e04c3fSmrg	struct r600_bytecode_alu_src srcs[4][4];
729301e04c3fSmrg	int i, j, r;
729401e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
729501e04c3fSmrg	unsigned op = ctx->inst_info->op;
729601e04c3fSmrg
729701e04c3fSmrg	if (op == ALU_OP3_MULADD_IEEE &&
729801e04c3fSmrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
729901e04c3fSmrg		op = ALU_OP3_MULADD;
730001e04c3fSmrg
730101e04c3fSmrg	for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
730201e04c3fSmrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
730301e04c3fSmrg					  srcs[j], &ctx->src[j]);
730401e04c3fSmrg		if (r)
730501e04c3fSmrg			return r;
730601e04c3fSmrg	}
730701e04c3fSmrg
730801e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
730901e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
731001e04c3fSmrg			continue;
731101e04c3fSmrg
731201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
731301e04c3fSmrg		alu.op = op;
731401e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
731501e04c3fSmrg			alu.src[j] = srcs[j][i];
731601e04c3fSmrg		}
731701e04c3fSmrg
731801e04c3fSmrg		if (dst == -1) {
731901e04c3fSmrg			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
732001e04c3fSmrg		} else {
732101e04c3fSmrg			alu.dst.sel = dst;
732201e04c3fSmrg		}
732301e04c3fSmrg		alu.dst.chan = i;
732401e04c3fSmrg		alu.dst.write = 1;
732501e04c3fSmrg		alu.is_op3 = 1;
732601e04c3fSmrg		if (i == lasti) {
732701e04c3fSmrg			alu.last = 1;
732801e04c3fSmrg		}
732901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
733001e04c3fSmrg		if (r)
733101e04c3fSmrg			return r;
733201e04c3fSmrg	}
733301e04c3fSmrg	return 0;
733401e04c3fSmrg}
733501e04c3fSmrg
733601e04c3fSmrgstatic int tgsi_op3(struct r600_shader_ctx *ctx)
733701e04c3fSmrg{
733801e04c3fSmrg	return tgsi_op3_dst(ctx, -1);
733901e04c3fSmrg}
734001e04c3fSmrg
734101e04c3fSmrgstatic int tgsi_dp(struct r600_shader_ctx *ctx)
734201e04c3fSmrg{
734301e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
734401e04c3fSmrg	struct r600_bytecode_alu alu;
734501e04c3fSmrg	int i, j, r;
734601e04c3fSmrg	unsigned op = ctx->inst_info->op;
734701e04c3fSmrg	if (op == ALU_OP2_DOT4_IEEE &&
734801e04c3fSmrg	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
734901e04c3fSmrg		op = ALU_OP2_DOT4;
735001e04c3fSmrg
735101e04c3fSmrg	for (i = 0; i < 4; i++) {
735201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
735301e04c3fSmrg		alu.op = op;
735401e04c3fSmrg		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
735501e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
735601e04c3fSmrg		}
735701e04c3fSmrg
735801e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
735901e04c3fSmrg		alu.dst.chan = i;
736001e04c3fSmrg		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
736101e04c3fSmrg		/* handle some special cases */
736201e04c3fSmrg		switch (inst->Instruction.Opcode) {
736301e04c3fSmrg		case TGSI_OPCODE_DP2:
736401e04c3fSmrg			if (i > 1) {
736501e04c3fSmrg				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
736601e04c3fSmrg				alu.src[0].chan = alu.src[1].chan = 0;
736701e04c3fSmrg			}
736801e04c3fSmrg			break;
736901e04c3fSmrg		case TGSI_OPCODE_DP3:
737001e04c3fSmrg			if (i > 2) {
737101e04c3fSmrg				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
737201e04c3fSmrg				alu.src[0].chan = alu.src[1].chan = 0;
737301e04c3fSmrg			}
737401e04c3fSmrg			break;
737501e04c3fSmrg		default:
737601e04c3fSmrg			break;
737701e04c3fSmrg		}
737801e04c3fSmrg		if (i == 3) {
737901e04c3fSmrg			alu.last = 1;
738001e04c3fSmrg		}
738101e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
738201e04c3fSmrg		if (r)
738301e04c3fSmrg			return r;
738401e04c3fSmrg	}
738501e04c3fSmrg	return 0;
738601e04c3fSmrg}
738701e04c3fSmrg
738801e04c3fSmrgstatic inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
738901e04c3fSmrg						    unsigned index)
739001e04c3fSmrg{
739101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
739201e04c3fSmrg	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
739301e04c3fSmrg		inst->Src[index].Register.File != TGSI_FILE_INPUT &&
739401e04c3fSmrg		inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
739501e04c3fSmrg		ctx->src[index].neg || ctx->src[index].abs ||
739601e04c3fSmrg		(inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY);
739701e04c3fSmrg}
739801e04c3fSmrg
739901e04c3fSmrgstatic inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
740001e04c3fSmrg					unsigned index)
740101e04c3fSmrg{
740201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
740301e04c3fSmrg	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
740401e04c3fSmrg}
740501e04c3fSmrg
740601e04c3fSmrgstatic int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading)
740701e04c3fSmrg{
740801e04c3fSmrg	struct r600_bytecode_vtx vtx;
740901e04c3fSmrg	struct r600_bytecode_alu alu;
741001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
741101e04c3fSmrg	int src_gpr, r, i;
741201e04c3fSmrg	int id = tgsi_tex_get_src_gpr(ctx, 1);
741301e04c3fSmrg	int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
741401e04c3fSmrg
741501e04c3fSmrg	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
741601e04c3fSmrg	if (src_requires_loading) {
741701e04c3fSmrg		for (i = 0; i < 4; i++) {
7418af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
7419af69d88dSmrg			alu.op = ALU_OP1_MOV;
7420af69d88dSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
7421af69d88dSmrg			alu.dst.sel = ctx->temp_reg;
7422af69d88dSmrg			alu.dst.chan = i;
7423af69d88dSmrg			if (i == 3)
7424af69d88dSmrg				alu.last = 1;
7425af69d88dSmrg			alu.dst.write = 1;
7426af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7427af69d88dSmrg			if (r)
7428af69d88dSmrg				return r;
7429af69d88dSmrg		}
7430af69d88dSmrg		src_gpr = ctx->temp_reg;
7431af69d88dSmrg	}
7432af69d88dSmrg
743301e04c3fSmrg	memset(&vtx, 0, sizeof(vtx));
743401e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
743501e04c3fSmrg	vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
743601e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
743701e04c3fSmrg	vtx.src_gpr = src_gpr;
743801e04c3fSmrg	vtx.mega_fetch_count = 16;
743901e04c3fSmrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
744001e04c3fSmrg	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
744101e04c3fSmrg	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
744201e04c3fSmrg	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
744301e04c3fSmrg	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
744401e04c3fSmrg	vtx.use_const_fields = 1;
744501e04c3fSmrg	vtx.buffer_index_mode = sampler_index_mode;
7446af69d88dSmrg
744701e04c3fSmrg	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
744801e04c3fSmrg		return r;
7449af69d88dSmrg
745001e04c3fSmrg	if (ctx->bc->chip_class >= EVERGREEN)
745101e04c3fSmrg		return 0;
7452af69d88dSmrg
745301e04c3fSmrg	for (i = 0; i < 4; i++) {
745401e04c3fSmrg		int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
745501e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
745601e04c3fSmrg			continue;
7457af69d88dSmrg
745801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
745901e04c3fSmrg		alu.op = ALU_OP2_AND_INT;
7460af69d88dSmrg
746101e04c3fSmrg		alu.dst.chan = i;
746201e04c3fSmrg		alu.dst.sel = vtx.dst_gpr;
746301e04c3fSmrg		alu.dst.write = 1;
7464af69d88dSmrg
746501e04c3fSmrg		alu.src[0].sel = vtx.dst_gpr;
746601e04c3fSmrg		alu.src[0].chan = i;
7467af69d88dSmrg
746801e04c3fSmrg		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
746901e04c3fSmrg		alu.src[1].sel += (id * 2);
747001e04c3fSmrg		alu.src[1].chan = i % 4;
747101e04c3fSmrg		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
747201e04c3fSmrg
747301e04c3fSmrg		if (i == lasti)
747401e04c3fSmrg			alu.last = 1;
747501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
747601e04c3fSmrg		if (r)
747701e04c3fSmrg			return r;
747801e04c3fSmrg	}
747901e04c3fSmrg
748001e04c3fSmrg	if (inst->Dst[0].Register.WriteMask & 3) {
748101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
748201e04c3fSmrg		alu.op = ALU_OP2_OR_INT;
748301e04c3fSmrg
748401e04c3fSmrg		alu.dst.chan = 3;
748501e04c3fSmrg		alu.dst.sel = vtx.dst_gpr;
748601e04c3fSmrg		alu.dst.write = 1;
748701e04c3fSmrg
748801e04c3fSmrg		alu.src[0].sel = vtx.dst_gpr;
748901e04c3fSmrg		alu.src[0].chan = 3;
749001e04c3fSmrg
749101e04c3fSmrg		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
749201e04c3fSmrg		alu.src[1].chan = 0;
749301e04c3fSmrg		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
749401e04c3fSmrg
749501e04c3fSmrg		alu.last = 1;
749601e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
749701e04c3fSmrg		if (r)
749801e04c3fSmrg			return r;
749901e04c3fSmrg	}
750001e04c3fSmrg	return 0;
750101e04c3fSmrg}
750201e04c3fSmrg
750301e04c3fSmrgstatic int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset, int eg_buffer_base)
750401e04c3fSmrg{
750501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
750601e04c3fSmrg	int r;
750701e04c3fSmrg	int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
750801e04c3fSmrg	int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
750901e04c3fSmrg
751001e04c3fSmrg	if (ctx->bc->chip_class < EVERGREEN) {
751101e04c3fSmrg		struct r600_bytecode_alu alu;
751201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
751301e04c3fSmrg		alu.op = ALU_OP1_MOV;
751401e04c3fSmrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
751501e04c3fSmrg		/* r600 we have them at channel 2 of the second dword */
751601e04c3fSmrg		alu.src[0].sel += (id * 2) + 1;
751701e04c3fSmrg		alu.src[0].chan = 1;
751801e04c3fSmrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
751901e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
752001e04c3fSmrg		alu.last = 1;
752101e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
752201e04c3fSmrg		if (r)
752301e04c3fSmrg			return r;
752401e04c3fSmrg		return 0;
752501e04c3fSmrg	} else {
752601e04c3fSmrg		struct r600_bytecode_vtx vtx;
752701e04c3fSmrg		memset(&vtx, 0, sizeof(vtx));
752801e04c3fSmrg		vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
752901e04c3fSmrg		vtx.buffer_id = id + eg_buffer_base;
753001e04c3fSmrg		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
753101e04c3fSmrg		vtx.src_gpr = 0;
753201e04c3fSmrg		vtx.mega_fetch_count = 16; /* no idea here really... */
753301e04c3fSmrg		vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
753401e04c3fSmrg		vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
753501e04c3fSmrg		vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7;		/* SEL_Y */
753601e04c3fSmrg		vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7;		/* SEL_Z */
753701e04c3fSmrg		vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7;		/* SEL_W */
753801e04c3fSmrg		vtx.data_format = FMT_32_32_32_32;
753901e04c3fSmrg		vtx.buffer_index_mode = sampler_index_mode;
754001e04c3fSmrg
754101e04c3fSmrg		if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
754201e04c3fSmrg			return r;
754301e04c3fSmrg		return 0;
754401e04c3fSmrg	}
754501e04c3fSmrg}
754601e04c3fSmrg
754701e04c3fSmrg
754801e04c3fSmrgstatic int tgsi_tex(struct r600_shader_ctx *ctx)
754901e04c3fSmrg{
755001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
755101e04c3fSmrg	struct r600_bytecode_tex tex;
755201e04c3fSmrg	struct r600_bytecode_tex grad_offs[3];
755301e04c3fSmrg	struct r600_bytecode_alu alu;
755401e04c3fSmrg	unsigned src_gpr;
755501e04c3fSmrg	int r, i, j, n_grad_offs = 0;
755601e04c3fSmrg	int opcode;
755701e04c3fSmrg	bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing &&
755801e04c3fSmrg				    inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
755901e04c3fSmrg				    (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
756001e04c3fSmrg				     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
756101e04c3fSmrg
756201e04c3fSmrg	bool txf_add_offsets = inst->Texture.NumOffsets &&
756301e04c3fSmrg			     inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
756401e04c3fSmrg			     inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
756501e04c3fSmrg
756601e04c3fSmrg	/* Texture fetch instructions can only use gprs as source.
756701e04c3fSmrg	 * Also they cannot negate the source or take the absolute value */
756801e04c3fSmrg	const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
756901e04c3fSmrg                                              tgsi_tex_src_requires_loading(ctx, 0)) ||
757001e04c3fSmrg					     read_compressed_msaa || txf_add_offsets;
757101e04c3fSmrg
757201e04c3fSmrg	boolean src_loaded = FALSE;
757301e04c3fSmrg	unsigned sampler_src_reg = 1;
757401e04c3fSmrg	int8_t offset_x = 0, offset_y = 0, offset_z = 0;
757501e04c3fSmrg	boolean has_txq_cube_array_z = false;
757601e04c3fSmrg	unsigned sampler_index_mode;
757701e04c3fSmrg	int array_index_offset_channel = -1;
757801e04c3fSmrg
757901e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
758001e04c3fSmrg	    ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
758101e04c3fSmrg	      inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
758201e04c3fSmrg		if (inst->Dst[0].Register.WriteMask & 4) {
758301e04c3fSmrg			ctx->shader->has_txq_cube_array_z_comp = true;
758401e04c3fSmrg			has_txq_cube_array_z = true;
758501e04c3fSmrg		}
758601e04c3fSmrg
758701e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
758801e04c3fSmrg	    inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
758901e04c3fSmrg	    inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
759001e04c3fSmrg	    inst->Instruction.Opcode == TGSI_OPCODE_TG4)
759101e04c3fSmrg		sampler_src_reg = 2;
759201e04c3fSmrg
759301e04c3fSmrg	/* TGSI moves the sampler to src reg 3 for TXD */
759401e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
759501e04c3fSmrg		sampler_src_reg = 3;
759601e04c3fSmrg
759701e04c3fSmrg	sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
759801e04c3fSmrg
759901e04c3fSmrg	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
760001e04c3fSmrg
760101e04c3fSmrg	if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
760201e04c3fSmrg		if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
760301e04c3fSmrg			if (ctx->bc->chip_class < EVERGREEN)
760401e04c3fSmrg				ctx->shader->uses_tex_buffers = true;
760501e04c3fSmrg			return r600_do_buffer_txq(ctx, 1, 0, R600_MAX_CONST_BUFFERS);
760601e04c3fSmrg		}
760701e04c3fSmrg		else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
760801e04c3fSmrg			if (ctx->bc->chip_class < EVERGREEN)
760901e04c3fSmrg				ctx->shader->uses_tex_buffers = true;
761001e04c3fSmrg			return do_vtx_fetch_inst(ctx, src_requires_loading);
761101e04c3fSmrg		}
761201e04c3fSmrg	}
761301e04c3fSmrg
761401e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
761501e04c3fSmrg		int out_chan;
761601e04c3fSmrg		/* Add perspective divide */
761701e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
761801e04c3fSmrg			out_chan = 2;
761901e04c3fSmrg			for (i = 0; i < 3; i++) {
7620af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
762101e04c3fSmrg				alu.op = ALU_OP1_RECIP_IEEE;
762201e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
762301e04c3fSmrg
762401e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
7625af69d88dSmrg				alu.dst.chan = i;
762601e04c3fSmrg				if (i == 2)
7627af69d88dSmrg					alu.last = 1;
762801e04c3fSmrg				if (out_chan == i)
762901e04c3fSmrg					alu.dst.write = 1;
7630af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
7631af69d88dSmrg				if (r)
7632af69d88dSmrg					return r;
7633af69d88dSmrg			}
763401e04c3fSmrg
7635af69d88dSmrg		} else {
763601e04c3fSmrg			out_chan = 3;
7637af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
763801e04c3fSmrg			alu.op = ALU_OP1_RECIP_IEEE;
763901e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
764001e04c3fSmrg
764101e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
764201e04c3fSmrg			alu.dst.chan = out_chan;
764301e04c3fSmrg			alu.last = 1;
764401e04c3fSmrg			alu.dst.write = 1;
764501e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
764601e04c3fSmrg			if (r)
764701e04c3fSmrg				return r;
764801e04c3fSmrg		}
764901e04c3fSmrg
765001e04c3fSmrg		for (i = 0; i < 3; i++) {
765101e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
765201e04c3fSmrg			alu.op = ALU_OP2_MUL;
765301e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
765401e04c3fSmrg			alu.src[0].chan = out_chan;
765501e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
765601e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
765701e04c3fSmrg			alu.dst.chan = i;
765801e04c3fSmrg			alu.dst.write = 1;
765901e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
766001e04c3fSmrg			if (r)
766101e04c3fSmrg				return r;
766201e04c3fSmrg		}
766301e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
766401e04c3fSmrg		alu.op = ALU_OP1_MOV;
766501e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
766601e04c3fSmrg		alu.src[0].chan = 0;
766701e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
766801e04c3fSmrg		alu.dst.chan = 3;
766901e04c3fSmrg		alu.last = 1;
767001e04c3fSmrg		alu.dst.write = 1;
767101e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
767201e04c3fSmrg		if (r)
767301e04c3fSmrg			return r;
767401e04c3fSmrg		src_loaded = TRUE;
767501e04c3fSmrg		src_gpr = ctx->temp_reg;
767601e04c3fSmrg	}
767701e04c3fSmrg
767801e04c3fSmrg
767901e04c3fSmrg	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
768001e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
768101e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
768201e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
768301e04c3fSmrg	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
768401e04c3fSmrg
768501e04c3fSmrg		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
768601e04c3fSmrg		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
768701e04c3fSmrg
768801e04c3fSmrg		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
768901e04c3fSmrg		for (i = 0; i < 4; i++) {
769001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
769101e04c3fSmrg			alu.op = ALU_OP2_CUBE;
769201e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
769301e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
769401e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
769501e04c3fSmrg			alu.dst.chan = i;
769601e04c3fSmrg			if (i == 3)
769701e04c3fSmrg				alu.last = 1;
769801e04c3fSmrg			alu.dst.write = 1;
769901e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
770001e04c3fSmrg			if (r)
770101e04c3fSmrg				return r;
770201e04c3fSmrg		}
770301e04c3fSmrg
770401e04c3fSmrg		/* tmp1.z = RCP_e(|tmp1.z|) */
770501e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
770601e04c3fSmrg			for (i = 0; i < 3; i++) {
770701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
770801e04c3fSmrg				alu.op = ALU_OP1_RECIP_IEEE;
770901e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
771001e04c3fSmrg				alu.src[0].chan = 2;
771101e04c3fSmrg				alu.src[0].abs = 1;
771201e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
771301e04c3fSmrg				alu.dst.chan = i;
771401e04c3fSmrg				if (i == 2)
771501e04c3fSmrg					alu.dst.write = 1;
771601e04c3fSmrg				if (i == 2)
771701e04c3fSmrg					alu.last = 1;
771801e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
771901e04c3fSmrg				if (r)
772001e04c3fSmrg					return r;
772101e04c3fSmrg			}
772201e04c3fSmrg		} else {
772301e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
772401e04c3fSmrg			alu.op = ALU_OP1_RECIP_IEEE;
772501e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
772601e04c3fSmrg			alu.src[0].chan = 2;
772701e04c3fSmrg			alu.src[0].abs = 1;
772801e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
772901e04c3fSmrg			alu.dst.chan = 2;
7730af69d88dSmrg			alu.dst.write = 1;
7731af69d88dSmrg			alu.last = 1;
7732af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
7733af69d88dSmrg			if (r)
7734af69d88dSmrg				return r;
7735af69d88dSmrg		}
7736af69d88dSmrg
773701e04c3fSmrg		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
773801e04c3fSmrg		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
773901e04c3fSmrg		 * muladd has no writemask, have to use another temp
774001e04c3fSmrg		 */
774101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
774201e04c3fSmrg		alu.op = ALU_OP3_MULADD;
774301e04c3fSmrg		alu.is_op3 = 1;
774401e04c3fSmrg
774501e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
774601e04c3fSmrg		alu.src[0].chan = 0;
774701e04c3fSmrg		alu.src[1].sel = ctx->temp_reg;
774801e04c3fSmrg		alu.src[1].chan = 2;
774901e04c3fSmrg
775001e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
775101e04c3fSmrg		alu.src[2].chan = 0;
775201e04c3fSmrg		alu.src[2].value = u_bitcast_f2u(1.5f);
775301e04c3fSmrg
775401e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
775501e04c3fSmrg		alu.dst.chan = 0;
775601e04c3fSmrg		alu.dst.write = 1;
775701e04c3fSmrg
775801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
775901e04c3fSmrg		if (r)
776001e04c3fSmrg			return r;
776101e04c3fSmrg
776201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
776301e04c3fSmrg		alu.op = ALU_OP3_MULADD;
776401e04c3fSmrg		alu.is_op3 = 1;
776501e04c3fSmrg
776601e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
776701e04c3fSmrg		alu.src[0].chan = 1;
776801e04c3fSmrg		alu.src[1].sel = ctx->temp_reg;
776901e04c3fSmrg		alu.src[1].chan = 2;
777001e04c3fSmrg
777101e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
777201e04c3fSmrg		alu.src[2].chan = 0;
777301e04c3fSmrg		alu.src[2].value = u_bitcast_f2u(1.5f);
777401e04c3fSmrg
777501e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
777601e04c3fSmrg		alu.dst.chan = 1;
777701e04c3fSmrg		alu.dst.write = 1;
777801e04c3fSmrg
777901e04c3fSmrg		alu.last = 1;
778001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
778101e04c3fSmrg		if (r)
778201e04c3fSmrg			return r;
778301e04c3fSmrg		/* write initial compare value into Z component
778401e04c3fSmrg		  - W src 0 for shadow cube
778501e04c3fSmrg		  - X src 1 for shadow cube array */
778601e04c3fSmrg		if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
778701e04c3fSmrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
778801e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
778901e04c3fSmrg			alu.op = ALU_OP1_MOV;
779001e04c3fSmrg			if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
779101e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
779201e04c3fSmrg			else
779301e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
779401e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
779501e04c3fSmrg			alu.dst.chan = 2;
779601e04c3fSmrg			alu.dst.write = 1;
779701e04c3fSmrg			alu.last = 1;
779801e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
779901e04c3fSmrg			if (r)
780001e04c3fSmrg				return r;
780101e04c3fSmrg		}
780201e04c3fSmrg
780301e04c3fSmrg		if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
780401e04c3fSmrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
780501e04c3fSmrg			if (ctx->bc->chip_class >= EVERGREEN) {
780601e04c3fSmrg				int mytmp = r600_get_temp(ctx);
780701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
780801e04c3fSmrg				alu.op = ALU_OP1_MOV;
780901e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
781001e04c3fSmrg				alu.src[0].chan = 3;
781101e04c3fSmrg				alu.dst.sel = mytmp;
781201e04c3fSmrg				alu.dst.chan = 0;
781301e04c3fSmrg				alu.dst.write = 1;
781401e04c3fSmrg				alu.last = 1;
781501e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
781601e04c3fSmrg				if (r)
781701e04c3fSmrg					return r;
781801e04c3fSmrg
781901e04c3fSmrg				/* Evaluate the array index according to floor(idx + 0.5). This
782001e04c3fSmrg				 * needs to be done before merging the face select value, because
782101e04c3fSmrg				 * otherwise the fractional part of the array index will interfere
782201e04c3fSmrg				 * with the face select value */
782301e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
782401e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
782501e04c3fSmrg				alu.op = ALU_OP1_RNDNE;
782601e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
782701e04c3fSmrg				alu.dst.chan = 3;
782801e04c3fSmrg				alu.dst.write = 1;
782901e04c3fSmrg				alu.last = 1;
783001e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
783101e04c3fSmrg				if (r)
783201e04c3fSmrg					return r;
783301e04c3fSmrg
783401e04c3fSmrg				/* Because the array slice index and the cube face index are merged
783501e04c3fSmrg				 * into one value we have to make sure the array slice index is >= 0,
783601e04c3fSmrg				 * otherwise the face selection will fail */
783701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
783801e04c3fSmrg				alu.op = ALU_OP2_MAX;
783901e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
784001e04c3fSmrg				alu.src[0].chan = 3;
784101e04c3fSmrg				alu.src[1].sel = V_SQ_ALU_SRC_0;
784201e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
784301e04c3fSmrg				alu.dst.chan = 3;
784401e04c3fSmrg				alu.dst.write = 1;
784501e04c3fSmrg				alu.last = 1;
784601e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
784701e04c3fSmrg				if (r)
784801e04c3fSmrg					return r;
784901e04c3fSmrg
785001e04c3fSmrg				/* have to multiply original layer by 8 and add to face id (temp.w) in Z */
785101e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
785201e04c3fSmrg				alu.op = ALU_OP3_MULADD;
785301e04c3fSmrg				alu.is_op3 = 1;
785401e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
785501e04c3fSmrg				alu.src[0].chan = 3;
785601e04c3fSmrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
785701e04c3fSmrg				alu.src[1].chan = 0;
785801e04c3fSmrg				alu.src[1].value = u_bitcast_f2u(8.0f);
785901e04c3fSmrg				alu.src[2].sel = mytmp;
786001e04c3fSmrg				alu.src[2].chan = 0;
786101e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
786201e04c3fSmrg				alu.dst.chan = 3;
786301e04c3fSmrg				alu.dst.write = 1;
786401e04c3fSmrg				alu.last = 1;
786501e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
786601e04c3fSmrg				if (r)
786701e04c3fSmrg					return r;
786801e04c3fSmrg			} else if (ctx->bc->chip_class < EVERGREEN) {
786901e04c3fSmrg				memset(&tex, 0, sizeof(struct r600_bytecode_tex));
787001e04c3fSmrg				tex.op = FETCH_OP_SET_CUBEMAP_INDEX;
787101e04c3fSmrg				tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
787201e04c3fSmrg				tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
787301e04c3fSmrg				tex.src_gpr = r600_get_temp(ctx);
787401e04c3fSmrg				tex.src_sel_x = 0;
787501e04c3fSmrg				tex.src_sel_y = 0;
787601e04c3fSmrg				tex.src_sel_z = 0;
787701e04c3fSmrg				tex.src_sel_w = 0;
787801e04c3fSmrg				tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
787901e04c3fSmrg				tex.coord_type_x = 1;
788001e04c3fSmrg				tex.coord_type_y = 1;
788101e04c3fSmrg				tex.coord_type_z = 1;
788201e04c3fSmrg				tex.coord_type_w = 1;
788301e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
788401e04c3fSmrg				alu.op = ALU_OP1_MOV;
788501e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
788601e04c3fSmrg				alu.dst.sel = tex.src_gpr;
788701e04c3fSmrg				alu.dst.chan = 0;
788801e04c3fSmrg				alu.last = 1;
788901e04c3fSmrg				alu.dst.write = 1;
789001e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
789101e04c3fSmrg				if (r)
789201e04c3fSmrg					return r;
789301e04c3fSmrg
789401e04c3fSmrg				r = r600_bytecode_add_tex(ctx->bc, &tex);
789501e04c3fSmrg				if (r)
789601e04c3fSmrg					return r;
789701e04c3fSmrg			}
789801e04c3fSmrg
789901e04c3fSmrg		}
790001e04c3fSmrg
790101e04c3fSmrg		/* for cube forms of lod and bias we need to route things */
790201e04c3fSmrg		if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
790301e04c3fSmrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
790401e04c3fSmrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
790501e04c3fSmrg		    inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
790601e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
790701e04c3fSmrg			alu.op = ALU_OP1_MOV;
790801e04c3fSmrg			if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
790901e04c3fSmrg			    inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
791001e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
791101e04c3fSmrg			else
791201e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
791301e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
791401e04c3fSmrg			alu.dst.chan = 2;
791501e04c3fSmrg			alu.last = 1;
791601e04c3fSmrg			alu.dst.write = 1;
791701e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
791801e04c3fSmrg			if (r)
791901e04c3fSmrg				return r;
792001e04c3fSmrg		}
792101e04c3fSmrg
792201e04c3fSmrg		src_loaded = TRUE;
792301e04c3fSmrg		src_gpr = ctx->temp_reg;
792401e04c3fSmrg	}
792501e04c3fSmrg
792601e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
792701e04c3fSmrg		int temp_h = 0, temp_v = 0;
792801e04c3fSmrg		int start_val = 0;
792901e04c3fSmrg
793001e04c3fSmrg		/* if we've already loaded the src (i.e. CUBE don't reload it). */
793101e04c3fSmrg		if (src_loaded == TRUE)
793201e04c3fSmrg			start_val = 1;
793301e04c3fSmrg		else
793401e04c3fSmrg			src_loaded = TRUE;
793501e04c3fSmrg		for (i = start_val; i < 3; i++) {
793601e04c3fSmrg			int treg = r600_get_temp(ctx);
793701e04c3fSmrg
793801e04c3fSmrg			if (i == 0)
793901e04c3fSmrg				src_gpr = treg;
794001e04c3fSmrg			else if (i == 1)
794101e04c3fSmrg				temp_h = treg;
794201e04c3fSmrg			else
794301e04c3fSmrg				temp_v = treg;
794401e04c3fSmrg
794501e04c3fSmrg			for (j = 0; j < 4; j++) {
794601e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
794701e04c3fSmrg				alu.op = ALU_OP1_MOV;
794801e04c3fSmrg                                r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
794901e04c3fSmrg                                alu.dst.sel = treg;
795001e04c3fSmrg                                alu.dst.chan = j;
795101e04c3fSmrg                                if (j == 3)
795201e04c3fSmrg                                   alu.last = 1;
795301e04c3fSmrg                                alu.dst.write = 1;
795401e04c3fSmrg                                r = r600_bytecode_add_alu(ctx->bc, &alu);
795501e04c3fSmrg                                if (r)
795601e04c3fSmrg                                    return r;
795701e04c3fSmrg			}
795801e04c3fSmrg		}
795901e04c3fSmrg		for (i = 1; i < 3; i++) {
796001e04c3fSmrg			/* set gradients h/v */
796101e04c3fSmrg			struct r600_bytecode_tex *t = &grad_offs[n_grad_offs++];
796201e04c3fSmrg			memset(t, 0, sizeof(struct r600_bytecode_tex));
796301e04c3fSmrg			t->op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
796401e04c3fSmrg				FETCH_OP_SET_GRADIENTS_V;
796501e04c3fSmrg			t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
796601e04c3fSmrg			t->sampler_index_mode = sampler_index_mode;
796701e04c3fSmrg			t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS;
796801e04c3fSmrg			t->resource_index_mode = sampler_index_mode;
796901e04c3fSmrg
797001e04c3fSmrg			t->src_gpr = (i == 1) ? temp_h : temp_v;
797101e04c3fSmrg			t->src_sel_x = 0;
797201e04c3fSmrg			t->src_sel_y = 1;
797301e04c3fSmrg			t->src_sel_z = 2;
797401e04c3fSmrg			t->src_sel_w = 3;
797501e04c3fSmrg
797601e04c3fSmrg			t->dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */
797701e04c3fSmrg			t->dst_sel_x = t->dst_sel_y = t->dst_sel_z = t->dst_sel_w = 7;
797801e04c3fSmrg			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
797901e04c3fSmrg				t->coord_type_x = 1;
798001e04c3fSmrg				t->coord_type_y = 1;
798101e04c3fSmrg				t->coord_type_z = 1;
798201e04c3fSmrg				t->coord_type_w = 1;
798301e04c3fSmrg			}
798401e04c3fSmrg		}
798501e04c3fSmrg	}
798601e04c3fSmrg
798701e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
798801e04c3fSmrg		/* Gather4 should follow the same rules as bilinear filtering, but the hardware
798901e04c3fSmrg		 * incorrectly forces nearest filtering if the texture format is integer.
799001e04c3fSmrg		 * The only effect it has on Gather4, which always returns 4 texels for
799101e04c3fSmrg		 * bilinear filtering, is that the final coordinates are off by 0.5 of
799201e04c3fSmrg		 * the texel size.
799301e04c3fSmrg		 *
799401e04c3fSmrg		 * The workaround is to subtract 0.5 from the unnormalized coordinates,
799501e04c3fSmrg		 * or (0.5 / size) from the normalized coordinates.
799601e04c3fSmrg		 */
799701e04c3fSmrg		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
799801e04c3fSmrg		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
799901e04c3fSmrg			int treg = r600_get_temp(ctx);
800001e04c3fSmrg
800101e04c3fSmrg			/* mov array and comparison oordinate to temp_reg if needed */
800201e04c3fSmrg			if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
800301e04c3fSmrg			     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
800401e04c3fSmrg			     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) && !src_loaded) {
800501e04c3fSmrg				int end = inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ? 3 : 2;
800601e04c3fSmrg				for (i = 2; i <= end; i++) {
800701e04c3fSmrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
800801e04c3fSmrg					alu.op = ALU_OP1_MOV;
800901e04c3fSmrg					alu.dst.sel = ctx->temp_reg;
801001e04c3fSmrg					alu.dst.chan = i;
801101e04c3fSmrg					alu.dst.write = 1;
801201e04c3fSmrg					alu.last = (i == end);
801301e04c3fSmrg					r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
801401e04c3fSmrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
801501e04c3fSmrg					if (r)
801601e04c3fSmrg						return r;
801701e04c3fSmrg				}
801801e04c3fSmrg			}
801901e04c3fSmrg
802001e04c3fSmrg			if (inst->Texture.Texture == TGSI_TEXTURE_RECT ||
802101e04c3fSmrg			    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
802201e04c3fSmrg				for (i = 0; i < 2; i++) {
802301e04c3fSmrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
802401e04c3fSmrg					alu.op = ALU_OP2_ADD;
802501e04c3fSmrg					alu.dst.sel = ctx->temp_reg;
802601e04c3fSmrg					alu.dst.chan = i;
802701e04c3fSmrg					alu.dst.write = 1;
802801e04c3fSmrg					alu.last = i == 1;
802901e04c3fSmrg					if (src_loaded) {
803001e04c3fSmrg						alu.src[0].sel = ctx->temp_reg;
803101e04c3fSmrg						alu.src[0].chan = i;
803201e04c3fSmrg					} else
803301e04c3fSmrg						r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
803401e04c3fSmrg					alu.src[1].sel = V_SQ_ALU_SRC_0_5;
803501e04c3fSmrg					alu.src[1].neg = 1;
803601e04c3fSmrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
803701e04c3fSmrg					if (r)
803801e04c3fSmrg						return r;
803901e04c3fSmrg				}
804001e04c3fSmrg			} else {
804101e04c3fSmrg				/* execute a TXQ */
804201e04c3fSmrg				memset(&tex, 0, sizeof(struct r600_bytecode_tex));
804301e04c3fSmrg				tex.op = FETCH_OP_GET_TEXTURE_RESINFO;
804401e04c3fSmrg				tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
804501e04c3fSmrg				tex.sampler_index_mode = sampler_index_mode;
804601e04c3fSmrg				tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
804701e04c3fSmrg				tex.resource_index_mode = sampler_index_mode;
804801e04c3fSmrg				tex.dst_gpr = treg;
804901e04c3fSmrg				tex.src_sel_x = 4;
805001e04c3fSmrg				tex.src_sel_y = 4;
805101e04c3fSmrg				tex.src_sel_z = 4;
805201e04c3fSmrg				tex.src_sel_w = 4;
805301e04c3fSmrg				tex.dst_sel_x = 0;
805401e04c3fSmrg				tex.dst_sel_y = 1;
805501e04c3fSmrg				tex.dst_sel_z = 7;
805601e04c3fSmrg				tex.dst_sel_w = 7;
805701e04c3fSmrg				r = r600_bytecode_add_tex(ctx->bc, &tex);
805801e04c3fSmrg				if (r)
805901e04c3fSmrg					return r;
806001e04c3fSmrg
806101e04c3fSmrg				/* coord.xy = -0.5 * (1.0/int_to_flt(size)) + coord.xy */
806201e04c3fSmrg				if (ctx->bc->chip_class == CAYMAN) {
806301e04c3fSmrg					/* */
806401e04c3fSmrg					for (i = 0; i < 2; i++) {
806501e04c3fSmrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
806601e04c3fSmrg						alu.op = ALU_OP1_INT_TO_FLT;
806701e04c3fSmrg						alu.dst.sel = treg;
806801e04c3fSmrg						alu.dst.chan = i;
806901e04c3fSmrg						alu.dst.write = 1;
807001e04c3fSmrg						alu.src[0].sel = treg;
807101e04c3fSmrg						alu.src[0].chan = i;
807201e04c3fSmrg						alu.last = (i == 1) ? 1 : 0;
807301e04c3fSmrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
807401e04c3fSmrg						if (r)
807501e04c3fSmrg							return r;
807601e04c3fSmrg					}
807701e04c3fSmrg					for (j = 0; j < 2; j++) {
807801e04c3fSmrg						for (i = 0; i < 3; i++) {
807901e04c3fSmrg							memset(&alu, 0, sizeof(struct r600_bytecode_alu));
808001e04c3fSmrg							alu.op = ALU_OP1_RECIP_IEEE;
808101e04c3fSmrg							alu.src[0].sel = treg;
808201e04c3fSmrg							alu.src[0].chan = j;
808301e04c3fSmrg							alu.dst.sel = treg;
808401e04c3fSmrg							alu.dst.chan = i;
808501e04c3fSmrg							if (i == 2)
808601e04c3fSmrg								alu.last = 1;
808701e04c3fSmrg							if (i == j)
808801e04c3fSmrg								alu.dst.write = 1;
808901e04c3fSmrg							r = r600_bytecode_add_alu(ctx->bc, &alu);
809001e04c3fSmrg							if (r)
809101e04c3fSmrg								return r;
809201e04c3fSmrg						}
809301e04c3fSmrg					}
809401e04c3fSmrg				} else {
809501e04c3fSmrg					for (i = 0; i < 2; i++) {
809601e04c3fSmrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
809701e04c3fSmrg						alu.op = ALU_OP1_INT_TO_FLT;
809801e04c3fSmrg						alu.dst.sel = treg;
809901e04c3fSmrg						alu.dst.chan = i;
810001e04c3fSmrg						alu.dst.write = 1;
810101e04c3fSmrg						alu.src[0].sel = treg;
810201e04c3fSmrg						alu.src[0].chan = i;
810301e04c3fSmrg						alu.last = 1;
810401e04c3fSmrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
810501e04c3fSmrg						if (r)
810601e04c3fSmrg							return r;
810701e04c3fSmrg					}
810801e04c3fSmrg					for (i = 0; i < 2; i++) {
810901e04c3fSmrg						memset(&alu, 0, sizeof(struct r600_bytecode_alu));
811001e04c3fSmrg						alu.op = ALU_OP1_RECIP_IEEE;
811101e04c3fSmrg						alu.src[0].sel = treg;
811201e04c3fSmrg						alu.src[0].chan = i;
811301e04c3fSmrg						alu.dst.sel = treg;
811401e04c3fSmrg						alu.dst.chan = i;
811501e04c3fSmrg						alu.last = 1;
811601e04c3fSmrg						alu.dst.write = 1;
811701e04c3fSmrg						r = r600_bytecode_add_alu(ctx->bc, &alu);
811801e04c3fSmrg						if (r)
811901e04c3fSmrg							return r;
812001e04c3fSmrg					}
812101e04c3fSmrg				}
812201e04c3fSmrg				for (i = 0; i < 2; i++) {
812301e04c3fSmrg					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
812401e04c3fSmrg					alu.op = ALU_OP3_MULADD;
812501e04c3fSmrg					alu.is_op3 = 1;
812601e04c3fSmrg					alu.dst.sel = ctx->temp_reg;
812701e04c3fSmrg					alu.dst.chan = i;
812801e04c3fSmrg					alu.dst.write = 1;
812901e04c3fSmrg					alu.last = i == 1;
813001e04c3fSmrg					alu.src[0].sel = treg;
813101e04c3fSmrg					alu.src[0].chan = i;
813201e04c3fSmrg					alu.src[1].sel = V_SQ_ALU_SRC_0_5;
813301e04c3fSmrg					alu.src[1].neg = 1;
813401e04c3fSmrg					if (src_loaded) {
813501e04c3fSmrg						alu.src[2].sel = ctx->temp_reg;
813601e04c3fSmrg						alu.src[2].chan = i;
813701e04c3fSmrg					} else
813801e04c3fSmrg						r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
813901e04c3fSmrg					r = r600_bytecode_add_alu(ctx->bc, &alu);
814001e04c3fSmrg					if (r)
814101e04c3fSmrg						return r;
814201e04c3fSmrg				}
814301e04c3fSmrg			}
814401e04c3fSmrg			src_loaded = TRUE;
814501e04c3fSmrg			src_gpr = ctx->temp_reg;
814601e04c3fSmrg		}
814701e04c3fSmrg	}
814801e04c3fSmrg
814901e04c3fSmrg	if (src_requires_loading && !src_loaded) {
815001e04c3fSmrg		for (i = 0; i < 4; i++) {
815101e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
815201e04c3fSmrg			alu.op = ALU_OP1_MOV;
815301e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
815401e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
815501e04c3fSmrg			alu.dst.chan = i;
815601e04c3fSmrg			if (i == 3)
815701e04c3fSmrg				alu.last = 1;
815801e04c3fSmrg			alu.dst.write = 1;
815901e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
816001e04c3fSmrg			if (r)
816101e04c3fSmrg				return r;
816201e04c3fSmrg		}
816301e04c3fSmrg		src_loaded = TRUE;
816401e04c3fSmrg		src_gpr = ctx->temp_reg;
816501e04c3fSmrg	}
816601e04c3fSmrg
816701e04c3fSmrg	/* get offset values */
816801e04c3fSmrg	if (inst->Texture.NumOffsets) {
816901e04c3fSmrg		assert(inst->Texture.NumOffsets == 1);
817001e04c3fSmrg
817101e04c3fSmrg		/* The texture offset feature doesn't work with the TXF instruction
817201e04c3fSmrg		 * and must be emulated by adding the offset to the texture coordinates. */
817301e04c3fSmrg		if (txf_add_offsets) {
817401e04c3fSmrg			const struct tgsi_texture_offset *off = inst->TexOffsets;
817501e04c3fSmrg
817601e04c3fSmrg			switch (inst->Texture.Texture) {
817701e04c3fSmrg			case TGSI_TEXTURE_3D:
817801e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
817901e04c3fSmrg				alu.op = ALU_OP2_ADD_INT;
818001e04c3fSmrg				alu.src[0].sel = src_gpr;
818101e04c3fSmrg				alu.src[0].chan = 2;
818201e04c3fSmrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
818301e04c3fSmrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
818401e04c3fSmrg				alu.dst.sel = src_gpr;
818501e04c3fSmrg				alu.dst.chan = 2;
818601e04c3fSmrg				alu.dst.write = 1;
818701e04c3fSmrg				alu.last = 1;
818801e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
818901e04c3fSmrg				if (r)
819001e04c3fSmrg					return r;
81917ec681f3Smrg				FALLTHROUGH;
819201e04c3fSmrg
819301e04c3fSmrg			case TGSI_TEXTURE_2D:
819401e04c3fSmrg			case TGSI_TEXTURE_SHADOW2D:
819501e04c3fSmrg			case TGSI_TEXTURE_RECT:
819601e04c3fSmrg			case TGSI_TEXTURE_SHADOWRECT:
819701e04c3fSmrg			case TGSI_TEXTURE_2D_ARRAY:
819801e04c3fSmrg			case TGSI_TEXTURE_SHADOW2D_ARRAY:
819901e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
820001e04c3fSmrg				alu.op = ALU_OP2_ADD_INT;
820101e04c3fSmrg				alu.src[0].sel = src_gpr;
820201e04c3fSmrg				alu.src[0].chan = 1;
820301e04c3fSmrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
820401e04c3fSmrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
820501e04c3fSmrg				alu.dst.sel = src_gpr;
820601e04c3fSmrg				alu.dst.chan = 1;
820701e04c3fSmrg				alu.dst.write = 1;
820801e04c3fSmrg				alu.last = 1;
820901e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
821001e04c3fSmrg				if (r)
821101e04c3fSmrg					return r;
82127ec681f3Smrg				FALLTHROUGH;
821301e04c3fSmrg
821401e04c3fSmrg			case TGSI_TEXTURE_1D:
821501e04c3fSmrg			case TGSI_TEXTURE_SHADOW1D:
821601e04c3fSmrg			case TGSI_TEXTURE_1D_ARRAY:
821701e04c3fSmrg			case TGSI_TEXTURE_SHADOW1D_ARRAY:
821801e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
821901e04c3fSmrg				alu.op = ALU_OP2_ADD_INT;
822001e04c3fSmrg				alu.src[0].sel = src_gpr;
822101e04c3fSmrg				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
822201e04c3fSmrg				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
822301e04c3fSmrg				alu.dst.sel = src_gpr;
822401e04c3fSmrg				alu.dst.write = 1;
822501e04c3fSmrg				alu.last = 1;
822601e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
822701e04c3fSmrg				if (r)
822801e04c3fSmrg					return r;
822901e04c3fSmrg				break;
823001e04c3fSmrg				/* texture offsets do not apply to other texture targets */
823101e04c3fSmrg			}
823201e04c3fSmrg		} else {
823301e04c3fSmrg			switch (inst->Texture.Texture) {
823401e04c3fSmrg			case TGSI_TEXTURE_3D:
823501e04c3fSmrg				offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
82367ec681f3Smrg				FALLTHROUGH;
823701e04c3fSmrg			case TGSI_TEXTURE_2D:
823801e04c3fSmrg			case TGSI_TEXTURE_SHADOW2D:
823901e04c3fSmrg			case TGSI_TEXTURE_RECT:
824001e04c3fSmrg			case TGSI_TEXTURE_SHADOWRECT:
824101e04c3fSmrg			case TGSI_TEXTURE_2D_ARRAY:
824201e04c3fSmrg			case TGSI_TEXTURE_SHADOW2D_ARRAY:
824301e04c3fSmrg				offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
82447ec681f3Smrg				FALLTHROUGH;
824501e04c3fSmrg			case TGSI_TEXTURE_1D:
824601e04c3fSmrg			case TGSI_TEXTURE_SHADOW1D:
824701e04c3fSmrg			case TGSI_TEXTURE_1D_ARRAY:
824801e04c3fSmrg			case TGSI_TEXTURE_SHADOW1D_ARRAY:
824901e04c3fSmrg				offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
825001e04c3fSmrg			}
825101e04c3fSmrg		}
825201e04c3fSmrg	}
825301e04c3fSmrg
825401e04c3fSmrg	/* Obtain the sample index for reading a compressed MSAA color texture.
825501e04c3fSmrg	 * To read the FMASK, we use the ldfptr instruction, which tells us
825601e04c3fSmrg	 * where the samples are stored.
825701e04c3fSmrg	 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
825801e04c3fSmrg	 * which is the identity mapping. Each nibble says which physical sample
825901e04c3fSmrg	 * should be fetched to get that sample.
826001e04c3fSmrg	 *
826101e04c3fSmrg	 * Assume src.z contains the sample index. It should be modified like this:
826201e04c3fSmrg	 *   src.z = (ldfptr() >> (src.z * 4)) & 0xF;
826301e04c3fSmrg	 * Then fetch the texel with src.
826401e04c3fSmrg	 */
826501e04c3fSmrg	if (read_compressed_msaa) {
826601e04c3fSmrg		unsigned sample_chan = 3;
826701e04c3fSmrg		unsigned temp = r600_get_temp(ctx);
826801e04c3fSmrg		assert(src_loaded);
826901e04c3fSmrg
827001e04c3fSmrg		/* temp.w = ldfptr() */
827101e04c3fSmrg		memset(&tex, 0, sizeof(struct r600_bytecode_tex));
827201e04c3fSmrg		tex.op = FETCH_OP_LD;
827301e04c3fSmrg		tex.inst_mod = 1; /* to indicate this is ldfptr */
827401e04c3fSmrg		tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
827501e04c3fSmrg		tex.sampler_index_mode = sampler_index_mode;
827601e04c3fSmrg		tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
827701e04c3fSmrg		tex.resource_index_mode = sampler_index_mode;
827801e04c3fSmrg		tex.src_gpr = src_gpr;
827901e04c3fSmrg		tex.dst_gpr = temp;
828001e04c3fSmrg		tex.dst_sel_x = 7; /* mask out these components */
828101e04c3fSmrg		tex.dst_sel_y = 7;
828201e04c3fSmrg		tex.dst_sel_z = 7;
828301e04c3fSmrg		tex.dst_sel_w = 0; /* store X */
828401e04c3fSmrg		tex.src_sel_x = 0;
828501e04c3fSmrg		tex.src_sel_y = 1;
828601e04c3fSmrg		tex.src_sel_z = 2;
828701e04c3fSmrg		tex.src_sel_w = 3;
828801e04c3fSmrg		tex.offset_x = offset_x;
828901e04c3fSmrg		tex.offset_y = offset_y;
829001e04c3fSmrg		tex.offset_z = offset_z;
829101e04c3fSmrg		r = r600_bytecode_add_tex(ctx->bc, &tex);
829201e04c3fSmrg		if (r)
829301e04c3fSmrg			return r;
829401e04c3fSmrg
829501e04c3fSmrg		/* temp.x = sample_index*4 */
829601e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
829701e04c3fSmrg		alu.op = ALU_OP2_MULLO_INT;
829801e04c3fSmrg		alu.src[0].sel = src_gpr;
829901e04c3fSmrg		alu.src[0].chan = sample_chan;
830001e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
830101e04c3fSmrg		alu.src[1].value = 4;
830201e04c3fSmrg		alu.dst.sel = temp;
830301e04c3fSmrg		alu.dst.chan = 0;
830401e04c3fSmrg		alu.dst.write = 1;
830501e04c3fSmrg		r = emit_mul_int_op(ctx->bc, &alu);
830601e04c3fSmrg		if (r)
830701e04c3fSmrg			return r;
830801e04c3fSmrg
830901e04c3fSmrg		/* sample_index = temp.w >> temp.x */
831001e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
831101e04c3fSmrg		alu.op = ALU_OP2_LSHR_INT;
831201e04c3fSmrg		alu.src[0].sel = temp;
831301e04c3fSmrg		alu.src[0].chan = 3;
831401e04c3fSmrg		alu.src[1].sel = temp;
831501e04c3fSmrg		alu.src[1].chan = 0;
831601e04c3fSmrg		alu.dst.sel = src_gpr;
831701e04c3fSmrg		alu.dst.chan = sample_chan;
831801e04c3fSmrg		alu.dst.write = 1;
831901e04c3fSmrg		alu.last = 1;
832001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
832101e04c3fSmrg		if (r)
832201e04c3fSmrg			return r;
832301e04c3fSmrg
832401e04c3fSmrg		/* sample_index & 0xF */
832501e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
832601e04c3fSmrg		alu.op = ALU_OP2_AND_INT;
832701e04c3fSmrg		alu.src[0].sel = src_gpr;
832801e04c3fSmrg		alu.src[0].chan = sample_chan;
832901e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
833001e04c3fSmrg		alu.src[1].value = 0xF;
833101e04c3fSmrg		alu.dst.sel = src_gpr;
833201e04c3fSmrg		alu.dst.chan = sample_chan;
833301e04c3fSmrg		alu.dst.write = 1;
833401e04c3fSmrg		alu.last = 1;
833501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
833601e04c3fSmrg		if (r)
833701e04c3fSmrg			return r;
833801e04c3fSmrg#if 0
833901e04c3fSmrg		/* visualize the FMASK */
834001e04c3fSmrg		for (i = 0; i < 4; i++) {
834101e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
834201e04c3fSmrg			alu.op = ALU_OP1_INT_TO_FLT;
834301e04c3fSmrg			alu.src[0].sel = src_gpr;
834401e04c3fSmrg			alu.src[0].chan = sample_chan;
834501e04c3fSmrg			alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
834601e04c3fSmrg			alu.dst.chan = i;
834701e04c3fSmrg			alu.dst.write = 1;
834801e04c3fSmrg			alu.last = 1;
834901e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
835001e04c3fSmrg			if (r)
835101e04c3fSmrg				return r;
835201e04c3fSmrg		}
835301e04c3fSmrg		return 0;
835401e04c3fSmrg#endif
835501e04c3fSmrg	}
835601e04c3fSmrg
835701e04c3fSmrg	/* does this shader want a num layers from TXQ for a cube array? */
835801e04c3fSmrg	if (has_txq_cube_array_z) {
835901e04c3fSmrg		int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
836001e04c3fSmrg
836101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
836201e04c3fSmrg		alu.op = ALU_OP1_MOV;
836301e04c3fSmrg
836401e04c3fSmrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
836501e04c3fSmrg		if (ctx->bc->chip_class >= EVERGREEN) {
836601e04c3fSmrg			/* with eg each dword is number of cubes */
836701e04c3fSmrg			alu.src[0].sel += id / 4;
836801e04c3fSmrg			alu.src[0].chan = id % 4;
836901e04c3fSmrg		} else {
837001e04c3fSmrg			/* r600 we have them at channel 2 of the second dword */
837101e04c3fSmrg			alu.src[0].sel += (id * 2) + 1;
837201e04c3fSmrg			alu.src[0].chan = 2;
837301e04c3fSmrg		}
837401e04c3fSmrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
837501e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
837601e04c3fSmrg		alu.last = 1;
837701e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
837801e04c3fSmrg		if (r)
837901e04c3fSmrg			return r;
838001e04c3fSmrg		/* disable writemask from texture instruction */
838101e04c3fSmrg		inst->Dst[0].Register.WriteMask &= ~4;
838201e04c3fSmrg	}
838301e04c3fSmrg
838401e04c3fSmrg	opcode = ctx->inst_info->op;
838501e04c3fSmrg	if (opcode == FETCH_OP_GATHER4 &&
838601e04c3fSmrg		inst->TexOffsets[0].File != TGSI_FILE_NULL &&
838701e04c3fSmrg		inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
838801e04c3fSmrg		struct r600_bytecode_tex *t;
838901e04c3fSmrg		opcode = FETCH_OP_GATHER4_O;
839001e04c3fSmrg
839101e04c3fSmrg		/* GATHER4_O/GATHER4_C_O use offset values loaded by
839201e04c3fSmrg		   SET_TEXTURE_OFFSETS instruction. The immediate offset values
839301e04c3fSmrg		   encoded in the instruction are ignored. */
839401e04c3fSmrg		t = &grad_offs[n_grad_offs++];
839501e04c3fSmrg		memset(t, 0, sizeof(struct r600_bytecode_tex));
839601e04c3fSmrg		t->op = FETCH_OP_SET_TEXTURE_OFFSETS;
839701e04c3fSmrg		t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
839801e04c3fSmrg		t->sampler_index_mode = sampler_index_mode;
839901e04c3fSmrg		t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS;
840001e04c3fSmrg		t->resource_index_mode = sampler_index_mode;
840101e04c3fSmrg
840201e04c3fSmrg		t->src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
840301e04c3fSmrg		t->src_sel_x = inst->TexOffsets[0].SwizzleX;
840401e04c3fSmrg		t->src_sel_y = inst->TexOffsets[0].SwizzleY;
840501e04c3fSmrg		if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
840601e04c3fSmrg			 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
840701e04c3fSmrg			/* make sure array index selector is 0, this is just a safety
840801e04c3fSmrg			 * precausion because TGSI seems to emit something strange here */
840901e04c3fSmrg			t->src_sel_z = 4;
841001e04c3fSmrg		else
841101e04c3fSmrg			t->src_sel_z = inst->TexOffsets[0].SwizzleZ;
841201e04c3fSmrg
841301e04c3fSmrg		t->src_sel_w = 4;
841401e04c3fSmrg
841501e04c3fSmrg		t->dst_sel_x = 7;
841601e04c3fSmrg		t->dst_sel_y = 7;
841701e04c3fSmrg		t->dst_sel_z = 7;
841801e04c3fSmrg		t->dst_sel_w = 7;
841901e04c3fSmrg	}
842001e04c3fSmrg
842101e04c3fSmrg	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
842201e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
842301e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
842401e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
842501e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
842601e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
842701e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
842801e04c3fSmrg		switch (opcode) {
842901e04c3fSmrg		case FETCH_OP_SAMPLE:
843001e04c3fSmrg			opcode = FETCH_OP_SAMPLE_C;
843101e04c3fSmrg			break;
843201e04c3fSmrg		case FETCH_OP_SAMPLE_L:
843301e04c3fSmrg			opcode = FETCH_OP_SAMPLE_C_L;
843401e04c3fSmrg			break;
843501e04c3fSmrg		case FETCH_OP_SAMPLE_LB:
843601e04c3fSmrg			opcode = FETCH_OP_SAMPLE_C_LB;
843701e04c3fSmrg			break;
843801e04c3fSmrg		case FETCH_OP_SAMPLE_G:
843901e04c3fSmrg			opcode = FETCH_OP_SAMPLE_C_G;
844001e04c3fSmrg			break;
844101e04c3fSmrg		/* Texture gather variants */
844201e04c3fSmrg		case FETCH_OP_GATHER4:
844301e04c3fSmrg			opcode = FETCH_OP_GATHER4_C;
844401e04c3fSmrg			break;
844501e04c3fSmrg		case FETCH_OP_GATHER4_O:
844601e04c3fSmrg			opcode = FETCH_OP_GATHER4_C_O;
844701e04c3fSmrg			break;
844801e04c3fSmrg		}
844901e04c3fSmrg	}
845001e04c3fSmrg
845101e04c3fSmrg	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
845201e04c3fSmrg	tex.op = opcode;
845301e04c3fSmrg
845401e04c3fSmrg	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
845501e04c3fSmrg	tex.sampler_index_mode = sampler_index_mode;
845601e04c3fSmrg	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
845701e04c3fSmrg	tex.resource_index_mode = sampler_index_mode;
845801e04c3fSmrg	tex.src_gpr = src_gpr;
845901e04c3fSmrg	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
846001e04c3fSmrg
846101e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
846201e04c3fSmrg		inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
846301e04c3fSmrg		tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */
846401e04c3fSmrg	}
846501e04c3fSmrg
846601e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
846701e04c3fSmrg		int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
846801e04c3fSmrg		tex.inst_mod = texture_component_select;
846901e04c3fSmrg
847001e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
847101e04c3fSmrg			tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
847201e04c3fSmrg			tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
847301e04c3fSmrg			tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
847401e04c3fSmrg			tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
847501e04c3fSmrg		} else {
847601e04c3fSmrg			/* GATHER4 result order is different from TGSI TG4 */
847701e04c3fSmrg			tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 1 : 7;
847801e04c3fSmrg			tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 2 : 7;
847901e04c3fSmrg			tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 0 : 7;
848001e04c3fSmrg			tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
848101e04c3fSmrg		}
848201e04c3fSmrg	}
848301e04c3fSmrg	else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
848401e04c3fSmrg		tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
848501e04c3fSmrg		tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
848601e04c3fSmrg		tex.dst_sel_z = 7;
848701e04c3fSmrg		tex.dst_sel_w = 7;
848801e04c3fSmrg	}
848901e04c3fSmrg	else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
849001e04c3fSmrg		tex.dst_sel_x = 3;
849101e04c3fSmrg		tex.dst_sel_y = 7;
849201e04c3fSmrg		tex.dst_sel_z = 7;
849301e04c3fSmrg		tex.dst_sel_w = 7;
849401e04c3fSmrg	}
849501e04c3fSmrg	else {
849601e04c3fSmrg		tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
849701e04c3fSmrg		tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
849801e04c3fSmrg		tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
849901e04c3fSmrg		tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
850001e04c3fSmrg	}
850101e04c3fSmrg
850201e04c3fSmrg
850301e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
850401e04c3fSmrg		tex.src_sel_x = 4;
850501e04c3fSmrg		tex.src_sel_y = 4;
850601e04c3fSmrg		tex.src_sel_z = 4;
850701e04c3fSmrg		tex.src_sel_w = 4;
850801e04c3fSmrg	} else if (src_loaded) {
850901e04c3fSmrg		tex.src_sel_x = 0;
851001e04c3fSmrg		tex.src_sel_y = 1;
851101e04c3fSmrg		tex.src_sel_z = 2;
851201e04c3fSmrg		tex.src_sel_w = 3;
851301e04c3fSmrg	} else {
851401e04c3fSmrg		tex.src_sel_x = ctx->src[0].swizzle[0];
851501e04c3fSmrg		tex.src_sel_y = ctx->src[0].swizzle[1];
851601e04c3fSmrg		tex.src_sel_z = ctx->src[0].swizzle[2];
851701e04c3fSmrg		tex.src_sel_w = ctx->src[0].swizzle[3];
851801e04c3fSmrg		tex.src_rel = ctx->src[0].rel;
851901e04c3fSmrg	}
852001e04c3fSmrg
852101e04c3fSmrg	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
852201e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
852301e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
852401e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
852501e04c3fSmrg		tex.src_sel_x = 1;
852601e04c3fSmrg		tex.src_sel_y = 0;
852701e04c3fSmrg		tex.src_sel_z = 3;
852801e04c3fSmrg		tex.src_sel_w = 2; /* route Z compare or Lod value into W */
852901e04c3fSmrg	}
853001e04c3fSmrg
853101e04c3fSmrg	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
853201e04c3fSmrg	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
853301e04c3fSmrg		tex.coord_type_x = 1;
853401e04c3fSmrg		tex.coord_type_y = 1;
853501e04c3fSmrg	}
853601e04c3fSmrg	tex.coord_type_z = 1;
853701e04c3fSmrg	tex.coord_type_w = 1;
853801e04c3fSmrg
853901e04c3fSmrg	tex.offset_x = offset_x;
854001e04c3fSmrg	tex.offset_y = offset_y;
854101e04c3fSmrg	if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
854201e04c3fSmrg		(inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
854301e04c3fSmrg		 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
854401e04c3fSmrg		tex.offset_z = 0;
854501e04c3fSmrg	}
854601e04c3fSmrg	else {
854701e04c3fSmrg		tex.offset_z = offset_z;
854801e04c3fSmrg	}
854901e04c3fSmrg
855001e04c3fSmrg	/* Put the depth for comparison in W.
855101e04c3fSmrg	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
855201e04c3fSmrg	 * Some instructions expect the depth in Z. */
855301e04c3fSmrg	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
855401e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
855501e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
855601e04c3fSmrg	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
855701e04c3fSmrg	    opcode != FETCH_OP_SAMPLE_C_L &&
855801e04c3fSmrg	    opcode != FETCH_OP_SAMPLE_C_LB) {
855901e04c3fSmrg		tex.src_sel_w = tex.src_sel_z;
856001e04c3fSmrg	}
856101e04c3fSmrg
856201e04c3fSmrg	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
856301e04c3fSmrg	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
856401e04c3fSmrg		if (opcode == FETCH_OP_SAMPLE_C_L ||
856501e04c3fSmrg		    opcode == FETCH_OP_SAMPLE_C_LB) {
856601e04c3fSmrg			/* the array index is read from Y */
856701e04c3fSmrg			tex.coord_type_y = 0;
856801e04c3fSmrg			array_index_offset_channel = tex.src_sel_y;
856901e04c3fSmrg		} else {
857001e04c3fSmrg			/* the array index is read from Z */
857101e04c3fSmrg			tex.coord_type_z = 0;
857201e04c3fSmrg			tex.src_sel_z = tex.src_sel_y;
857301e04c3fSmrg			array_index_offset_channel = tex.src_sel_z;
857401e04c3fSmrg		}
857501e04c3fSmrg	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
857601e04c3fSmrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
857701e04c3fSmrg		tex.coord_type_z = 0;
857801e04c3fSmrg		array_index_offset_channel = tex.src_sel_z;
857901e04c3fSmrg	} else if  ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
858001e04c3fSmrg		    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
858101e04c3fSmrg		    (ctx->bc->chip_class >= EVERGREEN))
858201e04c3fSmrg		/* the array index is read from Z, coordinate will be corrected elsewhere  */
858301e04c3fSmrg		tex.coord_type_z = 0;
858401e04c3fSmrg
858501e04c3fSmrg	/* We have array access to 1D or 2D ARRAY, the coordinates are not int ->
858601e04c3fSmrg	 * evaluate the array index  */
858701e04c3fSmrg	if (array_index_offset_channel >= 0 &&
858801e04c3fSmrg		 opcode != FETCH_OP_LD &&
858901e04c3fSmrg		 opcode != FETCH_OP_GET_TEXTURE_RESINFO) {
859001e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
859101e04c3fSmrg		alu.src[0].sel =  tex.src_gpr;
859201e04c3fSmrg		alu.src[0].chan =  array_index_offset_channel;
859301e04c3fSmrg		alu.src[0].rel = tex.src_rel;
859401e04c3fSmrg		alu.op = ALU_OP1_RNDNE;
859501e04c3fSmrg		alu.dst.sel = tex.src_gpr;
859601e04c3fSmrg		alu.dst.chan = array_index_offset_channel;
859701e04c3fSmrg		alu.dst.rel = tex.src_rel;
859801e04c3fSmrg		alu.dst.write = 1;
859901e04c3fSmrg		alu.last = 1;
860001e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
860101e04c3fSmrg		if (r)
860201e04c3fSmrg			return r;
860301e04c3fSmrg	}
860401e04c3fSmrg
860501e04c3fSmrg	/* mask unused source components */
860601e04c3fSmrg	if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
860701e04c3fSmrg		switch (inst->Texture.Texture) {
860801e04c3fSmrg		case TGSI_TEXTURE_2D:
860901e04c3fSmrg		case TGSI_TEXTURE_RECT:
861001e04c3fSmrg			tex.src_sel_z = 7;
861101e04c3fSmrg			tex.src_sel_w = 7;
861201e04c3fSmrg			break;
861301e04c3fSmrg		case TGSI_TEXTURE_1D_ARRAY:
861401e04c3fSmrg			tex.src_sel_y = 7;
861501e04c3fSmrg			tex.src_sel_w = 7;
861601e04c3fSmrg			break;
861701e04c3fSmrg		case TGSI_TEXTURE_1D:
861801e04c3fSmrg			tex.src_sel_y = 7;
861901e04c3fSmrg			tex.src_sel_z = 7;
862001e04c3fSmrg			tex.src_sel_w = 7;
862101e04c3fSmrg			break;
862201e04c3fSmrg		}
862301e04c3fSmrg	}
862401e04c3fSmrg
862501e04c3fSmrg	/* Emit set gradient and offset instructions. */
862601e04c3fSmrg	for (i = 0; i < n_grad_offs; ++i) {
862701e04c3fSmrg		r = r600_bytecode_add_tex(ctx->bc, &grad_offs[i]);
862801e04c3fSmrg		if (r)
862901e04c3fSmrg			return r;
863001e04c3fSmrg	}
863101e04c3fSmrg
863201e04c3fSmrg	r = r600_bytecode_add_tex(ctx->bc, &tex);
863301e04c3fSmrg	if (r)
863401e04c3fSmrg		return r;
863501e04c3fSmrg
863601e04c3fSmrg	/* add shadow ambient support  - gallium doesn't do it yet */
863701e04c3fSmrg	return 0;
863801e04c3fSmrg}
863901e04c3fSmrg
864001e04c3fSmrgstatic int find_hw_atomic_counter(struct r600_shader_ctx *ctx,
864101e04c3fSmrg				  struct tgsi_full_src_register *src)
864201e04c3fSmrg{
864301e04c3fSmrg	unsigned i;
864401e04c3fSmrg
864501e04c3fSmrg	if (src->Register.Indirect) {
864601e04c3fSmrg		for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
864701e04c3fSmrg			if (src->Indirect.ArrayID == ctx->shader->atomics[i].array_id)
864801e04c3fSmrg				return ctx->shader->atomics[i].hw_idx;
864901e04c3fSmrg		}
865001e04c3fSmrg	} else {
865101e04c3fSmrg		uint32_t index = src->Register.Index;
865201e04c3fSmrg		for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
865301e04c3fSmrg			if (ctx->shader->atomics[i].buffer_id != (unsigned)src->Dimension.Index)
865401e04c3fSmrg				continue;
865501e04c3fSmrg			if (index > ctx->shader->atomics[i].end)
865601e04c3fSmrg				continue;
865701e04c3fSmrg			if (index < ctx->shader->atomics[i].start)
865801e04c3fSmrg				continue;
865901e04c3fSmrg			uint32_t offset = (index - ctx->shader->atomics[i].start);
866001e04c3fSmrg			return ctx->shader->atomics[i].hw_idx + offset;
866101e04c3fSmrg		}
866201e04c3fSmrg	}
866301e04c3fSmrg	assert(0);
866401e04c3fSmrg	return -1;
866501e04c3fSmrg}
866601e04c3fSmrg
866701e04c3fSmrgstatic int tgsi_set_gds_temp(struct r600_shader_ctx *ctx,
866801e04c3fSmrg			     int *uav_id_p, int *uav_index_mode_p)
866901e04c3fSmrg{
867001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
867101e04c3fSmrg	int uav_id, uav_index_mode = 0;
867201e04c3fSmrg	int r;
867301e04c3fSmrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
867401e04c3fSmrg
867501e04c3fSmrg	uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]);
867601e04c3fSmrg
867701e04c3fSmrg	if (inst->Src[0].Register.Indirect) {
867801e04c3fSmrg		if (is_cm) {
867901e04c3fSmrg			struct r600_bytecode_alu alu;
868001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
868101e04c3fSmrg			alu.op = ALU_OP2_LSHL_INT;
868201e04c3fSmrg			alu.src[0].sel = get_address_file_reg(ctx, inst->Src[0].Indirect.Index);
868301e04c3fSmrg			alu.src[0].chan = 0;
868401e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
868501e04c3fSmrg			alu.src[1].value = 2;
868601e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
868701e04c3fSmrg			alu.dst.chan = 0;
868801e04c3fSmrg			alu.dst.write = 1;
868901e04c3fSmrg			alu.last = 1;
869001e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
869101e04c3fSmrg			if (r)
869201e04c3fSmrg				return r;
869301e04c3fSmrg
869401e04c3fSmrg			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
869501e04c3fSmrg					   ctx->temp_reg, 0,
869601e04c3fSmrg					   ctx->temp_reg, 0,
869701e04c3fSmrg					   V_SQ_ALU_SRC_LITERAL, uav_id * 4);
869801e04c3fSmrg			if (r)
869901e04c3fSmrg				return r;
870001e04c3fSmrg		} else
870101e04c3fSmrg			uav_index_mode = 2;
870201e04c3fSmrg	} else if (is_cm) {
870301e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
870401e04c3fSmrg				   ctx->temp_reg, 0,
870501e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, uav_id * 4,
870601e04c3fSmrg				   0, 0);
870701e04c3fSmrg		if (r)
870801e04c3fSmrg			return r;
870901e04c3fSmrg	}
871001e04c3fSmrg	*uav_id_p = uav_id;
871101e04c3fSmrg	*uav_index_mode_p = uav_index_mode;
871201e04c3fSmrg	return 0;
871301e04c3fSmrg}
871401e04c3fSmrg
871501e04c3fSmrgstatic int tgsi_load_gds(struct r600_shader_ctx *ctx)
871601e04c3fSmrg{
871701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
871801e04c3fSmrg	int r;
871901e04c3fSmrg	struct r600_bytecode_gds gds;
872001e04c3fSmrg	int uav_id = 0;
872101e04c3fSmrg	int uav_index_mode = 0;
872201e04c3fSmrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
872301e04c3fSmrg
872401e04c3fSmrg	r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
872501e04c3fSmrg	if (r)
872601e04c3fSmrg		return r;
872701e04c3fSmrg
872801e04c3fSmrg	memset(&gds, 0, sizeof(struct r600_bytecode_gds));
872901e04c3fSmrg	gds.op = FETCH_OP_GDS_READ_RET;
873001e04c3fSmrg	gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
873101e04c3fSmrg	gds.uav_id = is_cm ? 0 : uav_id;
873201e04c3fSmrg	gds.uav_index_mode = is_cm ? 0 : uav_index_mode;
873301e04c3fSmrg	gds.src_gpr = ctx->temp_reg;
873401e04c3fSmrg	gds.src_sel_x = (is_cm) ? 0 : 4;
873501e04c3fSmrg	gds.src_sel_y = 4;
873601e04c3fSmrg	gds.src_sel_z = 4;
873701e04c3fSmrg	gds.dst_sel_x = 0;
873801e04c3fSmrg	gds.dst_sel_y = 7;
873901e04c3fSmrg	gds.dst_sel_z = 7;
874001e04c3fSmrg	gds.dst_sel_w = 7;
874101e04c3fSmrg	gds.src_gpr2 = 0;
874201e04c3fSmrg	gds.alloc_consume = !is_cm;
874301e04c3fSmrg	r = r600_bytecode_add_gds(ctx->bc, &gds);
874401e04c3fSmrg	if (r)
874501e04c3fSmrg		return r;
874601e04c3fSmrg
874701e04c3fSmrg	ctx->bc->cf_last->vpm = 1;
874801e04c3fSmrg	return 0;
874901e04c3fSmrg}
875001e04c3fSmrg
875101e04c3fSmrg/* this fixes up 1D arrays properly */
875201e04c3fSmrgstatic int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr)
875301e04c3fSmrg{
875401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
875501e04c3fSmrg	int r, i;
875601e04c3fSmrg	struct r600_bytecode_alu alu;
875701e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
875801e04c3fSmrg
875901e04c3fSmrg	for (i = 0; i < 4; i++) {
876001e04c3fSmrg		bool def_val = true, write_zero = false;
876101e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
876201e04c3fSmrg		alu.op = ALU_OP1_MOV;
876301e04c3fSmrg		alu.dst.sel = temp_reg;
876401e04c3fSmrg		alu.dst.chan = i;
876501e04c3fSmrg
876601e04c3fSmrg		switch (inst->Memory.Texture) {
876701e04c3fSmrg		case TGSI_TEXTURE_BUFFER:
876801e04c3fSmrg		case TGSI_TEXTURE_1D:
876901e04c3fSmrg			if (i == 1 || i == 2 || i == 3) {
877001e04c3fSmrg				write_zero = true;
877101e04c3fSmrg			}
877201e04c3fSmrg			break;
877301e04c3fSmrg		case TGSI_TEXTURE_1D_ARRAY:
877401e04c3fSmrg			if (i == 1 || i == 3)
877501e04c3fSmrg				write_zero = true;
877601e04c3fSmrg			else if (i == 2) {
877701e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1);
877801e04c3fSmrg				def_val = false;
877901e04c3fSmrg			}
878001e04c3fSmrg			break;
878101e04c3fSmrg		case TGSI_TEXTURE_2D:
878201e04c3fSmrg			if (i == 2 || i == 3)
878301e04c3fSmrg				write_zero = true;
878401e04c3fSmrg			break;
878501e04c3fSmrg		default:
878601e04c3fSmrg			if (i == 3)
878701e04c3fSmrg				write_zero = true;
878801e04c3fSmrg			break;
878901e04c3fSmrg		}
879001e04c3fSmrg
879101e04c3fSmrg		if (write_zero) {
879201e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
879301e04c3fSmrg			alu.src[0].value = 0;
879401e04c3fSmrg		} else if (def_val) {
879501e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i);
879601e04c3fSmrg		}
879701e04c3fSmrg
879801e04c3fSmrg		if (i == 3)
879901e04c3fSmrg			alu.last = 1;
880001e04c3fSmrg		alu.dst.write = 1;
880101e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
880201e04c3fSmrg		if (r)
880301e04c3fSmrg			return r;
880401e04c3fSmrg	}
880501e04c3fSmrg	*idx_gpr = temp_reg;
880601e04c3fSmrg	return 0;
880701e04c3fSmrg}
880801e04c3fSmrg
880901e04c3fSmrgstatic int load_buffer_coord(struct r600_shader_ctx *ctx, int src_idx,
881001e04c3fSmrg			     int temp_reg)
881101e04c3fSmrg{
881201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
881301e04c3fSmrg	int r;
881401e04c3fSmrg	if (inst->Src[src_idx].Register.File == TGSI_FILE_IMMEDIATE) {
881501e04c3fSmrg		int value = (ctx->literals[4 * inst->Src[src_idx].Register.Index + inst->Src[src_idx].Register.SwizzleX]);
881601e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
881701e04c3fSmrg				   temp_reg, 0,
881801e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, value >> 2,
881901e04c3fSmrg				   0, 0);
882001e04c3fSmrg		if (r)
882101e04c3fSmrg			return r;
882201e04c3fSmrg	} else {
882301e04c3fSmrg		struct r600_bytecode_alu alu;
882401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
882501e04c3fSmrg		alu.op = ALU_OP2_LSHR_INT;
882601e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[src_idx], 0);
882701e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
882801e04c3fSmrg		alu.src[1].value = 2;
882901e04c3fSmrg		alu.dst.sel = temp_reg;
883001e04c3fSmrg		alu.dst.write = 1;
883101e04c3fSmrg		alu.last = 1;
883201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
883301e04c3fSmrg		if (r)
883401e04c3fSmrg			return r;
883501e04c3fSmrg	}
883601e04c3fSmrg	return 0;
883701e04c3fSmrg}
883801e04c3fSmrg
883901e04c3fSmrgstatic int tgsi_load_buffer(struct r600_shader_ctx *ctx)
884001e04c3fSmrg{
884101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
884201e04c3fSmrg	/* have to work out the offset into the RAT immediate return buffer */
884301e04c3fSmrg	struct r600_bytecode_vtx vtx;
884401e04c3fSmrg	struct r600_bytecode_cf *cf;
884501e04c3fSmrg	int r;
884601e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
884701e04c3fSmrg	unsigned rat_index_mode;
884801e04c3fSmrg	unsigned base;
884901e04c3fSmrg
885001e04c3fSmrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
885101e04c3fSmrg	base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE];
885201e04c3fSmrg
885301e04c3fSmrg	r = load_buffer_coord(ctx, 1, temp_reg);
885401e04c3fSmrg	if (r)
885501e04c3fSmrg		return r;
885601e04c3fSmrg	ctx->bc->cf_last->barrier = 1;
885701e04c3fSmrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
885801e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
885901e04c3fSmrg	vtx.buffer_id = inst->Src[0].Register.Index + base;
886001e04c3fSmrg	vtx.buffer_index_mode = rat_index_mode;
886101e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
886201e04c3fSmrg	vtx.src_gpr = temp_reg;
886301e04c3fSmrg	vtx.src_sel_x = 0;
886401e04c3fSmrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
886501e04c3fSmrg	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
886601e04c3fSmrg	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
886701e04c3fSmrg	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
886801e04c3fSmrg	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
886901e04c3fSmrg	vtx.num_format_all = 1;
887001e04c3fSmrg	vtx.format_comp_all = 1;
887101e04c3fSmrg	vtx.srf_mode_all = 0;
887201e04c3fSmrg
887301e04c3fSmrg	if (inst->Dst[0].Register.WriteMask & 8) {
887401e04c3fSmrg		vtx.data_format = FMT_32_32_32_32;
887501e04c3fSmrg		vtx.use_const_fields = 0;
887601e04c3fSmrg	} else if (inst->Dst[0].Register.WriteMask & 4) {
887701e04c3fSmrg		vtx.data_format = FMT_32_32_32;
887801e04c3fSmrg		vtx.use_const_fields = 0;
887901e04c3fSmrg	} else if (inst->Dst[0].Register.WriteMask & 2) {
888001e04c3fSmrg		vtx.data_format = FMT_32_32;
888101e04c3fSmrg		vtx.use_const_fields = 0;
888201e04c3fSmrg	} else {
888301e04c3fSmrg		vtx.data_format = FMT_32;
888401e04c3fSmrg		vtx.use_const_fields = 0;
888501e04c3fSmrg	}
888601e04c3fSmrg
888701e04c3fSmrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
888801e04c3fSmrg	if (r)
888901e04c3fSmrg		return r;
889001e04c3fSmrg	cf = ctx->bc->cf_last;
889101e04c3fSmrg	cf->barrier = 1;
889201e04c3fSmrg	return 0;
889301e04c3fSmrg}
889401e04c3fSmrg
889501e04c3fSmrgstatic int tgsi_load_rat(struct r600_shader_ctx *ctx)
889601e04c3fSmrg{
889701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
889801e04c3fSmrg	/* have to work out the offset into the RAT immediate return buffer */
889901e04c3fSmrg	struct r600_bytecode_vtx vtx;
890001e04c3fSmrg	struct r600_bytecode_cf *cf;
890101e04c3fSmrg	int r;
890201e04c3fSmrg	int idx_gpr;
890301e04c3fSmrg	unsigned format, num_format, format_comp, endian;
890401e04c3fSmrg	const struct util_format_description *desc;
890501e04c3fSmrg	unsigned rat_index_mode;
890601e04c3fSmrg	unsigned immed_base;
890701e04c3fSmrg
890801e04c3fSmrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
890901e04c3fSmrg
891001e04c3fSmrg	immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
891101e04c3fSmrg	r = load_index_src(ctx, 1, &idx_gpr);
891201e04c3fSmrg	if (r)
891301e04c3fSmrg		return r;
891401e04c3fSmrg
891501e04c3fSmrg	if (rat_index_mode)
891601e04c3fSmrg		egcm_load_index_reg(ctx->bc, 1, false);
891701e04c3fSmrg
891801e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
891901e04c3fSmrg	cf = ctx->bc->cf_last;
892001e04c3fSmrg
892101e04c3fSmrg	cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
892201e04c3fSmrg	cf->rat.inst = V_RAT_INST_NOP_RTN;
892301e04c3fSmrg	cf->rat.index_mode = rat_index_mode;
892401e04c3fSmrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
892501e04c3fSmrg	cf->output.gpr = ctx->thread_id_gpr;
892601e04c3fSmrg	cf->output.index_gpr = idx_gpr;
892701e04c3fSmrg	cf->output.comp_mask = 0xf;
892801e04c3fSmrg	cf->output.burst_count = 1;
892901e04c3fSmrg	cf->vpm = 1;
893001e04c3fSmrg	cf->barrier = 1;
893101e04c3fSmrg	cf->mark = 1;
893201e04c3fSmrg	cf->output.elem_size = 0;
893301e04c3fSmrg
893401e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
893501e04c3fSmrg	cf = ctx->bc->cf_last;
893601e04c3fSmrg	cf->barrier = 1;
893701e04c3fSmrg
893801e04c3fSmrg	desc = util_format_description(inst->Memory.Format);
893901e04c3fSmrg	r600_vertex_data_type(inst->Memory.Format,
894001e04c3fSmrg			      &format, &num_format, &format_comp, &endian);
894101e04c3fSmrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
894201e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
894301e04c3fSmrg	vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
894401e04c3fSmrg	vtx.buffer_index_mode = rat_index_mode;
894501e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
894601e04c3fSmrg	vtx.src_gpr = ctx->thread_id_gpr;
894701e04c3fSmrg	vtx.src_sel_x = 1;
894801e04c3fSmrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
894901e04c3fSmrg	vtx.dst_sel_x = desc->swizzle[0];
895001e04c3fSmrg	vtx.dst_sel_y = desc->swizzle[1];
895101e04c3fSmrg	vtx.dst_sel_z = desc->swizzle[2];
895201e04c3fSmrg	vtx.dst_sel_w = desc->swizzle[3];
895301e04c3fSmrg	vtx.srf_mode_all = 1;
895401e04c3fSmrg	vtx.data_format = format;
895501e04c3fSmrg	vtx.num_format_all = num_format;
895601e04c3fSmrg	vtx.format_comp_all = format_comp;
895701e04c3fSmrg	vtx.endian = endian;
895801e04c3fSmrg	vtx.offset = 0;
895901e04c3fSmrg	vtx.mega_fetch_count = 3;
896001e04c3fSmrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
896101e04c3fSmrg	if (r)
896201e04c3fSmrg		return r;
896301e04c3fSmrg	cf = ctx->bc->cf_last;
896401e04c3fSmrg	cf->barrier = 1;
896501e04c3fSmrg	return 0;
896601e04c3fSmrg}
896701e04c3fSmrg
896801e04c3fSmrgstatic int tgsi_load_lds(struct r600_shader_ctx *ctx)
896901e04c3fSmrg{
897001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
897101e04c3fSmrg	struct r600_bytecode_alu alu;
897201e04c3fSmrg	int r;
897301e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
897401e04c3fSmrg
897501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
897601e04c3fSmrg	alu.op = ALU_OP1_MOV;
897701e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
897801e04c3fSmrg	alu.dst.sel = temp_reg;
897901e04c3fSmrg	alu.dst.write = 1;
898001e04c3fSmrg	alu.last = 1;
898101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
898201e04c3fSmrg	if (r)
898301e04c3fSmrg		return r;
898401e04c3fSmrg
898501e04c3fSmrg	r = do_lds_fetch_values(ctx, temp_reg,
898601e04c3fSmrg				ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask);
898701e04c3fSmrg	if (r)
898801e04c3fSmrg		return r;
898901e04c3fSmrg	return 0;
899001e04c3fSmrg}
899101e04c3fSmrg
899201e04c3fSmrgstatic int tgsi_load(struct r600_shader_ctx *ctx)
899301e04c3fSmrg{
899401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
899501e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
899601e04c3fSmrg		return tgsi_load_rat(ctx);
899701e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
899801e04c3fSmrg		return tgsi_load_gds(ctx);
899901e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
900001e04c3fSmrg		return tgsi_load_buffer(ctx);
900101e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
900201e04c3fSmrg		return tgsi_load_lds(ctx);
900301e04c3fSmrg	return 0;
900401e04c3fSmrg}
900501e04c3fSmrg
900601e04c3fSmrgstatic int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx)
900701e04c3fSmrg{
900801e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
900901e04c3fSmrg	struct r600_bytecode_cf *cf;
901001e04c3fSmrg	int r, i;
901101e04c3fSmrg	unsigned rat_index_mode;
901201e04c3fSmrg	int lasti;
901301e04c3fSmrg	int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx);
901401e04c3fSmrg
901501e04c3fSmrg	r = load_buffer_coord(ctx, 0, treg2);
901601e04c3fSmrg	if (r)
901701e04c3fSmrg		return r;
901801e04c3fSmrg
901901e04c3fSmrg	rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
902001e04c3fSmrg	if (rat_index_mode)
902101e04c3fSmrg		egcm_load_index_reg(ctx->bc, 1, false);
902201e04c3fSmrg
902301e04c3fSmrg	for (i = 0; i <= 3; i++) {
902401e04c3fSmrg		struct r600_bytecode_alu alu;
902501e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
902601e04c3fSmrg		alu.op = ALU_OP1_MOV;
902701e04c3fSmrg		alu.dst.sel = temp_reg;
902801e04c3fSmrg		alu.dst.chan = i;
902901e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_0;
903001e04c3fSmrg		alu.last = (i == 3);
903101e04c3fSmrg		alu.dst.write = 1;
903201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
903301e04c3fSmrg		if (r)
903401e04c3fSmrg			return r;
903501e04c3fSmrg	}
903601e04c3fSmrg
903701e04c3fSmrg	lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
903801e04c3fSmrg	for (i = 0; i <= lasti; i++) {
903901e04c3fSmrg		struct r600_bytecode_alu alu;
904001e04c3fSmrg		if (!((1 << i) & inst->Dst[0].Register.WriteMask))
904101e04c3fSmrg			continue;
904201e04c3fSmrg
904301e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
904401e04c3fSmrg				   temp_reg, 0,
904501e04c3fSmrg				   treg2, 0,
904601e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, i);
904701e04c3fSmrg		if (r)
904801e04c3fSmrg			return r;
904901e04c3fSmrg
905001e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
905101e04c3fSmrg		alu.op = ALU_OP1_MOV;
905201e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
905301e04c3fSmrg		alu.dst.chan = 0;
905401e04c3fSmrg
905501e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
905601e04c3fSmrg		alu.last = 1;
905701e04c3fSmrg		alu.dst.write = 1;
905801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
905901e04c3fSmrg		if (r)
906001e04c3fSmrg			return r;
906101e04c3fSmrg
906201e04c3fSmrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
906301e04c3fSmrg		cf = ctx->bc->cf_last;
906401e04c3fSmrg
906501e04c3fSmrg		cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE];
906601e04c3fSmrg		cf->rat.inst = V_RAT_INST_STORE_TYPED;
906701e04c3fSmrg		cf->rat.index_mode = rat_index_mode;
906801e04c3fSmrg		cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
906901e04c3fSmrg		cf->output.gpr = ctx->temp_reg;
907001e04c3fSmrg		cf->output.index_gpr = temp_reg;
907101e04c3fSmrg		cf->output.comp_mask = 1;
907201e04c3fSmrg		cf->output.burst_count = 1;
907301e04c3fSmrg		cf->vpm = 1;
907401e04c3fSmrg		cf->barrier = 1;
907501e04c3fSmrg		cf->output.elem_size = 0;
907601e04c3fSmrg	}
907701e04c3fSmrg	return 0;
907801e04c3fSmrg}
907901e04c3fSmrg
908001e04c3fSmrgstatic int tgsi_store_rat(struct r600_shader_ctx *ctx)
908101e04c3fSmrg{
908201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
908301e04c3fSmrg	struct r600_bytecode_cf *cf;
908401e04c3fSmrg	bool src_requires_loading = false;
908501e04c3fSmrg	int val_gpr, idx_gpr;
908601e04c3fSmrg	int r, i;
908701e04c3fSmrg	unsigned rat_index_mode;
908801e04c3fSmrg
908901e04c3fSmrg	rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
909001e04c3fSmrg
909101e04c3fSmrg	r = load_index_src(ctx, 0, &idx_gpr);
909201e04c3fSmrg	if (r)
909301e04c3fSmrg		return r;
909401e04c3fSmrg
909501e04c3fSmrg	if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY)
909601e04c3fSmrg		src_requires_loading = true;
909701e04c3fSmrg
909801e04c3fSmrg	if (src_requires_loading) {
909901e04c3fSmrg		struct r600_bytecode_alu alu;
910001e04c3fSmrg		for (i = 0; i < 4; i++) {
910101e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
910201e04c3fSmrg			alu.op = ALU_OP1_MOV;
910301e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
910401e04c3fSmrg			alu.dst.chan = i;
910501e04c3fSmrg
910601e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
910701e04c3fSmrg			if (i == 3)
910801e04c3fSmrg				alu.last = 1;
910901e04c3fSmrg			alu.dst.write = 1;
911001e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
911101e04c3fSmrg			if (r)
911201e04c3fSmrg				return r;
911301e04c3fSmrg		}
911401e04c3fSmrg		val_gpr = ctx->temp_reg;
911501e04c3fSmrg	} else
911601e04c3fSmrg		val_gpr = tgsi_tex_get_src_gpr(ctx, 1);
911701e04c3fSmrg	if (rat_index_mode)
911801e04c3fSmrg		egcm_load_index_reg(ctx->bc, 1, false);
911901e04c3fSmrg
912001e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
912101e04c3fSmrg	cf = ctx->bc->cf_last;
912201e04c3fSmrg
912301e04c3fSmrg	cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
912401e04c3fSmrg	cf->rat.inst = V_RAT_INST_STORE_TYPED;
912501e04c3fSmrg	cf->rat.index_mode = rat_index_mode;
912601e04c3fSmrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
912701e04c3fSmrg	cf->output.gpr = val_gpr;
912801e04c3fSmrg	cf->output.index_gpr = idx_gpr;
912901e04c3fSmrg	cf->output.comp_mask = 0xf;
913001e04c3fSmrg	cf->output.burst_count = 1;
913101e04c3fSmrg	cf->vpm = 1;
913201e04c3fSmrg	cf->barrier = 1;
913301e04c3fSmrg	cf->output.elem_size = 0;
913401e04c3fSmrg	return 0;
913501e04c3fSmrg}
913601e04c3fSmrg
913701e04c3fSmrgstatic int tgsi_store_lds(struct r600_shader_ctx *ctx)
913801e04c3fSmrg{
913901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
914001e04c3fSmrg	struct r600_bytecode_alu alu;
914101e04c3fSmrg	int r, i, lasti;
914201e04c3fSmrg	int write_mask = inst->Dst[0].Register.WriteMask;
914301e04c3fSmrg	int temp_reg = r600_get_temp(ctx);
914401e04c3fSmrg
914501e04c3fSmrg	/* LDS write */
914601e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
914701e04c3fSmrg	alu.op = ALU_OP1_MOV;
914801e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
914901e04c3fSmrg	alu.dst.sel = temp_reg;
915001e04c3fSmrg	alu.dst.write = 1;
915101e04c3fSmrg	alu.last = 1;
915201e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
915301e04c3fSmrg	if (r)
915401e04c3fSmrg		return r;
915501e04c3fSmrg
915601e04c3fSmrg	lasti = tgsi_last_instruction(write_mask);
915701e04c3fSmrg	for (i = 1; i <= lasti; i++) {
915801e04c3fSmrg		if (!(write_mask & (1 << i)))
915901e04c3fSmrg			continue;
916001e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
916101e04c3fSmrg				   temp_reg, i,
916201e04c3fSmrg				   temp_reg, 0,
916301e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 4 * i);
916401e04c3fSmrg		if (r)
916501e04c3fSmrg			return r;
916601e04c3fSmrg	}
916701e04c3fSmrg	for (i = 0; i <= lasti; i++) {
916801e04c3fSmrg		if (!(write_mask & (1 << i)))
916901e04c3fSmrg			continue;
917001e04c3fSmrg
917101e04c3fSmrg		if ((i == 0 && ((write_mask & 3) == 3)) ||
917201e04c3fSmrg		    (i == 2 && ((write_mask & 0xc) == 0xc))) {
917301e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
917401e04c3fSmrg			alu.op = LDS_OP3_LDS_WRITE_REL;
917501e04c3fSmrg
917601e04c3fSmrg			alu.src[0].sel = temp_reg;
917701e04c3fSmrg			alu.src[0].chan = i;
917801e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
917901e04c3fSmrg			r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1);
918001e04c3fSmrg			alu.last = 1;
918101e04c3fSmrg			alu.is_lds_idx_op = true;
918201e04c3fSmrg			alu.lds_idx = 1;
918301e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
918401e04c3fSmrg			if (r)
918501e04c3fSmrg				return r;
918601e04c3fSmrg			i += 1;
918701e04c3fSmrg			continue;
918801e04c3fSmrg		}
918901e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
919001e04c3fSmrg		alu.op = LDS_OP2_LDS_WRITE;
919101e04c3fSmrg
919201e04c3fSmrg		alu.src[0].sel = temp_reg;
919301e04c3fSmrg		alu.src[0].chan = i;
919401e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
919501e04c3fSmrg
919601e04c3fSmrg		alu.last = 1;
919701e04c3fSmrg		alu.is_lds_idx_op = true;
919801e04c3fSmrg
919901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
920001e04c3fSmrg		if (r)
920101e04c3fSmrg			return r;
920201e04c3fSmrg	}
920301e04c3fSmrg	return 0;
920401e04c3fSmrg}
920501e04c3fSmrg
920601e04c3fSmrgstatic int tgsi_store(struct r600_shader_ctx *ctx)
920701e04c3fSmrg{
920801e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
920901e04c3fSmrg	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
921001e04c3fSmrg		return tgsi_store_buffer_rat(ctx);
921101e04c3fSmrg	else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
921201e04c3fSmrg		return tgsi_store_lds(ctx);
921301e04c3fSmrg	else
921401e04c3fSmrg		return tgsi_store_rat(ctx);
921501e04c3fSmrg}
921601e04c3fSmrg
921701e04c3fSmrgstatic int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
921801e04c3fSmrg{
921901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
922001e04c3fSmrg	/* have to work out the offset into the RAT immediate return buffer */
922101e04c3fSmrg	struct r600_bytecode_alu alu;
922201e04c3fSmrg	struct r600_bytecode_vtx vtx;
922301e04c3fSmrg	struct r600_bytecode_cf *cf;
922401e04c3fSmrg	int r;
922501e04c3fSmrg	int idx_gpr;
922601e04c3fSmrg	unsigned format, num_format, format_comp, endian;
922701e04c3fSmrg	const struct util_format_description *desc;
922801e04c3fSmrg	unsigned rat_index_mode;
922901e04c3fSmrg	unsigned immed_base;
923001e04c3fSmrg	unsigned rat_base;
923101e04c3fSmrg
923201e04c3fSmrg	immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
923301e04c3fSmrg	rat_base = ctx->shader->rat_base;
923401e04c3fSmrg
923501e04c3fSmrg        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
923601e04c3fSmrg		immed_base += ctx->info.file_count[TGSI_FILE_IMAGE];
923701e04c3fSmrg		rat_base += ctx->info.file_count[TGSI_FILE_IMAGE];
923801e04c3fSmrg
923901e04c3fSmrg		r = load_buffer_coord(ctx, 1, ctx->temp_reg);
924001e04c3fSmrg		if (r)
924101e04c3fSmrg			return r;
924201e04c3fSmrg		idx_gpr = ctx->temp_reg;
924301e04c3fSmrg	} else {
924401e04c3fSmrg		r = load_index_src(ctx, 1, &idx_gpr);
924501e04c3fSmrg		if (r)
924601e04c3fSmrg			return r;
924701e04c3fSmrg	}
924801e04c3fSmrg
924901e04c3fSmrg	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
925001e04c3fSmrg
925101e04c3fSmrg	if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) {
9252af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
925301e04c3fSmrg		alu.op = ALU_OP1_MOV;
925401e04c3fSmrg		alu.dst.sel = ctx->thread_id_gpr;
925501e04c3fSmrg		alu.dst.chan = 0;
9256af69d88dSmrg		alu.dst.write = 1;
925701e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
9258af69d88dSmrg		alu.last = 1;
9259af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9260af69d88dSmrg		if (r)
9261af69d88dSmrg			return r;
9262af69d88dSmrg
9263af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
926401e04c3fSmrg		alu.op = ALU_OP1_MOV;
926501e04c3fSmrg		alu.dst.sel = ctx->thread_id_gpr;
926601e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN)
926701e04c3fSmrg			alu.dst.chan = 2;
926801e04c3fSmrg		else
926901e04c3fSmrg			alu.dst.chan = 3;
9270af69d88dSmrg		alu.dst.write = 1;
927101e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
9272af69d88dSmrg		alu.last = 1;
9273af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
9274af69d88dSmrg		if (r)
9275af69d88dSmrg			return r;
927601e04c3fSmrg	} else {
927701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
927801e04c3fSmrg		alu.op = ALU_OP1_MOV;
927901e04c3fSmrg		alu.dst.sel = ctx->thread_id_gpr;
928001e04c3fSmrg		alu.dst.chan = 0;
928101e04c3fSmrg		alu.dst.write = 1;
928201e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
928301e04c3fSmrg		alu.last = 1;
928401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
928501e04c3fSmrg		if (r)
928601e04c3fSmrg			return r;
928701e04c3fSmrg	}
928801e04c3fSmrg
928901e04c3fSmrg	if (rat_index_mode)
929001e04c3fSmrg		egcm_load_index_reg(ctx->bc, 1, false);
929101e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
929201e04c3fSmrg	cf = ctx->bc->cf_last;
929301e04c3fSmrg
929401e04c3fSmrg	cf->rat.id = rat_base + inst->Src[0].Register.Index;
929501e04c3fSmrg	cf->rat.inst = ctx->inst_info->op;
929601e04c3fSmrg	cf->rat.index_mode = rat_index_mode;
929701e04c3fSmrg	cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
929801e04c3fSmrg	cf->output.gpr = ctx->thread_id_gpr;
929901e04c3fSmrg	cf->output.index_gpr = idx_gpr;
930001e04c3fSmrg	cf->output.comp_mask = 0xf;
930101e04c3fSmrg	cf->output.burst_count = 1;
930201e04c3fSmrg	cf->vpm = 1;
930301e04c3fSmrg	cf->barrier = 1;
930401e04c3fSmrg	cf->mark = 1;
930501e04c3fSmrg	cf->output.elem_size = 0;
930601e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
930701e04c3fSmrg	cf = ctx->bc->cf_last;
930801e04c3fSmrg	cf->barrier = 1;
930901e04c3fSmrg	cf->cf_addr = 1;
931001e04c3fSmrg
931101e04c3fSmrg	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
931201e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
931301e04c3fSmrg		desc = util_format_description(inst->Memory.Format);
931401e04c3fSmrg		r600_vertex_data_type(inst->Memory.Format,
931501e04c3fSmrg				      &format, &num_format, &format_comp, &endian);
931601e04c3fSmrg		vtx.dst_sel_x = desc->swizzle[0];
931701e04c3fSmrg	} else {
931801e04c3fSmrg		format = FMT_32;
931901e04c3fSmrg		num_format = 1;
932001e04c3fSmrg		format_comp = 0;
932101e04c3fSmrg		endian = 0;
932201e04c3fSmrg		vtx.dst_sel_x = 0;
932301e04c3fSmrg	}
932401e04c3fSmrg	vtx.op = FETCH_OP_VFETCH;
932501e04c3fSmrg	vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
932601e04c3fSmrg	vtx.buffer_index_mode = rat_index_mode;
932701e04c3fSmrg	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
932801e04c3fSmrg	vtx.src_gpr = ctx->thread_id_gpr;
932901e04c3fSmrg	vtx.src_sel_x = 1;
933001e04c3fSmrg	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
933101e04c3fSmrg	vtx.dst_sel_y = 7;
933201e04c3fSmrg	vtx.dst_sel_z = 7;
933301e04c3fSmrg	vtx.dst_sel_w = 7;
933401e04c3fSmrg	vtx.use_const_fields = 0;
933501e04c3fSmrg	vtx.srf_mode_all = 1;
933601e04c3fSmrg	vtx.data_format = format;
933701e04c3fSmrg	vtx.num_format_all = num_format;
933801e04c3fSmrg	vtx.format_comp_all = format_comp;
933901e04c3fSmrg	vtx.endian = endian;
934001e04c3fSmrg	vtx.offset = 0;
934101e04c3fSmrg	vtx.mega_fetch_count = 0xf;
934201e04c3fSmrg	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
934301e04c3fSmrg	if (r)
934401e04c3fSmrg		return r;
934501e04c3fSmrg	cf = ctx->bc->cf_last;
934601e04c3fSmrg	cf->vpm = 1;
934701e04c3fSmrg	cf->barrier = 1;
934801e04c3fSmrg	return 0;
934901e04c3fSmrg}
935001e04c3fSmrg
935101e04c3fSmrgstatic int get_gds_op(int opcode)
935201e04c3fSmrg{
935301e04c3fSmrg	switch (opcode) {
935401e04c3fSmrg	case TGSI_OPCODE_ATOMUADD:
935501e04c3fSmrg		return FETCH_OP_GDS_ADD_RET;
935601e04c3fSmrg	case TGSI_OPCODE_ATOMAND:
935701e04c3fSmrg		return FETCH_OP_GDS_AND_RET;
935801e04c3fSmrg	case TGSI_OPCODE_ATOMOR:
935901e04c3fSmrg		return FETCH_OP_GDS_OR_RET;
936001e04c3fSmrg	case TGSI_OPCODE_ATOMXOR:
936101e04c3fSmrg		return FETCH_OP_GDS_XOR_RET;
936201e04c3fSmrg	case TGSI_OPCODE_ATOMUMIN:
936301e04c3fSmrg		return FETCH_OP_GDS_MIN_UINT_RET;
936401e04c3fSmrg	case TGSI_OPCODE_ATOMUMAX:
936501e04c3fSmrg		return FETCH_OP_GDS_MAX_UINT_RET;
936601e04c3fSmrg	case TGSI_OPCODE_ATOMXCHG:
936701e04c3fSmrg		return FETCH_OP_GDS_XCHG_RET;
936801e04c3fSmrg	case TGSI_OPCODE_ATOMCAS:
936901e04c3fSmrg		return FETCH_OP_GDS_CMP_XCHG_RET;
937001e04c3fSmrg	default:
937101e04c3fSmrg		return -1;
937201e04c3fSmrg	}
937301e04c3fSmrg}
937401e04c3fSmrg
937501e04c3fSmrgstatic int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
937601e04c3fSmrg{
937701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
937801e04c3fSmrg	struct r600_bytecode_gds gds;
937901e04c3fSmrg	struct r600_bytecode_alu alu;
938001e04c3fSmrg	int gds_op = get_gds_op(inst->Instruction.Opcode);
938101e04c3fSmrg	int r;
938201e04c3fSmrg	int uav_id = 0;
938301e04c3fSmrg	int uav_index_mode = 0;
938401e04c3fSmrg	bool is_cm = (ctx->bc->chip_class == CAYMAN);
938501e04c3fSmrg
938601e04c3fSmrg	if (gds_op == -1) {
938701e04c3fSmrg		fprintf(stderr, "unknown GDS op for opcode %d\n", inst->Instruction.Opcode);
938801e04c3fSmrg		return -1;
938901e04c3fSmrg	}
939001e04c3fSmrg
939101e04c3fSmrg	r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode);
939201e04c3fSmrg	if (r)
939301e04c3fSmrg		return r;
939401e04c3fSmrg
939501e04c3fSmrg	if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) {
939601e04c3fSmrg		if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) {
939701e04c3fSmrg			int value = (ctx->literals[4 * inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]);
9398af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
939901e04c3fSmrg			alu.op = ALU_OP1_MOV;
940001e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
940101e04c3fSmrg			alu.dst.chan = is_cm ? 2 : 1;
940201e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
940301e04c3fSmrg			alu.src[0].value = value;
940401e04c3fSmrg			alu.last = 1;
9405af69d88dSmrg			alu.dst.write = 1;
940601e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
940701e04c3fSmrg			if (r)
940801e04c3fSmrg				return r;
940901e04c3fSmrg		} else {
941001e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
941101e04c3fSmrg			alu.op = ALU_OP1_MOV;
941201e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
941301e04c3fSmrg			alu.dst.chan = is_cm ? 2 : 1;
941401e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
9415af69d88dSmrg			alu.last = 1;
941601e04c3fSmrg			alu.dst.write = 1;
9417af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
9418af69d88dSmrg			if (r)
9419af69d88dSmrg				return r;
9420af69d88dSmrg		}
9421af69d88dSmrg	}
942201e04c3fSmrg	if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) {
942301e04c3fSmrg		int value = (ctx->literals[4 * inst->Src[2].Register.Index + inst->Src[2].Register.SwizzleX]);
942401e04c3fSmrg		int abs_value = abs(value);
942501e04c3fSmrg		if (abs_value != value && gds_op == FETCH_OP_GDS_ADD_RET)
942601e04c3fSmrg			gds_op = FETCH_OP_GDS_SUB_RET;
9427af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9428af69d88dSmrg		alu.op = ALU_OP1_MOV;
942901e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
943001e04c3fSmrg		alu.dst.chan = is_cm ? 1 : 0;
943101e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
943201e04c3fSmrg		alu.src[0].value = abs_value;
94333464ebd5Sriastradh		alu.last = 1;
943401e04c3fSmrg		alu.dst.write = 1;
943501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
943601e04c3fSmrg		if (r)
943701e04c3fSmrg			return r;
943801e04c3fSmrg	} else {
943901e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
944001e04c3fSmrg		alu.op = ALU_OP1_MOV;
944101e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
944201e04c3fSmrg		alu.dst.chan = is_cm ? 1 : 0;
944301e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
944401e04c3fSmrg		alu.last = 1;
944501e04c3fSmrg		alu.dst.write = 1;
9446af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
94473464ebd5Sriastradh		if (r)
94483464ebd5Sriastradh			return r;
94493464ebd5Sriastradh	}
94503464ebd5Sriastradh
9451af69d88dSmrg
945201e04c3fSmrg	memset(&gds, 0, sizeof(struct r600_bytecode_gds));
945301e04c3fSmrg	gds.op = gds_op;
945401e04c3fSmrg	gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
945501e04c3fSmrg	gds.uav_id = is_cm ? 0 : uav_id;
945601e04c3fSmrg	gds.uav_index_mode = is_cm ? 0 : uav_index_mode;
945701e04c3fSmrg	gds.src_gpr = ctx->temp_reg;
945801e04c3fSmrg	gds.src_gpr2 = 0;
945901e04c3fSmrg	gds.src_sel_x = is_cm ? 0 : 4;
946001e04c3fSmrg	gds.src_sel_y = is_cm ? 1 : 0;
946101e04c3fSmrg	if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET)
946201e04c3fSmrg		gds.src_sel_z = is_cm ? 2 : 1;
946301e04c3fSmrg	else
946401e04c3fSmrg		gds.src_sel_z = 7;
946501e04c3fSmrg	gds.dst_sel_x = 0;
946601e04c3fSmrg	gds.dst_sel_y = 7;
946701e04c3fSmrg	gds.dst_sel_z = 7;
946801e04c3fSmrg	gds.dst_sel_w = 7;
946901e04c3fSmrg	gds.alloc_consume = !is_cm;
947001e04c3fSmrg
947101e04c3fSmrg	r = r600_bytecode_add_gds(ctx->bc, &gds);
947201e04c3fSmrg	if (r)
947301e04c3fSmrg		return r;
947401e04c3fSmrg	ctx->bc->cf_last->vpm = 1;
947501e04c3fSmrg	return 0;
947601e04c3fSmrg}
9477af69d88dSmrg
947801e04c3fSmrgstatic int get_lds_op(int opcode)
947901e04c3fSmrg{
948001e04c3fSmrg	switch (opcode) {
948101e04c3fSmrg	case TGSI_OPCODE_ATOMUADD:
948201e04c3fSmrg		return LDS_OP2_LDS_ADD_RET;
948301e04c3fSmrg	case TGSI_OPCODE_ATOMAND:
948401e04c3fSmrg		return LDS_OP2_LDS_AND_RET;
948501e04c3fSmrg	case TGSI_OPCODE_ATOMOR:
948601e04c3fSmrg		return LDS_OP2_LDS_OR_RET;
948701e04c3fSmrg	case TGSI_OPCODE_ATOMXOR:
948801e04c3fSmrg		return LDS_OP2_LDS_XOR_RET;
948901e04c3fSmrg	case TGSI_OPCODE_ATOMUMIN:
949001e04c3fSmrg		return LDS_OP2_LDS_MIN_UINT_RET;
949101e04c3fSmrg	case TGSI_OPCODE_ATOMUMAX:
949201e04c3fSmrg		return LDS_OP2_LDS_MAX_UINT_RET;
949301e04c3fSmrg	case TGSI_OPCODE_ATOMIMIN:
949401e04c3fSmrg		return LDS_OP2_LDS_MIN_INT_RET;
949501e04c3fSmrg	case TGSI_OPCODE_ATOMIMAX:
949601e04c3fSmrg		return LDS_OP2_LDS_MAX_INT_RET;
949701e04c3fSmrg	case TGSI_OPCODE_ATOMXCHG:
949801e04c3fSmrg		return LDS_OP2_LDS_XCHG_RET;
949901e04c3fSmrg	case TGSI_OPCODE_ATOMCAS:
950001e04c3fSmrg		return LDS_OP3_LDS_CMP_XCHG_RET;
950101e04c3fSmrg	default:
950201e04c3fSmrg		return -1;
95033464ebd5Sriastradh	}
950401e04c3fSmrg}
95053464ebd5Sriastradh
950601e04c3fSmrgstatic int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
950701e04c3fSmrg{
950801e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
950901e04c3fSmrg	int lds_op = get_lds_op(inst->Instruction.Opcode);
951001e04c3fSmrg	int r;
95113464ebd5Sriastradh
951201e04c3fSmrg	struct r600_bytecode_alu alu;
951301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
951401e04c3fSmrg	alu.op = lds_op;
951501e04c3fSmrg	alu.is_lds_idx_op = true;
951601e04c3fSmrg	alu.last = 1;
951701e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
951801e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[2], 0);
951901e04c3fSmrg	if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET)
952001e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[3], 0);
952101e04c3fSmrg	else
952201e04c3fSmrg		alu.src[2].sel = V_SQ_ALU_SRC_0;
952301e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
952401e04c3fSmrg	if (r)
952501e04c3fSmrg		return r;
95263464ebd5Sriastradh
952701e04c3fSmrg	/* then read from LDS_OQ_A_POP */
952801e04c3fSmrg	memset(&alu, 0, sizeof(alu));
9529af69d88dSmrg
953001e04c3fSmrg	alu.op = ALU_OP1_MOV;
953101e04c3fSmrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
953201e04c3fSmrg	alu.src[0].chan = 0;
953301e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
953401e04c3fSmrg	alu.dst.write = 1;
953501e04c3fSmrg	alu.last = 1;
953601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
953701e04c3fSmrg	if (r)
953801e04c3fSmrg		return r;
9539af69d88dSmrg
954001e04c3fSmrg	return 0;
954101e04c3fSmrg}
9542af69d88dSmrg
954301e04c3fSmrgstatic int tgsi_atomic_op(struct r600_shader_ctx *ctx)
954401e04c3fSmrg{
954501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
954601e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
954701e04c3fSmrg		return tgsi_atomic_op_rat(ctx);
954801e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
954901e04c3fSmrg		return tgsi_atomic_op_gds(ctx);
955001e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
955101e04c3fSmrg		return tgsi_atomic_op_rat(ctx);
955201e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
955301e04c3fSmrg		return tgsi_atomic_op_lds(ctx);
955401e04c3fSmrg	return 0;
955501e04c3fSmrg}
9556af69d88dSmrg
955701e04c3fSmrgstatic int tgsi_resq(struct r600_shader_ctx *ctx)
955801e04c3fSmrg{
955901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
956001e04c3fSmrg	unsigned sampler_index_mode;
956101e04c3fSmrg	struct r600_bytecode_tex tex;
956201e04c3fSmrg	int r;
956301e04c3fSmrg	boolean has_txq_cube_array_z = false;
9564af69d88dSmrg
956501e04c3fSmrg	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
956601e04c3fSmrg	    (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
956701e04c3fSmrg		if (ctx->bc->chip_class < EVERGREEN)
956801e04c3fSmrg			ctx->shader->uses_tex_buffers = true;
956901e04c3fSmrg		unsigned eg_buffer_base = 0;
957001e04c3fSmrg		eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET;
957101e04c3fSmrg		if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
957201e04c3fSmrg			eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE];
957301e04c3fSmrg		return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset, eg_buffer_base);
95743464ebd5Sriastradh	}
95753464ebd5Sriastradh
957601e04c3fSmrg	if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY &&
957701e04c3fSmrg	    inst->Dst[0].Register.WriteMask & 4) {
957801e04c3fSmrg		ctx->shader->has_txq_cube_array_z_comp = true;
957901e04c3fSmrg		has_txq_cube_array_z = true;
95803464ebd5Sriastradh	}
95813464ebd5Sriastradh
958201e04c3fSmrg	sampler_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
958301e04c3fSmrg	if (sampler_index_mode)
958401e04c3fSmrg		egcm_load_index_reg(ctx->bc, 1, false);
9585af69d88dSmrg
95863464ebd5Sriastradh
958701e04c3fSmrg	/* does this shader want a num layers from TXQ for a cube array? */
958801e04c3fSmrg	if (has_txq_cube_array_z) {
958901e04c3fSmrg		int id = tgsi_tex_get_src_gpr(ctx, 0) + ctx->shader->image_size_const_offset;
959001e04c3fSmrg		struct r600_bytecode_alu alu;
9591af69d88dSmrg
959201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
959301e04c3fSmrg		alu.op = ALU_OP1_MOV;
95943464ebd5Sriastradh
959501e04c3fSmrg		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
959601e04c3fSmrg		/* with eg each dword is either number of cubes */
959701e04c3fSmrg		alu.src[0].sel += id / 4;
959801e04c3fSmrg		alu.src[0].chan = id % 4;
959901e04c3fSmrg		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
960001e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
960101e04c3fSmrg		alu.last = 1;
960201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
960301e04c3fSmrg		if (r)
960401e04c3fSmrg			return r;
960501e04c3fSmrg		/* disable writemask from texture instruction */
960601e04c3fSmrg		inst->Dst[0].Register.WriteMask &= ~4;
9607af69d88dSmrg	}
960801e04c3fSmrg	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
960901e04c3fSmrg	tex.op = ctx->inst_info->op;
961001e04c3fSmrg	tex.sampler_id = R600_IMAGE_REAL_RESOURCE_OFFSET + inst->Src[0].Register.Index;
961101e04c3fSmrg	tex.sampler_index_mode = sampler_index_mode;
961201e04c3fSmrg	tex.resource_id = tex.sampler_id;
961301e04c3fSmrg	tex.resource_index_mode = sampler_index_mode;
961401e04c3fSmrg	tex.src_sel_x = 4;
961501e04c3fSmrg	tex.src_sel_y = 4;
961601e04c3fSmrg	tex.src_sel_z = 4;
961701e04c3fSmrg	tex.src_sel_w = 4;
961801e04c3fSmrg	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
961901e04c3fSmrg	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
962001e04c3fSmrg	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
962101e04c3fSmrg	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
962201e04c3fSmrg	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
9623af69d88dSmrg	r = r600_bytecode_add_tex(ctx->bc, &tex);
96243464ebd5Sriastradh	if (r)
96253464ebd5Sriastradh		return r;
96263464ebd5Sriastradh
96273464ebd5Sriastradh	return 0;
96283464ebd5Sriastradh}
96293464ebd5Sriastradh
96303464ebd5Sriastradhstatic int tgsi_lrp(struct r600_shader_ctx *ctx)
96313464ebd5Sriastradh{
96323464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9633af69d88dSmrg	struct r600_bytecode_alu alu;
963401e04c3fSmrg	unsigned lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
963501e04c3fSmrg	struct r600_bytecode_alu_src srcs[2][4];
96363464ebd5Sriastradh	unsigned i;
96373464ebd5Sriastradh	int r;
96383464ebd5Sriastradh
96393464ebd5Sriastradh	/* optimize if it's just an equal balance */
96403464ebd5Sriastradh	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
96413464ebd5Sriastradh		for (i = 0; i < lasti + 1; i++) {
96423464ebd5Sriastradh			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
96433464ebd5Sriastradh				continue;
96443464ebd5Sriastradh
9645af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9646af69d88dSmrg			alu.op = ALU_OP2_ADD;
9647af69d88dSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
9648af69d88dSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
96493464ebd5Sriastradh			alu.omod = 3;
96503464ebd5Sriastradh			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
96513464ebd5Sriastradh			alu.dst.chan = i;
96523464ebd5Sriastradh			if (i == lasti) {
96533464ebd5Sriastradh				alu.last = 1;
96543464ebd5Sriastradh			}
9655af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
96563464ebd5Sriastradh			if (r)
96573464ebd5Sriastradh				return r;
96583464ebd5Sriastradh		}
96593464ebd5Sriastradh		return 0;
96603464ebd5Sriastradh	}
96613464ebd5Sriastradh
96623464ebd5Sriastradh	/* 1 - src0 */
96633464ebd5Sriastradh	for (i = 0; i < lasti + 1; i++) {
96643464ebd5Sriastradh		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
96653464ebd5Sriastradh			continue;
96663464ebd5Sriastradh
9667af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9668af69d88dSmrg		alu.op = ALU_OP2_ADD;
96693464ebd5Sriastradh		alu.src[0].sel = V_SQ_ALU_SRC_1;
96703464ebd5Sriastradh		alu.src[0].chan = 0;
9671af69d88dSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
9672af69d88dSmrg		r600_bytecode_src_toggle_neg(&alu.src[1]);
96733464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
96743464ebd5Sriastradh		alu.dst.chan = i;
96753464ebd5Sriastradh		if (i == lasti) {
96763464ebd5Sriastradh			alu.last = 1;
96773464ebd5Sriastradh		}
96783464ebd5Sriastradh		alu.dst.write = 1;
9679af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
96803464ebd5Sriastradh		if (r)
96813464ebd5Sriastradh			return r;
96823464ebd5Sriastradh	}
96833464ebd5Sriastradh
96843464ebd5Sriastradh	/* (1 - src0) * src2 */
96853464ebd5Sriastradh	for (i = 0; i < lasti + 1; i++) {
96863464ebd5Sriastradh		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
96873464ebd5Sriastradh			continue;
96883464ebd5Sriastradh
9689af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
9690af69d88dSmrg		alu.op = ALU_OP2_MUL;
96913464ebd5Sriastradh		alu.src[0].sel = ctx->temp_reg;
96923464ebd5Sriastradh		alu.src[0].chan = i;
9693af69d88dSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
96943464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
96953464ebd5Sriastradh		alu.dst.chan = i;
96963464ebd5Sriastradh		if (i == lasti) {
96973464ebd5Sriastradh			alu.last = 1;
96983464ebd5Sriastradh		}
96993464ebd5Sriastradh		alu.dst.write = 1;
9700af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
97013464ebd5Sriastradh		if (r)
97023464ebd5Sriastradh			return r;
97033464ebd5Sriastradh	}
97043464ebd5Sriastradh
97053464ebd5Sriastradh	/* src0 * src1 + (1 - src0) * src2 */
970601e04c3fSmrg
970701e04c3fSmrg	for (i = 0; i < 2; i++) {
970801e04c3fSmrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
970901e04c3fSmrg					  srcs[i], &ctx->src[i]);
971001e04c3fSmrg		if (r)
971101e04c3fSmrg			return r;
971201e04c3fSmrg	}
971301e04c3fSmrg
971401e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
971501e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
971601e04c3fSmrg			continue;
971701e04c3fSmrg
971801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
971901e04c3fSmrg		alu.op = ALU_OP3_MULADD;
972001e04c3fSmrg		alu.is_op3 = 1;
972101e04c3fSmrg		alu.src[0] = srcs[0][i];
972201e04c3fSmrg		alu.src[1] = srcs[1][i];
972301e04c3fSmrg		alu.src[2].sel = ctx->temp_reg;
972401e04c3fSmrg		alu.src[2].chan = i;
972501e04c3fSmrg
972601e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
972701e04c3fSmrg		alu.dst.chan = i;
972801e04c3fSmrg		if (i == lasti) {
972901e04c3fSmrg			alu.last = 1;
973001e04c3fSmrg		}
973101e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
973201e04c3fSmrg		if (r)
973301e04c3fSmrg			return r;
973401e04c3fSmrg	}
973501e04c3fSmrg	return 0;
973601e04c3fSmrg}
973701e04c3fSmrg
973801e04c3fSmrgstatic int tgsi_cmp(struct r600_shader_ctx *ctx)
973901e04c3fSmrg{
974001e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
974101e04c3fSmrg	struct r600_bytecode_alu alu;
974201e04c3fSmrg	int i, r, j;
974301e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
974401e04c3fSmrg	struct r600_bytecode_alu_src srcs[3][4];
974501e04c3fSmrg
974601e04c3fSmrg	unsigned op;
974701e04c3fSmrg
974801e04c3fSmrg	if (ctx->src[0].abs && ctx->src[0].neg) {
974901e04c3fSmrg		op = ALU_OP3_CNDE;
975001e04c3fSmrg		ctx->src[0].abs = 0;
975101e04c3fSmrg		ctx->src[0].neg = 0;
975201e04c3fSmrg	} else {
975301e04c3fSmrg		op = ALU_OP3_CNDGE;
975401e04c3fSmrg	}
975501e04c3fSmrg
975601e04c3fSmrg	for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
975701e04c3fSmrg		r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
975801e04c3fSmrg					  srcs[j], &ctx->src[j]);
975901e04c3fSmrg		if (r)
976001e04c3fSmrg			return r;
976101e04c3fSmrg	}
976201e04c3fSmrg
976301e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
976401e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
976501e04c3fSmrg			continue;
976601e04c3fSmrg
976701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
976801e04c3fSmrg		alu.op = op;
976901e04c3fSmrg		alu.src[0] = srcs[0][i];
977001e04c3fSmrg		alu.src[1] = srcs[2][i];
977101e04c3fSmrg		alu.src[2] = srcs[1][i];
977201e04c3fSmrg
977301e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
977401e04c3fSmrg		alu.dst.chan = i;
977501e04c3fSmrg		alu.dst.write = 1;
977601e04c3fSmrg		alu.is_op3 = 1;
977701e04c3fSmrg		if (i == lasti)
977801e04c3fSmrg			alu.last = 1;
977901e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
978001e04c3fSmrg		if (r)
978101e04c3fSmrg			return r;
978201e04c3fSmrg	}
978301e04c3fSmrg	return 0;
978401e04c3fSmrg}
978501e04c3fSmrg
978601e04c3fSmrgstatic int tgsi_ucmp(struct r600_shader_ctx *ctx)
978701e04c3fSmrg{
978801e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
978901e04c3fSmrg	struct r600_bytecode_alu alu;
979001e04c3fSmrg	int i, r;
979101e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
979201e04c3fSmrg
97933464ebd5Sriastradh	for (i = 0; i < lasti + 1; i++) {
97943464ebd5Sriastradh		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
97953464ebd5Sriastradh			continue;
97963464ebd5Sriastradh
979701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
979801e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
979901e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
980001e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
980101e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
980201e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
980301e04c3fSmrg		alu.dst.chan = i;
980401e04c3fSmrg		alu.dst.write = 1;
980501e04c3fSmrg		alu.is_op3 = 1;
980601e04c3fSmrg		if (i == lasti)
980701e04c3fSmrg			alu.last = 1;
980801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
980901e04c3fSmrg		if (r)
981001e04c3fSmrg			return r;
981101e04c3fSmrg	}
981201e04c3fSmrg	return 0;
981301e04c3fSmrg}
981401e04c3fSmrg
981501e04c3fSmrgstatic int tgsi_exp(struct r600_shader_ctx *ctx)
981601e04c3fSmrg{
981701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
981801e04c3fSmrg	struct r600_bytecode_alu alu;
981901e04c3fSmrg	int r;
982001e04c3fSmrg	unsigned i;
982101e04c3fSmrg
982201e04c3fSmrg	/* result.x = 2^floor(src); */
982301e04c3fSmrg	if (inst->Dst[0].Register.WriteMask & 1) {
982401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
982501e04c3fSmrg
982601e04c3fSmrg		alu.op = ALU_OP1_FLOOR;
982701e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
982801e04c3fSmrg
982901e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
983001e04c3fSmrg		alu.dst.chan = 0;
983101e04c3fSmrg		alu.dst.write = 1;
983201e04c3fSmrg		alu.last = 1;
983301e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
983401e04c3fSmrg		if (r)
983501e04c3fSmrg			return r;
983601e04c3fSmrg
983701e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
983801e04c3fSmrg			for (i = 0; i < 3; i++) {
983901e04c3fSmrg				alu.op = ALU_OP1_EXP_IEEE;
984001e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
984101e04c3fSmrg				alu.src[0].chan = 0;
984201e04c3fSmrg
984301e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
984401e04c3fSmrg				alu.dst.chan = i;
984501e04c3fSmrg				alu.dst.write = i == 0;
984601e04c3fSmrg				alu.last = i == 2;
984701e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
984801e04c3fSmrg				if (r)
984901e04c3fSmrg					return r;
985001e04c3fSmrg			}
985101e04c3fSmrg		} else {
985201e04c3fSmrg			alu.op = ALU_OP1_EXP_IEEE;
985301e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
985401e04c3fSmrg			alu.src[0].chan = 0;
985501e04c3fSmrg
985601e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
985701e04c3fSmrg			alu.dst.chan = 0;
985801e04c3fSmrg			alu.dst.write = 1;
985901e04c3fSmrg			alu.last = 1;
986001e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
986101e04c3fSmrg			if (r)
986201e04c3fSmrg				return r;
986301e04c3fSmrg		}
986401e04c3fSmrg	}
986501e04c3fSmrg
986601e04c3fSmrg	/* result.y = tmp - floor(tmp); */
986701e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
986801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
986901e04c3fSmrg
987001e04c3fSmrg		alu.op = ALU_OP1_FRACT;
987101e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
987201e04c3fSmrg
987301e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
987401e04c3fSmrg#if 0
987501e04c3fSmrg		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
987601e04c3fSmrg		if (r)
987701e04c3fSmrg			return r;
987801e04c3fSmrg#endif
987901e04c3fSmrg		alu.dst.write = 1;
988001e04c3fSmrg		alu.dst.chan = 1;
988101e04c3fSmrg
988201e04c3fSmrg		alu.last = 1;
98833464ebd5Sriastradh
9884af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
98853464ebd5Sriastradh		if (r)
98863464ebd5Sriastradh			return r;
98873464ebd5Sriastradh	}
98883464ebd5Sriastradh
988901e04c3fSmrg	/* result.z = RoughApprox2ToX(tmp);*/
989001e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
989101e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
989201e04c3fSmrg			for (i = 0; i < 3; i++) {
989301e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
989401e04c3fSmrg				alu.op = ALU_OP1_EXP_IEEE;
989501e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
9896af69d88dSmrg
989701e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
989801e04c3fSmrg				alu.dst.chan = i;
989901e04c3fSmrg				if (i == 2) {
990001e04c3fSmrg					alu.dst.write = 1;
990101e04c3fSmrg					alu.last = 1;
990201e04c3fSmrg				}
990301e04c3fSmrg
990401e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
990501e04c3fSmrg				if (r)
990601e04c3fSmrg					return r;
990701e04c3fSmrg			}
990801e04c3fSmrg		} else {
990901e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
991001e04c3fSmrg			alu.op = ALU_OP1_EXP_IEEE;
991101e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
991201e04c3fSmrg
991301e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
991401e04c3fSmrg			alu.dst.write = 1;
991501e04c3fSmrg			alu.dst.chan = 2;
9916af69d88dSmrg
9917af69d88dSmrg			alu.last = 1;
991801e04c3fSmrg
991901e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
992001e04c3fSmrg			if (r)
992101e04c3fSmrg				return r;
992201e04c3fSmrg		}
9923af69d88dSmrg	}
9924af69d88dSmrg
992501e04c3fSmrg	/* result.w = 1.0;*/
992601e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
992701e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
99283464ebd5Sriastradh
992901e04c3fSmrg		alu.op = ALU_OP1_MOV;
993001e04c3fSmrg		alu.src[0].sel = V_SQ_ALU_SRC_1;
993101e04c3fSmrg		alu.src[0].chan = 0;
99323464ebd5Sriastradh
993301e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
993401e04c3fSmrg		alu.dst.chan = 3;
99353464ebd5Sriastradh		alu.dst.write = 1;
993601e04c3fSmrg		alu.last = 1;
9937af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
99383464ebd5Sriastradh		if (r)
99393464ebd5Sriastradh			return r;
99403464ebd5Sriastradh	}
994101e04c3fSmrg	return tgsi_helper_copy(ctx, inst);
99423464ebd5Sriastradh}
99433464ebd5Sriastradh
994401e04c3fSmrgstatic int tgsi_log(struct r600_shader_ctx *ctx)
99453464ebd5Sriastradh{
99463464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
9947af69d88dSmrg	struct r600_bytecode_alu alu;
994801e04c3fSmrg	int r;
994901e04c3fSmrg	unsigned i;
99503464ebd5Sriastradh
995101e04c3fSmrg	/* result.x = floor(log2(|src|)); */
995201e04c3fSmrg	if (inst->Dst[0].Register.WriteMask & 1) {
995301e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
995401e04c3fSmrg			for (i = 0; i < 3; i++) {
995501e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
995601e04c3fSmrg
995701e04c3fSmrg				alu.op = ALU_OP1_LOG_IEEE;
995801e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
995901e04c3fSmrg				r600_bytecode_src_set_abs(&alu.src[0]);
996001e04c3fSmrg
996101e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
996201e04c3fSmrg				alu.dst.chan = i;
996301e04c3fSmrg				if (i == 0)
996401e04c3fSmrg					alu.dst.write = 1;
996501e04c3fSmrg				if (i == 2)
996601e04c3fSmrg					alu.last = 1;
996701e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
996801e04c3fSmrg				if (r)
996901e04c3fSmrg					return r;
997001e04c3fSmrg			}
99713464ebd5Sriastradh
99723464ebd5Sriastradh		} else {
997301e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
997401e04c3fSmrg
997501e04c3fSmrg			alu.op = ALU_OP1_LOG_IEEE;
997601e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
997701e04c3fSmrg			r600_bytecode_src_set_abs(&alu.src[0]);
997801e04c3fSmrg
997901e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
998001e04c3fSmrg			alu.dst.chan = 0;
998101e04c3fSmrg			alu.dst.write = 1;
998201e04c3fSmrg			alu.last = 1;
998301e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
998401e04c3fSmrg			if (r)
998501e04c3fSmrg				return r;
99863464ebd5Sriastradh		}
99873464ebd5Sriastradh
998801e04c3fSmrg		alu.op = ALU_OP1_FLOOR;
998901e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
999001e04c3fSmrg		alu.src[0].chan = 0;
999101e04c3fSmrg
99923464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
999301e04c3fSmrg		alu.dst.chan = 0;
99943464ebd5Sriastradh		alu.dst.write = 1;
999501e04c3fSmrg		alu.last = 1;
99963464ebd5Sriastradh
9997af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
99983464ebd5Sriastradh		if (r)
99993464ebd5Sriastradh			return r;
100003464ebd5Sriastradh	}
100013464ebd5Sriastradh
1000201e04c3fSmrg	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
1000301e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
100043464ebd5Sriastradh
1000501e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
1000601e04c3fSmrg			for (i = 0; i < 3; i++) {
1000701e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1000801e04c3fSmrg
1000901e04c3fSmrg				alu.op = ALU_OP1_LOG_IEEE;
1001001e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1001101e04c3fSmrg				r600_bytecode_src_set_abs(&alu.src[0]);
1001201e04c3fSmrg
1001301e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
1001401e04c3fSmrg				alu.dst.chan = i;
1001501e04c3fSmrg				if (i == 1)
1001601e04c3fSmrg					alu.dst.write = 1;
1001701e04c3fSmrg				if (i == 2)
1001801e04c3fSmrg					alu.last = 1;
1001901e04c3fSmrg
1002001e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
1002101e04c3fSmrg				if (r)
1002201e04c3fSmrg					return r;
1002301e04c3fSmrg			}
100243464ebd5Sriastradh		} else {
1002501e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
100263464ebd5Sriastradh
1002701e04c3fSmrg			alu.op = ALU_OP1_LOG_IEEE;
1002801e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1002901e04c3fSmrg			r600_bytecode_src_set_abs(&alu.src[0]);
100303464ebd5Sriastradh
100313464ebd5Sriastradh			alu.dst.sel = ctx->temp_reg;
1003201e04c3fSmrg			alu.dst.chan = 1;
1003301e04c3fSmrg			alu.dst.write = 1;
100343464ebd5Sriastradh			alu.last = 1;
100353464ebd5Sriastradh
1003601e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
1003701e04c3fSmrg			if (r)
1003801e04c3fSmrg				return r;
1003901e04c3fSmrg		}
100403464ebd5Sriastradh
10041af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
100423464ebd5Sriastradh
10043af69d88dSmrg		alu.op = ALU_OP1_FLOOR;
1004401e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
1004501e04c3fSmrg		alu.src[0].chan = 1;
100463464ebd5Sriastradh
100473464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
1004801e04c3fSmrg		alu.dst.chan = 1;
100493464ebd5Sriastradh		alu.dst.write = 1;
100503464ebd5Sriastradh		alu.last = 1;
1005101e04c3fSmrg
10052af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
100533464ebd5Sriastradh		if (r)
100543464ebd5Sriastradh			return r;
100553464ebd5Sriastradh
10056af69d88dSmrg		if (ctx->bc->chip_class == CAYMAN) {
100573464ebd5Sriastradh			for (i = 0; i < 3; i++) {
1005801e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1005901e04c3fSmrg				alu.op = ALU_OP1_EXP_IEEE;
1006001e04c3fSmrg				alu.src[0].sel = ctx->temp_reg;
1006101e04c3fSmrg				alu.src[0].chan = 1;
1006201e04c3fSmrg
1006301e04c3fSmrg				alu.dst.sel = ctx->temp_reg;
1006401e04c3fSmrg				alu.dst.chan = i;
1006501e04c3fSmrg				if (i == 1)
1006601e04c3fSmrg					alu.dst.write = 1;
1006701e04c3fSmrg				if (i == 2)
1006801e04c3fSmrg					alu.last = 1;
1006901e04c3fSmrg
1007001e04c3fSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
1007101e04c3fSmrg				if (r)
1007201e04c3fSmrg					return r;
1007301e04c3fSmrg			}
1007401e04c3fSmrg		} else {
1007501e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1007601e04c3fSmrg			alu.op = ALU_OP1_EXP_IEEE;
1007701e04c3fSmrg			alu.src[0].sel = ctx->temp_reg;
1007801e04c3fSmrg			alu.src[0].chan = 1;
1007901e04c3fSmrg
1008001e04c3fSmrg			alu.dst.sel = ctx->temp_reg;
1008101e04c3fSmrg			alu.dst.chan = 1;
1008201e04c3fSmrg			alu.dst.write = 1;
1008301e04c3fSmrg			alu.last = 1;
1008401e04c3fSmrg
1008501e04c3fSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
1008601e04c3fSmrg			if (r)
1008701e04c3fSmrg				return r;
1008801e04c3fSmrg		}
1008901e04c3fSmrg
1009001e04c3fSmrg		if (ctx->bc->chip_class == CAYMAN) {
1009101e04c3fSmrg			for (i = 0; i < 3; i++) {
1009201e04c3fSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1009301e04c3fSmrg				alu.op = ALU_OP1_RECIP_IEEE;
100943464ebd5Sriastradh				alu.src[0].sel = ctx->temp_reg;
1009501e04c3fSmrg				alu.src[0].chan = 1;
100963464ebd5Sriastradh
100973464ebd5Sriastradh				alu.dst.sel = ctx->temp_reg;
100983464ebd5Sriastradh				alu.dst.chan = i;
1009901e04c3fSmrg				if (i == 1)
1010001e04c3fSmrg					alu.dst.write = 1;
1010101e04c3fSmrg				if (i == 2)
1010201e04c3fSmrg					alu.last = 1;
1010301e04c3fSmrg
10104af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
101053464ebd5Sriastradh				if (r)
101063464ebd5Sriastradh					return r;
101073464ebd5Sriastradh			}
101083464ebd5Sriastradh		} else {
1010901e04c3fSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1011001e04c3fSmrg			alu.op = ALU_OP1_RECIP_IEEE;
101113464ebd5Sriastradh			alu.src[0].sel = ctx->temp_reg;
1011201e04c3fSmrg			alu.src[0].chan = 1;
101133464ebd5Sriastradh
101143464ebd5Sriastradh			alu.dst.sel = ctx->temp_reg;
1011501e04c3fSmrg			alu.dst.chan = 1;
101163464ebd5Sriastradh			alu.dst.write = 1;
101173464ebd5Sriastradh			alu.last = 1;
1011801e04c3fSmrg
10119af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
101203464ebd5Sriastradh			if (r)
101213464ebd5Sriastradh				return r;
101223464ebd5Sriastradh		}
101233464ebd5Sriastradh
10124af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
101253464ebd5Sriastradh
1012601e04c3fSmrg		alu.op = ALU_OP2_MUL;
1012701e04c3fSmrg
10128af69d88dSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1012901e04c3fSmrg		r600_bytecode_src_set_abs(&alu.src[0]);
1013001e04c3fSmrg
1013101e04c3fSmrg		alu.src[1].sel = ctx->temp_reg;
1013201e04c3fSmrg		alu.src[1].chan = 1;
101333464ebd5Sriastradh
101343464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
101353464ebd5Sriastradh		alu.dst.chan = 1;
1013601e04c3fSmrg		alu.dst.write = 1;
101373464ebd5Sriastradh		alu.last = 1;
101383464ebd5Sriastradh
10139af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
101403464ebd5Sriastradh		if (r)
101413464ebd5Sriastradh			return r;
101423464ebd5Sriastradh	}
101433464ebd5Sriastradh
1014401e04c3fSmrg	/* result.z = log2(|src|);*/
1014501e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
10146af69d88dSmrg		if (ctx->bc->chip_class == CAYMAN) {
101473464ebd5Sriastradh			for (i = 0; i < 3; i++) {
10148af69d88dSmrg				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1014901e04c3fSmrg
1015001e04c3fSmrg				alu.op = ALU_OP1_LOG_IEEE;
10151af69d88dSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1015201e04c3fSmrg				r600_bytecode_src_set_abs(&alu.src[0]);
101533464ebd5Sriastradh
101543464ebd5Sriastradh				alu.dst.sel = ctx->temp_reg;
1015501e04c3fSmrg				if (i == 2)
101563464ebd5Sriastradh					alu.dst.write = 1;
1015701e04c3fSmrg				alu.dst.chan = i;
1015801e04c3fSmrg				if (i == 2)
101593464ebd5Sriastradh					alu.last = 1;
101603464ebd5Sriastradh
10161af69d88dSmrg				r = r600_bytecode_add_alu(ctx->bc, &alu);
101623464ebd5Sriastradh				if (r)
101633464ebd5Sriastradh					return r;
101643464ebd5Sriastradh			}
101653464ebd5Sriastradh		} else {
10166af69d88dSmrg			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1016701e04c3fSmrg
1016801e04c3fSmrg			alu.op = ALU_OP1_LOG_IEEE;
10169af69d88dSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1017001e04c3fSmrg			r600_bytecode_src_set_abs(&alu.src[0]);
101713464ebd5Sriastradh
101723464ebd5Sriastradh			alu.dst.sel = ctx->temp_reg;
101733464ebd5Sriastradh			alu.dst.write = 1;
101743464ebd5Sriastradh			alu.dst.chan = 2;
101753464ebd5Sriastradh			alu.last = 1;
101763464ebd5Sriastradh
10177af69d88dSmrg			r = r600_bytecode_add_alu(ctx->bc, &alu);
101783464ebd5Sriastradh			if (r)
101793464ebd5Sriastradh				return r;
101803464ebd5Sriastradh		}
101813464ebd5Sriastradh	}
101823464ebd5Sriastradh
1018301e04c3fSmrg	/* result.w = 1.0; */
1018401e04c3fSmrg	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
10185af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
101863464ebd5Sriastradh
10187af69d88dSmrg		alu.op = ALU_OP1_MOV;
101883464ebd5Sriastradh		alu.src[0].sel = V_SQ_ALU_SRC_1;
101893464ebd5Sriastradh		alu.src[0].chan = 0;
101903464ebd5Sriastradh
101913464ebd5Sriastradh		alu.dst.sel = ctx->temp_reg;
101923464ebd5Sriastradh		alu.dst.chan = 3;
101933464ebd5Sriastradh		alu.dst.write = 1;
101943464ebd5Sriastradh		alu.last = 1;
1019501e04c3fSmrg
10196af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
101973464ebd5Sriastradh		if (r)
101983464ebd5Sriastradh			return r;
101993464ebd5Sriastradh	}
1020001e04c3fSmrg
102013464ebd5Sriastradh	return tgsi_helper_copy(ctx, inst);
102023464ebd5Sriastradh}
102033464ebd5Sriastradh
1020401e04c3fSmrgstatic int tgsi_eg_arl(struct r600_shader_ctx *ctx)
102053464ebd5Sriastradh{
102063464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10207af69d88dSmrg	struct r600_bytecode_alu alu;
102083464ebd5Sriastradh	int r;
1020901e04c3fSmrg	int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1021001e04c3fSmrg	unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
102113464ebd5Sriastradh
1021201e04c3fSmrg	assert(inst->Dst[0].Register.Index < 3);
1021301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
102143464ebd5Sriastradh
1021501e04c3fSmrg	switch (inst->Instruction.Opcode) {
1021601e04c3fSmrg	case TGSI_OPCODE_ARL:
1021701e04c3fSmrg		alu.op = ALU_OP1_FLT_TO_INT_FLOOR;
1021801e04c3fSmrg		break;
1021901e04c3fSmrg	case TGSI_OPCODE_ARR:
1022001e04c3fSmrg		alu.op = ALU_OP1_FLT_TO_INT;
1022101e04c3fSmrg		break;
1022201e04c3fSmrg	case TGSI_OPCODE_UARL:
1022301e04c3fSmrg		alu.op = ALU_OP1_MOV;
1022401e04c3fSmrg		break;
1022501e04c3fSmrg	default:
1022601e04c3fSmrg		assert(0);
1022701e04c3fSmrg		return -1;
1022801e04c3fSmrg	}
1022901e04c3fSmrg
1023001e04c3fSmrg	for (i = 0; i <= lasti; ++i) {
1023101e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1023201e04c3fSmrg			continue;
1023301e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1023401e04c3fSmrg		alu.last = i == lasti;
1023501e04c3fSmrg		alu.dst.sel = reg;
1023601e04c3fSmrg	        alu.dst.chan = i;
1023701e04c3fSmrg		alu.dst.write = 1;
1023801e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1023901e04c3fSmrg		if (r)
1024001e04c3fSmrg			return r;
1024101e04c3fSmrg	}
1024201e04c3fSmrg
1024301e04c3fSmrg	if (inst->Dst[0].Register.Index > 0)
1024401e04c3fSmrg		ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0;
1024501e04c3fSmrg	else
1024601e04c3fSmrg		ctx->bc->ar_loaded = 0;
1024701e04c3fSmrg
1024801e04c3fSmrg	return 0;
1024901e04c3fSmrg}
1025001e04c3fSmrgstatic int tgsi_r600_arl(struct r600_shader_ctx *ctx)
1025101e04c3fSmrg{
1025201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1025301e04c3fSmrg	struct r600_bytecode_alu alu;
1025401e04c3fSmrg	int r;
1025501e04c3fSmrg	int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1025601e04c3fSmrg
1025701e04c3fSmrg	switch (inst->Instruction.Opcode) {
1025801e04c3fSmrg	case TGSI_OPCODE_ARL:
1025901e04c3fSmrg		memset(&alu, 0, sizeof(alu));
1026001e04c3fSmrg		alu.op = ALU_OP1_FLOOR;
1026101e04c3fSmrg		alu.dst.sel = ctx->bc->ar_reg;
1026201e04c3fSmrg		alu.dst.write = 1;
1026301e04c3fSmrg		for (i = 0; i <= lasti; ++i) {
1026401e04c3fSmrg			if (inst->Dst[0].Register.WriteMask & (1 << i))  {
102653464ebd5Sriastradh				alu.dst.chan = i;
1026601e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1026701e04c3fSmrg				alu.last = i == lasti;
1026801e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1026901e04c3fSmrg					return r;
1027001e04c3fSmrg			}
1027101e04c3fSmrg		}
1027201e04c3fSmrg
1027301e04c3fSmrg		memset(&alu, 0, sizeof(alu));
1027401e04c3fSmrg		alu.op = ALU_OP1_FLT_TO_INT;
1027501e04c3fSmrg		alu.src[0].sel = ctx->bc->ar_reg;
1027601e04c3fSmrg		alu.dst.sel = ctx->bc->ar_reg;
1027701e04c3fSmrg		alu.dst.write = 1;
1027801e04c3fSmrg		/* FLT_TO_INT is trans-only on r600/r700 */
1027901e04c3fSmrg		alu.last = TRUE;
1028001e04c3fSmrg		for (i = 0; i <= lasti; ++i) {
1028101e04c3fSmrg			alu.dst.chan = i;
1028201e04c3fSmrg			alu.src[0].chan = i;
1028301e04c3fSmrg			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1028401e04c3fSmrg				return r;
1028501e04c3fSmrg		}
1028601e04c3fSmrg		break;
1028701e04c3fSmrg	case TGSI_OPCODE_ARR:
1028801e04c3fSmrg		memset(&alu, 0, sizeof(alu));
1028901e04c3fSmrg		alu.op = ALU_OP1_FLT_TO_INT;
1029001e04c3fSmrg		alu.dst.sel = ctx->bc->ar_reg;
1029101e04c3fSmrg		alu.dst.write = 1;
1029201e04c3fSmrg		/* FLT_TO_INT is trans-only on r600/r700 */
1029301e04c3fSmrg		alu.last = TRUE;
1029401e04c3fSmrg		for (i = 0; i <= lasti; ++i) {
1029501e04c3fSmrg			if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1029601e04c3fSmrg				alu.dst.chan = i;
1029701e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1029801e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
102993464ebd5Sriastradh					return r;
103003464ebd5Sriastradh			}
1030101e04c3fSmrg		}
1030201e04c3fSmrg		break;
1030301e04c3fSmrg	case TGSI_OPCODE_UARL:
1030401e04c3fSmrg		memset(&alu, 0, sizeof(alu));
1030501e04c3fSmrg		alu.op = ALU_OP1_MOV;
1030601e04c3fSmrg		alu.dst.sel = ctx->bc->ar_reg;
1030701e04c3fSmrg		alu.dst.write = 1;
1030801e04c3fSmrg		for (i = 0; i <= lasti; ++i) {
1030901e04c3fSmrg			if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1031001e04c3fSmrg				alu.dst.chan = i;
1031101e04c3fSmrg				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1031201e04c3fSmrg				alu.last = i == lasti;
1031301e04c3fSmrg				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1031401e04c3fSmrg					return r;
1031501e04c3fSmrg			}
1031601e04c3fSmrg		}
1031701e04c3fSmrg		break;
1031801e04c3fSmrg	default:
1031901e04c3fSmrg		assert(0);
1032001e04c3fSmrg		return -1;
1032101e04c3fSmrg	}
1032201e04c3fSmrg
1032301e04c3fSmrg	ctx->bc->ar_loaded = 0;
1032401e04c3fSmrg	return 0;
1032501e04c3fSmrg}
1032601e04c3fSmrg
1032701e04c3fSmrgstatic int tgsi_opdst(struct r600_shader_ctx *ctx)
1032801e04c3fSmrg{
1032901e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1033001e04c3fSmrg	struct r600_bytecode_alu alu;
1033101e04c3fSmrg	int i, r = 0;
1033201e04c3fSmrg
1033301e04c3fSmrg	for (i = 0; i < 4; i++) {
1033401e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1033501e04c3fSmrg
1033601e04c3fSmrg		alu.op = ALU_OP2_MUL;
1033701e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1033801e04c3fSmrg
1033901e04c3fSmrg		if (i == 0 || i == 3) {
1034001e04c3fSmrg			alu.src[0].sel = V_SQ_ALU_SRC_1;
1034101e04c3fSmrg		} else {
1034201e04c3fSmrg			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1034301e04c3fSmrg		}
1034401e04c3fSmrg
1034501e04c3fSmrg		if (i == 0 || i == 2) {
1034601e04c3fSmrg			alu.src[1].sel = V_SQ_ALU_SRC_1;
1034701e04c3fSmrg		} else {
1034801e04c3fSmrg			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
1034901e04c3fSmrg		}
1035001e04c3fSmrg		if (i == 3)
1035101e04c3fSmrg			alu.last = 1;
1035201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1035301e04c3fSmrg		if (r)
1035401e04c3fSmrg			return r;
1035501e04c3fSmrg	}
1035601e04c3fSmrg	return 0;
1035701e04c3fSmrg}
1035801e04c3fSmrg
1035901e04c3fSmrgstatic int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type,
1036001e04c3fSmrg			   struct r600_bytecode_alu_src *src)
1036101e04c3fSmrg{
1036201e04c3fSmrg	struct r600_bytecode_alu alu;
1036301e04c3fSmrg	int r;
1036401e04c3fSmrg
1036501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1036601e04c3fSmrg	alu.op = opcode;
1036701e04c3fSmrg	alu.execute_mask = 1;
1036801e04c3fSmrg	alu.update_pred = 1;
1036901e04c3fSmrg
1037001e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
1037101e04c3fSmrg	alu.dst.write = 1;
1037201e04c3fSmrg	alu.dst.chan = 0;
1037301e04c3fSmrg
1037401e04c3fSmrg	alu.src[0] = *src;
1037501e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_0;
1037601e04c3fSmrg	alu.src[1].chan = 0;
1037701e04c3fSmrg
1037801e04c3fSmrg	alu.last = 1;
1037901e04c3fSmrg
1038001e04c3fSmrg	r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
1038101e04c3fSmrg	if (r)
1038201e04c3fSmrg		return r;
1038301e04c3fSmrg	return 0;
1038401e04c3fSmrg}
1038501e04c3fSmrg
1038601e04c3fSmrgstatic int pops(struct r600_shader_ctx *ctx, int pops)
1038701e04c3fSmrg{
1038801e04c3fSmrg	unsigned force_pop = ctx->bc->force_add_cf;
103893464ebd5Sriastradh
1039001e04c3fSmrg	if (!force_pop) {
1039101e04c3fSmrg		int alu_pop = 3;
1039201e04c3fSmrg		if (ctx->bc->cf_last) {
1039301e04c3fSmrg			if (ctx->bc->cf_last->op == CF_OP_ALU)
1039401e04c3fSmrg				alu_pop = 0;
1039501e04c3fSmrg			else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER)
1039601e04c3fSmrg				alu_pop = 1;
1039701e04c3fSmrg		}
1039801e04c3fSmrg		alu_pop += pops;
1039901e04c3fSmrg		if (alu_pop == 1) {
1040001e04c3fSmrg			ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER;
1040101e04c3fSmrg			ctx->bc->force_add_cf = 1;
1040201e04c3fSmrg		} else if (alu_pop == 2) {
1040301e04c3fSmrg			ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
1040401e04c3fSmrg			ctx->bc->force_add_cf = 1;
104053464ebd5Sriastradh		} else {
1040601e04c3fSmrg			force_pop = 1;
104073464ebd5Sriastradh		}
1040801e04c3fSmrg	}
104093464ebd5Sriastradh
1041001e04c3fSmrg	if (force_pop) {
1041101e04c3fSmrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
1041201e04c3fSmrg		ctx->bc->cf_last->pop_count = pops;
1041301e04c3fSmrg		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
104143464ebd5Sriastradh	}
104153464ebd5Sriastradh
1041601e04c3fSmrg	return 0;
1041701e04c3fSmrg}
104183464ebd5Sriastradh
1041901e04c3fSmrgstatic inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
1042001e04c3fSmrg                                              unsigned reason)
1042101e04c3fSmrg{
1042201e04c3fSmrg	struct r600_stack_info *stack = &ctx->bc->stack;
1042301e04c3fSmrg	unsigned elements;
1042401e04c3fSmrg	int entries;
104253464ebd5Sriastradh
1042601e04c3fSmrg	unsigned entry_size = stack->entry_size;
104273464ebd5Sriastradh
1042801e04c3fSmrg	elements = (stack->loop + stack->push_wqm ) * entry_size;
1042901e04c3fSmrg	elements += stack->push;
104303464ebd5Sriastradh
1043101e04c3fSmrg	switch (ctx->bc->chip_class) {
1043201e04c3fSmrg	case R600:
1043301e04c3fSmrg	case R700:
1043401e04c3fSmrg		/* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
1043501e04c3fSmrg		 * the stack must be reserved to hold the current active/continue
1043601e04c3fSmrg		 * masks */
1043701e04c3fSmrg		if (reason == FC_PUSH_VPM || stack->push > 0) {
1043801e04c3fSmrg			elements += 2;
1043901e04c3fSmrg		}
1044001e04c3fSmrg		break;
104413464ebd5Sriastradh
1044201e04c3fSmrg	case CAYMAN:
1044301e04c3fSmrg		/* r9xx: any stack operation on empty stack consumes 2 additional
1044401e04c3fSmrg		 * elements */
1044501e04c3fSmrg		elements += 2;
104463464ebd5Sriastradh
104477ec681f3Smrg		FALLTHROUGH;
1044801e04c3fSmrg		/* FIXME: do the two elements added above cover the cases for the
1044901e04c3fSmrg		 * r8xx+ below? */
1045001e04c3fSmrg
1045101e04c3fSmrg	case EVERGREEN:
1045201e04c3fSmrg		/* r8xx+: 2 extra elements are not always required, but one extra
1045301e04c3fSmrg		 * element must be added for each of the following cases:
1045401e04c3fSmrg		 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
1045501e04c3fSmrg		 *    stack usage.
1045601e04c3fSmrg		 *    (Currently we don't use ALU_ELSE_AFTER.)
1045701e04c3fSmrg		 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
1045801e04c3fSmrg		 *    PUSH instruction executed.
1045901e04c3fSmrg		 *
1046001e04c3fSmrg		 *    NOTE: it seems we also need to reserve additional element in some
1046101e04c3fSmrg		 *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
1046201e04c3fSmrg		 *    then STACK_SIZE should be 2 instead of 1 */
1046301e04c3fSmrg		if (reason == FC_PUSH_VPM || stack->push > 0) {
1046401e04c3fSmrg			elements += 1;
104653464ebd5Sriastradh		}
1046601e04c3fSmrg		break;
104673464ebd5Sriastradh
1046801e04c3fSmrg	default:
1046901e04c3fSmrg		assert(0);
1047001e04c3fSmrg		break;
1047101e04c3fSmrg	}
104723464ebd5Sriastradh
1047301e04c3fSmrg	/* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4
1047401e04c3fSmrg	 * for all chips, so we use 4 in the final formula, not the real entry_size
1047501e04c3fSmrg	 * for the chip */
1047601e04c3fSmrg	entry_size = 4;
104773464ebd5Sriastradh
1047801e04c3fSmrg	entries = (elements + (entry_size - 1)) / entry_size;
104793464ebd5Sriastradh
1048001e04c3fSmrg	if (entries > stack->max_entries)
1048101e04c3fSmrg		stack->max_entries = entries;
1048201e04c3fSmrg	return elements;
1048301e04c3fSmrg}
104843464ebd5Sriastradh
1048501e04c3fSmrgstatic inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
1048601e04c3fSmrg{
1048701e04c3fSmrg	switch(reason) {
1048801e04c3fSmrg	case FC_PUSH_VPM:
1048901e04c3fSmrg		--ctx->bc->stack.push;
1049001e04c3fSmrg		assert(ctx->bc->stack.push >= 0);
1049101e04c3fSmrg		break;
1049201e04c3fSmrg	case FC_PUSH_WQM:
1049301e04c3fSmrg		--ctx->bc->stack.push_wqm;
1049401e04c3fSmrg		assert(ctx->bc->stack.push_wqm >= 0);
1049501e04c3fSmrg		break;
1049601e04c3fSmrg	case FC_LOOP:
1049701e04c3fSmrg		--ctx->bc->stack.loop;
1049801e04c3fSmrg		assert(ctx->bc->stack.loop >= 0);
1049901e04c3fSmrg		break;
1050001e04c3fSmrg	default:
1050101e04c3fSmrg		assert(0);
1050201e04c3fSmrg		break;
1050301e04c3fSmrg	}
1050401e04c3fSmrg}
105053464ebd5Sriastradh
1050601e04c3fSmrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
1050701e04c3fSmrg{
1050801e04c3fSmrg	switch (reason) {
1050901e04c3fSmrg	case FC_PUSH_VPM:
1051001e04c3fSmrg		++ctx->bc->stack.push;
1051101e04c3fSmrg		break;
1051201e04c3fSmrg	case FC_PUSH_WQM:
1051301e04c3fSmrg		++ctx->bc->stack.push_wqm;
1051401e04c3fSmrg		break;
1051501e04c3fSmrg	case FC_LOOP:
1051601e04c3fSmrg		++ctx->bc->stack.loop;
1051701e04c3fSmrg		break;
1051801e04c3fSmrg	default:
1051901e04c3fSmrg		assert(0);
1052001e04c3fSmrg	}
105213464ebd5Sriastradh
1052201e04c3fSmrg	return callstack_update_max_depth(ctx, reason);
1052301e04c3fSmrg}
105243464ebd5Sriastradh
1052501e04c3fSmrgstatic void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
1052601e04c3fSmrg{
1052701e04c3fSmrg	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
105283464ebd5Sriastradh
1052901e04c3fSmrg	sp->mid = realloc((void *)sp->mid,
1053001e04c3fSmrg						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
1053101e04c3fSmrg	sp->mid[sp->num_mid] = ctx->bc->cf_last;
1053201e04c3fSmrg	sp->num_mid++;
1053301e04c3fSmrg}
105343464ebd5Sriastradh
1053501e04c3fSmrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
1053601e04c3fSmrg{
1053701e04c3fSmrg	assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
1053801e04c3fSmrg	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
1053901e04c3fSmrg	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
1054001e04c3fSmrg	ctx->bc->fc_sp++;
1054101e04c3fSmrg}
105423464ebd5Sriastradh
1054301e04c3fSmrgstatic void fc_poplevel(struct r600_shader_ctx *ctx)
1054401e04c3fSmrg{
1054501e04c3fSmrg	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
1054601e04c3fSmrg	free(sp->mid);
1054701e04c3fSmrg	sp->mid = NULL;
1054801e04c3fSmrg	sp->num_mid = 0;
1054901e04c3fSmrg	sp->start = NULL;
1055001e04c3fSmrg	sp->type = 0;
1055101e04c3fSmrg	ctx->bc->fc_sp--;
1055201e04c3fSmrg}
105533464ebd5Sriastradh
1055401e04c3fSmrg#if 0
1055501e04c3fSmrgstatic int emit_return(struct r600_shader_ctx *ctx)
1055601e04c3fSmrg{
1055701e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN));
1055801e04c3fSmrg	return 0;
1055901e04c3fSmrg}
105603464ebd5Sriastradh
1056101e04c3fSmrgstatic int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
1056201e04c3fSmrg{
1056301e04c3fSmrg
1056401e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP));
1056501e04c3fSmrg	ctx->bc->cf_last->pop_count = pops;
1056601e04c3fSmrg	/* XXX work out offset */
1056701e04c3fSmrg	return 0;
1056801e04c3fSmrg}
105693464ebd5Sriastradh
1057001e04c3fSmrgstatic int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
1057101e04c3fSmrg{
1057201e04c3fSmrg	return 0;
1057301e04c3fSmrg}
105743464ebd5Sriastradh
1057501e04c3fSmrgstatic void emit_testflag(struct r600_shader_ctx *ctx)
1057601e04c3fSmrg{
105773464ebd5Sriastradh
1057801e04c3fSmrg}
105793464ebd5Sriastradh
1058001e04c3fSmrgstatic void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
1058101e04c3fSmrg{
1058201e04c3fSmrg	emit_testflag(ctx);
1058301e04c3fSmrg	emit_jump_to_offset(ctx, 1, 4);
1058401e04c3fSmrg	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
1058501e04c3fSmrg	pops(ctx, ifidx + 1);
1058601e04c3fSmrg	emit_return(ctx);
1058701e04c3fSmrg}
105883464ebd5Sriastradh
1058901e04c3fSmrgstatic void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
1059001e04c3fSmrg{
1059101e04c3fSmrg	emit_testflag(ctx);
105923464ebd5Sriastradh
1059301e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
1059401e04c3fSmrg	ctx->bc->cf_last->pop_count = 1;
105953464ebd5Sriastradh
1059601e04c3fSmrg	fc_set_mid(ctx, fc_sp);
105973464ebd5Sriastradh
1059801e04c3fSmrg	pops(ctx, 1);
1059901e04c3fSmrg}
1060001e04c3fSmrg#endif
106013464ebd5Sriastradh
1060201e04c3fSmrgstatic int emit_if(struct r600_shader_ctx *ctx, int opcode,
1060301e04c3fSmrg		   struct r600_bytecode_alu_src *src)
1060401e04c3fSmrg{
1060501e04c3fSmrg	int alu_type = CF_OP_ALU_PUSH_BEFORE;
1060601e04c3fSmrg	bool needs_workaround = false;
1060701e04c3fSmrg	int elems = callstack_push(ctx, FC_PUSH_VPM);
106083464ebd5Sriastradh
1060901e04c3fSmrg	if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
1061001e04c3fSmrg		needs_workaround = true;
106113464ebd5Sriastradh
1061201e04c3fSmrg	if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
1061301e04c3fSmrg		unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
1061401e04c3fSmrg		unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
106153464ebd5Sriastradh
1061601e04c3fSmrg		if (elems && (!dmod1 || !dmod2))
1061701e04c3fSmrg			needs_workaround = true;
1061801e04c3fSmrg	}
106193464ebd5Sriastradh
1062001e04c3fSmrg	/* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
1062101e04c3fSmrg	 * LOOP_STARTxxx for nested loops may put the branch stack into a state
1062201e04c3fSmrg	 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
1062301e04c3fSmrg	 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
1062401e04c3fSmrg	if (needs_workaround) {
1062501e04c3fSmrg		r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
1062601e04c3fSmrg		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
1062701e04c3fSmrg		alu_type = CF_OP_ALU;
106283464ebd5Sriastradh	}
106293464ebd5Sriastradh
1063001e04c3fSmrg	emit_logic_pred(ctx, opcode, alu_type, src);
106313464ebd5Sriastradh
1063201e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
106333464ebd5Sriastradh
1063401e04c3fSmrg	fc_pushlevel(ctx, FC_IF);
106353464ebd5Sriastradh
1063601e04c3fSmrg	return 0;
1063701e04c3fSmrg}
106383464ebd5Sriastradh
1063901e04c3fSmrgstatic int tgsi_if(struct r600_shader_ctx *ctx)
1064001e04c3fSmrg{
1064101e04c3fSmrg	struct r600_bytecode_alu_src alu_src;
1064201e04c3fSmrg	r600_bytecode_src(&alu_src, &ctx->src[0], 0);
1064301e04c3fSmrg
1064401e04c3fSmrg	return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src);
106453464ebd5Sriastradh}
106463464ebd5Sriastradh
1064701e04c3fSmrgstatic int tgsi_uif(struct r600_shader_ctx *ctx)
106483464ebd5Sriastradh{
1064901e04c3fSmrg	struct r600_bytecode_alu_src alu_src;
1065001e04c3fSmrg	r600_bytecode_src(&alu_src, &ctx->src[0], 0);
1065101e04c3fSmrg	return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1065201e04c3fSmrg}
106533464ebd5Sriastradh
1065401e04c3fSmrgstatic int tgsi_else(struct r600_shader_ctx *ctx)
1065501e04c3fSmrg{
1065601e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
1065701e04c3fSmrg	ctx->bc->cf_last->pop_count = 1;
106583464ebd5Sriastradh
1065901e04c3fSmrg	fc_set_mid(ctx, ctx->bc->fc_sp - 1);
1066001e04c3fSmrg	ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id;
1066101e04c3fSmrg	return 0;
1066201e04c3fSmrg}
1066301e04c3fSmrg
1066401e04c3fSmrgstatic int tgsi_endif(struct r600_shader_ctx *ctx)
1066501e04c3fSmrg{
1066601e04c3fSmrg	int offset = 2;
1066701e04c3fSmrg	pops(ctx, 1);
1066801e04c3fSmrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) {
1066901e04c3fSmrg		R600_ERR("if/endif unbalanced in shader\n");
106703464ebd5Sriastradh		return -1;
106713464ebd5Sriastradh	}
106723464ebd5Sriastradh
1067301e04c3fSmrg	/* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */
1067401e04c3fSmrg	if (ctx->bc->cf_last->eg_alu_extended)
1067501e04c3fSmrg			offset += 2;
1067601e04c3fSmrg
1067701e04c3fSmrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) {
1067801e04c3fSmrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + offset;
1067901e04c3fSmrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1;
1068001e04c3fSmrg	} else {
1068101e04c3fSmrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset;
10682af69d88dSmrg	}
1068301e04c3fSmrg	fc_poplevel(ctx);
106843464ebd5Sriastradh
1068501e04c3fSmrg	callstack_pop(ctx, FC_PUSH_VPM);
106863464ebd5Sriastradh	return 0;
106873464ebd5Sriastradh}
1068801e04c3fSmrg
1068901e04c3fSmrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx)
106903464ebd5Sriastradh{
1069101e04c3fSmrg	/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
1069201e04c3fSmrg	 * limited to 4096 iterations, like the other LOOP_* instructions. */
1069301e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
106943464ebd5Sriastradh
1069501e04c3fSmrg	fc_pushlevel(ctx, FC_LOOP);
106963464ebd5Sriastradh
1069701e04c3fSmrg	/* check stack depth */
1069801e04c3fSmrg	callstack_push(ctx, FC_LOOP);
1069901e04c3fSmrg	return 0;
1070001e04c3fSmrg}
1070101e04c3fSmrg
1070201e04c3fSmrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx)
1070301e04c3fSmrg{
1070401e04c3fSmrg	int i;
1070501e04c3fSmrg
1070601e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
1070701e04c3fSmrg
1070801e04c3fSmrg	if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) {
1070901e04c3fSmrg		R600_ERR("loop/endloop in shader code are not paired.\n");
1071001e04c3fSmrg		return -EINVAL;
1071101e04c3fSmrg	}
1071201e04c3fSmrg
1071301e04c3fSmrg	/* fixup loop pointers - from r600isa
1071401e04c3fSmrg	   LOOP END points to CF after LOOP START,
1071501e04c3fSmrg	   LOOP START point to CF after LOOP END
1071601e04c3fSmrg	   BRK/CONT point to LOOP END CF
1071701e04c3fSmrg	*/
1071801e04c3fSmrg	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
1071901e04c3fSmrg
1072001e04c3fSmrg	ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
1072101e04c3fSmrg
1072201e04c3fSmrg	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) {
1072301e04c3fSmrg		ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id;
1072401e04c3fSmrg	}
1072501e04c3fSmrg	/* XXX add LOOPRET support */
1072601e04c3fSmrg	fc_poplevel(ctx);
1072701e04c3fSmrg	callstack_pop(ctx, FC_LOOP);
1072801e04c3fSmrg	return 0;
1072901e04c3fSmrg}
1073001e04c3fSmrg
1073101e04c3fSmrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
1073201e04c3fSmrg{
1073301e04c3fSmrg	unsigned int fscp;
1073401e04c3fSmrg
1073501e04c3fSmrg	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
1073601e04c3fSmrg	{
1073701e04c3fSmrg		if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
1073801e04c3fSmrg			break;
107393464ebd5Sriastradh	}
107403464ebd5Sriastradh
1074101e04c3fSmrg	if (fscp == 0) {
1074201e04c3fSmrg		R600_ERR("Break not inside loop/endloop pair\n");
1074301e04c3fSmrg		return -EINVAL;
1074401e04c3fSmrg	}
1074501e04c3fSmrg
1074601e04c3fSmrg	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
1074701e04c3fSmrg
1074801e04c3fSmrg	fc_set_mid(ctx, fscp - 1);
1074901e04c3fSmrg
107503464ebd5Sriastradh	return 0;
107513464ebd5Sriastradh}
107523464ebd5Sriastradh
1075301e04c3fSmrgstatic int tgsi_gs_emit(struct r600_shader_ctx *ctx)
1075401e04c3fSmrg{
1075501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1075601e04c3fSmrg	int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
1075701e04c3fSmrg	int r;
1075801e04c3fSmrg
1075901e04c3fSmrg	if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
1076001e04c3fSmrg		emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
1076101e04c3fSmrg
1076201e04c3fSmrg	r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
1076301e04c3fSmrg	if (!r) {
1076401e04c3fSmrg		ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
1076501e04c3fSmrg		if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
1076601e04c3fSmrg			return emit_inc_ring_offset(ctx, stream, TRUE);
1076701e04c3fSmrg	}
1076801e04c3fSmrg	return r;
1076901e04c3fSmrg}
1077001e04c3fSmrg
1077101e04c3fSmrgstatic int tgsi_umad(struct r600_shader_ctx *ctx)
107723464ebd5Sriastradh{
107733464ebd5Sriastradh	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10774af69d88dSmrg	struct r600_bytecode_alu alu;
1077501e04c3fSmrg	int i, j, r;
1077601e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1077701e04c3fSmrg
1077801e04c3fSmrg	/* src0 * src1 */
1077901e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1078001e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1078101e04c3fSmrg			continue;
107823464ebd5Sriastradh
10783af69d88dSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
107843464ebd5Sriastradh
1078501e04c3fSmrg		alu.dst.chan = i;
1078601e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
1078701e04c3fSmrg		alu.dst.write = 1;
107883464ebd5Sriastradh
1078901e04c3fSmrg		alu.op = ALU_OP2_MULLO_UINT;
1079001e04c3fSmrg		for (j = 0; j < 2; j++) {
1079101e04c3fSmrg			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
107923464ebd5Sriastradh		}
107933464ebd5Sriastradh
1079401e04c3fSmrg		alu.last = 1;
1079501e04c3fSmrg		r = emit_mul_int_op(ctx->bc, &alu);
1079601e04c3fSmrg		if (r)
1079701e04c3fSmrg			return r;
1079801e04c3fSmrg	}
1079901e04c3fSmrg
1080001e04c3fSmrg
1080101e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1080201e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1080301e04c3fSmrg			continue;
1080401e04c3fSmrg
1080501e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1080601e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1080701e04c3fSmrg
1080801e04c3fSmrg		alu.op = ALU_OP2_ADD_INT;
1080901e04c3fSmrg
1081001e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
1081101e04c3fSmrg		alu.src[0].chan = i;
1081201e04c3fSmrg
1081301e04c3fSmrg		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
1081401e04c3fSmrg		if (i == lasti) {
108153464ebd5Sriastradh			alu.last = 1;
1081601e04c3fSmrg		}
10817af69d88dSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
108183464ebd5Sriastradh		if (r)
108193464ebd5Sriastradh			return r;
108203464ebd5Sriastradh	}
108213464ebd5Sriastradh	return 0;
108223464ebd5Sriastradh}
108233464ebd5Sriastradh
1082401e04c3fSmrgstatic int tgsi_pk2h(struct r600_shader_ctx *ctx)
108253464ebd5Sriastradh{
1082601e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
10827af69d88dSmrg	struct r600_bytecode_alu alu;
1082801e04c3fSmrg	int r, i;
1082901e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
108303464ebd5Sriastradh
1083101e04c3fSmrg	/* temp.xy = f32_to_f16(src) */
10832af69d88dSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1083301e04c3fSmrg	alu.op = ALU_OP1_FLT32_TO_FLT16;
1083401e04c3fSmrg	alu.dst.chan = 0;
108353464ebd5Sriastradh	alu.dst.sel = ctx->temp_reg;
108363464ebd5Sriastradh	alu.dst.write = 1;
10837af69d88dSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1083801e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1083901e04c3fSmrg	if (r)
1084001e04c3fSmrg		return r;
1084101e04c3fSmrg	alu.dst.chan = 1;
1084201e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
108433464ebd5Sriastradh	alu.last = 1;
1084401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
108453464ebd5Sriastradh	if (r)
108463464ebd5Sriastradh		return r;
108473464ebd5Sriastradh
1084801e04c3fSmrg	/* dst.x = temp.y * 0x10000 + temp.x */
1084901e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1085001e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1085101e04c3fSmrg			continue;
108523464ebd5Sriastradh
1085301e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1085401e04c3fSmrg		alu.op = ALU_OP3_MULADD_UINT24;
1085501e04c3fSmrg		alu.is_op3 = 1;
1085601e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1085701e04c3fSmrg		alu.last = i == lasti;
1085801e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
1085901e04c3fSmrg		alu.src[0].chan = 1;
1086001e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1086101e04c3fSmrg		alu.src[1].value = 0x10000;
1086201e04c3fSmrg		alu.src[2].sel = ctx->temp_reg;
1086301e04c3fSmrg		alu.src[2].chan = 0;
1086401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1086501e04c3fSmrg		if (r)
1086601e04c3fSmrg			return r;
108673464ebd5Sriastradh	}
108683464ebd5Sriastradh
108693464ebd5Sriastradh	return 0;
108703464ebd5Sriastradh}
108713464ebd5Sriastradh
1087201e04c3fSmrgstatic int tgsi_up2h(struct r600_shader_ctx *ctx)
10873af69d88dSmrg{
1087401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1087501e04c3fSmrg	struct r600_bytecode_alu alu;
1087601e04c3fSmrg	int r, i;
1087701e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
10878af69d88dSmrg
1087901e04c3fSmrg	/* temp.x = src.x */
1088001e04c3fSmrg	/* note: no need to mask out the high bits */
1088101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1088201e04c3fSmrg	alu.op = ALU_OP1_MOV;
1088301e04c3fSmrg	alu.dst.chan = 0;
1088401e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
1088501e04c3fSmrg	alu.dst.write = 1;
1088601e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1088701e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1088801e04c3fSmrg	if (r)
1088901e04c3fSmrg		return r;
10890af69d88dSmrg
1089101e04c3fSmrg	/* temp.y = src.x >> 16 */
1089201e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1089301e04c3fSmrg	alu.op = ALU_OP2_LSHR_INT;
1089401e04c3fSmrg	alu.dst.chan = 1;
1089501e04c3fSmrg	alu.dst.sel = ctx->temp_reg;
1089601e04c3fSmrg	alu.dst.write = 1;
1089701e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1089801e04c3fSmrg	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1089901e04c3fSmrg	alu.src[1].value = 16;
1090001e04c3fSmrg	alu.last = 1;
1090101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1090201e04c3fSmrg	if (r)
1090301e04c3fSmrg		return r;
109043464ebd5Sriastradh
1090501e04c3fSmrg	/* dst.wz = dst.xy = f16_to_f32(temp.xy) */
1090601e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1090701e04c3fSmrg		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1090801e04c3fSmrg			continue;
1090901e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1091001e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1091101e04c3fSmrg		alu.op = ALU_OP1_FLT16_TO_FLT32;
1091201e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
1091301e04c3fSmrg		alu.src[0].chan = i % 2;
1091401e04c3fSmrg		alu.last = i == lasti;
1091501e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1091601e04c3fSmrg		if (r)
1091701e04c3fSmrg			return r;
109183464ebd5Sriastradh	}
109193464ebd5Sriastradh
1092001e04c3fSmrg	return 0;
109213464ebd5Sriastradh}
109223464ebd5Sriastradh
1092301e04c3fSmrgstatic int tgsi_bfe(struct r600_shader_ctx *ctx)
109243464ebd5Sriastradh{
1092501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1092601e04c3fSmrg	struct r600_bytecode_alu alu;
1092701e04c3fSmrg	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1092801e04c3fSmrg	int r, i;
1092901e04c3fSmrg	int dst = -1;
109303464ebd5Sriastradh
1093101e04c3fSmrg	if ((inst->Src[0].Register.File == inst->Dst[0].Register.File &&
1093201e04c3fSmrg	     inst->Src[0].Register.Index == inst->Dst[0].Register.Index) ||
1093301e04c3fSmrg	    (inst->Src[2].Register.File == inst->Dst[0].Register.File &&
1093401e04c3fSmrg	     inst->Src[2].Register.Index == inst->Dst[0].Register.Index))
1093501e04c3fSmrg		dst = r600_get_temp(ctx);
109363464ebd5Sriastradh
1093701e04c3fSmrg	r = tgsi_op3_dst(ctx, dst);
1093801e04c3fSmrg	if (r)
1093901e04c3fSmrg		return r;
109403464ebd5Sriastradh
1094101e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1094201e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1094301e04c3fSmrg		alu.op = ALU_OP2_SETGE_INT;
1094401e04c3fSmrg		r600_bytecode_src(&alu.src[0], &ctx->src[2], i);
1094501e04c3fSmrg		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1094601e04c3fSmrg		alu.src[1].value = 32;
1094701e04c3fSmrg		alu.dst.sel = ctx->temp_reg;
1094801e04c3fSmrg		alu.dst.chan = i;
1094901e04c3fSmrg		alu.dst.write = 1;
1095001e04c3fSmrg		if (i == lasti)
1095101e04c3fSmrg			alu.last = 1;
1095201e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1095301e04c3fSmrg		if (r)
1095401e04c3fSmrg			return r;
1095501e04c3fSmrg	}
109563464ebd5Sriastradh
1095701e04c3fSmrg	for (i = 0; i < lasti + 1; i++) {
1095801e04c3fSmrg		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1095901e04c3fSmrg		alu.op = ALU_OP3_CNDE_INT;
1096001e04c3fSmrg		alu.is_op3 = 1;
1096101e04c3fSmrg		alu.src[0].sel = ctx->temp_reg;
1096201e04c3fSmrg		alu.src[0].chan = i;
109633464ebd5Sriastradh
1096401e04c3fSmrg		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1096501e04c3fSmrg		if (dst != -1)
1096601e04c3fSmrg			alu.src[1].sel = dst;
1096701e04c3fSmrg		else
1096801e04c3fSmrg			alu.src[1].sel = alu.dst.sel;
1096901e04c3fSmrg		alu.src[1].chan = i;
1097001e04c3fSmrg		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1097101e04c3fSmrg		alu.dst.write = 1;
1097201e04c3fSmrg		if (i == lasti)
1097301e04c3fSmrg			alu.last = 1;
1097401e04c3fSmrg		r = r600_bytecode_add_alu(ctx->bc, &alu);
1097501e04c3fSmrg		if (r)
1097601e04c3fSmrg			return r;
1097701e04c3fSmrg	}
109783464ebd5Sriastradh
109793464ebd5Sriastradh	return 0;
109803464ebd5Sriastradh}
109813464ebd5Sriastradh
1098201e04c3fSmrgstatic int tgsi_clock(struct r600_shader_ctx *ctx)
109833464ebd5Sriastradh{
1098401e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1098501e04c3fSmrg	struct r600_bytecode_alu alu;
1098601e04c3fSmrg	int r;
1098701e04c3fSmrg
1098801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1098901e04c3fSmrg	alu.op = ALU_OP1_MOV;
1099001e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1099101e04c3fSmrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO;
1099201e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1099301e04c3fSmrg	if (r)
1099401e04c3fSmrg		return r;
1099501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1099601e04c3fSmrg	alu.op = ALU_OP1_MOV;
1099701e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1099801e04c3fSmrg	alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI;
1099901e04c3fSmrg	alu.last = 1;
1100001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1100101e04c3fSmrg	if (r)
1100201e04c3fSmrg		return r;
110033464ebd5Sriastradh	return 0;
110043464ebd5Sriastradh}
110053464ebd5Sriastradh
1100601e04c3fSmrgstatic int emit_u64add(struct r600_shader_ctx *ctx, int op,
1100701e04c3fSmrg		       int treg,
1100801e04c3fSmrg		       int src0_sel, int src0_chan,
1100901e04c3fSmrg		       int src1_sel, int src1_chan)
110103464ebd5Sriastradh{
1101101e04c3fSmrg	struct r600_bytecode_alu alu;
1101201e04c3fSmrg	int r;
1101301e04c3fSmrg	int opc;
110143464ebd5Sriastradh
1101501e04c3fSmrg	if (op == ALU_OP2_ADD_INT)
1101601e04c3fSmrg		opc = ALU_OP2_ADDC_UINT;
1101701e04c3fSmrg	else
1101801e04c3fSmrg		opc = ALU_OP2_SUBB_UINT;
110193464ebd5Sriastradh
1102001e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1102101e04c3fSmrg	alu.op = op;            ;
1102201e04c3fSmrg	alu.dst.sel = treg;
1102301e04c3fSmrg	alu.dst.chan = 0;
1102401e04c3fSmrg	alu.dst.write = 1;
1102501e04c3fSmrg	alu.src[0].sel = src0_sel;
1102601e04c3fSmrg	alu.src[0].chan = src0_chan + 0;
1102701e04c3fSmrg	alu.src[1].sel = src1_sel;
1102801e04c3fSmrg	alu.src[1].chan = src1_chan + 0;
1102901e04c3fSmrg	alu.src[1].neg = 0;
1103001e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1103101e04c3fSmrg	if (r)
1103201e04c3fSmrg		return r;
110333464ebd5Sriastradh
1103401e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1103501e04c3fSmrg	alu.op = op;
1103601e04c3fSmrg	alu.dst.sel = treg;
1103701e04c3fSmrg	alu.dst.chan = 1;
1103801e04c3fSmrg	alu.dst.write = 1;
1103901e04c3fSmrg	alu.src[0].sel = src0_sel;
1104001e04c3fSmrg	alu.src[0].chan = src0_chan + 1;
1104101e04c3fSmrg	alu.src[1].sel = src1_sel;
1104201e04c3fSmrg	alu.src[1].chan = src1_chan + 1;
1104301e04c3fSmrg	alu.src[1].neg = 0;
1104401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1104501e04c3fSmrg	if (r)
1104601e04c3fSmrg		return r;
110473464ebd5Sriastradh
1104801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1104901e04c3fSmrg	alu.op = opc;
1105001e04c3fSmrg	alu.dst.sel = treg;
1105101e04c3fSmrg	alu.dst.chan = 2;
1105201e04c3fSmrg	alu.dst.write = 1;
1105301e04c3fSmrg	alu.last = 1;
1105401e04c3fSmrg	alu.src[0].sel = src0_sel;
1105501e04c3fSmrg	alu.src[0].chan = src0_chan + 0;
1105601e04c3fSmrg	alu.src[1].sel = src1_sel;
1105701e04c3fSmrg	alu.src[1].chan = src1_chan + 0;
1105801e04c3fSmrg	alu.src[1].neg = 0;
1105901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1106001e04c3fSmrg	if (r)
1106101e04c3fSmrg		return r;
110623464ebd5Sriastradh
1106301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1106401e04c3fSmrg	alu.op = op;
1106501e04c3fSmrg	alu.dst.sel = treg;
1106601e04c3fSmrg	alu.dst.chan = 1;
1106701e04c3fSmrg	alu.dst.write = 1;
1106801e04c3fSmrg	alu.src[0].sel = treg;
1106901e04c3fSmrg	alu.src[0].chan = 1;
1107001e04c3fSmrg	alu.src[1].sel = treg;
1107101e04c3fSmrg	alu.src[1].chan = 2;
1107201e04c3fSmrg	alu.last = 1;
1107301e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1107401e04c3fSmrg	if (r)
1107501e04c3fSmrg		return r;
1107601e04c3fSmrg	return 0;
110773464ebd5Sriastradh}
110783464ebd5Sriastradh
1107901e04c3fSmrgstatic int egcm_u64add(struct r600_shader_ctx *ctx)
110803464ebd5Sriastradh{
1108101e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1108201e04c3fSmrg	struct r600_bytecode_alu alu;
1108301e04c3fSmrg	int r;
1108401e04c3fSmrg	int treg = ctx->temp_reg;
1108501e04c3fSmrg	int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT;
11086af69d88dSmrg
1108701e04c3fSmrg	if (ctx->src[1].neg) {
1108801e04c3fSmrg		op = ALU_OP2_SUB_INT;
1108901e04c3fSmrg		opc = ALU_OP2_SUBB_UINT;
11090af69d88dSmrg	}
1109101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1109201e04c3fSmrg	alu.op = op;            ;
1109301e04c3fSmrg	alu.dst.sel = treg;
1109401e04c3fSmrg	alu.dst.chan = 0;
1109501e04c3fSmrg	alu.dst.write = 1;
1109601e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1109701e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1109801e04c3fSmrg	alu.src[1].neg = 0;
1109901e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1110001e04c3fSmrg	if (r)
1110101e04c3fSmrg		return r;
11102af69d88dSmrg
1110301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1110401e04c3fSmrg	alu.op = op;
1110501e04c3fSmrg	alu.dst.sel = treg;
1110601e04c3fSmrg	alu.dst.chan = 1;
1110701e04c3fSmrg	alu.dst.write = 1;
1110801e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1110901e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
1111001e04c3fSmrg	alu.src[1].neg = 0;
1111101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1111201e04c3fSmrg	if (r)
1111301e04c3fSmrg		return r;
111143464ebd5Sriastradh
1111501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1111601e04c3fSmrg	alu.op = opc              ;
1111701e04c3fSmrg	alu.dst.sel = treg;
1111801e04c3fSmrg	alu.dst.chan = 2;
1111901e04c3fSmrg	alu.dst.write = 1;
1112001e04c3fSmrg	alu.last = 1;
1112101e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1112201e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1112301e04c3fSmrg	alu.src[1].neg = 0;
1112401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1112501e04c3fSmrg	if (r)
1112601e04c3fSmrg		return r;
111273464ebd5Sriastradh
1112801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1112901e04c3fSmrg	alu.op = op;
1113001e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1113101e04c3fSmrg	alu.src[0].sel = treg;
1113201e04c3fSmrg	alu.src[0].chan = 1;
1113301e04c3fSmrg	alu.src[1].sel = treg;
1113401e04c3fSmrg	alu.src[1].chan = 2;
1113501e04c3fSmrg	alu.last = 1;
1113601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1113701e04c3fSmrg	if (r)
1113801e04c3fSmrg		return r;
1113901e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1114001e04c3fSmrg	alu.op = ALU_OP1_MOV;
1114101e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1114201e04c3fSmrg	alu.src[0].sel = treg;
1114301e04c3fSmrg	alu.src[0].chan = 0;
1114401e04c3fSmrg	alu.last = 1;
1114501e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1114601e04c3fSmrg	if (r)
1114701e04c3fSmrg		return r;
111483464ebd5Sriastradh	return 0;
111493464ebd5Sriastradh}
111503464ebd5Sriastradh
111517ec681f3Smrg
111527ec681f3Smrgstatic int egcm_i64neg(struct r600_shader_ctx *ctx)
111537ec681f3Smrg{
111547ec681f3Smrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
111557ec681f3Smrg	struct r600_bytecode_alu alu;
111567ec681f3Smrg	int r;
111577ec681f3Smrg	int treg = ctx->temp_reg;
111587ec681f3Smrg	const int op = ALU_OP2_SUB_INT;
111597ec681f3Smrg	const int opc = ALU_OP2_SUBB_UINT;
111607ec681f3Smrg
111617ec681f3Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
111627ec681f3Smrg	alu.op = op;            ;
111637ec681f3Smrg	alu.dst.sel = treg;
111647ec681f3Smrg	alu.dst.chan = 0;
111657ec681f3Smrg	alu.dst.write = 1;
111667ec681f3Smrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
111677ec681f3Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
111687ec681f3Smrg	alu.src[1].neg = 0;
111697ec681f3Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
111707ec681f3Smrg	if (r)
111717ec681f3Smrg		return r;
111727ec681f3Smrg
111737ec681f3Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
111747ec681f3Smrg	alu.op = op;
111757ec681f3Smrg	alu.dst.sel = treg;
111767ec681f3Smrg	alu.dst.chan = 1;
111777ec681f3Smrg	alu.dst.write = 1;
111787ec681f3Smrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
111797ec681f3Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[0], 1);
111807ec681f3Smrg	alu.src[1].neg = 0;
111817ec681f3Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
111827ec681f3Smrg	if (r)
111837ec681f3Smrg		return r;
111847ec681f3Smrg
111857ec681f3Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
111867ec681f3Smrg	alu.op = opc              ;
111877ec681f3Smrg	alu.dst.sel = treg;
111887ec681f3Smrg	alu.dst.chan = 2;
111897ec681f3Smrg	alu.dst.write = 1;
111907ec681f3Smrg	alu.last = 1;
111917ec681f3Smrg	alu.src[0].sel = V_SQ_ALU_SRC_0;
111927ec681f3Smrg	r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
111937ec681f3Smrg	alu.src[1].neg = 0;
111947ec681f3Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
111957ec681f3Smrg	if (r)
111967ec681f3Smrg		return r;
111977ec681f3Smrg
111987ec681f3Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
111997ec681f3Smrg	alu.op = op;
112007ec681f3Smrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
112017ec681f3Smrg	alu.src[0].sel = treg;
112027ec681f3Smrg	alu.src[0].chan = 1;
112037ec681f3Smrg	alu.src[1].sel = treg;
112047ec681f3Smrg	alu.src[1].chan = 2;
112057ec681f3Smrg	alu.last = 1;
112067ec681f3Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
112077ec681f3Smrg	if (r)
112087ec681f3Smrg		return r;
112097ec681f3Smrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
112107ec681f3Smrg	alu.op = ALU_OP1_MOV;
112117ec681f3Smrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
112127ec681f3Smrg	alu.src[0].sel = treg;
112137ec681f3Smrg	alu.src[0].chan = 0;
112147ec681f3Smrg	alu.last = 1;
112157ec681f3Smrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
112167ec681f3Smrg	if (r)
112177ec681f3Smrg		return r;
112187ec681f3Smrg	return 0;
112197ec681f3Smrg}
112207ec681f3Smrg
1122101e04c3fSmrg/* result.y = mul_high a, b
1122201e04c3fSmrg   result.x = mul a,b
1122301e04c3fSmrg   result.y += a.x * b.y + a.y * b.x;
1122401e04c3fSmrg*/
1122501e04c3fSmrgstatic int egcm_u64mul(struct r600_shader_ctx *ctx)
11226af69d88dSmrg{
1122701e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1122801e04c3fSmrg	struct r600_bytecode_alu alu;
1122901e04c3fSmrg	int r;
1123001e04c3fSmrg	int treg = ctx->temp_reg;
11231af69d88dSmrg
1123201e04c3fSmrg	/* temp.x = mul_lo a.x, b.x */
1123301e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1123401e04c3fSmrg	alu.op = ALU_OP2_MULLO_UINT;
1123501e04c3fSmrg	alu.dst.sel = treg;
1123601e04c3fSmrg	alu.dst.chan = 0;
1123701e04c3fSmrg	alu.dst.write = 1;
1123801e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1123901e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1124001e04c3fSmrg	r = emit_mul_int_op(ctx->bc, &alu);
1124101e04c3fSmrg	if (r)
1124201e04c3fSmrg		return r;
11243af69d88dSmrg
1124401e04c3fSmrg	/* temp.y = mul_hi a.x, b.x */
1124501e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1124601e04c3fSmrg	alu.op = ALU_OP2_MULHI_UINT;
1124701e04c3fSmrg	alu.dst.sel = treg;
1124801e04c3fSmrg	alu.dst.chan = 1;
1124901e04c3fSmrg	alu.dst.write = 1;
1125001e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1125101e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1125201e04c3fSmrg	r = emit_mul_int_op(ctx->bc, &alu);
1125301e04c3fSmrg	if (r)
1125401e04c3fSmrg		return r;
112553464ebd5Sriastradh
1125601e04c3fSmrg	/* temp.z = mul a.x, b.y */
1125701e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1125801e04c3fSmrg	alu.op = ALU_OP2_MULLO_UINT;
1125901e04c3fSmrg	alu.dst.sel = treg;
1126001e04c3fSmrg	alu.dst.chan = 2;
1126101e04c3fSmrg	alu.dst.write = 1;
1126201e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1126301e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
1126401e04c3fSmrg	r = emit_mul_int_op(ctx->bc, &alu);
1126501e04c3fSmrg	if (r)
1126601e04c3fSmrg		return r;
112673464ebd5Sriastradh
1126801e04c3fSmrg	/* temp.w = mul a.y, b.x */
1126901e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1127001e04c3fSmrg	alu.op = ALU_OP2_MULLO_UINT;
1127101e04c3fSmrg	alu.dst.sel = treg;
1127201e04c3fSmrg	alu.dst.chan = 3;
1127301e04c3fSmrg	alu.dst.write = 1;
1127401e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1127501e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1127601e04c3fSmrg	r = emit_mul_int_op(ctx->bc, &alu);
1127701e04c3fSmrg	if (r)
1127801e04c3fSmrg		return r;
112793464ebd5Sriastradh
1128001e04c3fSmrg	/* temp.z = temp.z + temp.w */
1128101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1128201e04c3fSmrg	alu.op = ALU_OP2_ADD_INT;
1128301e04c3fSmrg	alu.dst.sel = treg;
1128401e04c3fSmrg	alu.dst.chan = 2;
1128501e04c3fSmrg	alu.dst.write = 1;
1128601e04c3fSmrg	alu.src[0].sel = treg;
1128701e04c3fSmrg	alu.src[0].chan = 2;
1128801e04c3fSmrg	alu.src[1].sel = treg;
1128901e04c3fSmrg	alu.src[1].chan = 3;
1129001e04c3fSmrg	alu.last = 1;
1129101e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1129201e04c3fSmrg	if (r)
1129301e04c3fSmrg		return r;
1129401e04c3fSmrg
1129501e04c3fSmrg	/* temp.y = temp.y + temp.z */
1129601e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1129701e04c3fSmrg	alu.op = ALU_OP2_ADD_INT;
1129801e04c3fSmrg	alu.dst.sel = treg;
1129901e04c3fSmrg	alu.dst.chan = 1;
1130001e04c3fSmrg	alu.dst.write = 1;
1130101e04c3fSmrg	alu.src[0].sel = treg;
1130201e04c3fSmrg	alu.src[0].chan = 1;
1130301e04c3fSmrg	alu.src[1].sel = treg;
1130401e04c3fSmrg	alu.src[1].chan = 2;
1130501e04c3fSmrg	alu.last = 1;
1130601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1130701e04c3fSmrg	if (r)
1130801e04c3fSmrg		return r;
1130901e04c3fSmrg
1131001e04c3fSmrg	/* dst.x = temp.x */
1131101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1131201e04c3fSmrg	alu.op = ALU_OP1_MOV;
1131301e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1131401e04c3fSmrg	alu.src[0].sel = treg;
1131501e04c3fSmrg	alu.src[0].chan = 0;
1131601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1131701e04c3fSmrg	if (r)
1131801e04c3fSmrg		return r;
1131901e04c3fSmrg
1132001e04c3fSmrg	/* dst.y = temp.y */
1132101e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1132201e04c3fSmrg	alu.op = ALU_OP1_MOV;
1132301e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1132401e04c3fSmrg	alu.src[0].sel = treg;
1132501e04c3fSmrg	alu.src[0].chan = 1;
1132601e04c3fSmrg	alu.last = 1;
1132701e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1132801e04c3fSmrg	if (r)
1132901e04c3fSmrg		return r;
113303464ebd5Sriastradh
113313464ebd5Sriastradh	return 0;
113323464ebd5Sriastradh}
113333464ebd5Sriastradh
1133401e04c3fSmrgstatic int emit_u64sge(struct r600_shader_ctx *ctx,
1133501e04c3fSmrg		       int treg,
1133601e04c3fSmrg		       int src0_sel, int src0_base_chan,
1133701e04c3fSmrg		       int src1_sel, int src1_base_chan)
113383464ebd5Sriastradh{
1133901e04c3fSmrg	int r;
1134001e04c3fSmrg	/* for 64-bit sge */
1134101e04c3fSmrg	/* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */
1134201e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT,
1134301e04c3fSmrg			   treg, 1,
1134401e04c3fSmrg			   src0_sel, src0_base_chan + 1,
1134501e04c3fSmrg			   src1_sel, src1_base_chan + 1);
1134601e04c3fSmrg	if (r)
1134701e04c3fSmrg		return r;
113483464ebd5Sriastradh
1134901e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
1135001e04c3fSmrg			   treg, 0,
1135101e04c3fSmrg			   src0_sel, src0_base_chan,
1135201e04c3fSmrg			   src1_sel, src1_base_chan);
1135301e04c3fSmrg	if (r)
1135401e04c3fSmrg		return r;
113553464ebd5Sriastradh
1135601e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_SETE_INT,
1135701e04c3fSmrg			   treg, 2,
1135801e04c3fSmrg			   src0_sel, src0_base_chan + 1,
1135901e04c3fSmrg			   src1_sel, src1_base_chan + 1);
1136001e04c3fSmrg	if (r)
1136101e04c3fSmrg		return r;
1136201e04c3fSmrg
1136301e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_AND_INT,
1136401e04c3fSmrg			   treg, 0,
1136501e04c3fSmrg			   treg, 0,
1136601e04c3fSmrg			   treg, 2);
1136701e04c3fSmrg	if (r)
1136801e04c3fSmrg		return r;
1136901e04c3fSmrg
1137001e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
1137101e04c3fSmrg			   treg, 0,
1137201e04c3fSmrg			   treg, 0,
1137301e04c3fSmrg			   treg, 1);
1137401e04c3fSmrg	if (r)
1137501e04c3fSmrg		return r;
113763464ebd5Sriastradh	return 0;
113773464ebd5Sriastradh}
113783464ebd5Sriastradh
1137901e04c3fSmrg/* this isn't a complete div it's just enough for qbo shader to work */
1138001e04c3fSmrgstatic int egcm_u64div(struct r600_shader_ctx *ctx)
113813464ebd5Sriastradh{
1138201e04c3fSmrg	struct r600_bytecode_alu alu;
1138301e04c3fSmrg	struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src;
1138401e04c3fSmrg	int r, i;
1138501e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
113863464ebd5Sriastradh
1138701e04c3fSmrg	/* make sure we are dividing my a const with 0 in the high bits */
1138801e04c3fSmrg	if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL)
1138901e04c3fSmrg		return -1;
1139001e04c3fSmrg	if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0)
1139101e04c3fSmrg		return -1;
1139201e04c3fSmrg	/* make sure we are doing one division */
1139301e04c3fSmrg	if (inst->Dst[0].Register.WriteMask != 0x3)
1139401e04c3fSmrg		return -1;
113953464ebd5Sriastradh
1139601e04c3fSmrg	/* emit_if uses ctx->temp_reg so we can't */
1139701e04c3fSmrg	int treg = r600_get_temp(ctx);
1139801e04c3fSmrg	int tmp_num = r600_get_temp(ctx);
1139901e04c3fSmrg	int sub_tmp = r600_get_temp(ctx);
1140001e04c3fSmrg
1140101e04c3fSmrg	/* tmp quot are tmp_num.zw */
1140201e04c3fSmrg	r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0);
1140301e04c3fSmrg	r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1);
1140401e04c3fSmrg	r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0);
1140501e04c3fSmrg	r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1);
1140601e04c3fSmrg
1140701e04c3fSmrg	/* MOV tmp_num.xy, numerator */
1140801e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1140901e04c3fSmrg			   tmp_num, 0,
1141001e04c3fSmrg			   alu_num_lo.sel, alu_num_lo.chan,
1141101e04c3fSmrg			   0, 0);
1141201e04c3fSmrg	if (r)
1141301e04c3fSmrg		return r;
1141401e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1141501e04c3fSmrg			   tmp_num, 1,
1141601e04c3fSmrg			   alu_num_hi.sel, alu_num_hi.chan,
1141701e04c3fSmrg			   0, 0);
1141801e04c3fSmrg	if (r)
1141901e04c3fSmrg		return r;
114203464ebd5Sriastradh
1142101e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1142201e04c3fSmrg			   tmp_num, 2,
1142301e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 0,
1142401e04c3fSmrg			   0, 0);
1142501e04c3fSmrg	if (r)
1142601e04c3fSmrg		return r;
114273464ebd5Sriastradh
1142801e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1142901e04c3fSmrg			   tmp_num, 3,
1143001e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 0,
1143101e04c3fSmrg			   0, 0);
1143201e04c3fSmrg	if (r)
1143301e04c3fSmrg		return r;
114343464ebd5Sriastradh
1143501e04c3fSmrg	/* treg 0 is log2_denom */
1143601e04c3fSmrg	/* normally this gets the MSB for the denom high value
1143701e04c3fSmrg	   - however we know this will always be 0 here. */
1143801e04c3fSmrg	r = single_alu_op2(ctx,
1143901e04c3fSmrg			   ALU_OP1_MOV,
1144001e04c3fSmrg			   treg, 0,
1144101e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 32,
1144201e04c3fSmrg			   0, 0);
1144301e04c3fSmrg	if (r)
1144401e04c3fSmrg		return r;
11445af69d88dSmrg
1144601e04c3fSmrg	/* normally check demon hi for 0, but we know it is already */
1144701e04c3fSmrg	/* t0.z = num_hi >= denom_lo */
1144801e04c3fSmrg	r = single_alu_op2(ctx,
1144901e04c3fSmrg			   ALU_OP2_SETGE_UINT,
1145001e04c3fSmrg			   treg, 1,
1145101e04c3fSmrg			   alu_num_hi.sel, alu_num_hi.chan,
1145201e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
1145301e04c3fSmrg	if (r)
1145401e04c3fSmrg		return r;
11455af69d88dSmrg
1145601e04c3fSmrg	memset(&alu_src, 0, sizeof(alu_src));
1145701e04c3fSmrg	alu_src.sel = treg;
1145801e04c3fSmrg	alu_src.chan = 1;
1145901e04c3fSmrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1146001e04c3fSmrg	if (r)
1146101e04c3fSmrg		return r;
1146201e04c3fSmrg
1146301e04c3fSmrg	/* for loops in here */
1146401e04c3fSmrg	/* get msb t0.x = msb(src[1].x) first */
1146501e04c3fSmrg	int msb_lo = util_last_bit(alu_denom_lo.value);
1146601e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1146701e04c3fSmrg			   treg, 0,
1146801e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, msb_lo,
1146901e04c3fSmrg			   0, 0);
1147001e04c3fSmrg	if (r)
1147101e04c3fSmrg		return r;
11472af69d88dSmrg
1147301e04c3fSmrg	/* unroll the asm here */
1147401e04c3fSmrg	for (i = 0; i < 31; i++) {
1147501e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
1147601e04c3fSmrg				   treg, 2,
1147701e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, i,
1147801e04c3fSmrg				   treg, 0);
11479af69d88dSmrg		if (r)
11480af69d88dSmrg			return r;
11481af69d88dSmrg
1148201e04c3fSmrg		/* we can do this on the CPU */
1148301e04c3fSmrg		uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i);
1148401e04c3fSmrg		/* t0.z = tmp_num.y >= t0.z */
1148501e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
1148601e04c3fSmrg				   treg, 1,
1148701e04c3fSmrg				   tmp_num, 1,
1148801e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
11489af69d88dSmrg		if (r)
11490af69d88dSmrg			return r;
11491af69d88dSmrg
1149201e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_AND_INT,
1149301e04c3fSmrg				   treg, 1,
1149401e04c3fSmrg				   treg, 1,
1149501e04c3fSmrg				   treg, 2);
11496af69d88dSmrg		if (r)
11497af69d88dSmrg			return r;
11498af69d88dSmrg
1149901e04c3fSmrg		memset(&alu_src, 0, sizeof(alu_src));
1150001e04c3fSmrg		alu_src.sel = treg;
1150101e04c3fSmrg		alu_src.chan = 1;
1150201e04c3fSmrg		r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1150301e04c3fSmrg		if (r)
1150401e04c3fSmrg			return r;
115053464ebd5Sriastradh
1150601e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
1150701e04c3fSmrg				   tmp_num, 1,
1150801e04c3fSmrg				   tmp_num, 1,
1150901e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
1151001e04c3fSmrg		if (r)
1151101e04c3fSmrg			return r;
115123464ebd5Sriastradh
1151301e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_OR_INT,
1151401e04c3fSmrg				   tmp_num, 3,
1151501e04c3fSmrg				   tmp_num, 3,
1151601e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
1151701e04c3fSmrg		if (r)
1151801e04c3fSmrg			return r;
115193464ebd5Sriastradh
1152001e04c3fSmrg		r = tgsi_endif(ctx);
1152101e04c3fSmrg		if (r)
1152201e04c3fSmrg			return r;
115233464ebd5Sriastradh	}
115243464ebd5Sriastradh
1152501e04c3fSmrg	/* log2_denom is always <= 31, so manually peel the last loop
1152601e04c3fSmrg	 * iteration.
1152701e04c3fSmrg	 */
1152801e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
1152901e04c3fSmrg			   treg, 1,
1153001e04c3fSmrg			   tmp_num, 1,
1153101e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
1153201e04c3fSmrg	if (r)
1153301e04c3fSmrg		return r;
115343464ebd5Sriastradh
1153501e04c3fSmrg	memset(&alu_src, 0, sizeof(alu_src));
1153601e04c3fSmrg	alu_src.sel = treg;
1153701e04c3fSmrg	alu_src.chan = 1;
1153801e04c3fSmrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1153901e04c3fSmrg	if (r)
1154001e04c3fSmrg		return r;
115413464ebd5Sriastradh
1154201e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
1154301e04c3fSmrg			   tmp_num, 1,
1154401e04c3fSmrg			   tmp_num, 1,
1154501e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
1154601e04c3fSmrg	if (r)
1154701e04c3fSmrg		return r;
11548af69d88dSmrg
1154901e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
1155001e04c3fSmrg			   tmp_num, 3,
1155101e04c3fSmrg			   tmp_num, 3,
1155201e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 1U);
1155301e04c3fSmrg	if (r)
1155401e04c3fSmrg		return r;
1155501e04c3fSmrg	r = tgsi_endif(ctx);
1155601e04c3fSmrg	if (r)
1155701e04c3fSmrg		return r;
11558af69d88dSmrg
1155901e04c3fSmrg	r = tgsi_endif(ctx);
1156001e04c3fSmrg	if (r)
1156101e04c3fSmrg		return r;
11562af69d88dSmrg
1156301e04c3fSmrg	/* onto the second loop to unroll */
1156401e04c3fSmrg	for (i = 0; i < 31; i++) {
1156501e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
1156601e04c3fSmrg				   treg, 1,
1156701e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)),
1156801e04c3fSmrg				   treg, 0);
1156901e04c3fSmrg		if (r)
1157001e04c3fSmrg			return r;
11571af69d88dSmrg
1157201e04c3fSmrg		uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i);
1157301e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
1157401e04c3fSmrg				   treg, 2,
1157501e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
1157601e04c3fSmrg				   0, 0);
1157701e04c3fSmrg		if (r)
1157801e04c3fSmrg			return r;
11579af69d88dSmrg
1158001e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
1158101e04c3fSmrg				   treg, 3,
1158201e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
1158301e04c3fSmrg				   0, 0);
1158401e04c3fSmrg		if (r)
1158501e04c3fSmrg			return r;
11586af69d88dSmrg
1158701e04c3fSmrg		r = emit_u64sge(ctx, sub_tmp,
1158801e04c3fSmrg				tmp_num, 0,
1158901e04c3fSmrg				treg, 2);
1159001e04c3fSmrg		if (r)
1159101e04c3fSmrg			return r;
11592af69d88dSmrg
1159301e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_AND_INT,
1159401e04c3fSmrg				   treg, 1,
1159501e04c3fSmrg				   treg, 1,
1159601e04c3fSmrg				   sub_tmp, 0);
1159701e04c3fSmrg		if (r)
1159801e04c3fSmrg			return r;
11599af69d88dSmrg
1160001e04c3fSmrg		memset(&alu_src, 0, sizeof(alu_src));
1160101e04c3fSmrg		alu_src.sel = treg;
1160201e04c3fSmrg		alu_src.chan = 1;
1160301e04c3fSmrg		r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1160401e04c3fSmrg		if (r)
1160501e04c3fSmrg			return r;
11606af69d88dSmrg
11607af69d88dSmrg
1160801e04c3fSmrg		r = emit_u64add(ctx, ALU_OP2_SUB_INT,
1160901e04c3fSmrg				sub_tmp,
1161001e04c3fSmrg				tmp_num, 0,
1161101e04c3fSmrg				treg, 2);
1161201e04c3fSmrg		if (r)
1161301e04c3fSmrg			return r;
11614af69d88dSmrg
1161501e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
1161601e04c3fSmrg				   tmp_num, 0,
1161701e04c3fSmrg				   sub_tmp, 0,
1161801e04c3fSmrg				   0, 0);
1161901e04c3fSmrg		if (r)
1162001e04c3fSmrg			return r;
11621af69d88dSmrg
1162201e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP1_MOV,
1162301e04c3fSmrg				   tmp_num, 1,
1162401e04c3fSmrg				   sub_tmp, 1,
1162501e04c3fSmrg				   0, 0);
1162601e04c3fSmrg		if (r)
1162701e04c3fSmrg			return r;
11628af69d88dSmrg
1162901e04c3fSmrg		r = single_alu_op2(ctx, ALU_OP2_OR_INT,
1163001e04c3fSmrg				   tmp_num, 2,
1163101e04c3fSmrg				   tmp_num, 2,
1163201e04c3fSmrg				   V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
1163301e04c3fSmrg		if (r)
1163401e04c3fSmrg			return r;
11635af69d88dSmrg
1163601e04c3fSmrg		r = tgsi_endif(ctx);
11637af69d88dSmrg		if (r)
11638af69d88dSmrg			return r;
11639af69d88dSmrg	}
1164001e04c3fSmrg
1164101e04c3fSmrg	/* log2_denom is always <= 63, so manually peel the last loop
1164201e04c3fSmrg	 * iteration.
1164301e04c3fSmrg	 */
1164401e04c3fSmrg	uint64_t denom_shl = (uint64_t)alu_denom_lo.value;
1164501e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1164601e04c3fSmrg			   treg, 2,
1164701e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
1164801e04c3fSmrg			   0, 0);
1164901e04c3fSmrg	if (r)
1165001e04c3fSmrg		return r;
1165101e04c3fSmrg
1165201e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP1_MOV,
1165301e04c3fSmrg			   treg, 3,
1165401e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
1165501e04c3fSmrg			   0, 0);
1165601e04c3fSmrg	if (r)
1165701e04c3fSmrg		return r;
1165801e04c3fSmrg
1165901e04c3fSmrg	r = emit_u64sge(ctx, sub_tmp,
1166001e04c3fSmrg			tmp_num, 0,
1166101e04c3fSmrg			treg, 2);
1166201e04c3fSmrg	if (r)
1166301e04c3fSmrg		return r;
1166401e04c3fSmrg
1166501e04c3fSmrg	memset(&alu_src, 0, sizeof(alu_src));
1166601e04c3fSmrg	alu_src.sel = sub_tmp;
1166701e04c3fSmrg	alu_src.chan = 0;
1166801e04c3fSmrg	r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
1166901e04c3fSmrg	if (r)
1167001e04c3fSmrg		return r;
1167101e04c3fSmrg
1167201e04c3fSmrg	r = emit_u64add(ctx, ALU_OP2_SUB_INT,
1167301e04c3fSmrg			sub_tmp,
1167401e04c3fSmrg			tmp_num, 0,
1167501e04c3fSmrg			treg, 2);
1167601e04c3fSmrg	if (r)
1167701e04c3fSmrg		return r;
1167801e04c3fSmrg
1167901e04c3fSmrg	r = single_alu_op2(ctx, ALU_OP2_OR_INT,
1168001e04c3fSmrg			   tmp_num, 2,
1168101e04c3fSmrg			   tmp_num, 2,
1168201e04c3fSmrg			   V_SQ_ALU_SRC_LITERAL, 1U);
1168301e04c3fSmrg	if (r)
1168401e04c3fSmrg		return r;
1168501e04c3fSmrg	r = tgsi_endif(ctx);
1168601e04c3fSmrg	if (r)
1168701e04c3fSmrg		return r;
1168801e04c3fSmrg
1168901e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1169001e04c3fSmrg	alu.op = ALU_OP1_MOV;
1169101e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1169201e04c3fSmrg	alu.src[0].sel = tmp_num;
1169301e04c3fSmrg	alu.src[0].chan = 2;
1169401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1169501e04c3fSmrg	if (r)
1169601e04c3fSmrg		return r;
1169701e04c3fSmrg
1169801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1169901e04c3fSmrg	alu.op = ALU_OP1_MOV;
1170001e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1170101e04c3fSmrg	alu.src[0].sel = tmp_num;
1170201e04c3fSmrg	alu.src[0].chan = 3;
1170301e04c3fSmrg	alu.last = 1;
1170401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1170501e04c3fSmrg	if (r)
1170601e04c3fSmrg		return r;
117073464ebd5Sriastradh	return 0;
117083464ebd5Sriastradh}
117093464ebd5Sriastradh
1171001e04c3fSmrgstatic int egcm_u64sne(struct r600_shader_ctx *ctx)
1171101e04c3fSmrg{
1171201e04c3fSmrg	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1171301e04c3fSmrg	struct r600_bytecode_alu alu;
1171401e04c3fSmrg	int r;
1171501e04c3fSmrg	int treg = ctx->temp_reg;
1171601e04c3fSmrg
1171701e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1171801e04c3fSmrg	alu.op = ALU_OP2_SETNE_INT;
1171901e04c3fSmrg	alu.dst.sel = treg;
1172001e04c3fSmrg	alu.dst.chan = 0;
1172101e04c3fSmrg	alu.dst.write = 1;
1172201e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1172301e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
1172401e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1172501e04c3fSmrg	if (r)
1172601e04c3fSmrg		return r;
117273464ebd5Sriastradh
1172801e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1172901e04c3fSmrg	alu.op = ALU_OP2_SETNE_INT;
1173001e04c3fSmrg	alu.dst.sel = treg;
1173101e04c3fSmrg	alu.dst.chan = 1;
1173201e04c3fSmrg	alu.dst.write = 1;
1173301e04c3fSmrg	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1173401e04c3fSmrg	r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
1173501e04c3fSmrg	alu.last = 1;
1173601e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1173701e04c3fSmrg	if (r)
1173801e04c3fSmrg		return r;
1173901e04c3fSmrg
1174001e04c3fSmrg	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1174101e04c3fSmrg	alu.op = ALU_OP2_OR_INT;
1174201e04c3fSmrg	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1174301e04c3fSmrg	alu.src[0].sel = treg;
1174401e04c3fSmrg	alu.src[0].chan = 0;
1174501e04c3fSmrg	alu.src[1].sel = treg;
1174601e04c3fSmrg	alu.src[1].chan = 1;
1174701e04c3fSmrg	alu.last = 1;
1174801e04c3fSmrg	r = r600_bytecode_add_alu(ctx->bc, &alu);
1174901e04c3fSmrg	if (r)
1175001e04c3fSmrg		return r;
1175101e04c3fSmrg	return 0;
1175201e04c3fSmrg}
1175301e04c3fSmrg
1175401e04c3fSmrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1175501e04c3fSmrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_r600_arl},
1175601e04c3fSmrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
1175701e04c3fSmrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
1175801e04c3fSmrg
1175901e04c3fSmrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
1176001e04c3fSmrg
1176101e04c3fSmrg	[TGSI_OPCODE_RSQ]	= { ALU_OP0_NOP, tgsi_rsq},
1176201e04c3fSmrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
1176301e04c3fSmrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
1176401e04c3fSmrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
1176501e04c3fSmrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
1176601e04c3fSmrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1176701e04c3fSmrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1176801e04c3fSmrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
1176901e04c3fSmrg	/* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
1177001e04c3fSmrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
1177101e04c3fSmrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
1177201e04c3fSmrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
1177301e04c3fSmrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
1177401e04c3fSmrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
1177501e04c3fSmrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
1177601e04c3fSmrg	[TGSI_OPCODE_FMA]	= { ALU_OP0_NOP, tgsi_unsupported},
1177701e04c3fSmrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
1177801e04c3fSmrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
1177901e04c3fSmrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
1178001e04c3fSmrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
1178101e04c3fSmrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
1178201e04c3fSmrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
1178301e04c3fSmrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
1178401e04c3fSmrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
1178501e04c3fSmrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
1178601e04c3fSmrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
1178701e04c3fSmrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
1178801e04c3fSmrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
1178901e04c3fSmrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
1179001e04c3fSmrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_unsupported},
1179101e04c3fSmrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
1179201e04c3fSmrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
1179301e04c3fSmrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
1179401e04c3fSmrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
1179501e04c3fSmrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
1179601e04c3fSmrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
1179701e04c3fSmrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_unsupported},
1179801e04c3fSmrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1179901e04c3fSmrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1180001e04c3fSmrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1180101e04c3fSmrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
1180201e04c3fSmrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
1180301e04c3fSmrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
1180401e04c3fSmrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
1180501e04c3fSmrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, tgsi_trig},
1180601e04c3fSmrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
1180701e04c3fSmrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
1180801e04c3fSmrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
1180901e04c3fSmrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
1181001e04c3fSmrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
1181101e04c3fSmrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
1181201e04c3fSmrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_unsupported},
1181301e04c3fSmrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1181401e04c3fSmrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1181501e04c3fSmrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1181601e04c3fSmrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
1181701e04c3fSmrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
1181801e04c3fSmrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_r600_arl},
1181901e04c3fSmrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
1182001e04c3fSmrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
1182101e04c3fSmrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
1182201e04c3fSmrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
1182301e04c3fSmrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
1182401e04c3fSmrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
1182501e04c3fSmrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1182601e04c3fSmrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
1182701e04c3fSmrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
1182801e04c3fSmrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1182901e04c3fSmrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1183001e04c3fSmrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
1183101e04c3fSmrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
1183201e04c3fSmrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
1183301e04c3fSmrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
1183401e04c3fSmrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
1183501e04c3fSmrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
1183601e04c3fSmrg	[TGSI_OPCODE_DDX_FINE]	= { ALU_OP0_NOP, tgsi_unsupported},
1183701e04c3fSmrg	[TGSI_OPCODE_DDY_FINE]	= { ALU_OP0_NOP, tgsi_unsupported},
1183801e04c3fSmrg	[81]			= { ALU_OP0_NOP, tgsi_unsupported},
1183901e04c3fSmrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
1184001e04c3fSmrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
1184101e04c3fSmrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
1184201e04c3fSmrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
1184301e04c3fSmrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
1184401e04c3fSmrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2_trans},
1184501e04c3fSmrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
1184601e04c3fSmrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
1184701e04c3fSmrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
1184801e04c3fSmrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
1184901e04c3fSmrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
1185001e04c3fSmrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
1185101e04c3fSmrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
1185201e04c3fSmrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1185301e04c3fSmrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
1185401e04c3fSmrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
1185501e04c3fSmrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
1185601e04c3fSmrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
1185701e04c3fSmrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1185801e04c3fSmrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
1185901e04c3fSmrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1186001e04c3fSmrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1186101e04c3fSmrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
1186201e04c3fSmrg	[TGSI_OPCODE_RESQ]	= { ALU_OP0_NOP, tgsi_unsupported},
1186301e04c3fSmrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
1186401e04c3fSmrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
1186501e04c3fSmrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
1186601e04c3fSmrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
1186701e04c3fSmrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
1186801e04c3fSmrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
1186901e04c3fSmrg	[TGSI_OPCODE_MEMBAR]	= { ALU_OP0_NOP, tgsi_unsupported},
1187001e04c3fSmrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
1187101e04c3fSmrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
1187201e04c3fSmrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
1187301e04c3fSmrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
1187401e04c3fSmrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
1187501e04c3fSmrg	[TGSI_OPCODE_DFMA]	= { ALU_OP0_NOP, tgsi_unsupported},
1187601e04c3fSmrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
1187701e04c3fSmrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
1187801e04c3fSmrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
1187901e04c3fSmrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
1188001e04c3fSmrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
1188101e04c3fSmrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
1188201e04c3fSmrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2_trans},
1188301e04c3fSmrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
1188401e04c3fSmrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans},
1188501e04c3fSmrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
1188601e04c3fSmrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
1188701e04c3fSmrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
1188801e04c3fSmrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
1188901e04c3fSmrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
1189001e04c3fSmrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
1189101e04c3fSmrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
1189201e04c3fSmrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
1189301e04c3fSmrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
1189401e04c3fSmrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
1189501e04c3fSmrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2_trans},
1189601e04c3fSmrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
1189701e04c3fSmrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2_swap},
1189801e04c3fSmrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1189901e04c3fSmrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
1190001e04c3fSmrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
1190101e04c3fSmrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1190201e04c3fSmrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
1190301e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
1190401e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
1190501e04c3fSmrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
1190601e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
1190701e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
1190801e04c3fSmrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
1190901e04c3fSmrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
1191001e04c3fSmrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
1191101e04c3fSmrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
1191201e04c3fSmrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
1191301e04c3fSmrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
1191401e04c3fSmrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_r600_arl},
1191501e04c3fSmrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
1191601e04c3fSmrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
1191701e04c3fSmrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
1191801e04c3fSmrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_unsupported},
1191901e04c3fSmrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_unsupported},
1192001e04c3fSmrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
1192101e04c3fSmrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
1192201e04c3fSmrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
1192301e04c3fSmrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_NOP, tgsi_unsupported},
1192401e04c3fSmrg	[TGSI_OPCODE_ATOMUADD]	= { ALU_OP0_NOP, tgsi_unsupported},
1192501e04c3fSmrg	[TGSI_OPCODE_ATOMXCHG]	= { ALU_OP0_NOP, tgsi_unsupported},
1192601e04c3fSmrg	[TGSI_OPCODE_ATOMCAS]	= { ALU_OP0_NOP, tgsi_unsupported},
1192701e04c3fSmrg	[TGSI_OPCODE_ATOMAND]	= { ALU_OP0_NOP, tgsi_unsupported},
1192801e04c3fSmrg	[TGSI_OPCODE_ATOMOR]	= { ALU_OP0_NOP, tgsi_unsupported},
1192901e04c3fSmrg	[TGSI_OPCODE_ATOMXOR]	= { ALU_OP0_NOP, tgsi_unsupported},
1193001e04c3fSmrg	[TGSI_OPCODE_ATOMUMIN]	= { ALU_OP0_NOP, tgsi_unsupported},
1193101e04c3fSmrg	[TGSI_OPCODE_ATOMUMAX]	= { ALU_OP0_NOP, tgsi_unsupported},
1193201e04c3fSmrg	[TGSI_OPCODE_ATOMIMIN]	= { ALU_OP0_NOP, tgsi_unsupported},
1193301e04c3fSmrg	[TGSI_OPCODE_ATOMIMAX]	= { ALU_OP0_NOP, tgsi_unsupported},
1193401e04c3fSmrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
1193501e04c3fSmrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1193601e04c3fSmrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1193701e04c3fSmrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, tgsi_op2_trans},
1193801e04c3fSmrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
1193901e04c3fSmrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_unsupported},
1194001e04c3fSmrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_unsupported},
1194101e04c3fSmrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_unsupported},
1194201e04c3fSmrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_unsupported},
1194301e04c3fSmrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_unsupported},
1194401e04c3fSmrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_unsupported},
1194501e04c3fSmrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_unsupported},
1194601e04c3fSmrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_unsupported},
1194701e04c3fSmrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_unsupported},
1194801e04c3fSmrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_unsupported},
1194901e04c3fSmrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_unsupported},
1195001e04c3fSmrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_unsupported},
1195101e04c3fSmrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_unsupported},
1195201e04c3fSmrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
119533464ebd5Sriastradh};
119543464ebd5Sriastradh
1195501e04c3fSmrgstatic const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
1195601e04c3fSmrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_eg_arl},
1195701e04c3fSmrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
1195801e04c3fSmrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
1195901e04c3fSmrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
1196001e04c3fSmrg	[TGSI_OPCODE_RSQ]	= { ALU_OP0_NOP, tgsi_rsq},
1196101e04c3fSmrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
1196201e04c3fSmrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
1196301e04c3fSmrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
1196401e04c3fSmrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
1196501e04c3fSmrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1196601e04c3fSmrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1196701e04c3fSmrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
1196801e04c3fSmrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
1196901e04c3fSmrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
1197001e04c3fSmrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
1197101e04c3fSmrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
1197201e04c3fSmrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
1197301e04c3fSmrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
1197401e04c3fSmrg	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
1197501e04c3fSmrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
1197601e04c3fSmrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
1197701e04c3fSmrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
1197801e04c3fSmrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
1197901e04c3fSmrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
1198001e04c3fSmrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
1198101e04c3fSmrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
1198201e04c3fSmrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
1198301e04c3fSmrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
1198401e04c3fSmrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
1198501e04c3fSmrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
1198601e04c3fSmrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
1198701e04c3fSmrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
1198801e04c3fSmrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
1198901e04c3fSmrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
1199001e04c3fSmrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
1199101e04c3fSmrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
1199201e04c3fSmrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
1199301e04c3fSmrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
1199401e04c3fSmrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
1199501e04c3fSmrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
1199601e04c3fSmrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1199701e04c3fSmrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1199801e04c3fSmrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1199901e04c3fSmrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
1200001e04c3fSmrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
1200101e04c3fSmrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
1200201e04c3fSmrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
1200301e04c3fSmrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, tgsi_trig},
1200401e04c3fSmrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
1200501e04c3fSmrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
1200601e04c3fSmrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
1200701e04c3fSmrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
1200801e04c3fSmrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
1200901e04c3fSmrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
1201001e04c3fSmrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_up2h},
1201101e04c3fSmrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1201201e04c3fSmrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1201301e04c3fSmrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1201401e04c3fSmrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
1201501e04c3fSmrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
1201601e04c3fSmrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
1201701e04c3fSmrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
1201801e04c3fSmrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
1201901e04c3fSmrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
1202001e04c3fSmrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
1202101e04c3fSmrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
1202201e04c3fSmrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
1202301e04c3fSmrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1202401e04c3fSmrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
1202501e04c3fSmrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
1202601e04c3fSmrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1202701e04c3fSmrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1202801e04c3fSmrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
1202901e04c3fSmrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
1203001e04c3fSmrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
1203101e04c3fSmrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
1203201e04c3fSmrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
1203301e04c3fSmrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
1203401e04c3fSmrg	[TGSI_OPCODE_DDX_FINE]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
1203501e04c3fSmrg	[TGSI_OPCODE_DDY_FINE]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
1203601e04c3fSmrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
1203701e04c3fSmrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
1203801e04c3fSmrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
1203901e04c3fSmrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
1204001e04c3fSmrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
1204101e04c3fSmrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2},
1204201e04c3fSmrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
1204301e04c3fSmrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
1204401e04c3fSmrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
1204501e04c3fSmrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
1204601e04c3fSmrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
1204701e04c3fSmrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
1204801e04c3fSmrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
1204901e04c3fSmrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1205001e04c3fSmrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
1205101e04c3fSmrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
1205201e04c3fSmrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
1205301e04c3fSmrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
1205401e04c3fSmrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1205501e04c3fSmrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
1205601e04c3fSmrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1205701e04c3fSmrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1205801e04c3fSmrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
1205901e04c3fSmrg	[TGSI_OPCODE_RESQ]     	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq},
1206001e04c3fSmrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
1206101e04c3fSmrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
1206201e04c3fSmrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
1206301e04c3fSmrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
1206401e04c3fSmrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
1206501e04c3fSmrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
1206601e04c3fSmrg	[TGSI_OPCODE_MEMBAR]    = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
1206701e04c3fSmrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
1206801e04c3fSmrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
1206901e04c3fSmrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
1207001e04c3fSmrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
1207101e04c3fSmrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
1207201e04c3fSmrg	/* Refer below for TGSI_OPCODE_DFMA */
1207301e04c3fSmrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_f2i},
1207401e04c3fSmrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
1207501e04c3fSmrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
1207601e04c3fSmrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
1207701e04c3fSmrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
1207801e04c3fSmrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
1207901e04c3fSmrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2},
1208001e04c3fSmrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
1208101e04c3fSmrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_f2i},
1208201e04c3fSmrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
1208301e04c3fSmrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
1208401e04c3fSmrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
1208501e04c3fSmrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
1208601e04c3fSmrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
1208701e04c3fSmrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
1208801e04c3fSmrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
1208901e04c3fSmrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
1209001e04c3fSmrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
1209101e04c3fSmrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
1209201e04c3fSmrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2},
1209301e04c3fSmrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
1209401e04c3fSmrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2},
1209501e04c3fSmrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1209601e04c3fSmrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
1209701e04c3fSmrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
1209801e04c3fSmrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1209901e04c3fSmrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
1210001e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
1210101e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
1210201e04c3fSmrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
1210301e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
1210401e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
1210501e04c3fSmrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
1210601e04c3fSmrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
1210701e04c3fSmrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
1210801e04c3fSmrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
1210901e04c3fSmrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
1211001e04c3fSmrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
1211101e04c3fSmrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_eg_arl},
1211201e04c3fSmrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
1211301e04c3fSmrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
1211401e04c3fSmrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
1211501e04c3fSmrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_load},
1211601e04c3fSmrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_store},
1211701e04c3fSmrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
1211801e04c3fSmrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
1211901e04c3fSmrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
1212001e04c3fSmrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
1212101e04c3fSmrg	[TGSI_OPCODE_ATOMUADD]	= { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
1212201e04c3fSmrg	[TGSI_OPCODE_ATOMXCHG]	= { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
1212301e04c3fSmrg	[TGSI_OPCODE_ATOMCAS]	= { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
1212401e04c3fSmrg	[TGSI_OPCODE_ATOMAND]	= { V_RAT_INST_AND_RTN, tgsi_atomic_op},
1212501e04c3fSmrg	[TGSI_OPCODE_ATOMOR]	= { V_RAT_INST_OR_RTN, tgsi_atomic_op},
1212601e04c3fSmrg	[TGSI_OPCODE_ATOMXOR]	= { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
1212701e04c3fSmrg	[TGSI_OPCODE_ATOMUMIN]	= { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
1212801e04c3fSmrg	[TGSI_OPCODE_ATOMUMAX]	= { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
1212901e04c3fSmrg	[TGSI_OPCODE_ATOMIMIN]	= { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
1213001e04c3fSmrg	[TGSI_OPCODE_ATOMIMAX]	= { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
1213101e04c3fSmrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
1213201e04c3fSmrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1213301e04c3fSmrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1213401e04c3fSmrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, tgsi_op2_trans},
1213501e04c3fSmrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
1213601e04c3fSmrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_tex},
1213701e04c3fSmrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_tex},
1213801e04c3fSmrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_bfe},
1213901e04c3fSmrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_bfe},
1214001e04c3fSmrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_bfi},
1214101e04c3fSmrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_op2},
1214201e04c3fSmrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_op2},
1214301e04c3fSmrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_op2},
1214401e04c3fSmrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_msb},
1214501e04c3fSmrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_msb},
1214601e04c3fSmrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1214701e04c3fSmrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1214801e04c3fSmrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1214901e04c3fSmrg	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
1215001e04c3fSmrg	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
1215101e04c3fSmrg	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
1215201e04c3fSmrg	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
1215301e04c3fSmrg	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
1215401e04c3fSmrg	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
1215501e04c3fSmrg	[TGSI_OPCODE_DDIV]	= { 0, cayman_ddiv_instr },
1215601e04c3fSmrg	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
1215701e04c3fSmrg	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
1215801e04c3fSmrg	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
1215901e04c3fSmrg	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
1216001e04c3fSmrg	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
1216101e04c3fSmrg	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
1216201e04c3fSmrg	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
1216301e04c3fSmrg	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
1216401e04c3fSmrg	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
1216501e04c3fSmrg	[TGSI_OPCODE_DFMA]	= { ALU_OP3_FMA_64, tgsi_op3_64},
1216601e04c3fSmrg	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
1216701e04c3fSmrg	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
1216801e04c3fSmrg	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
1216901e04c3fSmrg	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
1217001e04c3fSmrg	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
1217101e04c3fSmrg	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
1217201e04c3fSmrg	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
1217301e04c3fSmrg	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
1217401e04c3fSmrg	[TGSI_OPCODE_U64SNE]    = { ALU_OP0_NOP, egcm_u64sne },
1217501e04c3fSmrg	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
1217601e04c3fSmrg	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
1217701e04c3fSmrg	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
121787ec681f3Smrg	[TGSI_OPCODE_I64NEG]    = { ALU_OP0_NOP, egcm_i64neg },
1217901e04c3fSmrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
121803464ebd5Sriastradh};
121813464ebd5Sriastradh
1218201e04c3fSmrgstatic const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
1218301e04c3fSmrg	[TGSI_OPCODE_ARL]	= { ALU_OP0_NOP, tgsi_eg_arl},
1218401e04c3fSmrg	[TGSI_OPCODE_MOV]	= { ALU_OP1_MOV, tgsi_op2},
1218501e04c3fSmrg	[TGSI_OPCODE_LIT]	= { ALU_OP0_NOP, tgsi_lit},
1218601e04c3fSmrg	[TGSI_OPCODE_RCP]	= { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr},
1218701e04c3fSmrg	[TGSI_OPCODE_RSQ]	= { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
1218801e04c3fSmrg	[TGSI_OPCODE_EXP]	= { ALU_OP0_NOP, tgsi_exp},
1218901e04c3fSmrg	[TGSI_OPCODE_LOG]	= { ALU_OP0_NOP, tgsi_log},
1219001e04c3fSmrg	[TGSI_OPCODE_MUL]	= { ALU_OP2_MUL_IEEE, tgsi_op2},
1219101e04c3fSmrg	[TGSI_OPCODE_ADD]	= { ALU_OP2_ADD, tgsi_op2},
1219201e04c3fSmrg	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1219301e04c3fSmrg	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1219401e04c3fSmrg	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
1219501e04c3fSmrg	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
1219601e04c3fSmrg	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
1219701e04c3fSmrg	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
1219801e04c3fSmrg	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
1219901e04c3fSmrg	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
1220001e04c3fSmrg	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
1220101e04c3fSmrg	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
1220201e04c3fSmrg	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
1220301e04c3fSmrg	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
1220401e04c3fSmrg	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
1220501e04c3fSmrg	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
1220601e04c3fSmrg	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
1220701e04c3fSmrg	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
1220801e04c3fSmrg	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
1220901e04c3fSmrg	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
1221001e04c3fSmrg	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
1221101e04c3fSmrg	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
1221201e04c3fSmrg	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, cayman_pow},
1221301e04c3fSmrg	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
1221401e04c3fSmrg	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
1221501e04c3fSmrg	[TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
1221601e04c3fSmrg	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
1221701e04c3fSmrg	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
1221801e04c3fSmrg	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, cayman_trig},
1221901e04c3fSmrg	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
1222001e04c3fSmrg	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
1222101e04c3fSmrg	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
1222201e04c3fSmrg	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
1222301e04c3fSmrg	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1222401e04c3fSmrg	[TGSI_OPCODE_PK4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1222501e04c3fSmrg	[TGSI_OPCODE_PK4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1222601e04c3fSmrg	[44]			= { ALU_OP0_NOP, tgsi_unsupported},
1222701e04c3fSmrg	[TGSI_OPCODE_SEQ]	= { ALU_OP2_SETE, tgsi_op2},
1222801e04c3fSmrg	[46]			= { ALU_OP0_NOP, tgsi_unsupported},
1222901e04c3fSmrg	[TGSI_OPCODE_SGT]	= { ALU_OP2_SETGT, tgsi_op2},
1223001e04c3fSmrg	[TGSI_OPCODE_SIN]	= { ALU_OP1_SIN, cayman_trig},
1223101e04c3fSmrg	[TGSI_OPCODE_SLE]	= { ALU_OP2_SETGE, tgsi_op2_swap},
1223201e04c3fSmrg	[TGSI_OPCODE_SNE]	= { ALU_OP2_SETNE, tgsi_op2},
1223301e04c3fSmrg	[51]			= { ALU_OP0_NOP, tgsi_unsupported},
1223401e04c3fSmrg	[TGSI_OPCODE_TEX]	= { FETCH_OP_SAMPLE, tgsi_tex},
1223501e04c3fSmrg	[TGSI_OPCODE_TXD]	= { FETCH_OP_SAMPLE_G, tgsi_tex},
1223601e04c3fSmrg	[TGSI_OPCODE_TXP]	= { FETCH_OP_SAMPLE, tgsi_tex},
1223701e04c3fSmrg	[TGSI_OPCODE_UP2H]	= { ALU_OP0_NOP, tgsi_up2h},
1223801e04c3fSmrg	[TGSI_OPCODE_UP2US]	= { ALU_OP0_NOP, tgsi_unsupported},
1223901e04c3fSmrg	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
1224001e04c3fSmrg	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
1224101e04c3fSmrg	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
1224201e04c3fSmrg	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
1224301e04c3fSmrg	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
1224401e04c3fSmrg	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
1224501e04c3fSmrg	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
1224601e04c3fSmrg	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
1224701e04c3fSmrg	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
1224801e04c3fSmrg	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
1224901e04c3fSmrg	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
1225001e04c3fSmrg	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1225101e04c3fSmrg	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
1225201e04c3fSmrg	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
1225301e04c3fSmrg	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
1225401e04c3fSmrg	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1225501e04c3fSmrg	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
1225601e04c3fSmrg	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
1225701e04c3fSmrg	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
1225801e04c3fSmrg	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
1225901e04c3fSmrg	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
1226001e04c3fSmrg	[TGSI_OPCODE_ENDIF]	= { ALU_OP0_NOP, tgsi_endif},
1226101e04c3fSmrg	[TGSI_OPCODE_DDX_FINE]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
1226201e04c3fSmrg	[TGSI_OPCODE_DDY_FINE]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
1226301e04c3fSmrg	[82]			= { ALU_OP0_NOP, tgsi_unsupported},
1226401e04c3fSmrg	[TGSI_OPCODE_CEIL]	= { ALU_OP1_CEIL, tgsi_op2},
1226501e04c3fSmrg	[TGSI_OPCODE_I2F]	= { ALU_OP1_INT_TO_FLT, tgsi_op2},
1226601e04c3fSmrg	[TGSI_OPCODE_NOT]	= { ALU_OP1_NOT_INT, tgsi_op2},
1226701e04c3fSmrg	[TGSI_OPCODE_TRUNC]	= { ALU_OP1_TRUNC, tgsi_op2},
1226801e04c3fSmrg	[TGSI_OPCODE_SHL]	= { ALU_OP2_LSHL_INT, tgsi_op2},
1226901e04c3fSmrg	[88]			= { ALU_OP0_NOP, tgsi_unsupported},
1227001e04c3fSmrg	[TGSI_OPCODE_AND]	= { ALU_OP2_AND_INT, tgsi_op2},
1227101e04c3fSmrg	[TGSI_OPCODE_OR]	= { ALU_OP2_OR_INT, tgsi_op2},
1227201e04c3fSmrg	[TGSI_OPCODE_MOD]	= { ALU_OP0_NOP, tgsi_imod},
1227301e04c3fSmrg	[TGSI_OPCODE_XOR]	= { ALU_OP2_XOR_INT, tgsi_op2},
1227401e04c3fSmrg	[93]			= { ALU_OP0_NOP, tgsi_unsupported},
1227501e04c3fSmrg	[TGSI_OPCODE_TXF]	= { FETCH_OP_LD, tgsi_tex},
1227601e04c3fSmrg	[TGSI_OPCODE_TXQ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1227701e04c3fSmrg	[TGSI_OPCODE_CONT]	= { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
1227801e04c3fSmrg	[TGSI_OPCODE_EMIT]	= { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
1227901e04c3fSmrg	[TGSI_OPCODE_ENDPRIM]	= { CF_OP_CUT_VERTEX, tgsi_gs_emit},
1228001e04c3fSmrg	[TGSI_OPCODE_BGNLOOP]	= { ALU_OP0_NOP, tgsi_bgnloop},
1228101e04c3fSmrg	[TGSI_OPCODE_BGNSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1228201e04c3fSmrg	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
1228301e04c3fSmrg	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
1228401e04c3fSmrg	[103]			= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
1228501e04c3fSmrg	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
1228601e04c3fSmrg	[TGSI_OPCODE_RESQ]     	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq},
1228701e04c3fSmrg	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
1228801e04c3fSmrg	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
1228901e04c3fSmrg	[TGSI_OPCODE_FSEQ]	= { ALU_OP2_SETE_DX10, tgsi_op2},
1229001e04c3fSmrg	[TGSI_OPCODE_FSGE]	= { ALU_OP2_SETGE_DX10, tgsi_op2},
1229101e04c3fSmrg	[TGSI_OPCODE_FSLT]	= { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
1229201e04c3fSmrg	[TGSI_OPCODE_FSNE]	= { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
1229301e04c3fSmrg	[TGSI_OPCODE_MEMBAR]    = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
1229401e04c3fSmrg	[113]	= { ALU_OP0_NOP, tgsi_unsupported},
1229501e04c3fSmrg	[114]			= { ALU_OP0_NOP, tgsi_unsupported},
1229601e04c3fSmrg	[115]			= { ALU_OP0_NOP, tgsi_unsupported},
1229701e04c3fSmrg	[TGSI_OPCODE_KILL_IF]	= { ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
1229801e04c3fSmrg	[TGSI_OPCODE_END]	= { ALU_OP0_NOP, tgsi_end},  /* aka HALT */
1229901e04c3fSmrg	/* Refer below for TGSI_OPCODE_DFMA */
1230001e04c3fSmrg	[TGSI_OPCODE_F2I]	= { ALU_OP1_FLT_TO_INT, tgsi_op2},
1230101e04c3fSmrg	[TGSI_OPCODE_IDIV]	= { ALU_OP0_NOP, tgsi_idiv},
1230201e04c3fSmrg	[TGSI_OPCODE_IMAX]	= { ALU_OP2_MAX_INT, tgsi_op2},
1230301e04c3fSmrg	[TGSI_OPCODE_IMIN]	= { ALU_OP2_MIN_INT, tgsi_op2},
1230401e04c3fSmrg	[TGSI_OPCODE_INEG]	= { ALU_OP2_SUB_INT, tgsi_ineg},
1230501e04c3fSmrg	[TGSI_OPCODE_ISGE]	= { ALU_OP2_SETGE_INT, tgsi_op2},
1230601e04c3fSmrg	[TGSI_OPCODE_ISHR]	= { ALU_OP2_ASHR_INT, tgsi_op2},
1230701e04c3fSmrg	[TGSI_OPCODE_ISLT]	= { ALU_OP2_SETGT_INT, tgsi_op2_swap},
1230801e04c3fSmrg	[TGSI_OPCODE_F2U]	= { ALU_OP1_FLT_TO_UINT, tgsi_op2},
1230901e04c3fSmrg	[TGSI_OPCODE_U2F]	= { ALU_OP1_UINT_TO_FLT, tgsi_op2},
1231001e04c3fSmrg	[TGSI_OPCODE_UADD]	= { ALU_OP2_ADD_INT, tgsi_op2},
1231101e04c3fSmrg	[TGSI_OPCODE_UDIV]	= { ALU_OP0_NOP, tgsi_udiv},
1231201e04c3fSmrg	[TGSI_OPCODE_UMAD]	= { ALU_OP0_NOP, tgsi_umad},
1231301e04c3fSmrg	[TGSI_OPCODE_UMAX]	= { ALU_OP2_MAX_UINT, tgsi_op2},
1231401e04c3fSmrg	[TGSI_OPCODE_UMIN]	= { ALU_OP2_MIN_UINT, tgsi_op2},
1231501e04c3fSmrg	[TGSI_OPCODE_UMOD]	= { ALU_OP0_NOP, tgsi_umod},
1231601e04c3fSmrg	[TGSI_OPCODE_UMUL]	= { ALU_OP2_MULLO_INT, cayman_mul_int_instr},
1231701e04c3fSmrg	[TGSI_OPCODE_USEQ]	= { ALU_OP2_SETE_INT, tgsi_op2},
1231801e04c3fSmrg	[TGSI_OPCODE_USGE]	= { ALU_OP2_SETGE_UINT, tgsi_op2},
1231901e04c3fSmrg	[TGSI_OPCODE_USHR]	= { ALU_OP2_LSHR_INT, tgsi_op2},
1232001e04c3fSmrg	[TGSI_OPCODE_USLT]	= { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
1232101e04c3fSmrg	[TGSI_OPCODE_USNE]	= { ALU_OP2_SETNE_INT, tgsi_op2},
1232201e04c3fSmrg	[TGSI_OPCODE_SWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1232301e04c3fSmrg	[TGSI_OPCODE_CASE]	= { ALU_OP0_NOP, tgsi_unsupported},
1232401e04c3fSmrg	[TGSI_OPCODE_DEFAULT]	= { ALU_OP0_NOP, tgsi_unsupported},
1232501e04c3fSmrg	[TGSI_OPCODE_ENDSWITCH]	= { ALU_OP0_NOP, tgsi_unsupported},
1232601e04c3fSmrg	[TGSI_OPCODE_SAMPLE]	= { 0, tgsi_unsupported},
1232701e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I]	= { 0, tgsi_unsupported},
1232801e04c3fSmrg	[TGSI_OPCODE_SAMPLE_I_MS]	= { 0, tgsi_unsupported},
1232901e04c3fSmrg	[TGSI_OPCODE_SAMPLE_B]	= { 0, tgsi_unsupported},
1233001e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C]	= { 0, tgsi_unsupported},
1233101e04c3fSmrg	[TGSI_OPCODE_SAMPLE_C_LZ]	= { 0, tgsi_unsupported},
1233201e04c3fSmrg	[TGSI_OPCODE_SAMPLE_D]	= { 0, tgsi_unsupported},
1233301e04c3fSmrg	[TGSI_OPCODE_SAMPLE_L]	= { 0, tgsi_unsupported},
1233401e04c3fSmrg	[TGSI_OPCODE_GATHER4]	= { 0, tgsi_unsupported},
1233501e04c3fSmrg	[TGSI_OPCODE_SVIEWINFO]	= { 0, tgsi_unsupported},
1233601e04c3fSmrg	[TGSI_OPCODE_SAMPLE_POS]	= { 0, tgsi_unsupported},
1233701e04c3fSmrg	[TGSI_OPCODE_SAMPLE_INFO]	= { 0, tgsi_unsupported},
1233801e04c3fSmrg	[TGSI_OPCODE_UARL]	= { ALU_OP1_MOVA_INT, tgsi_eg_arl},
1233901e04c3fSmrg	[TGSI_OPCODE_UCMP]	= { ALU_OP0_NOP, tgsi_ucmp},
1234001e04c3fSmrg	[TGSI_OPCODE_IABS]	= { 0, tgsi_iabs},
1234101e04c3fSmrg	[TGSI_OPCODE_ISSG]	= { 0, tgsi_issg},
1234201e04c3fSmrg	[TGSI_OPCODE_LOAD]	= { ALU_OP0_NOP, tgsi_load},
1234301e04c3fSmrg	[TGSI_OPCODE_STORE]	= { ALU_OP0_NOP, tgsi_store},
1234401e04c3fSmrg	[163]	= { ALU_OP0_NOP, tgsi_unsupported},
1234501e04c3fSmrg	[164]	= { ALU_OP0_NOP, tgsi_unsupported},
1234601e04c3fSmrg	[165]	= { ALU_OP0_NOP, tgsi_unsupported},
1234701e04c3fSmrg	[TGSI_OPCODE_BARRIER]	= { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
1234801e04c3fSmrg	[TGSI_OPCODE_ATOMUADD]	= { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
1234901e04c3fSmrg	[TGSI_OPCODE_ATOMXCHG]	= { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
1235001e04c3fSmrg	[TGSI_OPCODE_ATOMCAS]	= { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
1235101e04c3fSmrg	[TGSI_OPCODE_ATOMAND]	= { V_RAT_INST_AND_RTN, tgsi_atomic_op},
1235201e04c3fSmrg	[TGSI_OPCODE_ATOMOR]	= { V_RAT_INST_OR_RTN, tgsi_atomic_op},
1235301e04c3fSmrg	[TGSI_OPCODE_ATOMXOR]	= { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
1235401e04c3fSmrg	[TGSI_OPCODE_ATOMUMIN]	= { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
1235501e04c3fSmrg	[TGSI_OPCODE_ATOMUMAX]	= { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
1235601e04c3fSmrg	[TGSI_OPCODE_ATOMIMIN]	= { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
1235701e04c3fSmrg	[TGSI_OPCODE_ATOMIMAX]	= { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
1235801e04c3fSmrg	[TGSI_OPCODE_TEX2]	= { FETCH_OP_SAMPLE, tgsi_tex},
1235901e04c3fSmrg	[TGSI_OPCODE_TXB2]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
1236001e04c3fSmrg	[TGSI_OPCODE_TXL2]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
1236101e04c3fSmrg	[TGSI_OPCODE_IMUL_HI]	= { ALU_OP2_MULHI_INT, cayman_mul_int_instr},
1236201e04c3fSmrg	[TGSI_OPCODE_UMUL_HI]	= { ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
1236301e04c3fSmrg	[TGSI_OPCODE_TG4]	= { FETCH_OP_GATHER4, tgsi_tex},
1236401e04c3fSmrg	[TGSI_OPCODE_LODQ]	= { FETCH_OP_GET_LOD, tgsi_tex},
1236501e04c3fSmrg	[TGSI_OPCODE_IBFE]	= { ALU_OP3_BFE_INT, tgsi_bfe},
1236601e04c3fSmrg	[TGSI_OPCODE_UBFE]	= { ALU_OP3_BFE_UINT, tgsi_bfe},
1236701e04c3fSmrg	[TGSI_OPCODE_BFI]	= { ALU_OP0_NOP, tgsi_bfi},
1236801e04c3fSmrg	[TGSI_OPCODE_BREV]	= { ALU_OP1_BFREV_INT, tgsi_op2},
1236901e04c3fSmrg	[TGSI_OPCODE_POPC]	= { ALU_OP1_BCNT_INT, tgsi_op2},
1237001e04c3fSmrg	[TGSI_OPCODE_LSB]	= { ALU_OP1_FFBL_INT, tgsi_op2},
1237101e04c3fSmrg	[TGSI_OPCODE_IMSB]	= { ALU_OP1_FFBH_INT, tgsi_msb},
1237201e04c3fSmrg	[TGSI_OPCODE_UMSB]	= { ALU_OP1_FFBH_UINT, tgsi_msb},
1237301e04c3fSmrg	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1237401e04c3fSmrg	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1237501e04c3fSmrg	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
1237601e04c3fSmrg	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
1237701e04c3fSmrg	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
1237801e04c3fSmrg	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
1237901e04c3fSmrg	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
1238001e04c3fSmrg	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
1238101e04c3fSmrg	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
1238201e04c3fSmrg	[TGSI_OPCODE_DDIV]	= { 0, cayman_ddiv_instr },
1238301e04c3fSmrg	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
1238401e04c3fSmrg	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
1238501e04c3fSmrg	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
1238601e04c3fSmrg	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
1238701e04c3fSmrg	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
1238801e04c3fSmrg	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
1238901e04c3fSmrg	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
1239001e04c3fSmrg	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
1239101e04c3fSmrg	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
1239201e04c3fSmrg	[TGSI_OPCODE_DFMA]	= { ALU_OP3_FMA_64, tgsi_op3_64},
1239301e04c3fSmrg	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
1239401e04c3fSmrg	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
1239501e04c3fSmrg	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
1239601e04c3fSmrg	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
1239701e04c3fSmrg	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
1239801e04c3fSmrg	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
1239901e04c3fSmrg	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
1240001e04c3fSmrg	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
1240101e04c3fSmrg	[TGSI_OPCODE_U64SNE]    = { ALU_OP0_NOP, egcm_u64sne },
1240201e04c3fSmrg	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
1240301e04c3fSmrg	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
1240401e04c3fSmrg	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
124057ec681f3Smrg	[TGSI_OPCODE_I64NEG]    = { ALU_OP0_NOP, egcm_i64neg },
1240601e04c3fSmrg	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
124073464ebd5Sriastradh};
12408