13464ebd5Sriastradh/* 23464ebd5Sriastradh * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 33464ebd5Sriastradh * 43464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 53464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 63464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 73464ebd5Sriastradh * on the rights to use, copy, modify, merge, publish, distribute, sub 83464ebd5Sriastradh * license, and/or sell copies of the Software, and to permit persons to whom 93464ebd5Sriastradh * the Software is furnished to do so, subject to the following conditions: 103464ebd5Sriastradh * 113464ebd5Sriastradh * The above copyright notice and this permission notice (including the next 123464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the 133464ebd5Sriastradh * Software. 143464ebd5Sriastradh * 153464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 163464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 173464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 183464ebd5Sriastradh * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 193464ebd5Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 203464ebd5Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 213464ebd5Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE. 223464ebd5Sriastradh */ 23af69d88dSmrg#include "r600_sq.h" 24af69d88dSmrg#include "r600_formats.h" 25af69d88dSmrg#include "r600_opcodes.h" 26af69d88dSmrg#include "r600_shader.h" 277ec681f3Smrg#include "r600_dump.h" 28af69d88dSmrg#include "r600d.h" 297ec681f3Smrg#include "sfn/sfn_nir.h" 30af69d88dSmrg 31af69d88dSmrg#include "sb/sb_public.h" 32af69d88dSmrg 333464ebd5Sriastradh#include "pipe/p_shader_tokens.h" 343464ebd5Sriastradh#include "tgsi/tgsi_info.h" 353464ebd5Sriastradh#include "tgsi/tgsi_parse.h" 363464ebd5Sriastradh#include "tgsi/tgsi_scan.h" 373464ebd5Sriastradh#include "tgsi/tgsi_dump.h" 387ec681f3Smrg#include "tgsi/tgsi_from_mesa.h" 397ec681f3Smrg#include "nir/tgsi_to_nir.h" 407ec681f3Smrg#include "nir/nir_to_tgsi_info.h" 417ec681f3Smrg#include "compiler/nir/nir.h" 4201e04c3fSmrg#include "util/u_bitcast.h" 43af69d88dSmrg#include "util/u_memory.h" 44af69d88dSmrg#include "util/u_math.h" 453464ebd5Sriastradh#include <stdio.h> 463464ebd5Sriastradh#include <errno.h> 473464ebd5Sriastradh 4801e04c3fSmrg/* CAYMAN notes 493464ebd5SriastradhWhy CAYMAN got loops for lots of instructions is explained here. 503464ebd5Sriastradh 513464ebd5Sriastradh-These 8xx t-slot only ops are implemented in all vector slots. 523464ebd5SriastradhMUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 5301e04c3fSmrgThese 8xx t-slot only opcodes become vector ops, with all four 5401e04c3fSmrgslots expecting the arguments on sources a and b. Result is 553464ebd5Sriastradhbroadcast to all channels. 5601e04c3fSmrgMULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 5701e04c3fSmrgThese 8xx t-slot only opcodes become vector ops in the z, y, and 583464ebd5Sriastradhx slots. 593464ebd5SriastradhEXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 603464ebd5SriastradhRECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 613464ebd5SriastradhSQRT_IEEE/_64 623464ebd5SriastradhSIN/COS 6301e04c3fSmrgThe w slot may have an independent co-issued operation, or if the 6401e04c3fSmrgresult is required to be in the w slot, the opcode above may be 653464ebd5Sriastradhissued in the w slot as well. 663464ebd5SriastradhThe compiler must issue the source argument to slots z, y, and x 673464ebd5Sriastradh*/ 683464ebd5Sriastradh 6901e04c3fSmrg/* Contents of r0 on entry to various shaders 7001e04c3fSmrg 7101e04c3fSmrg VS - .x = VertexID 7201e04c3fSmrg .y = RelVertexID (??) 7301e04c3fSmrg .w = InstanceID 7401e04c3fSmrg 7501e04c3fSmrg GS - r0.xyw, r1.xyz = per-vertex offsets 7601e04c3fSmrg r0.z = PrimitiveID 7701e04c3fSmrg 7801e04c3fSmrg TCS - .x = PatchID 7901e04c3fSmrg .y = RelPatchID (??) 8001e04c3fSmrg .z = InvocationID 8101e04c3fSmrg .w = tess factor base. 8201e04c3fSmrg 8301e04c3fSmrg TES - .x = TessCoord.x 8401e04c3fSmrg - .y = TessCoord.y 8501e04c3fSmrg - .z = RelPatchID (??) 8601e04c3fSmrg - .w = PrimitiveID 8701e04c3fSmrg 8801e04c3fSmrg PS - face_gpr.z = SampleMask 8901e04c3fSmrg face_gpr.w = SampleID 9001e04c3fSmrg*/ 9101e04c3fSmrg#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 92af69d88dSmrgstatic int r600_shader_from_tgsi(struct r600_context *rctx, 93af69d88dSmrg struct r600_pipe_shader *pipeshader, 9401e04c3fSmrg union r600_shader_key key); 95af69d88dSmrg 96af69d88dSmrgstatic void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 97af69d88dSmrg int size, unsigned comp_mask) { 98af69d88dSmrg 99af69d88dSmrg if (!size) 100af69d88dSmrg return; 101af69d88dSmrg 102af69d88dSmrg if (ps->num_arrays == ps->max_arrays) { 103af69d88dSmrg ps->max_arrays += 64; 104af69d88dSmrg ps->arrays = realloc(ps->arrays, ps->max_arrays * 105af69d88dSmrg sizeof(struct r600_shader_array)); 106af69d88dSmrg } 107af69d88dSmrg 108af69d88dSmrg int n = ps->num_arrays; 109af69d88dSmrg ++ps->num_arrays; 1103464ebd5Sriastradh 111af69d88dSmrg ps->arrays[n].comp_mask = comp_mask; 112af69d88dSmrg ps->arrays[n].gpr_start = start_gpr; 113af69d88dSmrg ps->arrays[n].gpr_count = size; 114af69d88dSmrg} 115af69d88dSmrg 116af69d88dSmrgstatic void r600_dump_streamout(struct pipe_stream_output_info *so) 1173464ebd5Sriastradh{ 118af69d88dSmrg unsigned i; 1193464ebd5Sriastradh 120af69d88dSmrg fprintf(stderr, "STREAMOUT\n"); 121af69d88dSmrg for (i = 0; i < so->num_outputs; i++) { 122af69d88dSmrg unsigned mask = ((1 << so->output[i].num_components) - 1) << 123af69d88dSmrg so->output[i].start_component; 12401e04c3fSmrg fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 12501e04c3fSmrg i, 12601e04c3fSmrg so->output[i].stream, 12701e04c3fSmrg so->output[i].output_buffer, 128af69d88dSmrg so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 129af69d88dSmrg so->output[i].register_index, 130af69d88dSmrg mask & 1 ? "x" : "", 131af69d88dSmrg mask & 2 ? "y" : "", 132af69d88dSmrg mask & 4 ? "z" : "", 133af69d88dSmrg mask & 8 ? "w" : "", 134af69d88dSmrg so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 1353464ebd5Sriastradh } 1363464ebd5Sriastradh} 1373464ebd5Sriastradh 138af69d88dSmrgstatic int store_shader(struct pipe_context *ctx, 139af69d88dSmrg struct r600_pipe_shader *shader) 1403464ebd5Sriastradh{ 141af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 142af69d88dSmrg uint32_t *ptr, i; 1433464ebd5Sriastradh 1443464ebd5Sriastradh if (shader->bo == NULL) { 145af69d88dSmrg shader->bo = (struct r600_resource*) 14601e04c3fSmrg pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 1473464ebd5Sriastradh if (shader->bo == NULL) { 1483464ebd5Sriastradh return -ENOMEM; 1493464ebd5Sriastradh } 1509f464c52Smaya ptr = r600_buffer_map_sync_with_rings( 1519f464c52Smaya &rctx->b, shader->bo, 1527ec681f3Smrg PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); 1533464ebd5Sriastradh if (R600_BIG_ENDIAN) { 154af69d88dSmrg for (i = 0; i < shader->shader.bc.ndw; ++i) { 155af69d88dSmrg ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 1563464ebd5Sriastradh } 1573464ebd5Sriastradh } else { 158af69d88dSmrg memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 1593464ebd5Sriastradh } 1607ec681f3Smrg rctx->b.ws->buffer_unmap(rctx->b.ws, shader->bo->buf); 1613464ebd5Sriastradh } 162af69d88dSmrg 1633464ebd5Sriastradh return 0; 1643464ebd5Sriastradh} 1653464ebd5Sriastradh 1667ec681f3Smrgextern const struct nir_shader_compiler_options r600_nir_options; 1677ec681f3Smrgstatic int nshader = 0; 168af69d88dSmrgint r600_pipe_shader_create(struct pipe_context *ctx, 169af69d88dSmrg struct r600_pipe_shader *shader, 17001e04c3fSmrg union r600_shader_key key) 1713464ebd5Sriastradh{ 172af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 173af69d88dSmrg struct r600_pipe_shader_selector *sel = shader->selector; 1743464ebd5Sriastradh int r; 1757ec681f3Smrg struct r600_screen *rscreen = (struct r600_screen *)ctx->screen; 1767ec681f3Smrg 1777ec681f3Smrg int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ? 1787ec681f3Smrg tgsi_get_processor_type(sel->tokens): 1797ec681f3Smrg pipe_shader_type_from_mesa(sel->nir->info.stage); 1807ec681f3Smrg 1817ec681f3Smrg bool dump = r600_can_dump_shader(&rctx->screen->b, processor); 1827ec681f3Smrg unsigned use_sb = !(rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR)) || 1837ec681f3Smrg (rctx->screen->b.debug_flags & DBG_NIR_SB); 18401e04c3fSmrg unsigned sb_disasm; 18501e04c3fSmrg unsigned export_shader; 1867ec681f3Smrg 187af69d88dSmrg shader->shader.bc.isa = rctx->isa; 1887ec681f3Smrg 1897ec681f3Smrg if (!(rscreen->b.debug_flags & DBG_NIR_PREFERRED)) { 1907ec681f3Smrg assert(sel->ir_type == PIPE_SHADER_IR_TGSI); 1917ec681f3Smrg r = r600_shader_from_tgsi(rctx, shader, key); 1927ec681f3Smrg if (r) { 1937ec681f3Smrg R600_ERR("translation from TGSI failed !\n"); 1947ec681f3Smrg goto error; 1957ec681f3Smrg } 1967ec681f3Smrg } else { 1977ec681f3Smrg if (sel->ir_type == PIPE_SHADER_IR_TGSI) { 1987ec681f3Smrg sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true); 1997ec681f3Smrg const nir_shader_compiler_options *nir_options = 2007ec681f3Smrg (const nir_shader_compiler_options *) 2017ec681f3Smrg ctx->screen->get_compiler_options(ctx->screen, 2027ec681f3Smrg PIPE_SHADER_IR_NIR, 2037ec681f3Smrg shader->shader.processor_type); 2047ec681f3Smrg /* Lower int64 ops because we have some r600 build-in shaders that use it */ 2057ec681f3Smrg if (nir_options->lower_int64_options) { 2067ec681f3Smrg NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); 2077ec681f3Smrg NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL); 2087ec681f3Smrg NIR_PASS_V(sel->nir, nir_lower_int64); 2097ec681f3Smrg NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL); 2107ec681f3Smrg } 2117ec681f3Smrg NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false); 2127ec681f3Smrg } 2137ec681f3Smrg nir_tgsi_scan_shader(sel->nir, &sel->info, true); 2143464ebd5Sriastradh 2157ec681f3Smrg r = r600_shader_from_nir(rctx, shader, &key); 2167ec681f3Smrg if (r) { 2177ec681f3Smrg fprintf(stderr, "--Failed shader--------------------------------------------------\n"); 2187ec681f3Smrg 2197ec681f3Smrg if (sel->ir_type == PIPE_SHADER_IR_TGSI) { 2207ec681f3Smrg fprintf(stderr, "--TGSI--------------------------------------------------------\n"); 2217ec681f3Smrg tgsi_dump(sel->tokens, 0); 2227ec681f3Smrg } 2237ec681f3Smrg 2247ec681f3Smrg if (rscreen->b.debug_flags & (DBG_NIR_PREFERRED)) { 2257ec681f3Smrg fprintf(stderr, "--NIR --------------------------------------------------------\n"); 2267ec681f3Smrg nir_print_shader(sel->nir, stderr); 2277ec681f3Smrg } 2287ec681f3Smrg 2297ec681f3Smrg R600_ERR("translation from NIR failed !\n"); 2307ec681f3Smrg goto error; 2317ec681f3Smrg } 2327ec681f3Smrg } 2337ec681f3Smrg 234af69d88dSmrg if (dump) { 2357ec681f3Smrg if (sel->ir_type == PIPE_SHADER_IR_TGSI) { 2367ec681f3Smrg fprintf(stderr, "--TGSI--------------------------------------------------------\n"); 2377ec681f3Smrg tgsi_dump(sel->tokens, 0); 2387ec681f3Smrg } 2397ec681f3Smrg 240af69d88dSmrg if (sel->so.num_outputs) { 241af69d88dSmrg r600_dump_streamout(&sel->so); 242af69d88dSmrg } 2433464ebd5Sriastradh } 2447ec681f3Smrg 24501e04c3fSmrg if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { 24601e04c3fSmrg /* only disable for vertex shaders in tess paths */ 24701e04c3fSmrg if (key.vs.as_ls) 24801e04c3fSmrg use_sb = 0; 24901e04c3fSmrg } 25001e04c3fSmrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); 25101e04c3fSmrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); 25201e04c3fSmrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_COMPUTE); 253af69d88dSmrg 25401e04c3fSmrg /* disable SB for shaders using doubles */ 25501e04c3fSmrg use_sb &= !shader->shader.uses_doubles; 256af69d88dSmrg 25701e04c3fSmrg use_sb &= !shader->shader.uses_atomics; 25801e04c3fSmrg use_sb &= !shader->shader.uses_images; 25901e04c3fSmrg use_sb &= !shader->shader.uses_helper_invocation; 26001e04c3fSmrg 26101e04c3fSmrg /* Check if the bytecode has already been built. */ 262af69d88dSmrg if (!shader->shader.bc.bytecode) { 263af69d88dSmrg r = r600_bytecode_build(&shader->shader.bc); 264af69d88dSmrg if (r) { 265af69d88dSmrg R600_ERR("building bytecode failed !\n"); 266af69d88dSmrg goto error; 267af69d88dSmrg } 2683464ebd5Sriastradh } 269af69d88dSmrg 27001e04c3fSmrg sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 271af69d88dSmrg if (dump && !sb_disasm) { 272af69d88dSmrg fprintf(stderr, "--------------------------------------------------------------\n"); 273af69d88dSmrg r600_bytecode_disasm(&shader->shader.bc); 2743464ebd5Sriastradh fprintf(stderr, "______________________________________________________________\n"); 275af69d88dSmrg } else if ((dump && sb_disasm) || use_sb) { 2767ec681f3Smrg r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 277af69d88dSmrg dump, use_sb); 278af69d88dSmrg if (r) { 279af69d88dSmrg R600_ERR("r600_sb_bytecode_process failed !\n"); 280af69d88dSmrg goto error; 281af69d88dSmrg } 282af69d88dSmrg } 283af69d88dSmrg 2847ec681f3Smrg if (dump) { 2857ec681f3Smrg FILE *f; 2867ec681f3Smrg char fname[1024]; 2877ec681f3Smrg snprintf(fname, 1024, "shader_from_%s_%d.cpp", 2887ec681f3Smrg (sel->ir_type == PIPE_SHADER_IR_TGSI ? 2897ec681f3Smrg (rscreen->b.debug_flags & DBG_NIR_PREFERRED ? "tgsi-nir" : "tgsi") 2907ec681f3Smrg : "nir"), nshader); 2917ec681f3Smrg f = fopen(fname, "w"); 2927ec681f3Smrg print_shader_info(f, nshader++, &shader->shader); 2937ec681f3Smrg print_shader_info(stderr, nshader++, &shader->shader); 2947ec681f3Smrg print_pipe_info(stderr, &sel->info); 2957ec681f3Smrg if (sel->ir_type == PIPE_SHADER_IR_TGSI) { 2967ec681f3Smrg fprintf(f, "/****TGSI**********************************\n"); 2977ec681f3Smrg tgsi_dump_to_file(sel->tokens, 0, f); 2987ec681f3Smrg } 2997ec681f3Smrg 3007ec681f3Smrg if (rscreen->b.debug_flags & DBG_NIR_PREFERRED){ 3017ec681f3Smrg fprintf(f, "/****NIR **********************************\n"); 3027ec681f3Smrg nir_print_shader(sel->nir, f); 3037ec681f3Smrg } 3047ec681f3Smrg fprintf(f, "******************************************/\n"); 3057ec681f3Smrg fclose(f); 3067ec681f3Smrg } 3077ec681f3Smrg 308af69d88dSmrg if (shader->gs_copy_shader) { 309af69d88dSmrg if (dump) { 310af69d88dSmrg // dump copy shader 311af69d88dSmrg r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 312af69d88dSmrg &shader->gs_copy_shader->shader, dump, 0); 313af69d88dSmrg if (r) 314af69d88dSmrg goto error; 315af69d88dSmrg } 316af69d88dSmrg 317af69d88dSmrg if ((r = store_shader(ctx, shader->gs_copy_shader))) 318af69d88dSmrg goto error; 319af69d88dSmrg } 320af69d88dSmrg 321af69d88dSmrg /* Store the shader in a buffer. */ 322af69d88dSmrg if ((r = store_shader(ctx, shader))) 323af69d88dSmrg goto error; 324af69d88dSmrg 325af69d88dSmrg /* Build state. */ 326af69d88dSmrg switch (shader->shader.processor_type) { 32701e04c3fSmrg case PIPE_SHADER_TESS_CTRL: 32801e04c3fSmrg evergreen_update_hs_state(ctx, shader); 32901e04c3fSmrg break; 33001e04c3fSmrg case PIPE_SHADER_TESS_EVAL: 33101e04c3fSmrg if (key.tes.as_es) 33201e04c3fSmrg evergreen_update_es_state(ctx, shader); 33301e04c3fSmrg else 33401e04c3fSmrg evergreen_update_vs_state(ctx, shader); 33501e04c3fSmrg break; 33601e04c3fSmrg case PIPE_SHADER_GEOMETRY: 337af69d88dSmrg if (rctx->b.chip_class >= EVERGREEN) { 338af69d88dSmrg evergreen_update_gs_state(ctx, shader); 339af69d88dSmrg evergreen_update_vs_state(ctx, shader->gs_copy_shader); 340af69d88dSmrg } else { 341af69d88dSmrg r600_update_gs_state(ctx, shader); 342af69d88dSmrg r600_update_vs_state(ctx, shader->gs_copy_shader); 343af69d88dSmrg } 344af69d88dSmrg break; 34501e04c3fSmrg case PIPE_SHADER_VERTEX: 34601e04c3fSmrg export_shader = key.vs.as_es; 347af69d88dSmrg if (rctx->b.chip_class >= EVERGREEN) { 34801e04c3fSmrg if (key.vs.as_ls) 34901e04c3fSmrg evergreen_update_ls_state(ctx, shader); 35001e04c3fSmrg else if (key.vs.as_es) 351af69d88dSmrg evergreen_update_es_state(ctx, shader); 352af69d88dSmrg else 353af69d88dSmrg evergreen_update_vs_state(ctx, shader); 354af69d88dSmrg } else { 355af69d88dSmrg if (export_shader) 356af69d88dSmrg r600_update_es_state(ctx, shader); 357af69d88dSmrg else 358af69d88dSmrg r600_update_vs_state(ctx, shader); 359af69d88dSmrg } 360af69d88dSmrg break; 36101e04c3fSmrg case PIPE_SHADER_FRAGMENT: 362af69d88dSmrg if (rctx->b.chip_class >= EVERGREEN) { 363af69d88dSmrg evergreen_update_ps_state(ctx, shader); 364af69d88dSmrg } else { 365af69d88dSmrg r600_update_ps_state(ctx, shader); 366af69d88dSmrg } 367af69d88dSmrg break; 36801e04c3fSmrg case PIPE_SHADER_COMPUTE: 36901e04c3fSmrg evergreen_update_ls_state(ctx, shader); 37001e04c3fSmrg break; 371af69d88dSmrg default: 372af69d88dSmrg r = -EINVAL; 373af69d88dSmrg goto error; 3743464ebd5Sriastradh } 375af69d88dSmrg return 0; 376af69d88dSmrg 377af69d88dSmrgerror: 378af69d88dSmrg r600_pipe_shader_destroy(ctx, shader); 379af69d88dSmrg return r; 3803464ebd5Sriastradh} 3813464ebd5Sriastradh 38201e04c3fSmrgvoid r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader) 3833464ebd5Sriastradh{ 38401e04c3fSmrg r600_resource_reference(&shader->bo, NULL); 3857ec681f3Smrg if (list_is_linked(&shader->shader.bc.cf)) 3867ec681f3Smrg r600_bytecode_clear(&shader->shader.bc); 387af69d88dSmrg r600_release_command_buffer(&shader->command_buffer); 3883464ebd5Sriastradh} 3893464ebd5Sriastradh 3903464ebd5Sriastradh/* 3913464ebd5Sriastradh * tgsi -> r600 shader 3923464ebd5Sriastradh */ 3933464ebd5Sriastradhstruct r600_shader_tgsi_instruction; 3943464ebd5Sriastradh 3953464ebd5Sriastradhstruct r600_shader_src { 3963464ebd5Sriastradh unsigned sel; 3973464ebd5Sriastradh unsigned swizzle[4]; 3983464ebd5Sriastradh unsigned neg; 3993464ebd5Sriastradh unsigned abs; 4003464ebd5Sriastradh unsigned rel; 401af69d88dSmrg unsigned kc_bank; 40201e04c3fSmrg boolean kc_rel; /* true if cache bank is indexed */ 4033464ebd5Sriastradh uint32_t value[4]; 4043464ebd5Sriastradh}; 4053464ebd5Sriastradh 40601e04c3fSmrgstruct eg_interp { 40701e04c3fSmrg boolean enabled; 40801e04c3fSmrg unsigned ij_index; 40901e04c3fSmrg}; 41001e04c3fSmrg 4113464ebd5Sriastradhstruct r600_shader_ctx { 4123464ebd5Sriastradh struct tgsi_shader_info info; 41301e04c3fSmrg struct tgsi_array_info *array_infos; 41401e04c3fSmrg /* flag for each tgsi temp array if its been spilled or not */ 41501e04c3fSmrg bool *spilled_arrays; 4163464ebd5Sriastradh struct tgsi_parse_context parse; 4173464ebd5Sriastradh const struct tgsi_token *tokens; 4183464ebd5Sriastradh unsigned type; 4193464ebd5Sriastradh unsigned file_offset[TGSI_FILE_COUNT]; 4203464ebd5Sriastradh unsigned temp_reg; 42101e04c3fSmrg const struct r600_shader_tgsi_instruction *inst_info; 422af69d88dSmrg struct r600_bytecode *bc; 4233464ebd5Sriastradh struct r600_shader *shader; 4243464ebd5Sriastradh struct r600_shader_src src[4]; 425af69d88dSmrg uint32_t *literals; 426af69d88dSmrg uint32_t nliterals; 427af69d88dSmrg uint32_t max_driver_temp_used; 4283464ebd5Sriastradh /* needed for evergreen interpolation */ 42901e04c3fSmrg struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 430af69d88dSmrg /* evergreen/cayman also store sample mask in face register */ 431af69d88dSmrg int face_gpr; 43201e04c3fSmrg /* sample id is .w component stored in fixed point position register */ 43301e04c3fSmrg int fixed_pt_position_gpr; 434af69d88dSmrg int colors_used; 435af69d88dSmrg boolean clip_vertex_write; 436af69d88dSmrg unsigned cv_output; 437af69d88dSmrg unsigned edgeflag_output; 43801e04c3fSmrg int helper_invoc_reg; 43901e04c3fSmrg int cs_block_size_reg; 44001e04c3fSmrg int cs_grid_size_reg; 44101e04c3fSmrg bool cs_block_size_loaded, cs_grid_size_loaded; 442af69d88dSmrg int fragcoord_input; 443af69d88dSmrg int next_ring_offset; 444af69d88dSmrg int gs_out_ring_offset; 445af69d88dSmrg int gs_next_vertex; 446af69d88dSmrg struct r600_shader *gs_for_vs; 44701e04c3fSmrg int gs_export_gpr_tregs[4]; 44801e04c3fSmrg int gs_rotated_input[2]; 44901e04c3fSmrg const struct pipe_stream_output_info *gs_stream_output_info; 45001e04c3fSmrg unsigned enabled_stream_buffers_mask; 45101e04c3fSmrg unsigned tess_input_info; /* temp with tess input offsets */ 45201e04c3fSmrg unsigned tess_output_info; /* temp with tess input offsets */ 45301e04c3fSmrg unsigned thread_id_gpr; /* temp with thread id calculated for images */ 4543464ebd5Sriastradh}; 4553464ebd5Sriastradh 4563464ebd5Sriastradhstruct r600_shader_tgsi_instruction { 457af69d88dSmrg unsigned op; 4583464ebd5Sriastradh int (*process)(struct r600_shader_ctx *ctx); 4593464ebd5Sriastradh}; 4603464ebd5Sriastradh 46101e04c3fSmrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 46201e04c3fSmrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 4633464ebd5Sriastradhstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 46401e04c3fSmrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 465af69d88dSmrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 466af69d88dSmrgstatic int tgsi_else(struct r600_shader_ctx *ctx); 467af69d88dSmrgstatic int tgsi_endif(struct r600_shader_ctx *ctx); 468af69d88dSmrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx); 469af69d88dSmrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx); 470af69d88dSmrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 47101e04c3fSmrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 47201e04c3fSmrg unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 47301e04c3fSmrg unsigned int dst_reg); 47401e04c3fSmrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 47501e04c3fSmrg const struct r600_shader_src *shader_src, 47601e04c3fSmrg unsigned chan); 47701e04c3fSmrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 47801e04c3fSmrg unsigned dst_reg, unsigned mask); 47901e04c3fSmrg 48001e04c3fSmrgstatic bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) 48101e04c3fSmrg{ 48201e04c3fSmrg if (ctx->bc->family == CHIP_HEMLOCK || 48301e04c3fSmrg ctx->bc->family == CHIP_CYPRESS || 48401e04c3fSmrg ctx->bc->family == CHIP_JUNIPER) 48501e04c3fSmrg return false; 48601e04c3fSmrg return true; 48701e04c3fSmrg} 48801e04c3fSmrg 48901e04c3fSmrgstatic int tgsi_last_instruction(unsigned writemask) 49001e04c3fSmrg{ 49101e04c3fSmrg int i, lasti = 0; 49201e04c3fSmrg 49301e04c3fSmrg for (i = 0; i < 4; i++) { 49401e04c3fSmrg if (writemask & (1 << i)) { 49501e04c3fSmrg lasti = i; 49601e04c3fSmrg } 49701e04c3fSmrg } 49801e04c3fSmrg return lasti; 49901e04c3fSmrg} 5003464ebd5Sriastradh 5013464ebd5Sriastradhstatic int tgsi_is_supported(struct r600_shader_ctx *ctx) 5023464ebd5Sriastradh{ 5033464ebd5Sriastradh struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 50401e04c3fSmrg unsigned j; 5053464ebd5Sriastradh 50601e04c3fSmrg if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 5073464ebd5Sriastradh R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 5083464ebd5Sriastradh return -EINVAL; 5093464ebd5Sriastradh } 5103464ebd5Sriastradh#if 0 5113464ebd5Sriastradh if (i->Instruction.Label) { 5123464ebd5Sriastradh R600_ERR("label unsupported\n"); 5133464ebd5Sriastradh return -EINVAL; 5143464ebd5Sriastradh } 5153464ebd5Sriastradh#endif 5163464ebd5Sriastradh for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 5173464ebd5Sriastradh if (i->Src[j].Register.Dimension) { 5187ec681f3Smrg switch (i->Src[j].Register.File) { 5197ec681f3Smrg case TGSI_FILE_CONSTANT: 5207ec681f3Smrg case TGSI_FILE_HW_ATOMIC: 5217ec681f3Smrg break; 5227ec681f3Smrg case TGSI_FILE_INPUT: 5237ec681f3Smrg if (ctx->type == PIPE_SHADER_GEOMETRY || 5247ec681f3Smrg ctx->type == PIPE_SHADER_TESS_CTRL || 5257ec681f3Smrg ctx->type == PIPE_SHADER_TESS_EVAL) 5267ec681f3Smrg break; 5277ec681f3Smrg FALLTHROUGH; 5287ec681f3Smrg case TGSI_FILE_OUTPUT: 5297ec681f3Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL) 5307ec681f3Smrg break; 5317ec681f3Smrg FALLTHROUGH; 5327ec681f3Smrg default: 5337ec681f3Smrg R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 5347ec681f3Smrg i->Src[j].Register.File, 5357ec681f3Smrg i->Src[j].Register.Dimension); 5367ec681f3Smrg return -EINVAL; 5377ec681f3Smrg } 5383464ebd5Sriastradh } 5393464ebd5Sriastradh } 5403464ebd5Sriastradh for (j = 0; j < i->Instruction.NumDstRegs; j++) { 5413464ebd5Sriastradh if (i->Dst[j].Register.Dimension) { 54201e04c3fSmrg if (ctx->type == PIPE_SHADER_TESS_CTRL) 54301e04c3fSmrg continue; 5443464ebd5Sriastradh R600_ERR("unsupported dst (dimension)\n"); 5453464ebd5Sriastradh return -EINVAL; 5463464ebd5Sriastradh } 5473464ebd5Sriastradh } 5483464ebd5Sriastradh return 0; 5493464ebd5Sriastradh} 5503464ebd5Sriastradh 55101e04c3fSmrgint eg_get_interpolator_index(unsigned interpolate, unsigned location) 5523464ebd5Sriastradh{ 55301e04c3fSmrg if (interpolate == TGSI_INTERPOLATE_COLOR || 55401e04c3fSmrg interpolate == TGSI_INTERPOLATE_LINEAR || 55501e04c3fSmrg interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 55601e04c3fSmrg { 55701e04c3fSmrg int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 55801e04c3fSmrg int loc; 5593464ebd5Sriastradh 56001e04c3fSmrg switch(location) { 56101e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTER: 56201e04c3fSmrg loc = 1; 56301e04c3fSmrg break; 56401e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTROID: 56501e04c3fSmrg loc = 2; 56601e04c3fSmrg break; 56701e04c3fSmrg case TGSI_INTERPOLATE_LOC_SAMPLE: 56801e04c3fSmrg default: 56901e04c3fSmrg loc = 0; break; 5703464ebd5Sriastradh } 57101e04c3fSmrg 57201e04c3fSmrg return is_linear * 3 + loc; 5733464ebd5Sriastradh } 5743464ebd5Sriastradh 57501e04c3fSmrg return -1; 57601e04c3fSmrg} 57701e04c3fSmrg 57801e04c3fSmrgstatic void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 57901e04c3fSmrg int input) 58001e04c3fSmrg{ 58101e04c3fSmrg int i = eg_get_interpolator_index( 58201e04c3fSmrg ctx->shader->input[input].interpolate, 58301e04c3fSmrg ctx->shader->input[input].interpolate_location); 58401e04c3fSmrg assert(i >= 0); 58501e04c3fSmrg ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 586af69d88dSmrg} 587af69d88dSmrg 588af69d88dSmrgstatic int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 589af69d88dSmrg{ 590af69d88dSmrg int i, r; 591af69d88dSmrg struct r600_bytecode_alu alu; 592af69d88dSmrg int gpr = 0, base_chan = 0; 593af69d88dSmrg int ij_index = ctx->shader->input[input].ij_index; 594af69d88dSmrg 5953464ebd5Sriastradh /* work out gpr and base_chan from index */ 5963464ebd5Sriastradh gpr = ij_index / 2; 5973464ebd5Sriastradh base_chan = (2 * (ij_index % 2)) + 1; 5983464ebd5Sriastradh 5993464ebd5Sriastradh for (i = 0; i < 8; i++) { 600af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6013464ebd5Sriastradh 6023464ebd5Sriastradh if (i < 4) 603af69d88dSmrg alu.op = ALU_OP2_INTERP_ZW; 6043464ebd5Sriastradh else 605af69d88dSmrg alu.op = ALU_OP2_INTERP_XY; 6063464ebd5Sriastradh 6073464ebd5Sriastradh if ((i > 1) && (i < 6)) { 6083464ebd5Sriastradh alu.dst.sel = ctx->shader->input[input].gpr; 6093464ebd5Sriastradh alu.dst.write = 1; 6103464ebd5Sriastradh } 6113464ebd5Sriastradh 6123464ebd5Sriastradh alu.dst.chan = i % 4; 6133464ebd5Sriastradh 6143464ebd5Sriastradh alu.src[0].sel = gpr; 6153464ebd5Sriastradh alu.src[0].chan = (base_chan - (i % 2)); 6163464ebd5Sriastradh 6173464ebd5Sriastradh alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6183464ebd5Sriastradh 6193464ebd5Sriastradh alu.bank_swizzle_force = SQ_ALU_VEC_210; 6203464ebd5Sriastradh if ((i % 4) == 3) 6213464ebd5Sriastradh alu.last = 1; 622af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 623af69d88dSmrg if (r) 624af69d88dSmrg return r; 625af69d88dSmrg } 626af69d88dSmrg return 0; 627af69d88dSmrg} 628af69d88dSmrg 629af69d88dSmrgstatic int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 630af69d88dSmrg{ 631af69d88dSmrg int i, r; 632af69d88dSmrg struct r600_bytecode_alu alu; 633af69d88dSmrg 634af69d88dSmrg for (i = 0; i < 4; i++) { 635af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 636af69d88dSmrg 637af69d88dSmrg alu.op = ALU_OP1_INTERP_LOAD_P0; 638af69d88dSmrg 639af69d88dSmrg alu.dst.sel = ctx->shader->input[input].gpr; 640af69d88dSmrg alu.dst.write = 1; 641af69d88dSmrg 642af69d88dSmrg alu.dst.chan = i; 643af69d88dSmrg 644af69d88dSmrg alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 645af69d88dSmrg alu.src[0].chan = i; 646af69d88dSmrg 647af69d88dSmrg if (i == 3) 648af69d88dSmrg alu.last = 1; 649af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6503464ebd5Sriastradh if (r) 6513464ebd5Sriastradh return r; 6523464ebd5Sriastradh } 6533464ebd5Sriastradh return 0; 6543464ebd5Sriastradh} 6553464ebd5Sriastradh 656af69d88dSmrg/* 657af69d88dSmrg * Special export handling in shaders 658af69d88dSmrg * 659af69d88dSmrg * shader export ARRAY_BASE for EXPORT_POS: 660af69d88dSmrg * 60 is position 661af69d88dSmrg * 61 is misc vector 662af69d88dSmrg * 62, 63 are clip distance vectors 663af69d88dSmrg * 664af69d88dSmrg * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 665af69d88dSmrg * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 666af69d88dSmrg * USE_VTX_POINT_SIZE - point size in the X channel of export 61 667af69d88dSmrg * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 668af69d88dSmrg * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 669af69d88dSmrg * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 670af69d88dSmrg * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 671af69d88dSmrg * exclusive from render target index) 672af69d88dSmrg * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 673af69d88dSmrg * 674af69d88dSmrg * 675af69d88dSmrg * shader export ARRAY_BASE for EXPORT_PIXEL: 676af69d88dSmrg * 0-7 CB targets 677af69d88dSmrg * 61 computed Z vector 678af69d88dSmrg * 679af69d88dSmrg * The use of the values exported in the computed Z vector are controlled 680af69d88dSmrg * by DB_SHADER_CONTROL: 681af69d88dSmrg * Z_EXPORT_ENABLE - Z as a float in RED 682af69d88dSmrg * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 683af69d88dSmrg * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 684af69d88dSmrg * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 685af69d88dSmrg * DB_SOURCE_FORMAT - export control restrictions 686af69d88dSmrg * 687af69d88dSmrg */ 688af69d88dSmrg 689af69d88dSmrg 690af69d88dSmrg/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 691af69d88dSmrgstatic int r600_spi_sid(struct r600_shader_io * io) 692af69d88dSmrg{ 693af69d88dSmrg int index, name = io->name; 694af69d88dSmrg 695af69d88dSmrg /* These params are handled differently, they don't need 696af69d88dSmrg * semantic indices, so we'll use 0 for them. 697af69d88dSmrg */ 698af69d88dSmrg if (name == TGSI_SEMANTIC_POSITION || 699af69d88dSmrg name == TGSI_SEMANTIC_PSIZE || 700af69d88dSmrg name == TGSI_SEMANTIC_EDGEFLAG || 701af69d88dSmrg name == TGSI_SEMANTIC_FACE || 702af69d88dSmrg name == TGSI_SEMANTIC_SAMPLEMASK) 703af69d88dSmrg index = 0; 704af69d88dSmrg else { 705af69d88dSmrg if (name == TGSI_SEMANTIC_GENERIC) { 706af69d88dSmrg /* For generic params simply use sid from tgsi */ 7077ec681f3Smrg index = 9 + io->sid; 7087ec681f3Smrg } else if (name == TGSI_SEMANTIC_TEXCOORD) { 709af69d88dSmrg index = io->sid; 710af69d88dSmrg } else { 711af69d88dSmrg /* For non-generic params - pack name and sid into 8 bits */ 712af69d88dSmrg index = 0x80 | (name<<3) | (io->sid); 713af69d88dSmrg } 714af69d88dSmrg 715af69d88dSmrg /* Make sure that all really used indices have nonzero value, so 716af69d88dSmrg * we can just compare it to 0 later instead of comparing the name 717af69d88dSmrg * with different values to detect special cases. */ 718af69d88dSmrg index++; 719af69d88dSmrg } 720af69d88dSmrg 721af69d88dSmrg return index; 722af69d88dSmrg}; 723af69d88dSmrg 72401e04c3fSmrg/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 72501e04c3fSmrgint r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 72601e04c3fSmrg{ 72701e04c3fSmrg switch (semantic_name) { 72801e04c3fSmrg case TGSI_SEMANTIC_POSITION: 72901e04c3fSmrg return 0; 73001e04c3fSmrg case TGSI_SEMANTIC_PSIZE: 73101e04c3fSmrg return 1; 73201e04c3fSmrg case TGSI_SEMANTIC_CLIPDIST: 73301e04c3fSmrg assert(index <= 1); 73401e04c3fSmrg return 2 + index; 7357ec681f3Smrg case TGSI_SEMANTIC_TEXCOORD: 7367ec681f3Smrg return 4 + index; 73701e04c3fSmrg case TGSI_SEMANTIC_GENERIC: 73801e04c3fSmrg if (index <= 63-4) 7397ec681f3Smrg return 4 + index; 74001e04c3fSmrg else 74101e04c3fSmrg /* same explanation as in the default statement, 74201e04c3fSmrg * the only user hitting this is st/nine. 74301e04c3fSmrg */ 74401e04c3fSmrg return 0; 74501e04c3fSmrg 74601e04c3fSmrg /* patch indices are completely separate and thus start from 0 */ 74701e04c3fSmrg case TGSI_SEMANTIC_TESSOUTER: 74801e04c3fSmrg return 0; 74901e04c3fSmrg case TGSI_SEMANTIC_TESSINNER: 75001e04c3fSmrg return 1; 75101e04c3fSmrg case TGSI_SEMANTIC_PATCH: 75201e04c3fSmrg return 2 + index; 75301e04c3fSmrg 75401e04c3fSmrg default: 75501e04c3fSmrg /* Don't fail here. The result of this function is only used 75601e04c3fSmrg * for LS, TCS, TES, and GS, where legacy GL semantics can't 75701e04c3fSmrg * occur, but this function is called for all vertex shaders 75801e04c3fSmrg * before it's known whether LS will be compiled or not. 75901e04c3fSmrg */ 76001e04c3fSmrg return 0; 76101e04c3fSmrg } 76201e04c3fSmrg} 76301e04c3fSmrg 764af69d88dSmrg/* turn input into interpolate on EG */ 765af69d88dSmrgstatic int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 766af69d88dSmrg{ 767af69d88dSmrg int r = 0; 768af69d88dSmrg 769af69d88dSmrg if (ctx->shader->input[index].spi_sid) { 770af69d88dSmrg ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 771af69d88dSmrg if (ctx->shader->input[index].interpolate > 0) { 772af69d88dSmrg evergreen_interp_assign_ij_index(ctx, index); 77301e04c3fSmrg r = evergreen_interp_alu(ctx, index); 774af69d88dSmrg } else { 77501e04c3fSmrg r = evergreen_interp_flat(ctx, index); 776af69d88dSmrg } 777af69d88dSmrg } 778af69d88dSmrg return r; 779af69d88dSmrg} 780af69d88dSmrg 781af69d88dSmrgstatic int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 782af69d88dSmrg{ 783af69d88dSmrg struct r600_bytecode_alu alu; 784af69d88dSmrg int i, r; 785af69d88dSmrg int gpr_front = ctx->shader->input[front].gpr; 786af69d88dSmrg int gpr_back = ctx->shader->input[back].gpr; 787af69d88dSmrg 788af69d88dSmrg for (i = 0; i < 4; i++) { 789af69d88dSmrg memset(&alu, 0, sizeof(alu)); 790af69d88dSmrg alu.op = ALU_OP3_CNDGT; 791af69d88dSmrg alu.is_op3 = 1; 792af69d88dSmrg alu.dst.write = 1; 793af69d88dSmrg alu.dst.sel = gpr_front; 794af69d88dSmrg alu.src[0].sel = ctx->face_gpr; 795af69d88dSmrg alu.src[1].sel = gpr_front; 796af69d88dSmrg alu.src[2].sel = gpr_back; 797af69d88dSmrg 798af69d88dSmrg alu.dst.chan = i; 799af69d88dSmrg alu.src[1].chan = i; 800af69d88dSmrg alu.src[2].chan = i; 801af69d88dSmrg alu.last = (i==3); 802af69d88dSmrg 803af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 804af69d88dSmrg return r; 805af69d88dSmrg } 806af69d88dSmrg 807af69d88dSmrg return 0; 808af69d88dSmrg} 8093464ebd5Sriastradh 81001e04c3fSmrg/* execute a single slot ALU calculation */ 81101e04c3fSmrgstatic int single_alu_op2(struct r600_shader_ctx *ctx, int op, 81201e04c3fSmrg int dst_sel, int dst_chan, 81301e04c3fSmrg int src0_sel, unsigned src0_chan_val, 81401e04c3fSmrg int src1_sel, unsigned src1_chan_val) 81501e04c3fSmrg{ 81601e04c3fSmrg struct r600_bytecode_alu alu; 81701e04c3fSmrg int r, i; 81801e04c3fSmrg 81901e04c3fSmrg if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 82001e04c3fSmrg for (i = 0; i < 4; i++) { 82101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 82201e04c3fSmrg alu.op = op; 82301e04c3fSmrg alu.src[0].sel = src0_sel; 82401e04c3fSmrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 82501e04c3fSmrg alu.src[0].value = src0_chan_val; 82601e04c3fSmrg else 82701e04c3fSmrg alu.src[0].chan = src0_chan_val; 82801e04c3fSmrg alu.src[1].sel = src1_sel; 82901e04c3fSmrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 83001e04c3fSmrg alu.src[1].value = src1_chan_val; 83101e04c3fSmrg else 83201e04c3fSmrg alu.src[1].chan = src1_chan_val; 83301e04c3fSmrg alu.dst.sel = dst_sel; 83401e04c3fSmrg alu.dst.chan = i; 83501e04c3fSmrg alu.dst.write = i == dst_chan; 83601e04c3fSmrg alu.last = (i == 3); 83701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 83801e04c3fSmrg if (r) 83901e04c3fSmrg return r; 84001e04c3fSmrg } 84101e04c3fSmrg return 0; 84201e04c3fSmrg } 84301e04c3fSmrg 84401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 84501e04c3fSmrg alu.op = op; 84601e04c3fSmrg alu.src[0].sel = src0_sel; 84701e04c3fSmrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 84801e04c3fSmrg alu.src[0].value = src0_chan_val; 84901e04c3fSmrg else 85001e04c3fSmrg alu.src[0].chan = src0_chan_val; 85101e04c3fSmrg alu.src[1].sel = src1_sel; 85201e04c3fSmrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 85301e04c3fSmrg alu.src[1].value = src1_chan_val; 85401e04c3fSmrg else 85501e04c3fSmrg alu.src[1].chan = src1_chan_val; 85601e04c3fSmrg alu.dst.sel = dst_sel; 85701e04c3fSmrg alu.dst.chan = dst_chan; 85801e04c3fSmrg alu.dst.write = 1; 85901e04c3fSmrg alu.last = 1; 86001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 86101e04c3fSmrg if (r) 86201e04c3fSmrg return r; 86301e04c3fSmrg return 0; 86401e04c3fSmrg} 86501e04c3fSmrg 86601e04c3fSmrg/* execute a single slot ALU calculation */ 86701e04c3fSmrgstatic int single_alu_op3(struct r600_shader_ctx *ctx, int op, 86801e04c3fSmrg int dst_sel, int dst_chan, 86901e04c3fSmrg int src0_sel, unsigned src0_chan_val, 87001e04c3fSmrg int src1_sel, unsigned src1_chan_val, 87101e04c3fSmrg int src2_sel, unsigned src2_chan_val) 87201e04c3fSmrg{ 87301e04c3fSmrg struct r600_bytecode_alu alu; 87401e04c3fSmrg int r; 87501e04c3fSmrg 87601e04c3fSmrg /* validate this for other ops */ 87701e04c3fSmrg assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT); 87801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 87901e04c3fSmrg alu.op = op; 88001e04c3fSmrg alu.src[0].sel = src0_sel; 88101e04c3fSmrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 88201e04c3fSmrg alu.src[0].value = src0_chan_val; 88301e04c3fSmrg else 88401e04c3fSmrg alu.src[0].chan = src0_chan_val; 88501e04c3fSmrg alu.src[1].sel = src1_sel; 88601e04c3fSmrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 88701e04c3fSmrg alu.src[1].value = src1_chan_val; 88801e04c3fSmrg else 88901e04c3fSmrg alu.src[1].chan = src1_chan_val; 89001e04c3fSmrg alu.src[2].sel = src2_sel; 89101e04c3fSmrg if (src2_sel == V_SQ_ALU_SRC_LITERAL) 89201e04c3fSmrg alu.src[2].value = src2_chan_val; 89301e04c3fSmrg else 89401e04c3fSmrg alu.src[2].chan = src2_chan_val; 89501e04c3fSmrg alu.dst.sel = dst_sel; 89601e04c3fSmrg alu.dst.chan = dst_chan; 89701e04c3fSmrg alu.is_op3 = 1; 89801e04c3fSmrg alu.last = 1; 89901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 90001e04c3fSmrg if (r) 90101e04c3fSmrg return r; 90201e04c3fSmrg return 0; 90301e04c3fSmrg} 90401e04c3fSmrg 90501e04c3fSmrg/* put it in temp_reg.x */ 90601e04c3fSmrgstatic int get_lds_offset0(struct r600_shader_ctx *ctx, 90701e04c3fSmrg int rel_patch_chan, 90801e04c3fSmrg int temp_reg, bool is_patch_var) 90901e04c3fSmrg{ 91001e04c3fSmrg int r; 91101e04c3fSmrg 91201e04c3fSmrg /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 91301e04c3fSmrg /* ADD 91401e04c3fSmrg Dimension - patch0_offset (input_vals.z), 91501e04c3fSmrg Non-dim - patch0_data_offset (input_vals.w) 91601e04c3fSmrg */ 91701e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 91801e04c3fSmrg temp_reg, 0, 91901e04c3fSmrg ctx->tess_output_info, 0, 92001e04c3fSmrg 0, rel_patch_chan, 92101e04c3fSmrg ctx->tess_output_info, is_patch_var ? 3 : 2); 92201e04c3fSmrg if (r) 92301e04c3fSmrg return r; 92401e04c3fSmrg return 0; 92501e04c3fSmrg} 92601e04c3fSmrg 92701e04c3fSmrgstatic inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 92801e04c3fSmrg{ 92901e04c3fSmrg return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 93001e04c3fSmrg} 93101e04c3fSmrg 93201e04c3fSmrgstatic int r600_get_temp(struct r600_shader_ctx *ctx) 93301e04c3fSmrg{ 93401e04c3fSmrg return ctx->temp_reg + ctx->max_driver_temp_used++; 93501e04c3fSmrg} 93601e04c3fSmrg 93701e04c3fSmrgstatic int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 93801e04c3fSmrg{ 93901e04c3fSmrg int i; 94001e04c3fSmrg i = ctx->shader->noutput++; 94101e04c3fSmrg ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 94201e04c3fSmrg ctx->shader->output[i].sid = 0; 94301e04c3fSmrg ctx->shader->output[i].gpr = 0; 94401e04c3fSmrg ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 94501e04c3fSmrg ctx->shader->output[i].write_mask = 0x4; 94601e04c3fSmrg ctx->shader->output[i].spi_sid = prim_id_sid; 94701e04c3fSmrg 94801e04c3fSmrg return 0; 94901e04c3fSmrg} 95001e04c3fSmrg 95101e04c3fSmrgstatic int tgsi_barrier(struct r600_shader_ctx *ctx) 95201e04c3fSmrg{ 95301e04c3fSmrg struct r600_bytecode_alu alu; 95401e04c3fSmrg int r; 95501e04c3fSmrg 95601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 95701e04c3fSmrg alu.op = ctx->inst_info->op; 95801e04c3fSmrg alu.last = 1; 95901e04c3fSmrg 96001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 96101e04c3fSmrg if (r) 96201e04c3fSmrg return r; 96301e04c3fSmrg return 0; 96401e04c3fSmrg} 96501e04c3fSmrg 96601e04c3fSmrgstatic void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, unsigned *scratch_space_needed) 96701e04c3fSmrg{ 96801e04c3fSmrg // pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays 96901e04c3fSmrg unsigned n = ctx->info.array_max[TGSI_FILE_TEMPORARY]; 97001e04c3fSmrg unsigned narrays_left = n; 97101e04c3fSmrg bool *spilled = ctx->spilled_arrays; // assumed calloc:ed 97201e04c3fSmrg 97301e04c3fSmrg *scratch_space_needed = 0; 97401e04c3fSmrg while (*regno > 124 && narrays_left) { 97501e04c3fSmrg unsigned i; 97601e04c3fSmrg unsigned largest = 0; 97701e04c3fSmrg unsigned largest_index = 0; 97801e04c3fSmrg 97901e04c3fSmrg for (i = 0; i < n; i++) { 98001e04c3fSmrg unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 98101e04c3fSmrg if (!spilled[i] && size > largest) { 98201e04c3fSmrg largest = size; 98301e04c3fSmrg largest_index = i; 98401e04c3fSmrg } 98501e04c3fSmrg } 98601e04c3fSmrg 98701e04c3fSmrg spilled[largest_index] = true; 98801e04c3fSmrg *regno -= largest; 98901e04c3fSmrg *scratch_space_needed += largest; 99001e04c3fSmrg 99101e04c3fSmrg narrays_left --; 99201e04c3fSmrg } 99301e04c3fSmrg 99401e04c3fSmrg if (narrays_left == 0) { 99501e04c3fSmrg ctx->info.indirect_files &= ~(1 << TGSI_FILE_TEMPORARY); 99601e04c3fSmrg } 99701e04c3fSmrg} 99801e04c3fSmrg 99901e04c3fSmrg/* Take spilled temp arrays into account when translating tgsi register 100001e04c3fSmrg * indexes into r600 gprs if spilled is false, or scratch array offset if 100101e04c3fSmrg * spilled is true */ 100201e04c3fSmrgstatic int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, bool *spilled) 100301e04c3fSmrg{ 100401e04c3fSmrg unsigned i; 100501e04c3fSmrg unsigned spilled_size = 0; 100601e04c3fSmrg 100701e04c3fSmrg for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) { 100801e04c3fSmrg if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) { 100901e04c3fSmrg if (ctx->spilled_arrays[i]) { 101001e04c3fSmrg /* vec4 index into spilled scratch memory */ 101101e04c3fSmrg *spilled = true; 101201e04c3fSmrg return tgsi_reg_index - ctx->array_infos[i].range.First + spilled_size; 101301e04c3fSmrg } 101401e04c3fSmrg else { 101501e04c3fSmrg /* regular GPR array */ 101601e04c3fSmrg *spilled = false; 101701e04c3fSmrg return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY]; 101801e04c3fSmrg } 101901e04c3fSmrg } 102001e04c3fSmrg 102101e04c3fSmrg if (tgsi_reg_index < ctx->array_infos[i].range.First) 102201e04c3fSmrg break; 102301e04c3fSmrg if (ctx->spilled_arrays[i]) { 102401e04c3fSmrg spilled_size += ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 102501e04c3fSmrg } 102601e04c3fSmrg } 102701e04c3fSmrg 102801e04c3fSmrg /* regular GPR index, minus the holes from spilled arrays */ 102901e04c3fSmrg *spilled = false; 103001e04c3fSmrg 103101e04c3fSmrg return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY]; 103201e04c3fSmrg} 103301e04c3fSmrg 103401e04c3fSmrg/* look up spill area base offset and array size for a spilled temp array */ 103501e04c3fSmrgstatic void get_spilled_array_base_and_size(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, 103601e04c3fSmrg unsigned *array_base, unsigned *array_size) 103701e04c3fSmrg{ 103801e04c3fSmrg unsigned i; 103901e04c3fSmrg unsigned offset = 0; 104001e04c3fSmrg 104101e04c3fSmrg for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) { 104201e04c3fSmrg if (ctx->spilled_arrays[i]) { 104301e04c3fSmrg unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 104401e04c3fSmrg 104501e04c3fSmrg if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) { 104601e04c3fSmrg *array_base = offset; 104701e04c3fSmrg *array_size = size - 1; /* hw counts from 1 */ 104801e04c3fSmrg 104901e04c3fSmrg return; 105001e04c3fSmrg } 105101e04c3fSmrg 105201e04c3fSmrg offset += size; 105301e04c3fSmrg } 105401e04c3fSmrg } 105501e04c3fSmrg} 105601e04c3fSmrg 10573464ebd5Sriastradhstatic int tgsi_declaration(struct r600_shader_ctx *ctx) 10583464ebd5Sriastradh{ 10593464ebd5Sriastradh struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 1060af69d88dSmrg int r, i, j, count = d->Range.Last - d->Range.First + 1; 10613464ebd5Sriastradh 10623464ebd5Sriastradh switch (d->Declaration.File) { 10633464ebd5Sriastradh case TGSI_FILE_INPUT: 106401e04c3fSmrg for (j = 0; j < count; j++) { 106501e04c3fSmrg i = ctx->shader->ninput + j; 106601e04c3fSmrg assert(i < ARRAY_SIZE(ctx->shader->input)); 106701e04c3fSmrg ctx->shader->input[i].name = d->Semantic.Name; 106801e04c3fSmrg ctx->shader->input[i].sid = d->Semantic.Index + j; 106901e04c3fSmrg ctx->shader->input[i].interpolate = d->Interp.Interpolate; 107001e04c3fSmrg ctx->shader->input[i].interpolate_location = d->Interp.Location; 107101e04c3fSmrg ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 107201e04c3fSmrg if (ctx->type == PIPE_SHADER_FRAGMENT) { 107301e04c3fSmrg ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 107401e04c3fSmrg switch (ctx->shader->input[i].name) { 107501e04c3fSmrg case TGSI_SEMANTIC_FACE: 107601e04c3fSmrg if (ctx->face_gpr != -1) 107701e04c3fSmrg ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 107801e04c3fSmrg else 107901e04c3fSmrg ctx->face_gpr = ctx->shader->input[i].gpr; 108001e04c3fSmrg break; 108101e04c3fSmrg case TGSI_SEMANTIC_COLOR: 108201e04c3fSmrg ctx->colors_used++; 108301e04c3fSmrg break; 108401e04c3fSmrg case TGSI_SEMANTIC_POSITION: 108501e04c3fSmrg ctx->fragcoord_input = i; 108601e04c3fSmrg break; 108701e04c3fSmrg case TGSI_SEMANTIC_PRIMID: 108801e04c3fSmrg /* set this for now */ 108901e04c3fSmrg ctx->shader->gs_prim_id_input = true; 109001e04c3fSmrg ctx->shader->ps_prim_id_input = i; 109101e04c3fSmrg break; 109201e04c3fSmrg } 109301e04c3fSmrg if (ctx->bc->chip_class >= EVERGREEN) { 109401e04c3fSmrg if ((r = evergreen_interp_input(ctx, i))) 109501e04c3fSmrg return r; 109601e04c3fSmrg } 109701e04c3fSmrg } else if (ctx->type == PIPE_SHADER_GEOMETRY) { 109801e04c3fSmrg /* FIXME probably skip inputs if they aren't passed in the ring */ 109901e04c3fSmrg ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 110001e04c3fSmrg ctx->next_ring_offset += 16; 110101e04c3fSmrg if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 110201e04c3fSmrg ctx->shader->gs_prim_id_input = true; 11033464ebd5Sriastradh } 11043464ebd5Sriastradh } 110501e04c3fSmrg ctx->shader->ninput += count; 11063464ebd5Sriastradh break; 11073464ebd5Sriastradh case TGSI_FILE_OUTPUT: 110801e04c3fSmrg for (j = 0; j < count; j++) { 110901e04c3fSmrg i = ctx->shader->noutput + j; 111001e04c3fSmrg assert(i < ARRAY_SIZE(ctx->shader->output)); 111101e04c3fSmrg ctx->shader->output[i].name = d->Semantic.Name; 111201e04c3fSmrg ctx->shader->output[i].sid = d->Semantic.Index + j; 111301e04c3fSmrg ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 111401e04c3fSmrg ctx->shader->output[i].interpolate = d->Interp.Interpolate; 111501e04c3fSmrg ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 111601e04c3fSmrg if (ctx->type == PIPE_SHADER_VERTEX || 111701e04c3fSmrg ctx->type == PIPE_SHADER_GEOMETRY || 111801e04c3fSmrg ctx->type == PIPE_SHADER_TESS_EVAL) { 111901e04c3fSmrg ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 112001e04c3fSmrg switch (d->Semantic.Name) { 112101e04c3fSmrg case TGSI_SEMANTIC_CLIPDIST: 112201e04c3fSmrg break; 112301e04c3fSmrg case TGSI_SEMANTIC_PSIZE: 112401e04c3fSmrg ctx->shader->vs_out_misc_write = 1; 112501e04c3fSmrg ctx->shader->vs_out_point_size = 1; 112601e04c3fSmrg break; 112701e04c3fSmrg case TGSI_SEMANTIC_EDGEFLAG: 112801e04c3fSmrg ctx->shader->vs_out_misc_write = 1; 112901e04c3fSmrg ctx->shader->vs_out_edgeflag = 1; 113001e04c3fSmrg ctx->edgeflag_output = i; 113101e04c3fSmrg break; 113201e04c3fSmrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 113301e04c3fSmrg ctx->shader->vs_out_misc_write = 1; 113401e04c3fSmrg ctx->shader->vs_out_viewport = 1; 113501e04c3fSmrg break; 113601e04c3fSmrg case TGSI_SEMANTIC_LAYER: 113701e04c3fSmrg ctx->shader->vs_out_misc_write = 1; 113801e04c3fSmrg ctx->shader->vs_out_layer = 1; 113901e04c3fSmrg break; 114001e04c3fSmrg case TGSI_SEMANTIC_CLIPVERTEX: 114101e04c3fSmrg ctx->clip_vertex_write = TRUE; 114201e04c3fSmrg ctx->cv_output = i; 114301e04c3fSmrg break; 114401e04c3fSmrg } 114501e04c3fSmrg if (ctx->type == PIPE_SHADER_GEOMETRY) { 114601e04c3fSmrg ctx->gs_out_ring_offset += 16; 114701e04c3fSmrg } 114801e04c3fSmrg } else if (ctx->type == PIPE_SHADER_FRAGMENT) { 114901e04c3fSmrg switch (d->Semantic.Name) { 115001e04c3fSmrg case TGSI_SEMANTIC_COLOR: 115101e04c3fSmrg ctx->shader->nr_ps_max_color_exports++; 115201e04c3fSmrg break; 115301e04c3fSmrg } 1154af69d88dSmrg } 11553464ebd5Sriastradh } 115601e04c3fSmrg ctx->shader->noutput += count; 11573464ebd5Sriastradh break; 11583464ebd5Sriastradh case TGSI_FILE_TEMPORARY: 1159af69d88dSmrg if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1160af69d88dSmrg if (d->Array.ArrayID) { 116101e04c3fSmrg bool spilled; 116201e04c3fSmrg unsigned idx = map_tgsi_reg_index_to_r600_gpr(ctx, 116301e04c3fSmrg d->Range.First, 116401e04c3fSmrg &spilled); 116501e04c3fSmrg 116601e04c3fSmrg if (!spilled) { 116701e04c3fSmrg r600_add_gpr_array(ctx->shader, idx, 116801e04c3fSmrg d->Range.Last - d->Range.First + 1, 0x0F); 116901e04c3fSmrg } 1170af69d88dSmrg } 1171af69d88dSmrg } 1172af69d88dSmrg break; 1173af69d88dSmrg 1174af69d88dSmrg case TGSI_FILE_CONSTANT: 11753464ebd5Sriastradh case TGSI_FILE_SAMPLER: 117601e04c3fSmrg case TGSI_FILE_SAMPLER_VIEW: 11773464ebd5Sriastradh case TGSI_FILE_ADDRESS: 117801e04c3fSmrg case TGSI_FILE_BUFFER: 117901e04c3fSmrg case TGSI_FILE_IMAGE: 118001e04c3fSmrg case TGSI_FILE_MEMORY: 118101e04c3fSmrg break; 118201e04c3fSmrg 118301e04c3fSmrg case TGSI_FILE_HW_ATOMIC: 118401e04c3fSmrg i = ctx->shader->nhwatomic_ranges; 118501e04c3fSmrg ctx->shader->atomics[i].start = d->Range.First; 118601e04c3fSmrg ctx->shader->atomics[i].end = d->Range.Last; 118701e04c3fSmrg ctx->shader->atomics[i].hw_idx = ctx->shader->atomic_base + ctx->shader->nhwatomic; 118801e04c3fSmrg ctx->shader->atomics[i].array_id = d->Array.ArrayID; 118901e04c3fSmrg ctx->shader->atomics[i].buffer_id = d->Dim.Index2D; 119001e04c3fSmrg ctx->shader->nhwatomic_ranges++; 119101e04c3fSmrg ctx->shader->nhwatomic += count; 11923464ebd5Sriastradh break; 11933464ebd5Sriastradh 11943464ebd5Sriastradh case TGSI_FILE_SYSTEM_VALUE: 119501e04c3fSmrg if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 119601e04c3fSmrg d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 119701e04c3fSmrg d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 119801e04c3fSmrg break; /* Already handled from allocate_system_value_inputs */ 119901e04c3fSmrg } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 1200af69d88dSmrg break; 120101e04c3fSmrg } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 120201e04c3fSmrg break; 120301e04c3fSmrg else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 120401e04c3fSmrg break; 120501e04c3fSmrg else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 120601e04c3fSmrg d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 120701e04c3fSmrg int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 120801e04c3fSmrg int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 120901e04c3fSmrg unsigned temp_reg = r600_get_temp(ctx); 121001e04c3fSmrg 121101e04c3fSmrg r = get_lds_offset0(ctx, 2, temp_reg, true); 121201e04c3fSmrg if (r) 121301e04c3fSmrg return r; 121401e04c3fSmrg 121501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 121601e04c3fSmrg temp_reg, 0, 121701e04c3fSmrg temp_reg, 0, 121801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, param * 16); 121901e04c3fSmrg if (r) 122001e04c3fSmrg return r; 122101e04c3fSmrg 122201e04c3fSmrg do_lds_fetch_values(ctx, temp_reg, dreg, 0xf); 1223af69d88dSmrg } 122401e04c3fSmrg else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 122501e04c3fSmrg /* MOV r1.x, r0.x; 122601e04c3fSmrg MOV r1.y, r0.y; 122701e04c3fSmrg */ 122801e04c3fSmrg for (i = 0; i < 2; i++) { 1229af69d88dSmrg struct r600_bytecode_alu alu; 1230af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 123101e04c3fSmrg alu.op = ALU_OP1_MOV; 1232af69d88dSmrg alu.src[0].sel = 0; 123301e04c3fSmrg alu.src[0].chan = 0 + i; 123401e04c3fSmrg alu.dst.sel = 1; 123501e04c3fSmrg alu.dst.chan = 0 + i; 1236af69d88dSmrg alu.dst.write = 1; 123701e04c3fSmrg alu.last = (i == 1) ? 1 : 0; 1238af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1239af69d88dSmrg return r; 1240af69d88dSmrg } 124101e04c3fSmrg /* ADD r1.z, 1.0f, -r0.x */ 124201e04c3fSmrg struct r600_bytecode_alu alu; 124301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 124401e04c3fSmrg alu.op = ALU_OP2_ADD; 124501e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; 124601e04c3fSmrg alu.src[1].sel = 1; 124701e04c3fSmrg alu.src[1].chan = 0; 124801e04c3fSmrg alu.src[1].neg = 1; 124901e04c3fSmrg alu.dst.sel = 1; 125001e04c3fSmrg alu.dst.chan = 2; 125101e04c3fSmrg alu.dst.write = 1; 125201e04c3fSmrg alu.last = 1; 125301e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 125401e04c3fSmrg return r; 12553464ebd5Sriastradh 125601e04c3fSmrg /* ADD r1.z, r1.z, -r1.y */ 125701e04c3fSmrg alu.op = ALU_OP2_ADD; 125801e04c3fSmrg alu.src[0].sel = 1; 125901e04c3fSmrg alu.src[0].chan = 2; 126001e04c3fSmrg alu.src[1].sel = 1; 126101e04c3fSmrg alu.src[1].chan = 1; 126201e04c3fSmrg alu.src[1].neg = 1; 126301e04c3fSmrg alu.dst.sel = 1; 126401e04c3fSmrg alu.dst.chan = 2; 126501e04c3fSmrg alu.dst.write = 1; 126601e04c3fSmrg alu.last = 1; 126701e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 126801e04c3fSmrg return r; 126901e04c3fSmrg break; 127001e04c3fSmrg } 127101e04c3fSmrg break; 127201e04c3fSmrg default: 127301e04c3fSmrg R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 127401e04c3fSmrg return -EINVAL; 127501e04c3fSmrg } 127601e04c3fSmrg return 0; 127701e04c3fSmrg} 127801e04c3fSmrg 127901e04c3fSmrgstatic int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 128001e04c3fSmrg{ 128101e04c3fSmrg struct tgsi_parse_context parse; 128201e04c3fSmrg struct { 128301e04c3fSmrg boolean enabled; 128401e04c3fSmrg int *reg; 128501e04c3fSmrg unsigned name, alternate_name; 128601e04c3fSmrg } inputs[2] = { 128701e04c3fSmrg { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 128801e04c3fSmrg 128901e04c3fSmrg { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 129001e04c3fSmrg }; 129101e04c3fSmrg int num_regs = 0; 129201e04c3fSmrg unsigned k, i; 129301e04c3fSmrg 129401e04c3fSmrg if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 129501e04c3fSmrg return 0; 129601e04c3fSmrg } 129701e04c3fSmrg 129801e04c3fSmrg /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 129901e04c3fSmrg while (!tgsi_parse_end_of_tokens(&parse)) { 130001e04c3fSmrg tgsi_parse_token(&parse); 130101e04c3fSmrg 130201e04c3fSmrg if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 130301e04c3fSmrg const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 130401e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 130501e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 130601e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 130701e04c3fSmrg { 130801e04c3fSmrg int interpolate, location, k; 130901e04c3fSmrg 131001e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 131101e04c3fSmrg location = TGSI_INTERPOLATE_LOC_CENTER; 131201e04c3fSmrg } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 131301e04c3fSmrg location = TGSI_INTERPOLATE_LOC_CENTER; 131401e04c3fSmrg /* Needs sample positions, currently those are always available */ 131501e04c3fSmrg } else { 131601e04c3fSmrg location = TGSI_INTERPOLATE_LOC_CENTROID; 131701e04c3fSmrg } 131801e04c3fSmrg 131901e04c3fSmrg interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 132001e04c3fSmrg k = eg_get_interpolator_index(interpolate, location); 132101e04c3fSmrg if (k >= 0) 132201e04c3fSmrg ctx->eg_interpolators[k].enabled = true; 132301e04c3fSmrg } 132401e04c3fSmrg } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 132501e04c3fSmrg struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 132601e04c3fSmrg if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 132701e04c3fSmrg for (k = 0; k < ARRAY_SIZE(inputs); k++) { 132801e04c3fSmrg if (d->Semantic.Name == inputs[k].name || 132901e04c3fSmrg d->Semantic.Name == inputs[k].alternate_name) { 133001e04c3fSmrg inputs[k].enabled = true; 133101e04c3fSmrg } 133201e04c3fSmrg } 133301e04c3fSmrg } 133401e04c3fSmrg } 133501e04c3fSmrg } 133601e04c3fSmrg 133701e04c3fSmrg tgsi_parse_free(&parse); 133801e04c3fSmrg 133901e04c3fSmrg if (ctx->info.reads_samplemask && 134001e04c3fSmrg (ctx->info.uses_linear_sample || ctx->info.uses_persp_sample)) { 134101e04c3fSmrg inputs[1].enabled = true; 134201e04c3fSmrg } 134301e04c3fSmrg 134401e04c3fSmrg if (ctx->bc->chip_class >= EVERGREEN) { 134501e04c3fSmrg int num_baryc = 0; 134601e04c3fSmrg /* assign gpr to each interpolator according to priority */ 134701e04c3fSmrg for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) { 134801e04c3fSmrg if (ctx->eg_interpolators[i].enabled) { 134901e04c3fSmrg ctx->eg_interpolators[i].ij_index = num_baryc; 135001e04c3fSmrg num_baryc++; 135101e04c3fSmrg } 135201e04c3fSmrg } 135301e04c3fSmrg num_baryc = (num_baryc + 1) >> 1; 135401e04c3fSmrg gpr_offset += num_baryc; 135501e04c3fSmrg } 135601e04c3fSmrg 135701e04c3fSmrg for (i = 0; i < ARRAY_SIZE(inputs); i++) { 135801e04c3fSmrg boolean enabled = inputs[i].enabled; 135901e04c3fSmrg int *reg = inputs[i].reg; 136001e04c3fSmrg unsigned name = inputs[i].name; 136101e04c3fSmrg 136201e04c3fSmrg if (enabled) { 136301e04c3fSmrg int gpr = gpr_offset + num_regs++; 136401e04c3fSmrg ctx->shader->nsys_inputs++; 136501e04c3fSmrg 136601e04c3fSmrg // add to inputs, allocate a gpr 136701e04c3fSmrg k = ctx->shader->ninput++; 136801e04c3fSmrg ctx->shader->input[k].name = name; 136901e04c3fSmrg ctx->shader->input[k].sid = 0; 137001e04c3fSmrg ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 137101e04c3fSmrg ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 137201e04c3fSmrg *reg = ctx->shader->input[k].gpr = gpr; 137301e04c3fSmrg } 137401e04c3fSmrg } 137501e04c3fSmrg 137601e04c3fSmrg return gpr_offset + num_regs; 13773464ebd5Sriastradh} 13783464ebd5Sriastradh 13793464ebd5Sriastradh/* 13803464ebd5Sriastradh * for evergreen we need to scan the shader to find the number of GPRs we need to 138101e04c3fSmrg * reserve for interpolation and system values 13823464ebd5Sriastradh * 138301e04c3fSmrg * we need to know if we are going to emit any sample or centroid inputs 13843464ebd5Sriastradh * if perspective and linear are required 13853464ebd5Sriastradh*/ 13863464ebd5Sriastradhstatic int evergreen_gpr_count(struct r600_shader_ctx *ctx) 13873464ebd5Sriastradh{ 138801e04c3fSmrg unsigned i; 13893464ebd5Sriastradh 139001e04c3fSmrg memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 13913464ebd5Sriastradh 139201e04c3fSmrg /* 139301e04c3fSmrg * Could get this information from the shader info. But right now 139401e04c3fSmrg * we interpolate all declared inputs, whereas the shader info will 139501e04c3fSmrg * only contain the bits if the inputs are actually used, so it might 139601e04c3fSmrg * not be safe... 139701e04c3fSmrg */ 13983464ebd5Sriastradh for (i = 0; i < ctx->info.num_inputs; i++) { 139901e04c3fSmrg int k; 140001e04c3fSmrg /* skip position/face/mask/sampleid */ 14013464ebd5Sriastradh if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1402af69d88dSmrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 140301e04c3fSmrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 140401e04c3fSmrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 14053464ebd5Sriastradh continue; 140601e04c3fSmrg 140701e04c3fSmrg k = eg_get_interpolator_index( 140801e04c3fSmrg ctx->info.input_interpolate[i], 140901e04c3fSmrg ctx->info.input_interpolate_loc[i]); 141001e04c3fSmrg if (k >= 0) 141101e04c3fSmrg ctx->eg_interpolators[k].enabled = TRUE; 141201e04c3fSmrg } 141301e04c3fSmrg 141401e04c3fSmrg /* XXX PULL MODEL and LINE STIPPLE */ 141501e04c3fSmrg 141601e04c3fSmrg return allocate_system_value_inputs(ctx, 0); 141701e04c3fSmrg} 141801e04c3fSmrg 141901e04c3fSmrg/* sample_id_sel == NULL means fetch for current sample */ 142001e04c3fSmrgstatic int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 142101e04c3fSmrg{ 142201e04c3fSmrg struct r600_bytecode_vtx vtx; 142301e04c3fSmrg int r, t1; 142401e04c3fSmrg 142501e04c3fSmrg t1 = r600_get_temp(ctx); 142601e04c3fSmrg 142701e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 142801e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 142901e04c3fSmrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 143001e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 143101e04c3fSmrg if (sample_id == NULL) { 143201e04c3fSmrg assert(ctx->fixed_pt_position_gpr != -1); 143301e04c3fSmrg 143401e04c3fSmrg vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 143501e04c3fSmrg vtx.src_sel_x = 3; 143601e04c3fSmrg } 143701e04c3fSmrg else { 143801e04c3fSmrg struct r600_bytecode_alu alu; 143901e04c3fSmrg 144001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 144101e04c3fSmrg alu.op = ALU_OP1_MOV; 144201e04c3fSmrg r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 144301e04c3fSmrg alu.dst.sel = t1; 144401e04c3fSmrg alu.dst.write = 1; 144501e04c3fSmrg alu.last = 1; 144601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 144701e04c3fSmrg if (r) 144801e04c3fSmrg return r; 144901e04c3fSmrg 145001e04c3fSmrg vtx.src_gpr = t1; 145101e04c3fSmrg vtx.src_sel_x = 0; 14523464ebd5Sriastradh } 145301e04c3fSmrg vtx.mega_fetch_count = 16; 145401e04c3fSmrg vtx.dst_gpr = t1; 145501e04c3fSmrg vtx.dst_sel_x = 0; 145601e04c3fSmrg vtx.dst_sel_y = 1; 145701e04c3fSmrg vtx.dst_sel_z = 2; 145801e04c3fSmrg vtx.dst_sel_w = 3; 145901e04c3fSmrg vtx.data_format = FMT_32_32_32_32_FLOAT; 146001e04c3fSmrg vtx.num_format_all = 2; 146101e04c3fSmrg vtx.format_comp_all = 1; 146201e04c3fSmrg vtx.use_const_fields = 0; 146301e04c3fSmrg vtx.offset = 0; 146401e04c3fSmrg vtx.endian = r600_endian_swap(32); 146501e04c3fSmrg vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 146601e04c3fSmrg 146701e04c3fSmrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 146801e04c3fSmrg if (r) 146901e04c3fSmrg return r; 147001e04c3fSmrg 147101e04c3fSmrg return t1; 147201e04c3fSmrg} 147301e04c3fSmrg 147401e04c3fSmrgstatic int eg_load_helper_invocation(struct r600_shader_ctx *ctx) 147501e04c3fSmrg{ 147601e04c3fSmrg int r; 147701e04c3fSmrg struct r600_bytecode_alu alu; 147801e04c3fSmrg 147901e04c3fSmrg /* do a vtx fetch with wqm set on the vtx fetch */ 148001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 148101e04c3fSmrg alu.op = ALU_OP1_MOV; 148201e04c3fSmrg alu.dst.sel = ctx->helper_invoc_reg; 148301e04c3fSmrg alu.dst.chan = 0; 148401e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 148501e04c3fSmrg alu.src[0].value = 0xffffffff; 148601e04c3fSmrg alu.dst.write = 1; 148701e04c3fSmrg alu.last = 1; 148801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 148901e04c3fSmrg if (r) 149001e04c3fSmrg return r; 149101e04c3fSmrg 149201e04c3fSmrg /* do a vtx fetch in VPM mode */ 149301e04c3fSmrg struct r600_bytecode_vtx vtx; 149401e04c3fSmrg memset(&vtx, 0, sizeof(vtx)); 149501e04c3fSmrg vtx.op = FETCH_OP_GET_BUFFER_RESINFO; 149601e04c3fSmrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 149701e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 149801e04c3fSmrg vtx.src_gpr = 0; 149901e04c3fSmrg vtx.mega_fetch_count = 16; /* no idea here really... */ 150001e04c3fSmrg vtx.dst_gpr = ctx->helper_invoc_reg; 150101e04c3fSmrg vtx.dst_sel_x = 4; 150201e04c3fSmrg vtx.dst_sel_y = 7; /* SEL_Y */ 150301e04c3fSmrg vtx.dst_sel_z = 7; /* SEL_Z */ 150401e04c3fSmrg vtx.dst_sel_w = 7; /* SEL_W */ 150501e04c3fSmrg vtx.data_format = FMT_32; 150601e04c3fSmrg if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) 150701e04c3fSmrg return r; 150801e04c3fSmrg ctx->bc->cf_last->vpm = 1; 150901e04c3fSmrg return 0; 151001e04c3fSmrg} 151101e04c3fSmrg 151201e04c3fSmrgstatic int cm_load_helper_invocation(struct r600_shader_ctx *ctx) 151301e04c3fSmrg{ 151401e04c3fSmrg int r; 151501e04c3fSmrg struct r600_bytecode_alu alu; 151601e04c3fSmrg 151701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 151801e04c3fSmrg alu.op = ALU_OP1_MOV; 151901e04c3fSmrg alu.dst.sel = ctx->helper_invoc_reg; 152001e04c3fSmrg alu.dst.chan = 0; 152101e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 152201e04c3fSmrg alu.src[0].value = 0xffffffff; 152301e04c3fSmrg alu.dst.write = 1; 152401e04c3fSmrg alu.last = 1; 152501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 152601e04c3fSmrg if (r) 152701e04c3fSmrg return r; 152801e04c3fSmrg 152901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 153001e04c3fSmrg alu.op = ALU_OP1_MOV; 153101e04c3fSmrg alu.dst.sel = ctx->helper_invoc_reg; 153201e04c3fSmrg alu.dst.chan = 0; 153301e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 153401e04c3fSmrg alu.dst.write = 1; 153501e04c3fSmrg alu.last = 1; 153601e04c3fSmrg r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE); 153701e04c3fSmrg if (r) 153801e04c3fSmrg return r; 153901e04c3fSmrg 154001e04c3fSmrg return ctx->helper_invoc_reg; 154101e04c3fSmrg} 154201e04c3fSmrg 154301e04c3fSmrgstatic int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block) 154401e04c3fSmrg{ 154501e04c3fSmrg struct r600_bytecode_vtx vtx; 154601e04c3fSmrg int r, t1; 154701e04c3fSmrg 154801e04c3fSmrg if (ctx->cs_block_size_loaded) 154901e04c3fSmrg return ctx->cs_block_size_reg; 155001e04c3fSmrg if (ctx->cs_grid_size_loaded) 155101e04c3fSmrg return ctx->cs_grid_size_reg; 155201e04c3fSmrg 155301e04c3fSmrg t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg; 155401e04c3fSmrg struct r600_bytecode_alu alu; 155501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 155601e04c3fSmrg alu.op = ALU_OP1_MOV; 155701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 155801e04c3fSmrg alu.dst.sel = t1; 155901e04c3fSmrg alu.dst.write = 1; 156001e04c3fSmrg alu.last = 1; 156101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 156201e04c3fSmrg if (r) 156301e04c3fSmrg return r; 156401e04c3fSmrg 156501e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 156601e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 156701e04c3fSmrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 156801e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 156901e04c3fSmrg vtx.src_gpr = t1; 157001e04c3fSmrg vtx.src_sel_x = 0; 15713464ebd5Sriastradh 157201e04c3fSmrg vtx.mega_fetch_count = 16; 157301e04c3fSmrg vtx.dst_gpr = t1; 157401e04c3fSmrg vtx.dst_sel_x = 0; 157501e04c3fSmrg vtx.dst_sel_y = 1; 157601e04c3fSmrg vtx.dst_sel_z = 2; 157701e04c3fSmrg vtx.dst_sel_w = 7; 157801e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 157901e04c3fSmrg vtx.num_format_all = 1; 158001e04c3fSmrg vtx.format_comp_all = 0; 158101e04c3fSmrg vtx.use_const_fields = 0; 158201e04c3fSmrg vtx.offset = load_block ? 0 : 16; // first element is size of buffer 158301e04c3fSmrg vtx.endian = r600_endian_swap(32); 158401e04c3fSmrg vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 15853464ebd5Sriastradh 158601e04c3fSmrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 158701e04c3fSmrg if (r) 158801e04c3fSmrg return r; 15893464ebd5Sriastradh 159001e04c3fSmrg if (load_block) 159101e04c3fSmrg ctx->cs_block_size_loaded = true; 159201e04c3fSmrg else 159301e04c3fSmrg ctx->cs_grid_size_loaded = true; 159401e04c3fSmrg return t1; 15953464ebd5Sriastradh} 15963464ebd5Sriastradh 15973464ebd5Sriastradhstatic void tgsi_src(struct r600_shader_ctx *ctx, 15983464ebd5Sriastradh const struct tgsi_full_src_register *tgsi_src, 15993464ebd5Sriastradh struct r600_shader_src *r600_src) 16003464ebd5Sriastradh{ 16013464ebd5Sriastradh memset(r600_src, 0, sizeof(*r600_src)); 16023464ebd5Sriastradh r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 16033464ebd5Sriastradh r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 16043464ebd5Sriastradh r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 16053464ebd5Sriastradh r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 16063464ebd5Sriastradh r600_src->neg = tgsi_src->Register.Negate; 16073464ebd5Sriastradh r600_src->abs = tgsi_src->Register.Absolute; 16083464ebd5Sriastradh 160901e04c3fSmrg if (tgsi_src->Register.File == TGSI_FILE_TEMPORARY) { 161001e04c3fSmrg bool spilled; 161101e04c3fSmrg unsigned idx; 161201e04c3fSmrg 161301e04c3fSmrg idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_src->Register.Index, &spilled); 161401e04c3fSmrg 161501e04c3fSmrg if (spilled) { 161601e04c3fSmrg int reg = r600_get_temp(ctx); 161701e04c3fSmrg int r; 161801e04c3fSmrg 161901e04c3fSmrg r600_src->sel = reg; 162001e04c3fSmrg 162101e04c3fSmrg if (ctx->bc->chip_class < R700) { 162201e04c3fSmrg struct r600_bytecode_output cf; 162301e04c3fSmrg 162401e04c3fSmrg memset(&cf, 0, sizeof(struct r600_bytecode_output)); 162501e04c3fSmrg cf.op = CF_OP_MEM_SCRATCH; 162601e04c3fSmrg cf.elem_size = 3; 162701e04c3fSmrg cf.gpr = reg; 162801e04c3fSmrg cf.comp_mask = 0xF; 162901e04c3fSmrg cf.swizzle_x = 0; 163001e04c3fSmrg cf.swizzle_y = 1; 163101e04c3fSmrg cf.swizzle_z = 2; 163201e04c3fSmrg cf.swizzle_w = 3; 163301e04c3fSmrg cf.burst_count = 1; 163401e04c3fSmrg 163501e04c3fSmrg get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index, 163601e04c3fSmrg &cf.array_base, &cf.array_size); 163701e04c3fSmrg 163801e04c3fSmrg if (tgsi_src->Register.Indirect) { 163901e04c3fSmrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 164001e04c3fSmrg cf.index_gpr = ctx->bc->ar_reg; 164101e04c3fSmrg } 164201e04c3fSmrg else { 164301e04c3fSmrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ; 164401e04c3fSmrg cf.array_base += idx; 164501e04c3fSmrg cf.array_size = 0; 164601e04c3fSmrg } 164701e04c3fSmrg 164801e04c3fSmrg r = r600_bytecode_add_output(ctx->bc, &cf); 164901e04c3fSmrg } 165001e04c3fSmrg else { 165101e04c3fSmrg struct r600_bytecode_vtx vtx; 165201e04c3fSmrg 165301e04c3fSmrg if (r600_bytecode_get_need_wait_ack(ctx->bc)) { 165401e04c3fSmrg r600_bytecode_need_wait_ack(ctx->bc, false); 165501e04c3fSmrg r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 165601e04c3fSmrg } 165701e04c3fSmrg 165801e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 165901e04c3fSmrg vtx.op = FETCH_OP_READ_SCRATCH; 166001e04c3fSmrg vtx.dst_gpr = reg; 166101e04c3fSmrg vtx.uncached = 1; // Must bypass cache since prior spill written in same invocation 166201e04c3fSmrg vtx.elem_size = 3; 166301e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 166401e04c3fSmrg vtx.num_format_all = V_038010_SQ_NUM_FORMAT_INT; 166501e04c3fSmrg vtx.dst_sel_x = tgsi_src->Register.SwizzleX; 166601e04c3fSmrg vtx.dst_sel_y = tgsi_src->Register.SwizzleY; 166701e04c3fSmrg vtx.dst_sel_z = tgsi_src->Register.SwizzleZ; 166801e04c3fSmrg vtx.dst_sel_w = tgsi_src->Register.SwizzleW; 166901e04c3fSmrg 167001e04c3fSmrg get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index, 167101e04c3fSmrg &vtx.array_base, &vtx.array_size); 167201e04c3fSmrg 167301e04c3fSmrg if (tgsi_src->Register.Indirect) { 167401e04c3fSmrg vtx.indexed = 1; 167501e04c3fSmrg vtx.src_gpr = ctx->bc->ar_reg; 167601e04c3fSmrg } 167701e04c3fSmrg else { 167801e04c3fSmrg vtx.array_base += idx; 167901e04c3fSmrg vtx.array_size = 0; 168001e04c3fSmrg } 168101e04c3fSmrg 168201e04c3fSmrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 168301e04c3fSmrg } 168401e04c3fSmrg 168501e04c3fSmrg if (r) 168601e04c3fSmrg return; 168701e04c3fSmrg } 168801e04c3fSmrg else { 168901e04c3fSmrg if (tgsi_src->Register.Indirect) 169001e04c3fSmrg r600_src->rel = V_SQ_REL_RELATIVE; 169101e04c3fSmrg 169201e04c3fSmrg r600_src->sel = idx; 169301e04c3fSmrg } 169401e04c3fSmrg 169501e04c3fSmrg return; 169601e04c3fSmrg } 169701e04c3fSmrg 16983464ebd5Sriastradh if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 16993464ebd5Sriastradh int index; 17003464ebd5Sriastradh if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 17013464ebd5Sriastradh (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 17023464ebd5Sriastradh (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 17033464ebd5Sriastradh 17043464ebd5Sriastradh index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 17057ec681f3Smrg r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel); 17063464ebd5Sriastradh if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 17073464ebd5Sriastradh return; 17083464ebd5Sriastradh } 17093464ebd5Sriastradh index = tgsi_src->Register.Index; 17103464ebd5Sriastradh r600_src->sel = V_SQ_ALU_SRC_LITERAL; 17113464ebd5Sriastradh memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 17123464ebd5Sriastradh } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1713af69d88dSmrg if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1714af69d88dSmrg r600_src->swizzle[0] = 2; // Z value 171501e04c3fSmrg r600_src->swizzle[1] = 2; 171601e04c3fSmrg r600_src->swizzle[2] = 2; 171701e04c3fSmrg r600_src->swizzle[3] = 2; 1718af69d88dSmrg r600_src->sel = ctx->face_gpr; 171901e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 172001e04c3fSmrg r600_src->swizzle[0] = 3; // W value 172101e04c3fSmrg r600_src->swizzle[1] = 3; 172201e04c3fSmrg r600_src->swizzle[2] = 3; 172301e04c3fSmrg r600_src->swizzle[3] = 3; 172401e04c3fSmrg r600_src->sel = ctx->fixed_pt_position_gpr; 172501e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 172601e04c3fSmrg r600_src->swizzle[0] = 0; 172701e04c3fSmrg r600_src->swizzle[1] = 1; 172801e04c3fSmrg r600_src->swizzle[2] = 4; 172901e04c3fSmrg r600_src->swizzle[3] = 4; 173001e04c3fSmrg r600_src->sel = load_sample_position(ctx, NULL, -1); 1731af69d88dSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1732af69d88dSmrg r600_src->swizzle[0] = 3; 1733af69d88dSmrg r600_src->swizzle[1] = 3; 1734af69d88dSmrg r600_src->swizzle[2] = 3; 1735af69d88dSmrg r600_src->swizzle[3] = 3; 1736af69d88dSmrg r600_src->sel = 0; 1737af69d88dSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1738af69d88dSmrg r600_src->swizzle[0] = 0; 1739af69d88dSmrg r600_src->swizzle[1] = 0; 1740af69d88dSmrg r600_src->swizzle[2] = 0; 1741af69d88dSmrg r600_src->swizzle[3] = 0; 1742af69d88dSmrg r600_src->sel = 0; 174301e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_THREAD_ID) { 174401e04c3fSmrg r600_src->sel = 0; 174501e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_ID) { 174601e04c3fSmrg r600_src->sel = 1; 174701e04c3fSmrg } else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 174801e04c3fSmrg r600_src->swizzle[0] = 3; 174901e04c3fSmrg r600_src->swizzle[1] = 3; 175001e04c3fSmrg r600_src->swizzle[2] = 3; 175101e04c3fSmrg r600_src->swizzle[3] = 3; 175201e04c3fSmrg r600_src->sel = 1; 175301e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 175401e04c3fSmrg r600_src->swizzle[0] = 2; 175501e04c3fSmrg r600_src->swizzle[1] = 2; 175601e04c3fSmrg r600_src->swizzle[2] = 2; 175701e04c3fSmrg r600_src->swizzle[3] = 2; 175801e04c3fSmrg r600_src->sel = 0; 175901e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 176001e04c3fSmrg r600_src->sel = 1; 176101e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 176201e04c3fSmrg r600_src->sel = 3; 176301e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 176401e04c3fSmrg r600_src->sel = 2; 176501e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 176601e04c3fSmrg r600_src->sel = ctx->tess_input_info; 176701e04c3fSmrg r600_src->swizzle[0] = 2; 176801e04c3fSmrg r600_src->swizzle[1] = 2; 176901e04c3fSmrg r600_src->swizzle[2] = 2; 177001e04c3fSmrg r600_src->swizzle[3] = 2; 177101e04c3fSmrg } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 177201e04c3fSmrg r600_src->sel = 0; 177301e04c3fSmrg r600_src->swizzle[0] = 0; 177401e04c3fSmrg r600_src->swizzle[1] = 0; 177501e04c3fSmrg r600_src->swizzle[2] = 0; 177601e04c3fSmrg r600_src->swizzle[3] = 0; 177701e04c3fSmrg } else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 177801e04c3fSmrg r600_src->sel = 0; 177901e04c3fSmrg r600_src->swizzle[0] = 3; 178001e04c3fSmrg r600_src->swizzle[1] = 3; 178101e04c3fSmrg r600_src->swizzle[2] = 3; 178201e04c3fSmrg r600_src->swizzle[3] = 3; 178301e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) { 178401e04c3fSmrg r600_src->sel = load_block_grid_size(ctx, false); 178501e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) { 178601e04c3fSmrg r600_src->sel = load_block_grid_size(ctx, true); 178701e04c3fSmrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) { 178801e04c3fSmrg r600_src->sel = ctx->helper_invoc_reg; 178901e04c3fSmrg r600_src->swizzle[0] = 0; 179001e04c3fSmrg r600_src->swizzle[1] = 0; 179101e04c3fSmrg r600_src->swizzle[2] = 0; 179201e04c3fSmrg r600_src->swizzle[3] = 0; 1793af69d88dSmrg } 17943464ebd5Sriastradh } else { 17953464ebd5Sriastradh if (tgsi_src->Register.Indirect) 17963464ebd5Sriastradh r600_src->rel = V_SQ_REL_RELATIVE; 17973464ebd5Sriastradh r600_src->sel = tgsi_src->Register.Index; 17983464ebd5Sriastradh r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 17993464ebd5Sriastradh } 1800af69d88dSmrg if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1801af69d88dSmrg if (tgsi_src->Register.Dimension) { 1802af69d88dSmrg r600_src->kc_bank = tgsi_src->Dimension.Index; 180301e04c3fSmrg if (tgsi_src->Dimension.Indirect) { 180401e04c3fSmrg r600_src->kc_rel = 1; 180501e04c3fSmrg } 1806af69d88dSmrg } 1807af69d88dSmrg } 18083464ebd5Sriastradh} 18093464ebd5Sriastradh 1810af69d88dSmrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 181101e04c3fSmrg unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1812af69d88dSmrg unsigned int dst_reg) 18133464ebd5Sriastradh{ 1814af69d88dSmrg struct r600_bytecode_vtx vtx; 18153464ebd5Sriastradh unsigned int ar_reg; 18163464ebd5Sriastradh int r; 18173464ebd5Sriastradh 18183464ebd5Sriastradh if (offset) { 1819af69d88dSmrg struct r600_bytecode_alu alu; 18203464ebd5Sriastradh 18213464ebd5Sriastradh memset(&alu, 0, sizeof(alu)); 18223464ebd5Sriastradh 1823af69d88dSmrg alu.op = ALU_OP2_ADD_INT; 1824af69d88dSmrg alu.src[0].sel = ctx->bc->ar_reg; 1825af69d88dSmrg alu.src[0].chan = ar_chan; 18263464ebd5Sriastradh 18273464ebd5Sriastradh alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 18283464ebd5Sriastradh alu.src[1].value = offset; 18293464ebd5Sriastradh 18303464ebd5Sriastradh alu.dst.sel = dst_reg; 1831af69d88dSmrg alu.dst.chan = ar_chan; 18323464ebd5Sriastradh alu.dst.write = 1; 18333464ebd5Sriastradh alu.last = 1; 18343464ebd5Sriastradh 1835af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 18363464ebd5Sriastradh return r; 18373464ebd5Sriastradh 18383464ebd5Sriastradh ar_reg = dst_reg; 18393464ebd5Sriastradh } else { 1840af69d88dSmrg ar_reg = ctx->bc->ar_reg; 18413464ebd5Sriastradh } 18423464ebd5Sriastradh 18433464ebd5Sriastradh memset(&vtx, 0, sizeof(vtx)); 1844af69d88dSmrg vtx.buffer_id = cb_idx; 184501e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 18463464ebd5Sriastradh vtx.src_gpr = ar_reg; 1847af69d88dSmrg vtx.src_sel_x = ar_chan; 18483464ebd5Sriastradh vtx.mega_fetch_count = 16; 18493464ebd5Sriastradh vtx.dst_gpr = dst_reg; 18503464ebd5Sriastradh vtx.dst_sel_x = 0; /* SEL_X */ 18513464ebd5Sriastradh vtx.dst_sel_y = 1; /* SEL_Y */ 18523464ebd5Sriastradh vtx.dst_sel_z = 2; /* SEL_Z */ 18533464ebd5Sriastradh vtx.dst_sel_w = 3; /* SEL_W */ 18543464ebd5Sriastradh vtx.data_format = FMT_32_32_32_32_FLOAT; 18553464ebd5Sriastradh vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 18563464ebd5Sriastradh vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 18573464ebd5Sriastradh vtx.endian = r600_endian_swap(32); 185801e04c3fSmrg vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 18593464ebd5Sriastradh 1860af69d88dSmrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1861af69d88dSmrg return r; 1862af69d88dSmrg 1863af69d88dSmrg return 0; 1864af69d88dSmrg} 1865af69d88dSmrg 1866af69d88dSmrgstatic int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1867af69d88dSmrg{ 1868af69d88dSmrg struct r600_bytecode_vtx vtx; 1869af69d88dSmrg int r; 1870af69d88dSmrg unsigned index = src->Register.Index; 1871af69d88dSmrg unsigned vtx_id = src->Dimension.Index; 187201e04c3fSmrg int offset_reg = ctx->gs_rotated_input[vtx_id / 3]; 1873af69d88dSmrg int offset_chan = vtx_id % 3; 187401e04c3fSmrg int t2 = 0; 1875af69d88dSmrg 1876af69d88dSmrg /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1877af69d88dSmrg * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1878af69d88dSmrg 187901e04c3fSmrg if (offset_reg == ctx->gs_rotated_input[0] && offset_chan == 2) 1880af69d88dSmrg offset_chan = 3; 1881af69d88dSmrg 188201e04c3fSmrg if (src->Dimension.Indirect || src->Register.Indirect) 188301e04c3fSmrg t2 = r600_get_temp(ctx); 188401e04c3fSmrg 1885af69d88dSmrg if (src->Dimension.Indirect) { 1886af69d88dSmrg int treg[3]; 1887af69d88dSmrg struct r600_bytecode_alu alu; 1888af69d88dSmrg int r, i; 188901e04c3fSmrg unsigned addr_reg; 189001e04c3fSmrg addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index); 189101e04c3fSmrg if (src->DimIndirect.Index > 0) { 189201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 189301e04c3fSmrg ctx->bc->ar_reg, 0, 189401e04c3fSmrg addr_reg, 0, 189501e04c3fSmrg 0, 0); 189601e04c3fSmrg if (r) 189701e04c3fSmrg return r; 189801e04c3fSmrg } 189901e04c3fSmrg /* 1900af69d88dSmrg we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1901af69d88dSmrg at least this is what fglrx seems to do. */ 1902af69d88dSmrg for (i = 0; i < 3; i++) { 1903af69d88dSmrg treg[i] = r600_get_temp(ctx); 1904af69d88dSmrg } 190501e04c3fSmrg r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 190601e04c3fSmrg 1907af69d88dSmrg for (i = 0; i < 3; i++) { 1908af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1909af69d88dSmrg alu.op = ALU_OP1_MOV; 191001e04c3fSmrg alu.src[0].sel = ctx->gs_rotated_input[0]; 1911af69d88dSmrg alu.src[0].chan = i == 2 ? 3 : i; 1912af69d88dSmrg alu.dst.sel = treg[i]; 1913af69d88dSmrg alu.dst.chan = 0; 1914af69d88dSmrg alu.dst.write = 1; 1915af69d88dSmrg alu.last = 1; 1916af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1917af69d88dSmrg if (r) 1918af69d88dSmrg return r; 1919af69d88dSmrg } 1920af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1921af69d88dSmrg alu.op = ALU_OP1_MOV; 1922af69d88dSmrg alu.src[0].sel = treg[0]; 1923af69d88dSmrg alu.src[0].rel = 1; 1924af69d88dSmrg alu.dst.sel = t2; 1925af69d88dSmrg alu.dst.write = 1; 1926af69d88dSmrg alu.last = 1; 1927af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1928af69d88dSmrg if (r) 1929af69d88dSmrg return r; 1930af69d88dSmrg offset_reg = t2; 193101e04c3fSmrg offset_chan = 0; 1932af69d88dSmrg } 1933af69d88dSmrg 193401e04c3fSmrg if (src->Register.Indirect) { 193501e04c3fSmrg int addr_reg; 193601e04c3fSmrg unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID]; 193701e04c3fSmrg 193801e04c3fSmrg addr_reg = get_address_file_reg(ctx, src->Indirect.Index); 193901e04c3fSmrg 194001e04c3fSmrg /* pull the value from index_reg */ 194101e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 194201e04c3fSmrg t2, 1, 194301e04c3fSmrg addr_reg, 0, 194401e04c3fSmrg V_SQ_ALU_SRC_LITERAL, first); 194501e04c3fSmrg if (r) 194601e04c3fSmrg return r; 194701e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 194801e04c3fSmrg t2, 0, 194901e04c3fSmrg t2, 1, 195001e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 4, 195101e04c3fSmrg offset_reg, offset_chan); 195201e04c3fSmrg if (r) 195301e04c3fSmrg return r; 195401e04c3fSmrg offset_reg = t2; 195501e04c3fSmrg offset_chan = 0; 195601e04c3fSmrg index = src->Register.Index - first; 195701e04c3fSmrg } 1958af69d88dSmrg 1959af69d88dSmrg memset(&vtx, 0, sizeof(vtx)); 1960af69d88dSmrg vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 196101e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1962af69d88dSmrg vtx.src_gpr = offset_reg; 1963af69d88dSmrg vtx.src_sel_x = offset_chan; 1964af69d88dSmrg vtx.offset = index * 16; /*bytes*/ 1965af69d88dSmrg vtx.mega_fetch_count = 16; 1966af69d88dSmrg vtx.dst_gpr = dst_reg; 1967af69d88dSmrg vtx.dst_sel_x = 0; /* SEL_X */ 1968af69d88dSmrg vtx.dst_sel_y = 1; /* SEL_Y */ 1969af69d88dSmrg vtx.dst_sel_z = 2; /* SEL_Z */ 1970af69d88dSmrg vtx.dst_sel_w = 3; /* SEL_W */ 1971af69d88dSmrg if (ctx->bc->chip_class >= EVERGREEN) { 1972af69d88dSmrg vtx.use_const_fields = 1; 1973af69d88dSmrg } else { 1974af69d88dSmrg vtx.data_format = FMT_32_32_32_32_FLOAT; 1975af69d88dSmrg } 1976af69d88dSmrg 1977af69d88dSmrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 19783464ebd5Sriastradh return r; 19793464ebd5Sriastradh 19803464ebd5Sriastradh return 0; 19813464ebd5Sriastradh} 19823464ebd5Sriastradh 1983af69d88dSmrgstatic int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1984af69d88dSmrg{ 1985af69d88dSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 198601e04c3fSmrg unsigned i; 1987af69d88dSmrg 1988af69d88dSmrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1989af69d88dSmrg struct tgsi_full_src_register *src = &inst->Src[i]; 1990af69d88dSmrg 1991af69d88dSmrg if (src->Register.File == TGSI_FILE_INPUT) { 1992af69d88dSmrg if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1993af69d88dSmrg /* primitive id is in R0.z */ 1994af69d88dSmrg ctx->src[i].sel = 0; 1995af69d88dSmrg ctx->src[i].swizzle[0] = 2; 1996af69d88dSmrg } 1997af69d88dSmrg } 1998af69d88dSmrg if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1999af69d88dSmrg int treg = r600_get_temp(ctx); 2000af69d88dSmrg 2001af69d88dSmrg fetch_gs_input(ctx, src, treg); 2002af69d88dSmrg ctx->src[i].sel = treg; 200301e04c3fSmrg ctx->src[i].rel = 0; 2004af69d88dSmrg } 2005af69d88dSmrg } 2006af69d88dSmrg return 0; 2007af69d88dSmrg} 2008af69d88dSmrg 20093464ebd5Sriastradh 201001e04c3fSmrg/* Tessellation shaders pass outputs to the next shader using LDS. 201101e04c3fSmrg * 201201e04c3fSmrg * LS outputs = TCS(HS) inputs 201301e04c3fSmrg * TCS(HS) outputs = TES(DS) inputs 201401e04c3fSmrg * 201501e04c3fSmrg * The LDS layout is: 201601e04c3fSmrg * - TCS inputs for patch 0 201701e04c3fSmrg * - TCS inputs for patch 1 201801e04c3fSmrg * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 201901e04c3fSmrg * - ... 202001e04c3fSmrg * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 202101e04c3fSmrg * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 202201e04c3fSmrg * - TCS outputs for patch 1 202301e04c3fSmrg * - Per-patch TCS outputs for patch 1 202401e04c3fSmrg * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 202501e04c3fSmrg * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 202601e04c3fSmrg * - ... 202701e04c3fSmrg * 202801e04c3fSmrg * All three shaders VS(LS), TCS, TES share the same LDS space. 202901e04c3fSmrg */ 203001e04c3fSmrg/* this will return with the dw address in temp_reg.x */ 203101e04c3fSmrgstatic int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 203201e04c3fSmrg const struct tgsi_full_dst_register *dst, 203301e04c3fSmrg const struct tgsi_full_src_register *src, 203401e04c3fSmrg int stride_bytes_reg, int stride_bytes_chan) 203501e04c3fSmrg{ 203601e04c3fSmrg struct tgsi_full_dst_register reg; 203701e04c3fSmrg ubyte *name, *index, *array_first; 203801e04c3fSmrg int r; 203901e04c3fSmrg int param; 204001e04c3fSmrg struct tgsi_shader_info *info = &ctx->info; 204101e04c3fSmrg /* Set the register description. The address computation is the same 204201e04c3fSmrg * for sources and destinations. */ 204301e04c3fSmrg if (src) { 204401e04c3fSmrg reg.Register.File = src->Register.File; 204501e04c3fSmrg reg.Register.Index = src->Register.Index; 204601e04c3fSmrg reg.Register.Indirect = src->Register.Indirect; 204701e04c3fSmrg reg.Register.Dimension = src->Register.Dimension; 204801e04c3fSmrg reg.Indirect = src->Indirect; 204901e04c3fSmrg reg.Dimension = src->Dimension; 205001e04c3fSmrg reg.DimIndirect = src->DimIndirect; 205101e04c3fSmrg } else 205201e04c3fSmrg reg = *dst; 205301e04c3fSmrg 205401e04c3fSmrg /* If the register is 2-dimensional (e.g. an array of vertices 205501e04c3fSmrg * in a primitive), calculate the base address of the vertex. */ 205601e04c3fSmrg if (reg.Register.Dimension) { 205701e04c3fSmrg int sel, chan; 205801e04c3fSmrg if (reg.Dimension.Indirect) { 205901e04c3fSmrg unsigned addr_reg; 206001e04c3fSmrg assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 206101e04c3fSmrg 206201e04c3fSmrg addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 206301e04c3fSmrg /* pull the value from index_reg */ 206401e04c3fSmrg sel = addr_reg; 206501e04c3fSmrg chan = 0; 206601e04c3fSmrg } else { 206701e04c3fSmrg sel = V_SQ_ALU_SRC_LITERAL; 206801e04c3fSmrg chan = reg.Dimension.Index; 20693464ebd5Sriastradh } 20703464ebd5Sriastradh 207101e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 207201e04c3fSmrg temp_reg, 0, 207301e04c3fSmrg stride_bytes_reg, stride_bytes_chan, 207401e04c3fSmrg sel, chan, 207501e04c3fSmrg temp_reg, 0); 207601e04c3fSmrg if (r) 207701e04c3fSmrg return r; 207801e04c3fSmrg } 20793464ebd5Sriastradh 208001e04c3fSmrg if (reg.Register.File == TGSI_FILE_INPUT) { 208101e04c3fSmrg name = info->input_semantic_name; 208201e04c3fSmrg index = info->input_semantic_index; 208301e04c3fSmrg array_first = info->input_array_first; 208401e04c3fSmrg } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 208501e04c3fSmrg name = info->output_semantic_name; 208601e04c3fSmrg index = info->output_semantic_index; 208701e04c3fSmrg array_first = info->output_array_first; 208801e04c3fSmrg } else { 208901e04c3fSmrg assert(0); 209001e04c3fSmrg return -1; 209101e04c3fSmrg } 209201e04c3fSmrg if (reg.Register.Indirect) { 209301e04c3fSmrg int addr_reg; 209401e04c3fSmrg int first; 209501e04c3fSmrg /* Add the relative address of the element. */ 209601e04c3fSmrg if (reg.Indirect.ArrayID) 209701e04c3fSmrg first = array_first[reg.Indirect.ArrayID]; 209801e04c3fSmrg else 209901e04c3fSmrg first = reg.Register.Index; 210001e04c3fSmrg 210101e04c3fSmrg addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 210201e04c3fSmrg 210301e04c3fSmrg /* pull the value from index_reg */ 210401e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 210501e04c3fSmrg temp_reg, 0, 210601e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 16, 210701e04c3fSmrg addr_reg, 0, 210801e04c3fSmrg temp_reg, 0); 210901e04c3fSmrg if (r) 211001e04c3fSmrg return r; 211101e04c3fSmrg 211201e04c3fSmrg param = r600_get_lds_unique_index(name[first], 211301e04c3fSmrg index[first]); 211401e04c3fSmrg 211501e04c3fSmrg } else { 211601e04c3fSmrg param = r600_get_lds_unique_index(name[reg.Register.Index], 211701e04c3fSmrg index[reg.Register.Index]); 211801e04c3fSmrg } 211901e04c3fSmrg 212001e04c3fSmrg /* add to base_addr - passed in temp_reg.x */ 212101e04c3fSmrg if (param) { 212201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 212301e04c3fSmrg temp_reg, 0, 212401e04c3fSmrg temp_reg, 0, 212501e04c3fSmrg V_SQ_ALU_SRC_LITERAL, param * 16); 212601e04c3fSmrg if (r) 212701e04c3fSmrg return r; 212801e04c3fSmrg 212901e04c3fSmrg } 213001e04c3fSmrg return 0; 213101e04c3fSmrg} 213201e04c3fSmrg 213301e04c3fSmrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 213401e04c3fSmrg unsigned dst_reg, unsigned mask) 213501e04c3fSmrg{ 213601e04c3fSmrg struct r600_bytecode_alu alu; 213701e04c3fSmrg int r, i, lasti; 213801e04c3fSmrg 213901e04c3fSmrg if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 214001e04c3fSmrg ctx->bc->force_add_cf = 1; 214101e04c3fSmrg 214201e04c3fSmrg lasti = tgsi_last_instruction(mask); 214301e04c3fSmrg for (i = 1; i <= lasti; i++) { 214401e04c3fSmrg if (!(mask & (1 << i))) 214501e04c3fSmrg continue; 214601e04c3fSmrg 214701e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 214801e04c3fSmrg temp_reg, i, 214901e04c3fSmrg temp_reg, 0, 215001e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 4 * i); 215101e04c3fSmrg if (r) 215201e04c3fSmrg return r; 215301e04c3fSmrg } 215401e04c3fSmrg for (i = 0; i <= lasti; i++) { 215501e04c3fSmrg if (!(mask & (1 << i))) 215601e04c3fSmrg continue; 215701e04c3fSmrg 215801e04c3fSmrg /* emit an LDS_READ_RET */ 215901e04c3fSmrg memset(&alu, 0, sizeof(alu)); 216001e04c3fSmrg alu.op = LDS_OP1_LDS_READ_RET; 216101e04c3fSmrg alu.src[0].sel = temp_reg; 216201e04c3fSmrg alu.src[0].chan = i; 216301e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0; 216401e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_0; 216501e04c3fSmrg alu.dst.chan = 0; 216601e04c3fSmrg alu.is_lds_idx_op = true; 216701e04c3fSmrg alu.last = 1; 216801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 216901e04c3fSmrg if (r) 217001e04c3fSmrg return r; 217101e04c3fSmrg } 217201e04c3fSmrg for (i = 0; i <= lasti; i++) { 217301e04c3fSmrg if (!(mask & (1 << i))) 217401e04c3fSmrg continue; 217501e04c3fSmrg 217601e04c3fSmrg /* then read from LDS_OQ_A_POP */ 217701e04c3fSmrg memset(&alu, 0, sizeof(alu)); 217801e04c3fSmrg 217901e04c3fSmrg alu.op = ALU_OP1_MOV; 218001e04c3fSmrg alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 218101e04c3fSmrg alu.src[0].chan = 0; 218201e04c3fSmrg alu.dst.sel = dst_reg; 218301e04c3fSmrg alu.dst.chan = i; 218401e04c3fSmrg alu.dst.write = 1; 218501e04c3fSmrg alu.last = 1; 218601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 218701e04c3fSmrg if (r) 218801e04c3fSmrg return r; 218901e04c3fSmrg } 219001e04c3fSmrg return 0; 219101e04c3fSmrg} 219201e04c3fSmrg 219301e04c3fSmrgstatic int fetch_mask(struct tgsi_src_register *reg) 219401e04c3fSmrg{ 219501e04c3fSmrg int mask = 0; 219601e04c3fSmrg mask |= 1 << reg->SwizzleX; 219701e04c3fSmrg mask |= 1 << reg->SwizzleY; 219801e04c3fSmrg mask |= 1 << reg->SwizzleZ; 219901e04c3fSmrg mask |= 1 << reg->SwizzleW; 220001e04c3fSmrg return mask; 220101e04c3fSmrg} 220201e04c3fSmrg 220301e04c3fSmrgstatic int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 220401e04c3fSmrg{ 220501e04c3fSmrg int r; 220601e04c3fSmrg unsigned temp_reg = r600_get_temp(ctx); 220701e04c3fSmrg 220801e04c3fSmrg r = get_lds_offset0(ctx, 2, temp_reg, 220901e04c3fSmrg src->Register.Dimension ? false : true); 221001e04c3fSmrg if (r) 221101e04c3fSmrg return r; 221201e04c3fSmrg 221301e04c3fSmrg /* the base address is now in temp.x */ 221401e04c3fSmrg r = r600_get_byte_address(ctx, temp_reg, 221501e04c3fSmrg NULL, src, ctx->tess_output_info, 1); 221601e04c3fSmrg if (r) 221701e04c3fSmrg return r; 221801e04c3fSmrg 221901e04c3fSmrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 222001e04c3fSmrg if (r) 222101e04c3fSmrg return r; 222201e04c3fSmrg return 0; 222301e04c3fSmrg} 222401e04c3fSmrg 222501e04c3fSmrgstatic int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 222601e04c3fSmrg{ 222701e04c3fSmrg int r; 222801e04c3fSmrg unsigned temp_reg = r600_get_temp(ctx); 222901e04c3fSmrg 223001e04c3fSmrg /* t.x = ips * r0.y */ 223101e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 223201e04c3fSmrg temp_reg, 0, 223301e04c3fSmrg ctx->tess_input_info, 0, 223401e04c3fSmrg 0, 1); 223501e04c3fSmrg 223601e04c3fSmrg if (r) 223701e04c3fSmrg return r; 223801e04c3fSmrg 223901e04c3fSmrg /* the base address is now in temp.x */ 224001e04c3fSmrg r = r600_get_byte_address(ctx, temp_reg, 224101e04c3fSmrg NULL, src, ctx->tess_input_info, 1); 224201e04c3fSmrg if (r) 224301e04c3fSmrg return r; 224401e04c3fSmrg 224501e04c3fSmrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 224601e04c3fSmrg if (r) 224701e04c3fSmrg return r; 224801e04c3fSmrg return 0; 224901e04c3fSmrg} 225001e04c3fSmrg 225101e04c3fSmrgstatic int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 225201e04c3fSmrg{ 225301e04c3fSmrg int r; 225401e04c3fSmrg unsigned temp_reg = r600_get_temp(ctx); 225501e04c3fSmrg 225601e04c3fSmrg r = get_lds_offset0(ctx, 1, temp_reg, 225701e04c3fSmrg src->Register.Dimension ? false : true); 225801e04c3fSmrg if (r) 225901e04c3fSmrg return r; 226001e04c3fSmrg /* the base address is now in temp.x */ 226101e04c3fSmrg r = r600_get_byte_address(ctx, temp_reg, 226201e04c3fSmrg NULL, src, 226301e04c3fSmrg ctx->tess_output_info, 1); 226401e04c3fSmrg if (r) 226501e04c3fSmrg return r; 226601e04c3fSmrg 226701e04c3fSmrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 226801e04c3fSmrg if (r) 226901e04c3fSmrg return r; 227001e04c3fSmrg return 0; 227101e04c3fSmrg} 227201e04c3fSmrg 227301e04c3fSmrgstatic int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 227401e04c3fSmrg{ 227501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 227601e04c3fSmrg unsigned i; 227701e04c3fSmrg 227801e04c3fSmrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 227901e04c3fSmrg struct tgsi_full_src_register *src = &inst->Src[i]; 228001e04c3fSmrg 228101e04c3fSmrg if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 228201e04c3fSmrg int treg = r600_get_temp(ctx); 228301e04c3fSmrg fetch_tes_input(ctx, src, treg); 228401e04c3fSmrg ctx->src[i].sel = treg; 228501e04c3fSmrg ctx->src[i].rel = 0; 228601e04c3fSmrg } 228701e04c3fSmrg if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 228801e04c3fSmrg int treg = r600_get_temp(ctx); 228901e04c3fSmrg fetch_tcs_input(ctx, src, treg); 229001e04c3fSmrg ctx->src[i].sel = treg; 229101e04c3fSmrg ctx->src[i].rel = 0; 229201e04c3fSmrg } 229301e04c3fSmrg if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 229401e04c3fSmrg int treg = r600_get_temp(ctx); 229501e04c3fSmrg fetch_tcs_output(ctx, src, treg); 229601e04c3fSmrg ctx->src[i].sel = treg; 229701e04c3fSmrg ctx->src[i].rel = 0; 229801e04c3fSmrg } 229901e04c3fSmrg } 230001e04c3fSmrg return 0; 230101e04c3fSmrg} 230201e04c3fSmrg 230301e04c3fSmrgstatic int tgsi_split_constant(struct r600_shader_ctx *ctx) 230401e04c3fSmrg{ 230501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 230601e04c3fSmrg struct r600_bytecode_alu alu; 230701e04c3fSmrg int i, j, k, nconst, r; 230801e04c3fSmrg 230901e04c3fSmrg for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 231001e04c3fSmrg if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 231101e04c3fSmrg nconst++; 231201e04c3fSmrg } 231301e04c3fSmrg tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 231401e04c3fSmrg } 231501e04c3fSmrg for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 231601e04c3fSmrg if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 231701e04c3fSmrg continue; 231801e04c3fSmrg } 231901e04c3fSmrg 232001e04c3fSmrg if (ctx->src[i].rel) { 232101e04c3fSmrg int chan = inst->Src[i].Indirect.Swizzle; 232201e04c3fSmrg int treg = r600_get_temp(ctx); 232301e04c3fSmrg if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 232401e04c3fSmrg return r; 232501e04c3fSmrg 232601e04c3fSmrg ctx->src[i].kc_bank = 0; 232701e04c3fSmrg ctx->src[i].kc_rel = 0; 232801e04c3fSmrg ctx->src[i].sel = treg; 232901e04c3fSmrg ctx->src[i].rel = 0; 23303464ebd5Sriastradh j--; 23313464ebd5Sriastradh } else if (j > 0) { 23323464ebd5Sriastradh int treg = r600_get_temp(ctx); 23333464ebd5Sriastradh for (k = 0; k < 4; k++) { 2334af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2335af69d88dSmrg alu.op = ALU_OP1_MOV; 23363464ebd5Sriastradh alu.src[0].sel = ctx->src[i].sel; 23373464ebd5Sriastradh alu.src[0].chan = k; 23383464ebd5Sriastradh alu.src[0].rel = ctx->src[i].rel; 2339af69d88dSmrg alu.src[0].kc_bank = ctx->src[i].kc_bank; 234001e04c3fSmrg alu.src[0].kc_rel = ctx->src[i].kc_rel; 23413464ebd5Sriastradh alu.dst.sel = treg; 23423464ebd5Sriastradh alu.dst.chan = k; 23433464ebd5Sriastradh alu.dst.write = 1; 23443464ebd5Sriastradh if (k == 3) 23453464ebd5Sriastradh alu.last = 1; 2346af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 23473464ebd5Sriastradh if (r) 23483464ebd5Sriastradh return r; 23493464ebd5Sriastradh } 23503464ebd5Sriastradh ctx->src[i].sel = treg; 23513464ebd5Sriastradh ctx->src[i].rel =0; 23523464ebd5Sriastradh j--; 23533464ebd5Sriastradh } 23543464ebd5Sriastradh } 23553464ebd5Sriastradh return 0; 23563464ebd5Sriastradh} 23573464ebd5Sriastradh 23583464ebd5Sriastradh/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 23593464ebd5Sriastradhstatic int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 23603464ebd5Sriastradh{ 23613464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2362af69d88dSmrg struct r600_bytecode_alu alu; 23633464ebd5Sriastradh int i, j, k, nliteral, r; 23643464ebd5Sriastradh 23653464ebd5Sriastradh for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 23663464ebd5Sriastradh if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 23673464ebd5Sriastradh nliteral++; 23683464ebd5Sriastradh } 23693464ebd5Sriastradh } 23703464ebd5Sriastradh for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 23713464ebd5Sriastradh if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 23723464ebd5Sriastradh int treg = r600_get_temp(ctx); 23733464ebd5Sriastradh for (k = 0; k < 4; k++) { 2374af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2375af69d88dSmrg alu.op = ALU_OP1_MOV; 23763464ebd5Sriastradh alu.src[0].sel = ctx->src[i].sel; 23773464ebd5Sriastradh alu.src[0].chan = k; 23783464ebd5Sriastradh alu.src[0].value = ctx->src[i].value[k]; 23793464ebd5Sriastradh alu.dst.sel = treg; 23803464ebd5Sriastradh alu.dst.chan = k; 23813464ebd5Sriastradh alu.dst.write = 1; 23823464ebd5Sriastradh if (k == 3) 23833464ebd5Sriastradh alu.last = 1; 2384af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 23853464ebd5Sriastradh if (r) 23863464ebd5Sriastradh return r; 23873464ebd5Sriastradh } 23883464ebd5Sriastradh ctx->src[i].sel = treg; 23893464ebd5Sriastradh j--; 23903464ebd5Sriastradh } 23913464ebd5Sriastradh } 23923464ebd5Sriastradh return 0; 23933464ebd5Sriastradh} 23943464ebd5Sriastradh 2395af69d88dSmrgstatic int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 23963464ebd5Sriastradh{ 2397af69d88dSmrg int i, r, count = ctx->shader->ninput; 23983464ebd5Sriastradh 2399af69d88dSmrg for (i = 0; i < count; i++) { 2400af69d88dSmrg if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 2401af69d88dSmrg r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 2402af69d88dSmrg if (r) 2403af69d88dSmrg return r; 2404af69d88dSmrg } 2405af69d88dSmrg } 2406af69d88dSmrg return 0; 2407af69d88dSmrg} 2408af69d88dSmrg 240901e04c3fSmrgstatic int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 241001e04c3fSmrg int stream, unsigned *stream_item_size UNUSED) 2411af69d88dSmrg{ 2412af69d88dSmrg unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 241301e04c3fSmrg unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 241401e04c3fSmrg int j, r; 241501e04c3fSmrg unsigned i; 2416af69d88dSmrg 2417af69d88dSmrg /* Sanity checking. */ 241801e04c3fSmrg if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 2419af69d88dSmrg R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 2420af69d88dSmrg r = -EINVAL; 2421af69d88dSmrg goto out_err; 2422af69d88dSmrg } 2423af69d88dSmrg for (i = 0; i < so->num_outputs; i++) { 2424af69d88dSmrg if (so->output[i].output_buffer >= 4) { 2425af69d88dSmrg R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 2426af69d88dSmrg so->output[i].output_buffer); 2427af69d88dSmrg r = -EINVAL; 2428af69d88dSmrg goto out_err; 2429af69d88dSmrg } 2430af69d88dSmrg } 2431af69d88dSmrg 2432af69d88dSmrg /* Initialize locations where the outputs are stored. */ 2433af69d88dSmrg for (i = 0; i < so->num_outputs; i++) { 2434af69d88dSmrg 243501e04c3fSmrg so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 243601e04c3fSmrg start_comp[i] = so->output[i].start_component; 2437af69d88dSmrg /* Lower outputs with dst_offset < start_component. 2438af69d88dSmrg * 2439af69d88dSmrg * We can only output 4D vectors with a write mask, e.g. we can 2440af69d88dSmrg * only output the W component at offset 3, etc. If we want 2441af69d88dSmrg * to store Y, Z, or W at buffer offset 0, we need to use MOV 2442af69d88dSmrg * to move it to X and output X. */ 2443af69d88dSmrg if (so->output[i].dst_offset < so->output[i].start_component) { 2444af69d88dSmrg unsigned tmp = r600_get_temp(ctx); 2445af69d88dSmrg 2446af69d88dSmrg for (j = 0; j < so->output[i].num_components; j++) { 2447af69d88dSmrg struct r600_bytecode_alu alu; 2448af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2449af69d88dSmrg alu.op = ALU_OP1_MOV; 2450af69d88dSmrg alu.src[0].sel = so_gpr[i]; 2451af69d88dSmrg alu.src[0].chan = so->output[i].start_component + j; 2452af69d88dSmrg 2453af69d88dSmrg alu.dst.sel = tmp; 2454af69d88dSmrg alu.dst.chan = j; 2455af69d88dSmrg alu.dst.write = 1; 2456af69d88dSmrg if (j == so->output[i].num_components - 1) 2457af69d88dSmrg alu.last = 1; 2458af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2459af69d88dSmrg if (r) 2460af69d88dSmrg return r; 2461af69d88dSmrg } 246201e04c3fSmrg start_comp[i] = 0; 2463af69d88dSmrg so_gpr[i] = tmp; 2464af69d88dSmrg } 2465af69d88dSmrg } 2466af69d88dSmrg 2467af69d88dSmrg /* Write outputs to buffers. */ 2468af69d88dSmrg for (i = 0; i < so->num_outputs; i++) { 2469af69d88dSmrg struct r600_bytecode_output output; 2470af69d88dSmrg 247101e04c3fSmrg if (stream != -1 && stream != so->output[i].stream) 247201e04c3fSmrg continue; 247301e04c3fSmrg 2474af69d88dSmrg memset(&output, 0, sizeof(struct r600_bytecode_output)); 2475af69d88dSmrg output.gpr = so_gpr[i]; 247601e04c3fSmrg output.elem_size = so->output[i].num_components - 1; 247701e04c3fSmrg if (output.elem_size == 2) 247801e04c3fSmrg output.elem_size = 3; // 3 not supported, write 4 with junk at end 247901e04c3fSmrg output.array_base = so->output[i].dst_offset - start_comp[i]; 2480af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2481af69d88dSmrg output.burst_count = 1; 2482af69d88dSmrg /* array_size is an upper limit for the burst_count 2483af69d88dSmrg * with MEM_STREAM instructions */ 2484af69d88dSmrg output.array_size = 0xFFF; 248501e04c3fSmrg output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 248601e04c3fSmrg 2487af69d88dSmrg if (ctx->bc->chip_class >= EVERGREEN) { 2488af69d88dSmrg switch (so->output[i].output_buffer) { 2489af69d88dSmrg case 0: 2490af69d88dSmrg output.op = CF_OP_MEM_STREAM0_BUF0; 2491af69d88dSmrg break; 2492af69d88dSmrg case 1: 2493af69d88dSmrg output.op = CF_OP_MEM_STREAM0_BUF1; 2494af69d88dSmrg break; 2495af69d88dSmrg case 2: 2496af69d88dSmrg output.op = CF_OP_MEM_STREAM0_BUF2; 2497af69d88dSmrg break; 2498af69d88dSmrg case 3: 2499af69d88dSmrg output.op = CF_OP_MEM_STREAM0_BUF3; 2500af69d88dSmrg break; 2501af69d88dSmrg } 250201e04c3fSmrg output.op += so->output[i].stream * 4; 250301e04c3fSmrg assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 250401e04c3fSmrg ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2505af69d88dSmrg } else { 2506af69d88dSmrg switch (so->output[i].output_buffer) { 2507af69d88dSmrg case 0: 2508af69d88dSmrg output.op = CF_OP_MEM_STREAM0; 2509af69d88dSmrg break; 2510af69d88dSmrg case 1: 2511af69d88dSmrg output.op = CF_OP_MEM_STREAM1; 2512af69d88dSmrg break; 2513af69d88dSmrg case 2: 2514af69d88dSmrg output.op = CF_OP_MEM_STREAM2; 2515af69d88dSmrg break; 2516af69d88dSmrg case 3: 2517af69d88dSmrg output.op = CF_OP_MEM_STREAM3; 2518af69d88dSmrg break; 2519af69d88dSmrg } 252001e04c3fSmrg ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2521af69d88dSmrg } 2522af69d88dSmrg r = r600_bytecode_add_output(ctx->bc, &output); 2523af69d88dSmrg if (r) 2524af69d88dSmrg goto out_err; 2525af69d88dSmrg } 2526af69d88dSmrg return 0; 2527af69d88dSmrgout_err: 2528af69d88dSmrg return r; 2529af69d88dSmrg} 2530af69d88dSmrg 2531af69d88dSmrgstatic void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2532af69d88dSmrg{ 2533af69d88dSmrg struct r600_bytecode_alu alu; 2534af69d88dSmrg unsigned reg; 2535af69d88dSmrg 2536af69d88dSmrg if (!ctx->shader->vs_out_edgeflag) 2537af69d88dSmrg return; 2538af69d88dSmrg 2539af69d88dSmrg reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2540af69d88dSmrg 2541af69d88dSmrg /* clamp(x, 0, 1) */ 2542af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2543af69d88dSmrg alu.op = ALU_OP1_MOV; 2544af69d88dSmrg alu.src[0].sel = reg; 2545af69d88dSmrg alu.dst.sel = reg; 2546af69d88dSmrg alu.dst.write = 1; 2547af69d88dSmrg alu.dst.clamp = 1; 2548af69d88dSmrg alu.last = 1; 2549af69d88dSmrg r600_bytecode_add_alu(ctx->bc, &alu); 2550af69d88dSmrg 2551af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2552af69d88dSmrg alu.op = ALU_OP1_FLT_TO_INT; 2553af69d88dSmrg alu.src[0].sel = reg; 2554af69d88dSmrg alu.dst.sel = reg; 2555af69d88dSmrg alu.dst.write = 1; 2556af69d88dSmrg alu.last = 1; 2557af69d88dSmrg r600_bytecode_add_alu(ctx->bc, &alu); 2558af69d88dSmrg} 2559af69d88dSmrg 25607ec681f3Smrgint generate_gs_copy_shader(struct r600_context *rctx, 25617ec681f3Smrg struct r600_pipe_shader *gs, 25627ec681f3Smrg struct pipe_stream_output_info *so) 2563af69d88dSmrg{ 2564af69d88dSmrg struct r600_shader_ctx ctx = {}; 2565af69d88dSmrg struct r600_shader *gs_shader = &gs->shader; 2566af69d88dSmrg struct r600_pipe_shader *cshader; 256701e04c3fSmrg unsigned ocnt = gs_shader->noutput; 2568af69d88dSmrg struct r600_bytecode_alu alu; 2569af69d88dSmrg struct r600_bytecode_vtx vtx; 2570af69d88dSmrg struct r600_bytecode_output output; 2571af69d88dSmrg struct r600_bytecode_cf *cf_jump, *cf_pop, 2572af69d88dSmrg *last_exp_pos = NULL, *last_exp_param = NULL; 257301e04c3fSmrg int next_clip_pos = 61, next_param = 0; 257401e04c3fSmrg unsigned i, j; 257501e04c3fSmrg int ring; 257601e04c3fSmrg bool only_ring_0 = true; 2577af69d88dSmrg cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2578af69d88dSmrg if (!cshader) 2579af69d88dSmrg return 0; 2580af69d88dSmrg 2581af69d88dSmrg memcpy(cshader->shader.output, gs_shader->output, ocnt * 2582af69d88dSmrg sizeof(struct r600_shader_io)); 2583af69d88dSmrg 2584af69d88dSmrg cshader->shader.noutput = ocnt; 2585af69d88dSmrg 2586af69d88dSmrg ctx.shader = &cshader->shader; 2587af69d88dSmrg ctx.bc = &ctx.shader->bc; 258801e04c3fSmrg ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX; 2589af69d88dSmrg 2590af69d88dSmrg r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2591af69d88dSmrg rctx->screen->has_compressed_msaa_texturing); 2592af69d88dSmrg 2593af69d88dSmrg ctx.bc->isa = rctx->isa; 2594af69d88dSmrg 259501e04c3fSmrg cf_jump = NULL; 259601e04c3fSmrg memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 259701e04c3fSmrg 2598af69d88dSmrg /* R0.x = R0.x & 0x3fffffff */ 2599af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2600af69d88dSmrg alu.op = ALU_OP2_AND_INT; 2601af69d88dSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2602af69d88dSmrg alu.src[1].value = 0x3fffffff; 2603af69d88dSmrg alu.dst.write = 1; 2604af69d88dSmrg r600_bytecode_add_alu(ctx.bc, &alu); 2605af69d88dSmrg 2606af69d88dSmrg /* R0.y = R0.x >> 30 */ 2607af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2608af69d88dSmrg alu.op = ALU_OP2_LSHR_INT; 2609af69d88dSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2610af69d88dSmrg alu.src[1].value = 0x1e; 2611af69d88dSmrg alu.dst.chan = 1; 2612af69d88dSmrg alu.dst.write = 1; 2613af69d88dSmrg alu.last = 1; 2614af69d88dSmrg r600_bytecode_add_alu(ctx.bc, &alu); 2615af69d88dSmrg 2616af69d88dSmrg /* fetch vertex data from GSVS ring */ 2617af69d88dSmrg for (i = 0; i < ocnt; ++i) { 2618af69d88dSmrg struct r600_shader_io *out = &ctx.shader->output[i]; 261901e04c3fSmrg 2620af69d88dSmrg out->gpr = i + 1; 2621af69d88dSmrg out->ring_offset = i * 16; 2622af69d88dSmrg 2623af69d88dSmrg memset(&vtx, 0, sizeof(vtx)); 2624af69d88dSmrg vtx.op = FETCH_OP_VFETCH; 2625af69d88dSmrg vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 262601e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 262701e04c3fSmrg vtx.mega_fetch_count = 16; 2628af69d88dSmrg vtx.offset = out->ring_offset; 2629af69d88dSmrg vtx.dst_gpr = out->gpr; 263001e04c3fSmrg vtx.src_gpr = 0; 2631af69d88dSmrg vtx.dst_sel_x = 0; 2632af69d88dSmrg vtx.dst_sel_y = 1; 2633af69d88dSmrg vtx.dst_sel_z = 2; 2634af69d88dSmrg vtx.dst_sel_w = 3; 2635af69d88dSmrg if (rctx->b.chip_class >= EVERGREEN) { 2636af69d88dSmrg vtx.use_const_fields = 1; 2637af69d88dSmrg } else { 2638af69d88dSmrg vtx.data_format = FMT_32_32_32_32_FLOAT; 2639af69d88dSmrg } 2640af69d88dSmrg 2641af69d88dSmrg r600_bytecode_add_vtx(ctx.bc, &vtx); 2642af69d88dSmrg } 264301e04c3fSmrg ctx.temp_reg = i + 1; 264401e04c3fSmrg for (ring = 3; ring >= 0; --ring) { 264501e04c3fSmrg bool enabled = false; 264601e04c3fSmrg for (i = 0; i < so->num_outputs; i++) { 264701e04c3fSmrg if (so->output[i].stream == ring) { 264801e04c3fSmrg enabled = true; 264901e04c3fSmrg if (ring > 0) 265001e04c3fSmrg only_ring_0 = false; 265101e04c3fSmrg break; 265201e04c3fSmrg } 265301e04c3fSmrg } 265401e04c3fSmrg if (ring != 0 && !enabled) { 265501e04c3fSmrg cshader->shader.ring_item_sizes[ring] = 0; 265601e04c3fSmrg continue; 265701e04c3fSmrg } 265801e04c3fSmrg 265901e04c3fSmrg if (cf_jump) { 266001e04c3fSmrg // Patch up jump label 266101e04c3fSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 266201e04c3fSmrg cf_pop = ctx.bc->cf_last; 266301e04c3fSmrg 266401e04c3fSmrg cf_jump->cf_addr = cf_pop->id + 2; 266501e04c3fSmrg cf_jump->pop_count = 1; 266601e04c3fSmrg cf_pop->cf_addr = cf_pop->id + 2; 266701e04c3fSmrg cf_pop->pop_count = 1; 266801e04c3fSmrg } 266901e04c3fSmrg 267001e04c3fSmrg /* PRED_SETE_INT __, R0.y, ring */ 267101e04c3fSmrg memset(&alu, 0, sizeof(alu)); 267201e04c3fSmrg alu.op = ALU_OP2_PRED_SETE_INT; 267301e04c3fSmrg alu.src[0].chan = 1; 267401e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 267501e04c3fSmrg alu.src[1].value = ring; 267601e04c3fSmrg alu.execute_mask = 1; 267701e04c3fSmrg alu.update_pred = 1; 267801e04c3fSmrg alu.last = 1; 267901e04c3fSmrg r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 268001e04c3fSmrg 268101e04c3fSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 268201e04c3fSmrg cf_jump = ctx.bc->cf_last; 268301e04c3fSmrg 268401e04c3fSmrg if (enabled) 268501e04c3fSmrg emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]); 268601e04c3fSmrg cshader->shader.ring_item_sizes[ring] = ocnt * 16; 268701e04c3fSmrg } 268801e04c3fSmrg 268901e04c3fSmrg /* bc adds nops - copy it */ 269001e04c3fSmrg if (ctx.bc->chip_class == R600) { 269101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 269201e04c3fSmrg alu.op = ALU_OP0_NOP; 269301e04c3fSmrg alu.last = 1; 269401e04c3fSmrg r600_bytecode_add_alu(ctx.bc, &alu); 2695af69d88dSmrg 269601e04c3fSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 269701e04c3fSmrg } 2698af69d88dSmrg 2699af69d88dSmrg /* export vertex data */ 2700af69d88dSmrg /* XXX factor out common code with r600_shader_from_tgsi ? */ 2701af69d88dSmrg for (i = 0; i < ocnt; ++i) { 2702af69d88dSmrg struct r600_shader_io *out = &ctx.shader->output[i]; 270301e04c3fSmrg bool instream0 = true; 2704af69d88dSmrg if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2705af69d88dSmrg continue; 2706af69d88dSmrg 270701e04c3fSmrg for (j = 0; j < so->num_outputs; j++) { 270801e04c3fSmrg if (so->output[j].register_index == i) { 270901e04c3fSmrg if (so->output[j].stream == 0) 271001e04c3fSmrg break; 271101e04c3fSmrg if (so->output[j].stream > 0) 271201e04c3fSmrg instream0 = false; 271301e04c3fSmrg } 271401e04c3fSmrg } 271501e04c3fSmrg if (!instream0) 271601e04c3fSmrg continue; 2717af69d88dSmrg memset(&output, 0, sizeof(output)); 2718af69d88dSmrg output.gpr = out->gpr; 2719af69d88dSmrg output.elem_size = 3; 2720af69d88dSmrg output.swizzle_x = 0; 2721af69d88dSmrg output.swizzle_y = 1; 2722af69d88dSmrg output.swizzle_z = 2; 2723af69d88dSmrg output.swizzle_w = 3; 2724af69d88dSmrg output.burst_count = 1; 2725af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2726af69d88dSmrg output.op = CF_OP_EXPORT; 2727af69d88dSmrg switch (out->name) { 2728af69d88dSmrg case TGSI_SEMANTIC_POSITION: 2729af69d88dSmrg output.array_base = 60; 2730af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2731af69d88dSmrg break; 2732af69d88dSmrg 2733af69d88dSmrg case TGSI_SEMANTIC_PSIZE: 2734af69d88dSmrg output.array_base = 61; 2735af69d88dSmrg if (next_clip_pos == 61) 2736af69d88dSmrg next_clip_pos = 62; 2737af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2738af69d88dSmrg output.swizzle_y = 7; 2739af69d88dSmrg output.swizzle_z = 7; 2740af69d88dSmrg output.swizzle_w = 7; 2741af69d88dSmrg ctx.shader->vs_out_misc_write = 1; 2742af69d88dSmrg ctx.shader->vs_out_point_size = 1; 2743af69d88dSmrg break; 2744af69d88dSmrg case TGSI_SEMANTIC_LAYER: 2745af69d88dSmrg if (out->spi_sid) { 2746af69d88dSmrg /* duplicate it as PARAM to pass to the pixel shader */ 2747af69d88dSmrg output.array_base = next_param++; 2748af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2749af69d88dSmrg last_exp_param = ctx.bc->cf_last; 2750af69d88dSmrg } 2751af69d88dSmrg output.array_base = 61; 2752af69d88dSmrg if (next_clip_pos == 61) 2753af69d88dSmrg next_clip_pos = 62; 2754af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2755af69d88dSmrg output.swizzle_x = 7; 2756af69d88dSmrg output.swizzle_y = 7; 2757af69d88dSmrg output.swizzle_z = 0; 2758af69d88dSmrg output.swizzle_w = 7; 2759af69d88dSmrg ctx.shader->vs_out_misc_write = 1; 2760af69d88dSmrg ctx.shader->vs_out_layer = 1; 2761af69d88dSmrg break; 2762af69d88dSmrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 2763af69d88dSmrg if (out->spi_sid) { 2764af69d88dSmrg /* duplicate it as PARAM to pass to the pixel shader */ 2765af69d88dSmrg output.array_base = next_param++; 2766af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2767af69d88dSmrg last_exp_param = ctx.bc->cf_last; 2768af69d88dSmrg } 2769af69d88dSmrg output.array_base = 61; 2770af69d88dSmrg if (next_clip_pos == 61) 2771af69d88dSmrg next_clip_pos = 62; 2772af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2773af69d88dSmrg ctx.shader->vs_out_misc_write = 1; 2774af69d88dSmrg ctx.shader->vs_out_viewport = 1; 2775af69d88dSmrg output.swizzle_x = 7; 2776af69d88dSmrg output.swizzle_y = 7; 2777af69d88dSmrg output.swizzle_z = 7; 2778af69d88dSmrg output.swizzle_w = 0; 2779af69d88dSmrg break; 2780af69d88dSmrg case TGSI_SEMANTIC_CLIPDIST: 2781af69d88dSmrg /* spi_sid is 0 for clipdistance outputs that were generated 2782af69d88dSmrg * for clipvertex - we don't need to pass them to PS */ 2783af69d88dSmrg ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 278401e04c3fSmrg ctx.shader->cull_dist_write = gs->shader.cull_dist_write; 278501e04c3fSmrg ctx.shader->cc_dist_mask = gs->shader.cc_dist_mask; 2786af69d88dSmrg if (out->spi_sid) { 2787af69d88dSmrg /* duplicate it as PARAM to pass to the pixel shader */ 2788af69d88dSmrg output.array_base = next_param++; 2789af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2790af69d88dSmrg last_exp_param = ctx.bc->cf_last; 2791af69d88dSmrg } 2792af69d88dSmrg output.array_base = next_clip_pos++; 2793af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2794af69d88dSmrg break; 2795af69d88dSmrg case TGSI_SEMANTIC_FOG: 2796af69d88dSmrg output.swizzle_y = 4; /* 0 */ 2797af69d88dSmrg output.swizzle_z = 4; /* 0 */ 2798af69d88dSmrg output.swizzle_w = 5; /* 1 */ 2799af69d88dSmrg break; 2800af69d88dSmrg default: 2801af69d88dSmrg output.array_base = next_param++; 2802af69d88dSmrg break; 2803af69d88dSmrg } 2804af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2805af69d88dSmrg if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2806af69d88dSmrg last_exp_param = ctx.bc->cf_last; 2807af69d88dSmrg else 2808af69d88dSmrg last_exp_pos = ctx.bc->cf_last; 2809af69d88dSmrg } 2810af69d88dSmrg 2811af69d88dSmrg if (!last_exp_pos) { 2812af69d88dSmrg memset(&output, 0, sizeof(output)); 2813af69d88dSmrg output.gpr = 0; 2814af69d88dSmrg output.elem_size = 3; 2815af69d88dSmrg output.swizzle_x = 7; 2816af69d88dSmrg output.swizzle_y = 7; 2817af69d88dSmrg output.swizzle_z = 7; 2818af69d88dSmrg output.swizzle_w = 7; 2819af69d88dSmrg output.burst_count = 1; 2820af69d88dSmrg output.type = 2; 2821af69d88dSmrg output.op = CF_OP_EXPORT; 2822af69d88dSmrg output.array_base = 60; 2823af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2824af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2825af69d88dSmrg last_exp_pos = ctx.bc->cf_last; 2826af69d88dSmrg } 2827af69d88dSmrg 2828af69d88dSmrg if (!last_exp_param) { 2829af69d88dSmrg memset(&output, 0, sizeof(output)); 2830af69d88dSmrg output.gpr = 0; 2831af69d88dSmrg output.elem_size = 3; 2832af69d88dSmrg output.swizzle_x = 7; 2833af69d88dSmrg output.swizzle_y = 7; 2834af69d88dSmrg output.swizzle_z = 7; 2835af69d88dSmrg output.swizzle_w = 7; 2836af69d88dSmrg output.burst_count = 1; 2837af69d88dSmrg output.type = 2; 2838af69d88dSmrg output.op = CF_OP_EXPORT; 2839af69d88dSmrg output.array_base = next_param++; 2840af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2841af69d88dSmrg r600_bytecode_add_output(ctx.bc, &output); 2842af69d88dSmrg last_exp_param = ctx.bc->cf_last; 2843af69d88dSmrg } 2844af69d88dSmrg 2845af69d88dSmrg last_exp_pos->op = CF_OP_EXPORT_DONE; 2846af69d88dSmrg last_exp_param->op = CF_OP_EXPORT_DONE; 2847af69d88dSmrg 2848af69d88dSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2849af69d88dSmrg cf_pop = ctx.bc->cf_last; 2850af69d88dSmrg 2851af69d88dSmrg cf_jump->cf_addr = cf_pop->id + 2; 2852af69d88dSmrg cf_jump->pop_count = 1; 2853af69d88dSmrg cf_pop->cf_addr = cf_pop->id + 2; 2854af69d88dSmrg cf_pop->pop_count = 1; 2855af69d88dSmrg 2856af69d88dSmrg if (ctx.bc->chip_class == CAYMAN) 2857af69d88dSmrg cm_bytecode_add_cf_end(ctx.bc); 2858af69d88dSmrg else { 2859af69d88dSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2860af69d88dSmrg ctx.bc->cf_last->end_of_program = 1; 2861af69d88dSmrg } 2862af69d88dSmrg 2863af69d88dSmrg gs->gs_copy_shader = cshader; 286401e04c3fSmrg cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2865af69d88dSmrg 2866af69d88dSmrg ctx.bc->nstack = 1; 2867af69d88dSmrg 2868af69d88dSmrg return r600_bytecode_build(ctx.bc); 2869af69d88dSmrg} 2870af69d88dSmrg 287101e04c3fSmrgstatic int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 287201e04c3fSmrg{ 287301e04c3fSmrg if (ind) { 287401e04c3fSmrg struct r600_bytecode_alu alu; 287501e04c3fSmrg int r; 287601e04c3fSmrg 287701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 287801e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 287901e04c3fSmrg alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 288001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 288101e04c3fSmrg alu.src[1].value = ctx->gs_out_ring_offset >> 4; 288201e04c3fSmrg alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 288301e04c3fSmrg alu.dst.write = 1; 288401e04c3fSmrg alu.last = 1; 288501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 288601e04c3fSmrg if (r) 288701e04c3fSmrg return r; 288801e04c3fSmrg } 288901e04c3fSmrg return 0; 289001e04c3fSmrg} 289101e04c3fSmrg 289201e04c3fSmrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so UNUSED, int stream, bool ind) 2893af69d88dSmrg{ 2894af69d88dSmrg struct r600_bytecode_output output; 289501e04c3fSmrg int ring_offset; 289601e04c3fSmrg unsigned i, k; 289701e04c3fSmrg int effective_stream = stream == -1 ? 0 : stream; 289801e04c3fSmrg int idx = 0; 2899af69d88dSmrg 2900af69d88dSmrg for (i = 0; i < ctx->shader->noutput; i++) { 2901af69d88dSmrg if (ctx->gs_for_vs) { 2902af69d88dSmrg /* for ES we need to lookup corresponding ring offset expected by GS 2903af69d88dSmrg * (map this output to GS input by name and sid) */ 2904af69d88dSmrg /* FIXME precompute offsets */ 2905af69d88dSmrg ring_offset = -1; 2906af69d88dSmrg for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2907af69d88dSmrg struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2908af69d88dSmrg struct r600_shader_io *out = &ctx->shader->output[i]; 2909af69d88dSmrg if (in->name == out->name && in->sid == out->sid) 2910af69d88dSmrg ring_offset = in->ring_offset; 2911af69d88dSmrg } 2912af69d88dSmrg 2913af69d88dSmrg if (ring_offset == -1) 2914af69d88dSmrg continue; 291501e04c3fSmrg } else { 291601e04c3fSmrg ring_offset = idx * 16; 291701e04c3fSmrg idx++; 291801e04c3fSmrg } 2919af69d88dSmrg 292001e04c3fSmrg if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 292101e04c3fSmrg continue; 2922af69d88dSmrg /* next_ring_offset after parsing input decls contains total size of 2923af69d88dSmrg * single vertex data, gs_next_vertex - current vertex index */ 2924af69d88dSmrg if (!ind) 2925af69d88dSmrg ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2926af69d88dSmrg 2927af69d88dSmrg memset(&output, 0, sizeof(struct r600_bytecode_output)); 2928af69d88dSmrg output.gpr = ctx->shader->output[i].gpr; 2929af69d88dSmrg output.elem_size = 3; 2930af69d88dSmrg output.comp_mask = 0xF; 2931af69d88dSmrg output.burst_count = 1; 2932af69d88dSmrg 2933af69d88dSmrg if (ind) 2934af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2935af69d88dSmrg else 2936af69d88dSmrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2937af69d88dSmrg 293801e04c3fSmrg switch (stream) { 293901e04c3fSmrg default: 294001e04c3fSmrg case 0: 294101e04c3fSmrg output.op = CF_OP_MEM_RING; break; 294201e04c3fSmrg case 1: 294301e04c3fSmrg output.op = CF_OP_MEM_RING1; break; 294401e04c3fSmrg case 2: 294501e04c3fSmrg output.op = CF_OP_MEM_RING2; break; 294601e04c3fSmrg case 3: 294701e04c3fSmrg output.op = CF_OP_MEM_RING3; break; 294801e04c3fSmrg } 2949af69d88dSmrg 2950af69d88dSmrg if (ind) { 2951af69d88dSmrg output.array_base = ring_offset >> 2; /* in dwords */ 2952af69d88dSmrg output.array_size = 0xfff; 295301e04c3fSmrg output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2954af69d88dSmrg } else 2955af69d88dSmrg output.array_base = ring_offset >> 2; /* in dwords */ 2956af69d88dSmrg r600_bytecode_add_output(ctx->bc, &output); 2957af69d88dSmrg } 2958af69d88dSmrg 2959af69d88dSmrg ++ctx->gs_next_vertex; 2960af69d88dSmrg return 0; 2961af69d88dSmrg} 2962af69d88dSmrg 296301e04c3fSmrg 296401e04c3fSmrgstatic int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2965af69d88dSmrg{ 296601e04c3fSmrg int r; 296701e04c3fSmrg struct r600_bytecode_vtx vtx; 296801e04c3fSmrg int temp_val = ctx->temp_reg; 296901e04c3fSmrg /* need to store the TCS output somewhere */ 297001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 297101e04c3fSmrg temp_val, 0, 297201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 0, 297301e04c3fSmrg 0, 0); 297401e04c3fSmrg if (r) 297501e04c3fSmrg return r; 2976af69d88dSmrg 297701e04c3fSmrg /* used by VS/TCS */ 297801e04c3fSmrg if (ctx->tess_input_info) { 297901e04c3fSmrg /* fetch tcs input values into resv space */ 298001e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 298101e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 298201e04c3fSmrg vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 298301e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 298401e04c3fSmrg vtx.mega_fetch_count = 16; 298501e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 298601e04c3fSmrg vtx.num_format_all = 2; 298701e04c3fSmrg vtx.format_comp_all = 1; 298801e04c3fSmrg vtx.use_const_fields = 0; 298901e04c3fSmrg vtx.endian = r600_endian_swap(32); 299001e04c3fSmrg vtx.srf_mode_all = 1; 299101e04c3fSmrg vtx.offset = 0; 299201e04c3fSmrg vtx.dst_gpr = ctx->tess_input_info; 299301e04c3fSmrg vtx.dst_sel_x = 0; 299401e04c3fSmrg vtx.dst_sel_y = 1; 299501e04c3fSmrg vtx.dst_sel_z = 2; 299601e04c3fSmrg vtx.dst_sel_w = 3; 299701e04c3fSmrg vtx.src_gpr = temp_val; 299801e04c3fSmrg vtx.src_sel_x = 0; 299901e04c3fSmrg 300001e04c3fSmrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 300101e04c3fSmrg if (r) 300201e04c3fSmrg return r; 300301e04c3fSmrg } 300401e04c3fSmrg 300501e04c3fSmrg /* used by TCS/TES */ 300601e04c3fSmrg if (ctx->tess_output_info) { 300701e04c3fSmrg /* fetch tcs output values into resv space */ 300801e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 300901e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 301001e04c3fSmrg vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 301101e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 301201e04c3fSmrg vtx.mega_fetch_count = 16; 301301e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 301401e04c3fSmrg vtx.num_format_all = 2; 301501e04c3fSmrg vtx.format_comp_all = 1; 301601e04c3fSmrg vtx.use_const_fields = 0; 301701e04c3fSmrg vtx.endian = r600_endian_swap(32); 301801e04c3fSmrg vtx.srf_mode_all = 1; 301901e04c3fSmrg vtx.offset = 16; 302001e04c3fSmrg vtx.dst_gpr = ctx->tess_output_info; 302101e04c3fSmrg vtx.dst_sel_x = 0; 302201e04c3fSmrg vtx.dst_sel_y = 1; 302301e04c3fSmrg vtx.dst_sel_z = 2; 302401e04c3fSmrg vtx.dst_sel_w = 3; 302501e04c3fSmrg vtx.src_gpr = temp_val; 302601e04c3fSmrg vtx.src_sel_x = 0; 302701e04c3fSmrg 302801e04c3fSmrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 302901e04c3fSmrg if (r) 303001e04c3fSmrg return r; 303101e04c3fSmrg } 303201e04c3fSmrg return 0; 303301e04c3fSmrg} 303401e04c3fSmrg 303501e04c3fSmrgstatic int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 303601e04c3fSmrg{ 303701e04c3fSmrg int j, r; 303801e04c3fSmrg int temp_reg; 303901e04c3fSmrg unsigned i; 304001e04c3fSmrg 304101e04c3fSmrg /* fetch tcs input values into input_vals */ 304201e04c3fSmrg ctx->tess_input_info = r600_get_temp(ctx); 304301e04c3fSmrg ctx->tess_output_info = 0; 304401e04c3fSmrg r = r600_fetch_tess_io_info(ctx); 304501e04c3fSmrg if (r) 304601e04c3fSmrg return r; 304701e04c3fSmrg 304801e04c3fSmrg temp_reg = r600_get_temp(ctx); 304901e04c3fSmrg /* dst reg contains LDS address stride * idx */ 305001e04c3fSmrg /* MUL vertexID, vertex_dw_stride */ 305101e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 305201e04c3fSmrg temp_reg, 0, 305301e04c3fSmrg ctx->tess_input_info, 1, 305401e04c3fSmrg 0, 1); /* rel id in r0.y? */ 305501e04c3fSmrg if (r) 305601e04c3fSmrg return r; 305701e04c3fSmrg 305801e04c3fSmrg for (i = 0; i < ctx->shader->noutput; i++) { 305901e04c3fSmrg struct r600_bytecode_alu alu; 30607ec681f3Smrg int param = r600_get_lds_unique_index(ctx->shader->output[i].name, 30617ec681f3Smrg ctx->shader->output[i].sid); 306201e04c3fSmrg 306301e04c3fSmrg if (param) { 306401e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 306501e04c3fSmrg temp_reg, 1, 306601e04c3fSmrg temp_reg, 0, 306701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, param * 16); 306801e04c3fSmrg if (r) 306901e04c3fSmrg return r; 307001e04c3fSmrg } 307101e04c3fSmrg 307201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 307301e04c3fSmrg temp_reg, 2, 307401e04c3fSmrg temp_reg, param ? 1 : 0, 307501e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 8); 307601e04c3fSmrg if (r) 307701e04c3fSmrg return r; 307801e04c3fSmrg 307901e04c3fSmrg 308001e04c3fSmrg for (j = 0; j < 2; j++) { 308101e04c3fSmrg int chan = (j == 1) ? 2 : (param ? 1 : 0); 308201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 308301e04c3fSmrg alu.op = LDS_OP3_LDS_WRITE_REL; 308401e04c3fSmrg alu.src[0].sel = temp_reg; 308501e04c3fSmrg alu.src[0].chan = chan; 308601e04c3fSmrg alu.src[1].sel = ctx->shader->output[i].gpr; 308701e04c3fSmrg alu.src[1].chan = j * 2; 308801e04c3fSmrg alu.src[2].sel = ctx->shader->output[i].gpr; 308901e04c3fSmrg alu.src[2].chan = (j * 2) + 1; 309001e04c3fSmrg alu.last = 1; 309101e04c3fSmrg alu.dst.chan = 0; 309201e04c3fSmrg alu.lds_idx = 1; 309301e04c3fSmrg alu.is_lds_idx_op = true; 309401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 309501e04c3fSmrg if (r) 309601e04c3fSmrg return r; 309701e04c3fSmrg } 309801e04c3fSmrg } 309901e04c3fSmrg return 0; 310001e04c3fSmrg} 310101e04c3fSmrg 310201e04c3fSmrgstatic int r600_store_tcs_output(struct r600_shader_ctx *ctx) 310301e04c3fSmrg{ 310401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 310501e04c3fSmrg const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 310601e04c3fSmrg int i, r, lasti; 310701e04c3fSmrg int temp_reg = r600_get_temp(ctx); 310801e04c3fSmrg struct r600_bytecode_alu alu; 310901e04c3fSmrg unsigned write_mask = dst->Register.WriteMask; 311001e04c3fSmrg 311101e04c3fSmrg if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 311201e04c3fSmrg return 0; 311301e04c3fSmrg 311401e04c3fSmrg r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 311501e04c3fSmrg if (r) 311601e04c3fSmrg return r; 311701e04c3fSmrg 311801e04c3fSmrg /* the base address is now in temp.x */ 311901e04c3fSmrg r = r600_get_byte_address(ctx, temp_reg, 312001e04c3fSmrg &inst->Dst[0], NULL, ctx->tess_output_info, 1); 312101e04c3fSmrg if (r) 312201e04c3fSmrg return r; 312301e04c3fSmrg 312401e04c3fSmrg /* LDS write */ 312501e04c3fSmrg lasti = tgsi_last_instruction(write_mask); 312601e04c3fSmrg for (i = 1; i <= lasti; i++) { 312701e04c3fSmrg 312801e04c3fSmrg if (!(write_mask & (1 << i))) 312901e04c3fSmrg continue; 313001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 313101e04c3fSmrg temp_reg, i, 313201e04c3fSmrg temp_reg, 0, 313301e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 4 * i); 313401e04c3fSmrg if (r) 313501e04c3fSmrg return r; 313601e04c3fSmrg } 313701e04c3fSmrg 313801e04c3fSmrg for (i = 0; i <= lasti; i++) { 313901e04c3fSmrg if (!(write_mask & (1 << i))) 314001e04c3fSmrg continue; 314101e04c3fSmrg 314201e04c3fSmrg if ((i == 0 && ((write_mask & 3) == 3)) || 314301e04c3fSmrg (i == 2 && ((write_mask & 0xc) == 0xc))) { 314401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 314501e04c3fSmrg alu.op = LDS_OP3_LDS_WRITE_REL; 314601e04c3fSmrg alu.src[0].sel = temp_reg; 314701e04c3fSmrg alu.src[0].chan = i; 314801e04c3fSmrg 314901e04c3fSmrg alu.src[1].sel = dst->Register.Index; 315001e04c3fSmrg alu.src[1].sel += ctx->file_offset[dst->Register.File]; 315101e04c3fSmrg alu.src[1].chan = i; 315201e04c3fSmrg 315301e04c3fSmrg alu.src[2].sel = dst->Register.Index; 315401e04c3fSmrg alu.src[2].sel += ctx->file_offset[dst->Register.File]; 315501e04c3fSmrg alu.src[2].chan = i + 1; 315601e04c3fSmrg alu.lds_idx = 1; 315701e04c3fSmrg alu.dst.chan = 0; 315801e04c3fSmrg alu.last = 1; 315901e04c3fSmrg alu.is_lds_idx_op = true; 316001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 316101e04c3fSmrg if (r) 316201e04c3fSmrg return r; 316301e04c3fSmrg i += 1; 316401e04c3fSmrg continue; 316501e04c3fSmrg } 316601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 316701e04c3fSmrg alu.op = LDS_OP2_LDS_WRITE; 316801e04c3fSmrg alu.src[0].sel = temp_reg; 316901e04c3fSmrg alu.src[0].chan = i; 317001e04c3fSmrg 317101e04c3fSmrg alu.src[1].sel = dst->Register.Index; 317201e04c3fSmrg alu.src[1].sel += ctx->file_offset[dst->Register.File]; 317301e04c3fSmrg alu.src[1].chan = i; 317401e04c3fSmrg 317501e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_0; 317601e04c3fSmrg alu.dst.chan = 0; 317701e04c3fSmrg alu.last = 1; 317801e04c3fSmrg alu.is_lds_idx_op = true; 317901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 318001e04c3fSmrg if (r) 318101e04c3fSmrg return r; 318201e04c3fSmrg } 318301e04c3fSmrg return 0; 318401e04c3fSmrg} 318501e04c3fSmrg 318601e04c3fSmrgstatic int r600_tess_factor_read(struct r600_shader_ctx *ctx, 318701e04c3fSmrg int output_idx, int nc) 318801e04c3fSmrg{ 318901e04c3fSmrg int param; 319001e04c3fSmrg unsigned temp_reg = r600_get_temp(ctx); 319101e04c3fSmrg unsigned name = ctx->shader->output[output_idx].name; 319201e04c3fSmrg int dreg = ctx->shader->output[output_idx].gpr; 319301e04c3fSmrg int r; 319401e04c3fSmrg 319501e04c3fSmrg param = r600_get_lds_unique_index(name, 0); 319601e04c3fSmrg r = get_lds_offset0(ctx, 1, temp_reg, true); 319701e04c3fSmrg if (r) 319801e04c3fSmrg return r; 319901e04c3fSmrg 320001e04c3fSmrg if (param) { 320101e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 320201e04c3fSmrg temp_reg, 0, 320301e04c3fSmrg temp_reg, 0, 320401e04c3fSmrg V_SQ_ALU_SRC_LITERAL, param * 16); 320501e04c3fSmrg if (r) 320601e04c3fSmrg return r; 320701e04c3fSmrg } 320801e04c3fSmrg 320901e04c3fSmrg do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1)); 321001e04c3fSmrg return 0; 321101e04c3fSmrg} 321201e04c3fSmrg 321301e04c3fSmrgstatic int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 321401e04c3fSmrg{ 321501e04c3fSmrg int stride, outer_comps, inner_comps; 321601e04c3fSmrg int tessinner_idx = -1, tessouter_idx = -1; 321701e04c3fSmrg int i, r; 321801e04c3fSmrg unsigned j; 321901e04c3fSmrg int temp_reg = r600_get_temp(ctx); 322001e04c3fSmrg int treg[3] = {-1, -1, -1}; 322101e04c3fSmrg struct r600_bytecode_alu alu; 322201e04c3fSmrg struct r600_bytecode_cf *cf_jump, *cf_pop; 322301e04c3fSmrg 322401e04c3fSmrg /* only execute factor emission for invocation 0 */ 322501e04c3fSmrg /* PRED_SETE_INT __, R0.x, 0 */ 322601e04c3fSmrg memset(&alu, 0, sizeof(alu)); 322701e04c3fSmrg alu.op = ALU_OP2_PRED_SETE_INT; 322801e04c3fSmrg alu.src[0].chan = 2; 322901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 323001e04c3fSmrg alu.execute_mask = 1; 323101e04c3fSmrg alu.update_pred = 1; 323201e04c3fSmrg alu.last = 1; 323301e04c3fSmrg r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 323401e04c3fSmrg 323501e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 323601e04c3fSmrg cf_jump = ctx->bc->cf_last; 323701e04c3fSmrg 323801e04c3fSmrg treg[0] = r600_get_temp(ctx); 323901e04c3fSmrg switch (ctx->shader->tcs_prim_mode) { 324001e04c3fSmrg case PIPE_PRIM_LINES: 324101e04c3fSmrg stride = 8; /* 2 dwords, 1 vec2 store */ 324201e04c3fSmrg outer_comps = 2; 324301e04c3fSmrg inner_comps = 0; 324401e04c3fSmrg break; 324501e04c3fSmrg case PIPE_PRIM_TRIANGLES: 324601e04c3fSmrg stride = 16; /* 4 dwords, 1 vec4 store */ 324701e04c3fSmrg outer_comps = 3; 324801e04c3fSmrg inner_comps = 1; 324901e04c3fSmrg treg[1] = r600_get_temp(ctx); 325001e04c3fSmrg break; 325101e04c3fSmrg case PIPE_PRIM_QUADS: 325201e04c3fSmrg stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 325301e04c3fSmrg outer_comps = 4; 325401e04c3fSmrg inner_comps = 2; 325501e04c3fSmrg treg[1] = r600_get_temp(ctx); 325601e04c3fSmrg treg[2] = r600_get_temp(ctx); 325701e04c3fSmrg break; 325801e04c3fSmrg default: 325901e04c3fSmrg assert(0); 326001e04c3fSmrg return -1; 326101e04c3fSmrg } 326201e04c3fSmrg 326301e04c3fSmrg /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 326401e04c3fSmrg /* TF_WRITE takes index in R.x, value in R.y */ 326501e04c3fSmrg for (j = 0; j < ctx->shader->noutput; j++) { 326601e04c3fSmrg if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSINNER) 326701e04c3fSmrg tessinner_idx = j; 326801e04c3fSmrg if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSOUTER) 326901e04c3fSmrg tessouter_idx = j; 327001e04c3fSmrg } 327101e04c3fSmrg 327201e04c3fSmrg if (tessouter_idx == -1) 327301e04c3fSmrg return -1; 327401e04c3fSmrg 327501e04c3fSmrg if (tessinner_idx == -1 && inner_comps) 327601e04c3fSmrg return -1; 327701e04c3fSmrg 327801e04c3fSmrg if (tessouter_idx != -1) { 327901e04c3fSmrg r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps); 328001e04c3fSmrg if (r) 328101e04c3fSmrg return r; 328201e04c3fSmrg } 328301e04c3fSmrg 328401e04c3fSmrg if (tessinner_idx != -1) { 328501e04c3fSmrg r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps); 328601e04c3fSmrg if (r) 328701e04c3fSmrg return r; 328801e04c3fSmrg } 328901e04c3fSmrg 329001e04c3fSmrg /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 329101e04c3fSmrg /* r.x = relpatchid(r0.y) * tf_stride */ 329201e04c3fSmrg 329301e04c3fSmrg /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 329401e04c3fSmrg /* add incoming r0.w to it: t.x = t.x + r0.w */ 329501e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 329601e04c3fSmrg temp_reg, 0, 329701e04c3fSmrg 0, 1, 329801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, stride, 329901e04c3fSmrg 0, 3); 330001e04c3fSmrg if (r) 330101e04c3fSmrg return r; 330201e04c3fSmrg 330301e04c3fSmrg for (i = 0; i < outer_comps + inner_comps; i++) { 330401e04c3fSmrg int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 330501e04c3fSmrg int out_comp = i >= outer_comps ? i - outer_comps : i; 330601e04c3fSmrg 330701e04c3fSmrg if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) { 330801e04c3fSmrg if (out_comp == 1) 330901e04c3fSmrg out_comp = 0; 331001e04c3fSmrg else if (out_comp == 0) 331101e04c3fSmrg out_comp = 1; 331201e04c3fSmrg } 331301e04c3fSmrg 331401e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 331501e04c3fSmrg treg[i / 2], (2 * (i % 2)), 331601e04c3fSmrg temp_reg, 0, 331701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 4 * i); 331801e04c3fSmrg if (r) 331901e04c3fSmrg return r; 332001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 332101e04c3fSmrg treg[i / 2], 1 + (2 * (i%2)), 332201e04c3fSmrg ctx->shader->output[out_idx].gpr, out_comp, 332301e04c3fSmrg 0, 0); 332401e04c3fSmrg if (r) 332501e04c3fSmrg return r; 332601e04c3fSmrg } 332701e04c3fSmrg for (i = 0; i < outer_comps + inner_comps; i++) { 332801e04c3fSmrg struct r600_bytecode_gds gds; 332901e04c3fSmrg 333001e04c3fSmrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 333101e04c3fSmrg gds.src_gpr = treg[i / 2]; 333201e04c3fSmrg gds.src_sel_x = 2 * (i % 2); 333301e04c3fSmrg gds.src_sel_y = 1 + (2 * (i % 2)); 333401e04c3fSmrg gds.src_sel_z = 4; 333501e04c3fSmrg gds.dst_sel_x = 7; 333601e04c3fSmrg gds.dst_sel_y = 7; 333701e04c3fSmrg gds.dst_sel_z = 7; 333801e04c3fSmrg gds.dst_sel_w = 7; 333901e04c3fSmrg gds.op = FETCH_OP_TF_WRITE; 334001e04c3fSmrg r = r600_bytecode_add_gds(ctx->bc, &gds); 334101e04c3fSmrg if (r) 334201e04c3fSmrg return r; 334301e04c3fSmrg } 334401e04c3fSmrg 334501e04c3fSmrg // Patch up jump label 334601e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 334701e04c3fSmrg cf_pop = ctx->bc->cf_last; 334801e04c3fSmrg 334901e04c3fSmrg cf_jump->cf_addr = cf_pop->id + 2; 335001e04c3fSmrg cf_jump->pop_count = 1; 335101e04c3fSmrg cf_pop->cf_addr = cf_pop->id + 2; 335201e04c3fSmrg cf_pop->pop_count = 1; 335301e04c3fSmrg 335401e04c3fSmrg return 0; 335501e04c3fSmrg} 335601e04c3fSmrg 335701e04c3fSmrg/* 335801e04c3fSmrg * We have to work out the thread ID for load and atomic 335901e04c3fSmrg * operations, which store the returned value to an index 336001e04c3fSmrg * in an intermediate buffer. 336101e04c3fSmrg * The index is calculated by taking the thread id, 336201e04c3fSmrg * calculated from the MBCNT instructions. 336301e04c3fSmrg * Then the shader engine ID is multiplied by 256, 336401e04c3fSmrg * and the wave id is added. 336501e04c3fSmrg * Then the result is multipled by 64 and thread id is 336601e04c3fSmrg * added. 336701e04c3fSmrg */ 336801e04c3fSmrgstatic int load_thread_id_gpr(struct r600_shader_ctx *ctx) 336901e04c3fSmrg{ 337001e04c3fSmrg struct r600_bytecode_alu alu; 337101e04c3fSmrg int r; 337201e04c3fSmrg 337301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 337401e04c3fSmrg alu.op = ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT; 337501e04c3fSmrg alu.dst.sel = ctx->temp_reg; 337601e04c3fSmrg alu.dst.chan = 0; 337701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 337801e04c3fSmrg alu.src[0].value = 0xffffffff; 337901e04c3fSmrg alu.dst.write = 1; 338001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 338101e04c3fSmrg if (r) 338201e04c3fSmrg return r; 338301e04c3fSmrg 338401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 338501e04c3fSmrg alu.op = ALU_OP1_MBCNT_32HI_INT; 338601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 338701e04c3fSmrg alu.dst.chan = 1; 338801e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 338901e04c3fSmrg alu.src[0].value = 0xffffffff; 339001e04c3fSmrg alu.dst.write = 1; 339101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 339201e04c3fSmrg if (r) 339301e04c3fSmrg return r; 339401e04c3fSmrg 339501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 339601e04c3fSmrg alu.op = ALU_OP3_MULADD_UINT24; 339701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 339801e04c3fSmrg alu.dst.chan = 2; 339901e04c3fSmrg alu.src[0].sel = EG_V_SQ_ALU_SRC_SE_ID; 340001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 340101e04c3fSmrg alu.src[1].value = 256; 340201e04c3fSmrg alu.src[2].sel = EG_V_SQ_ALU_SRC_HW_WAVE_ID; 340301e04c3fSmrg alu.dst.write = 1; 340401e04c3fSmrg alu.is_op3 = 1; 340501e04c3fSmrg alu.last = 1; 340601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 340701e04c3fSmrg if (r) 340801e04c3fSmrg return r; 340901e04c3fSmrg 341001e04c3fSmrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 341101e04c3fSmrg ctx->thread_id_gpr, 1, 341201e04c3fSmrg ctx->temp_reg, 2, 341301e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 0x40, 341401e04c3fSmrg ctx->temp_reg, 0); 341501e04c3fSmrg if (r) 341601e04c3fSmrg return r; 341701e04c3fSmrg return 0; 341801e04c3fSmrg} 341901e04c3fSmrg 342001e04c3fSmrgstatic int r600_shader_from_tgsi(struct r600_context *rctx, 342101e04c3fSmrg struct r600_pipe_shader *pipeshader, 342201e04c3fSmrg union r600_shader_key key) 342301e04c3fSmrg{ 342401e04c3fSmrg struct r600_screen *rscreen = rctx->screen; 342501e04c3fSmrg struct r600_shader *shader = &pipeshader->shader; 342601e04c3fSmrg struct tgsi_token *tokens = pipeshader->selector->tokens; 342701e04c3fSmrg struct pipe_stream_output_info so = pipeshader->selector->so; 342801e04c3fSmrg struct tgsi_full_immediate *immediate; 342901e04c3fSmrg struct r600_shader_ctx ctx; 343001e04c3fSmrg struct r600_bytecode_output output[ARRAY_SIZE(shader->output)]; 343101e04c3fSmrg unsigned output_done, noutput; 343201e04c3fSmrg unsigned opcode; 343301e04c3fSmrg int j, k, r = 0; 343401e04c3fSmrg unsigned i; 343501e04c3fSmrg int next_param_base = 0, next_clip_base; 343601e04c3fSmrg int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 343701e04c3fSmrg bool indirect_gprs; 343801e04c3fSmrg bool ring_outputs = false; 343901e04c3fSmrg bool lds_outputs = false; 344001e04c3fSmrg bool lds_inputs = false; 344101e04c3fSmrg bool pos_emitted = false; 3442af69d88dSmrg 344301e04c3fSmrg ctx.bc = &shader->bc; 344401e04c3fSmrg ctx.shader = shader; 3445af69d88dSmrg 3446af69d88dSmrg r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 3447af69d88dSmrg rscreen->has_compressed_msaa_texturing); 3448af69d88dSmrg ctx.tokens = tokens; 3449af69d88dSmrg tgsi_scan_shader(tokens, &ctx.info); 3450af69d88dSmrg shader->indirect_files = ctx.info.indirect_files; 345101e04c3fSmrg 345201e04c3fSmrg int narrays = ctx.info.array_max[TGSI_FILE_TEMPORARY]; 345301e04c3fSmrg ctx.array_infos = calloc(narrays, sizeof(*ctx.array_infos)); 345401e04c3fSmrg ctx.spilled_arrays = calloc(narrays, sizeof(bool)); 345501e04c3fSmrg tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, narrays, ctx.array_infos); 345601e04c3fSmrg 345701e04c3fSmrg shader->uses_helper_invocation = false; 345801e04c3fSmrg shader->uses_doubles = ctx.info.uses_doubles; 345901e04c3fSmrg shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC]; 346001e04c3fSmrg shader->nsys_inputs = 0; 346101e04c3fSmrg 346201e04c3fSmrg shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 || 346301e04c3fSmrg ctx.info.file_count[TGSI_FILE_BUFFER] > 0; 346401e04c3fSmrg indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 3465af69d88dSmrg tgsi_parse_init(&ctx.parse, tokens); 346601e04c3fSmrg ctx.type = ctx.info.processor; 34673464ebd5Sriastradh shader->processor_type = ctx.type; 34683464ebd5Sriastradh ctx.bc->type = shader->processor_type; 34693464ebd5Sriastradh 347001e04c3fSmrg switch (ctx.type) { 347101e04c3fSmrg case PIPE_SHADER_VERTEX: 347201e04c3fSmrg shader->vs_as_gs_a = key.vs.as_gs_a; 347301e04c3fSmrg shader->vs_as_es = key.vs.as_es; 347401e04c3fSmrg shader->vs_as_ls = key.vs.as_ls; 347501e04c3fSmrg shader->atomic_base = key.vs.first_atomic_counter; 347601e04c3fSmrg if (shader->vs_as_es) 347701e04c3fSmrg ring_outputs = true; 347801e04c3fSmrg if (shader->vs_as_ls) 347901e04c3fSmrg lds_outputs = true; 348001e04c3fSmrg break; 348101e04c3fSmrg case PIPE_SHADER_GEOMETRY: 348201e04c3fSmrg ring_outputs = true; 348301e04c3fSmrg shader->atomic_base = key.gs.first_atomic_counter; 348401e04c3fSmrg shader->gs_tri_strip_adj_fix = key.gs.tri_strip_adj_fix; 348501e04c3fSmrg break; 348601e04c3fSmrg case PIPE_SHADER_TESS_CTRL: 348701e04c3fSmrg shader->tcs_prim_mode = key.tcs.prim_mode; 348801e04c3fSmrg shader->atomic_base = key.tcs.first_atomic_counter; 348901e04c3fSmrg lds_outputs = true; 349001e04c3fSmrg lds_inputs = true; 349101e04c3fSmrg break; 349201e04c3fSmrg case PIPE_SHADER_TESS_EVAL: 349301e04c3fSmrg shader->tes_as_es = key.tes.as_es; 349401e04c3fSmrg shader->atomic_base = key.tes.first_atomic_counter; 349501e04c3fSmrg lds_inputs = true; 349601e04c3fSmrg if (shader->tes_as_es) 349701e04c3fSmrg ring_outputs = true; 349801e04c3fSmrg break; 349901e04c3fSmrg case PIPE_SHADER_FRAGMENT: 350001e04c3fSmrg shader->two_side = key.ps.color_two_side; 350101e04c3fSmrg shader->atomic_base = key.ps.first_atomic_counter; 350201e04c3fSmrg shader->rat_base = key.ps.nr_cbufs; 350301e04c3fSmrg shader->image_size_const_offset = key.ps.image_size_const_offset; 350401e04c3fSmrg break; 350501e04c3fSmrg case PIPE_SHADER_COMPUTE: 350601e04c3fSmrg shader->rat_base = 0; 350701e04c3fSmrg shader->image_size_const_offset = ctx.info.file_count[TGSI_FILE_SAMPLER]; 350801e04c3fSmrg break; 350901e04c3fSmrg default: 351001e04c3fSmrg break; 351101e04c3fSmrg } 3512af69d88dSmrg 351301e04c3fSmrg if (shader->vs_as_es || shader->tes_as_es) { 3514af69d88dSmrg ctx.gs_for_vs = &rctx->gs_shader->current->shader; 3515af69d88dSmrg } else { 3516af69d88dSmrg ctx.gs_for_vs = NULL; 3517af69d88dSmrg } 3518af69d88dSmrg 3519af69d88dSmrg ctx.next_ring_offset = 0; 3520af69d88dSmrg ctx.gs_out_ring_offset = 0; 3521af69d88dSmrg ctx.gs_next_vertex = 0; 352201e04c3fSmrg ctx.gs_stream_output_info = &so; 3523af69d88dSmrg 352401e04c3fSmrg ctx.thread_id_gpr = -1; 3525af69d88dSmrg ctx.face_gpr = -1; 352601e04c3fSmrg ctx.fixed_pt_position_gpr = -1; 3527af69d88dSmrg ctx.fragcoord_input = -1; 3528af69d88dSmrg ctx.colors_used = 0; 3529af69d88dSmrg ctx.clip_vertex_write = 0; 3530af69d88dSmrg 353101e04c3fSmrg ctx.helper_invoc_reg = -1; 353201e04c3fSmrg ctx.cs_block_size_reg = -1; 353301e04c3fSmrg ctx.cs_grid_size_reg = -1; 353401e04c3fSmrg ctx.cs_block_size_loaded = false; 353501e04c3fSmrg ctx.cs_grid_size_loaded = false; 353601e04c3fSmrg 3537af69d88dSmrg shader->nr_ps_color_exports = 0; 3538af69d88dSmrg shader->nr_ps_max_color_exports = 0; 35393464ebd5Sriastradh 35403464ebd5Sriastradh 35413464ebd5Sriastradh /* register allocations */ 35423464ebd5Sriastradh /* Values [0,127] correspond to GPR[0..127]. 35433464ebd5Sriastradh * Values [128,159] correspond to constant buffer bank 0 35443464ebd5Sriastradh * Values [160,191] correspond to constant buffer bank 1 35453464ebd5Sriastradh * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 35463464ebd5Sriastradh * Values [256,287] correspond to constant buffer bank 2 (EG) 35473464ebd5Sriastradh * Values [288,319] correspond to constant buffer bank 3 (EG) 35483464ebd5Sriastradh * Other special values are shown in the list below. 35493464ebd5Sriastradh * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 35503464ebd5Sriastradh * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 35513464ebd5Sriastradh * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 35523464ebd5Sriastradh * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 35533464ebd5Sriastradh * 248 SQ_ALU_SRC_0: special constant 0.0. 35543464ebd5Sriastradh * 249 SQ_ALU_SRC_1: special constant 1.0 float. 35553464ebd5Sriastradh * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 35563464ebd5Sriastradh * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 35573464ebd5Sriastradh * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 35583464ebd5Sriastradh * 253 SQ_ALU_SRC_LITERAL: literal constant. 35593464ebd5Sriastradh * 254 SQ_ALU_SRC_PV: previous vector result. 35603464ebd5Sriastradh * 255 SQ_ALU_SRC_PS: previous scalar result. 35613464ebd5Sriastradh */ 35623464ebd5Sriastradh for (i = 0; i < TGSI_FILE_COUNT; i++) { 35633464ebd5Sriastradh ctx.file_offset[i] = 0; 35643464ebd5Sriastradh } 3565af69d88dSmrg 356601e04c3fSmrg if (ctx.type == PIPE_SHADER_VERTEX) { 356701e04c3fSmrg 35683464ebd5Sriastradh ctx.file_offset[TGSI_FILE_INPUT] = 1; 356901e04c3fSmrg if (ctx.info.num_inputs) 3570af69d88dSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 35713464ebd5Sriastradh } 357201e04c3fSmrg if (ctx.type == PIPE_SHADER_FRAGMENT) { 357301e04c3fSmrg if (ctx.bc->chip_class >= EVERGREEN) 357401e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 357501e04c3fSmrg else 357601e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 357701e04c3fSmrg 357801e04c3fSmrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 357901e04c3fSmrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) { 358001e04c3fSmrg ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 358101e04c3fSmrg shader->uses_helper_invocation = true; 358201e04c3fSmrg } 358301e04c3fSmrg } 35843464ebd5Sriastradh } 358501e04c3fSmrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 3586af69d88dSmrg /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3587af69d88dSmrg ctx.file_offset[TGSI_FILE_INPUT] = 2; 3588af69d88dSmrg } 358901e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 359001e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT] = 1; 359101e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_EVAL) { 359201e04c3fSmrg bool add_tesscoord = false, add_tess_inout = false; 359301e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT] = 1; 359401e04c3fSmrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 359501e04c3fSmrg /* if we have tesscoord save one reg */ 359601e04c3fSmrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 359701e04c3fSmrg add_tesscoord = true; 359801e04c3fSmrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 359901e04c3fSmrg ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 360001e04c3fSmrg add_tess_inout = true; 360101e04c3fSmrg } 360201e04c3fSmrg if (add_tesscoord || add_tess_inout) 360301e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT]++; 360401e04c3fSmrg if (add_tess_inout) 360501e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT]+=2; 360601e04c3fSmrg } 360701e04c3fSmrg if (ctx.type == PIPE_SHADER_COMPUTE) { 360801e04c3fSmrg ctx.file_offset[TGSI_FILE_INPUT] = 2; 360901e04c3fSmrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 361001e04c3fSmrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE) 361101e04c3fSmrg ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 361201e04c3fSmrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE) 361301e04c3fSmrg ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 361401e04c3fSmrg } 361501e04c3fSmrg } 3616af69d88dSmrg 361701e04c3fSmrg ctx.file_offset[TGSI_FILE_OUTPUT] = 3618af69d88dSmrg ctx.file_offset[TGSI_FILE_INPUT] + 3619af69d88dSmrg ctx.info.file_max[TGSI_FILE_INPUT] + 1; 36203464ebd5Sriastradh ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3621af69d88dSmrg ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 36223464ebd5Sriastradh 36233464ebd5Sriastradh /* Outside the GPR range. This will be translated to one of the 36243464ebd5Sriastradh * kcache banks later. */ 36253464ebd5Sriastradh ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 36263464ebd5Sriastradh ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3627af69d88dSmrg 362801e04c3fSmrg pipeshader->scratch_space_needed = 0; 362901e04c3fSmrg int regno = ctx.file_offset[TGSI_FILE_TEMPORARY] + 363001e04c3fSmrg ctx.info.file_max[TGSI_FILE_TEMPORARY]; 363101e04c3fSmrg if (regno > 124) { 363201e04c3fSmrg choose_spill_arrays(&ctx, ®no, &pipeshader->scratch_space_needed); 363301e04c3fSmrg shader->indirect_files = ctx.info.indirect_files; 363401e04c3fSmrg } 363501e04c3fSmrg shader->needs_scratch_space = pipeshader->scratch_space_needed != 0; 363601e04c3fSmrg 363701e04c3fSmrg ctx.bc->ar_reg = ++regno; 363801e04c3fSmrg ctx.bc->index_reg[0] = ++regno; 363901e04c3fSmrg ctx.bc->index_reg[1] = ++regno; 364001e04c3fSmrg 364101e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_CTRL) { 364201e04c3fSmrg ctx.tess_input_info = ++regno; 364301e04c3fSmrg ctx.tess_output_info = ++regno; 364401e04c3fSmrg } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { 364501e04c3fSmrg ctx.tess_input_info = ++regno; 364601e04c3fSmrg ctx.tess_output_info = ++regno; 364701e04c3fSmrg } else if (ctx.type == PIPE_SHADER_GEOMETRY) { 364801e04c3fSmrg ctx.gs_export_gpr_tregs[0] = ++regno; 364901e04c3fSmrg ctx.gs_export_gpr_tregs[1] = ++regno; 365001e04c3fSmrg ctx.gs_export_gpr_tregs[2] = ++regno; 365101e04c3fSmrg ctx.gs_export_gpr_tregs[3] = ++regno; 365201e04c3fSmrg if (ctx.shader->gs_tri_strip_adj_fix) { 365301e04c3fSmrg ctx.gs_rotated_input[0] = ++regno; 365401e04c3fSmrg ctx.gs_rotated_input[1] = ++regno; 365501e04c3fSmrg } else { 365601e04c3fSmrg ctx.gs_rotated_input[0] = 0; 365701e04c3fSmrg ctx.gs_rotated_input[1] = 1; 365801e04c3fSmrg } 365901e04c3fSmrg } 366001e04c3fSmrg 366101e04c3fSmrg if (shader->uses_images) { 366201e04c3fSmrg ctx.thread_id_gpr = ++regno; 366301e04c3fSmrg } 366401e04c3fSmrg ctx.temp_reg = ++regno; 366501e04c3fSmrg 366601e04c3fSmrg shader->max_arrays = 0; 366701e04c3fSmrg shader->num_arrays = 0; 3668af69d88dSmrg if (indirect_gprs) { 3669af69d88dSmrg 3670af69d88dSmrg if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3671af69d88dSmrg r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3672af69d88dSmrg ctx.file_offset[TGSI_FILE_OUTPUT] - 3673af69d88dSmrg ctx.file_offset[TGSI_FILE_INPUT], 3674af69d88dSmrg 0x0F); 3675af69d88dSmrg } 3676af69d88dSmrg if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3677af69d88dSmrg r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3678af69d88dSmrg ctx.file_offset[TGSI_FILE_TEMPORARY] - 3679af69d88dSmrg ctx.file_offset[TGSI_FILE_OUTPUT], 3680af69d88dSmrg 0x0F); 3681af69d88dSmrg } 3682af69d88dSmrg } 36833464ebd5Sriastradh 36843464ebd5Sriastradh ctx.nliterals = 0; 36853464ebd5Sriastradh ctx.literals = NULL; 368601e04c3fSmrg ctx.max_driver_temp_used = 0; 368701e04c3fSmrg 368801e04c3fSmrg shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 368901e04c3fSmrg ctx.info.colors_written == 1; 369001e04c3fSmrg shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 369101e04c3fSmrg shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 369201e04c3fSmrg 369301e04c3fSmrg if (ctx.type == PIPE_SHADER_VERTEX || 369401e04c3fSmrg ctx.type == PIPE_SHADER_GEOMETRY || 369501e04c3fSmrg ctx.type == PIPE_SHADER_TESS_EVAL) { 369601e04c3fSmrg shader->cc_dist_mask = (1 << (ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED] + 369701e04c3fSmrg ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED])) - 1; 369801e04c3fSmrg shader->clip_dist_write = (1 << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]) - 1; 369901e04c3fSmrg shader->cull_dist_write = ((1 << ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED]) - 1) << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]; 370001e04c3fSmrg } 370101e04c3fSmrg 370201e04c3fSmrg if (shader->vs_as_gs_a) 370301e04c3fSmrg vs_add_primid_output(&ctx, key.vs.prim_id_out); 370401e04c3fSmrg 370501e04c3fSmrg if (ctx.thread_id_gpr != -1) { 370601e04c3fSmrg r = load_thread_id_gpr(&ctx); 370701e04c3fSmrg if (r) 370801e04c3fSmrg return r; 370901e04c3fSmrg } 371001e04c3fSmrg 371101e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_EVAL) 371201e04c3fSmrg r600_fetch_tess_io_info(&ctx); 371301e04c3fSmrg 37143464ebd5Sriastradh while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 37153464ebd5Sriastradh tgsi_parse_token(&ctx.parse); 37163464ebd5Sriastradh switch (ctx.parse.FullToken.Token.Type) { 37173464ebd5Sriastradh case TGSI_TOKEN_TYPE_IMMEDIATE: 37183464ebd5Sriastradh immediate = &ctx.parse.FullToken.FullImmediate; 37193464ebd5Sriastradh ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 37203464ebd5Sriastradh if(ctx.literals == NULL) { 37213464ebd5Sriastradh r = -ENOMEM; 37223464ebd5Sriastradh goto out_err; 37233464ebd5Sriastradh } 37243464ebd5Sriastradh ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 37253464ebd5Sriastradh ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 37263464ebd5Sriastradh ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 37273464ebd5Sriastradh ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 37283464ebd5Sriastradh ctx.nliterals++; 37293464ebd5Sriastradh break; 37303464ebd5Sriastradh case TGSI_TOKEN_TYPE_DECLARATION: 37313464ebd5Sriastradh r = tgsi_declaration(&ctx); 37323464ebd5Sriastradh if (r) 37333464ebd5Sriastradh goto out_err; 37343464ebd5Sriastradh break; 37353464ebd5Sriastradh case TGSI_TOKEN_TYPE_INSTRUCTION: 37363464ebd5Sriastradh case TGSI_TOKEN_TYPE_PROPERTY: 37373464ebd5Sriastradh break; 37383464ebd5Sriastradh default: 37393464ebd5Sriastradh R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 37403464ebd5Sriastradh r = -EINVAL; 37413464ebd5Sriastradh goto out_err; 37423464ebd5Sriastradh } 37433464ebd5Sriastradh } 3744af69d88dSmrg 374501e04c3fSmrg shader->ring_item_sizes[0] = ctx.next_ring_offset; 374601e04c3fSmrg shader->ring_item_sizes[1] = 0; 374701e04c3fSmrg shader->ring_item_sizes[2] = 0; 374801e04c3fSmrg shader->ring_item_sizes[3] = 0; 37493464ebd5Sriastradh 3750af69d88dSmrg /* Process two side if needed */ 3751af69d88dSmrg if (shader->two_side && ctx.colors_used) { 3752af69d88dSmrg int i, count = ctx.shader->ninput; 3753af69d88dSmrg unsigned next_lds_loc = ctx.shader->nlds; 3754af69d88dSmrg 3755af69d88dSmrg /* additional inputs will be allocated right after the existing inputs, 3756af69d88dSmrg * we won't need them after the color selection, so we don't need to 3757af69d88dSmrg * reserve these gprs for the rest of the shader code and to adjust 3758af69d88dSmrg * output offsets etc. */ 3759af69d88dSmrg int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3760af69d88dSmrg ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3761af69d88dSmrg 376201e04c3fSmrg /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3763af69d88dSmrg if (ctx.face_gpr == -1) { 3764af69d88dSmrg i = ctx.shader->ninput++; 3765af69d88dSmrg ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3766af69d88dSmrg ctx.shader->input[i].spi_sid = 0; 3767af69d88dSmrg ctx.shader->input[i].gpr = gpr++; 3768af69d88dSmrg ctx.face_gpr = ctx.shader->input[i].gpr; 3769af69d88dSmrg } 37703464ebd5Sriastradh 3771af69d88dSmrg for (i = 0; i < count; i++) { 3772af69d88dSmrg if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3773af69d88dSmrg int ni = ctx.shader->ninput++; 3774af69d88dSmrg memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3775af69d88dSmrg ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3776af69d88dSmrg ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3777af69d88dSmrg ctx.shader->input[ni].gpr = gpr++; 3778af69d88dSmrg // TGSI to LLVM needs to know the lds position of inputs. 3779af69d88dSmrg // Non LLVM path computes it later (in process_twoside_color) 3780af69d88dSmrg ctx.shader->input[ni].lds_pos = next_lds_loc++; 3781af69d88dSmrg ctx.shader->input[i].back_color_input = ni; 3782af69d88dSmrg if (ctx.bc->chip_class >= EVERGREEN) { 3783af69d88dSmrg if ((r = evergreen_interp_input(&ctx, ni))) 37843464ebd5Sriastradh return r; 37853464ebd5Sriastradh } 37863464ebd5Sriastradh } 37873464ebd5Sriastradh } 37883464ebd5Sriastradh } 37893464ebd5Sriastradh 3790af69d88dSmrg if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3791af69d88dSmrg shader->nr_ps_max_color_exports = 8; 3792af69d88dSmrg 379301e04c3fSmrg if (ctx.shader->uses_helper_invocation) { 379401e04c3fSmrg if (ctx.bc->chip_class == CAYMAN) 379501e04c3fSmrg r = cm_load_helper_invocation(&ctx); 379601e04c3fSmrg else 379701e04c3fSmrg r = eg_load_helper_invocation(&ctx); 379801e04c3fSmrg if (r) 379901e04c3fSmrg return r; 380001e04c3fSmrg } 3801af69d88dSmrg 380201e04c3fSmrg /* 380301e04c3fSmrg * XXX this relies on fixed_pt_position_gpr only being present when 380401e04c3fSmrg * this shader should be executed per sample. Should be the case for now... 380501e04c3fSmrg */ 380601e04c3fSmrg if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) { 380701e04c3fSmrg /* 380801e04c3fSmrg * Fix up sample mask. The hw always gives us coverage mask for 380901e04c3fSmrg * the pixel. However, for per-sample shading, we need the 381001e04c3fSmrg * coverage for the shader invocation only. 381101e04c3fSmrg * Also, with disabled msaa, only the first bit should be set 381201e04c3fSmrg * (luckily the same fixup works for both problems). 381301e04c3fSmrg * For now, we can only do it if we know this shader is always 381401e04c3fSmrg * executed per sample (due to usage of bits in the shader 381501e04c3fSmrg * forcing per-sample execution). 381601e04c3fSmrg * If the fb is not multisampled, we'd do unnecessary work but 381701e04c3fSmrg * it should still be correct. 381801e04c3fSmrg * It will however do nothing for sample shading according 381901e04c3fSmrg * to MinSampleShading. 382001e04c3fSmrg */ 382101e04c3fSmrg struct r600_bytecode_alu alu; 382201e04c3fSmrg int tmp = r600_get_temp(&ctx); 382301e04c3fSmrg assert(ctx.face_gpr != -1); 382401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 382501e04c3fSmrg 382601e04c3fSmrg alu.op = ALU_OP2_LSHL_INT; 382701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 382801e04c3fSmrg alu.src[0].value = 0x1; 382901e04c3fSmrg alu.src[1].sel = ctx.fixed_pt_position_gpr; 383001e04c3fSmrg alu.src[1].chan = 3; 383101e04c3fSmrg alu.dst.sel = tmp; 383201e04c3fSmrg alu.dst.chan = 0; 383301e04c3fSmrg alu.dst.write = 1; 383401e04c3fSmrg alu.last = 1; 383501e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 383601e04c3fSmrg return r; 383701e04c3fSmrg 383801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 383901e04c3fSmrg alu.op = ALU_OP2_AND_INT; 384001e04c3fSmrg alu.src[0].sel = tmp; 384101e04c3fSmrg alu.src[1].sel = ctx.face_gpr; 384201e04c3fSmrg alu.src[1].chan = 2; 384301e04c3fSmrg alu.dst.sel = ctx.face_gpr; 384401e04c3fSmrg alu.dst.chan = 2; 384501e04c3fSmrg alu.dst.write = 1; 384601e04c3fSmrg alu.last = 1; 384701e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 384801e04c3fSmrg return r; 384901e04c3fSmrg } 385001e04c3fSmrg 385101e04c3fSmrg if (ctx.fragcoord_input >= 0) { 385201e04c3fSmrg if (ctx.bc->chip_class == CAYMAN) { 385301e04c3fSmrg for (j = 0 ; j < 4; j++) { 3854af69d88dSmrg struct r600_bytecode_alu alu; 3855af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3856af69d88dSmrg alu.op = ALU_OP1_RECIP_IEEE; 3857af69d88dSmrg alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3858af69d88dSmrg alu.src[0].chan = 3; 3859af69d88dSmrg 3860af69d88dSmrg alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 386101e04c3fSmrg alu.dst.chan = j; 386201e04c3fSmrg alu.dst.write = (j == 3); 386301e04c3fSmrg alu.last = (j == 3); 3864af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3865af69d88dSmrg return r; 38663464ebd5Sriastradh } 386701e04c3fSmrg } else { 386801e04c3fSmrg struct r600_bytecode_alu alu; 386901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 387001e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 387101e04c3fSmrg alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 387201e04c3fSmrg alu.src[0].chan = 3; 387301e04c3fSmrg 387401e04c3fSmrg alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 387501e04c3fSmrg alu.dst.chan = 3; 387601e04c3fSmrg alu.dst.write = 1; 387701e04c3fSmrg alu.last = 1; 387801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 387901e04c3fSmrg return r; 38803464ebd5Sriastradh } 388101e04c3fSmrg } 3882af69d88dSmrg 388301e04c3fSmrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 388401e04c3fSmrg struct r600_bytecode_alu alu; 388501e04c3fSmrg int r; 3886af69d88dSmrg 388701e04c3fSmrg /* GS thread with no output workaround - emit a cut at start of GS */ 388801e04c3fSmrg if (ctx.bc->chip_class == R600) 388901e04c3fSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 389001e04c3fSmrg 389101e04c3fSmrg for (j = 0; j < 4; j++) { 3892af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3893af69d88dSmrg alu.op = ALU_OP1_MOV; 3894af69d88dSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3895af69d88dSmrg alu.src[0].value = 0; 389601e04c3fSmrg alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3897af69d88dSmrg alu.dst.write = 1; 3898af69d88dSmrg alu.last = 1; 3899af69d88dSmrg r = r600_bytecode_add_alu(ctx.bc, &alu); 3900af69d88dSmrg if (r) 3901af69d88dSmrg return r; 3902af69d88dSmrg } 390301e04c3fSmrg 390401e04c3fSmrg if (ctx.shader->gs_tri_strip_adj_fix) { 390501e04c3fSmrg r = single_alu_op2(&ctx, ALU_OP2_AND_INT, 390601e04c3fSmrg ctx.gs_rotated_input[0], 2, 390701e04c3fSmrg 0, 2, 390801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 1); 390901e04c3fSmrg if (r) 3910af69d88dSmrg return r; 3911af69d88dSmrg 391201e04c3fSmrg for (i = 0; i < 6; i++) { 391301e04c3fSmrg int rotated = (i + 4) % 6; 391401e04c3fSmrg int offset_reg = i / 3; 391501e04c3fSmrg int offset_chan = i % 3; 391601e04c3fSmrg int rotated_offset_reg = rotated / 3; 391701e04c3fSmrg int rotated_offset_chan = rotated % 3; 391801e04c3fSmrg 391901e04c3fSmrg if (offset_reg == 0 && offset_chan == 2) 392001e04c3fSmrg offset_chan = 3; 392101e04c3fSmrg if (rotated_offset_reg == 0 && rotated_offset_chan == 2) 392201e04c3fSmrg rotated_offset_chan = 3; 392301e04c3fSmrg 392401e04c3fSmrg r = single_alu_op3(&ctx, ALU_OP3_CNDE_INT, 392501e04c3fSmrg ctx.gs_rotated_input[offset_reg], offset_chan, 392601e04c3fSmrg ctx.gs_rotated_input[0], 2, 392701e04c3fSmrg offset_reg, offset_chan, 392801e04c3fSmrg rotated_offset_reg, rotated_offset_chan); 3929af69d88dSmrg if (r) 393001e04c3fSmrg return r; 393101e04c3fSmrg } 393201e04c3fSmrg } 393301e04c3fSmrg } 393401e04c3fSmrg 393501e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 393601e04c3fSmrg r600_fetch_tess_io_info(&ctx); 393701e04c3fSmrg 393801e04c3fSmrg if (shader->two_side && ctx.colors_used) { 393901e04c3fSmrg if ((r = process_twoside_color_inputs(&ctx))) 394001e04c3fSmrg return r; 394101e04c3fSmrg } 394201e04c3fSmrg 394301e04c3fSmrg tgsi_parse_init(&ctx.parse, tokens); 394401e04c3fSmrg while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 394501e04c3fSmrg tgsi_parse_token(&ctx.parse); 394601e04c3fSmrg switch (ctx.parse.FullToken.Token.Type) { 394701e04c3fSmrg case TGSI_TOKEN_TYPE_INSTRUCTION: 394801e04c3fSmrg r = tgsi_is_supported(&ctx); 394901e04c3fSmrg if (r) 395001e04c3fSmrg goto out_err; 395101e04c3fSmrg ctx.max_driver_temp_used = 0; 395201e04c3fSmrg /* reserve first tmp for everyone */ 395301e04c3fSmrg r600_get_temp(&ctx); 3954af69d88dSmrg 395501e04c3fSmrg opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 395601e04c3fSmrg if ((r = tgsi_split_constant(&ctx))) 395701e04c3fSmrg goto out_err; 395801e04c3fSmrg if ((r = tgsi_split_literal_constant(&ctx))) 395901e04c3fSmrg goto out_err; 396001e04c3fSmrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 396101e04c3fSmrg if ((r = tgsi_split_gs_inputs(&ctx))) 3962af69d88dSmrg goto out_err; 396301e04c3fSmrg } else if (lds_inputs) { 396401e04c3fSmrg if ((r = tgsi_split_lds_inputs(&ctx))) 3965af69d88dSmrg goto out_err; 396601e04c3fSmrg } 396701e04c3fSmrg if (ctx.bc->chip_class == CAYMAN) 396801e04c3fSmrg ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 396901e04c3fSmrg else if (ctx.bc->chip_class >= EVERGREEN) 397001e04c3fSmrg ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 397101e04c3fSmrg else 397201e04c3fSmrg ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 397301e04c3fSmrg 397401e04c3fSmrg ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise; 397501e04c3fSmrg 397601e04c3fSmrg r = ctx.inst_info->process(&ctx); 397701e04c3fSmrg if (r) 397801e04c3fSmrg goto out_err; 397901e04c3fSmrg 398001e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_CTRL) { 398101e04c3fSmrg r = r600_store_tcs_output(&ctx); 3982af69d88dSmrg if (r) 3983af69d88dSmrg goto out_err; 39843464ebd5Sriastradh } 398501e04c3fSmrg break; 398601e04c3fSmrg default: 398701e04c3fSmrg break; 39883464ebd5Sriastradh } 39893464ebd5Sriastradh } 3990af69d88dSmrg 3991af69d88dSmrg /* Reset the temporary register counter. */ 3992af69d88dSmrg ctx.max_driver_temp_used = 0; 3993af69d88dSmrg 3994af69d88dSmrg noutput = shader->noutput; 3995af69d88dSmrg 3996af69d88dSmrg if (!ring_outputs && ctx.clip_vertex_write) { 3997af69d88dSmrg unsigned clipdist_temp[2]; 3998af69d88dSmrg 3999af69d88dSmrg clipdist_temp[0] = r600_get_temp(&ctx); 4000af69d88dSmrg clipdist_temp[1] = r600_get_temp(&ctx); 4001af69d88dSmrg 4002af69d88dSmrg /* need to convert a clipvertex write into clipdistance writes and not export 4003af69d88dSmrg the clip vertex anymore */ 4004af69d88dSmrg 4005af69d88dSmrg memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 4006af69d88dSmrg shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 4007af69d88dSmrg shader->output[noutput].gpr = clipdist_temp[0]; 4008af69d88dSmrg noutput++; 4009af69d88dSmrg shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 4010af69d88dSmrg shader->output[noutput].gpr = clipdist_temp[1]; 4011af69d88dSmrg noutput++; 4012af69d88dSmrg 4013af69d88dSmrg /* reset spi_sid for clipvertex output to avoid confusing spi */ 4014af69d88dSmrg shader->output[ctx.cv_output].spi_sid = 0; 4015af69d88dSmrg 4016af69d88dSmrg shader->clip_dist_write = 0xFF; 401701e04c3fSmrg shader->cc_dist_mask = 0xFF; 4018af69d88dSmrg 4019af69d88dSmrg for (i = 0; i < 8; i++) { 4020af69d88dSmrg int oreg = i >> 2; 4021af69d88dSmrg int ochan = i & 3; 4022af69d88dSmrg 4023af69d88dSmrg for (j = 0; j < 4; j++) { 4024af69d88dSmrg struct r600_bytecode_alu alu; 4025af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4026af69d88dSmrg alu.op = ALU_OP2_DOT4; 4027af69d88dSmrg alu.src[0].sel = shader->output[ctx.cv_output].gpr; 4028af69d88dSmrg alu.src[0].chan = j; 4029af69d88dSmrg 4030af69d88dSmrg alu.src[1].sel = 512 + i; 403101e04c3fSmrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4032af69d88dSmrg alu.src[1].chan = j; 4033af69d88dSmrg 4034af69d88dSmrg alu.dst.sel = clipdist_temp[oreg]; 4035af69d88dSmrg alu.dst.chan = j; 4036af69d88dSmrg alu.dst.write = (j == ochan); 4037af69d88dSmrg if (j == 3) 4038af69d88dSmrg alu.last = 1; 403901e04c3fSmrg r = r600_bytecode_add_alu(ctx.bc, &alu); 4040af69d88dSmrg if (r) 4041af69d88dSmrg return r; 40423464ebd5Sriastradh } 40433464ebd5Sriastradh } 40443464ebd5Sriastradh } 40453464ebd5Sriastradh 4046af69d88dSmrg /* Add stream outputs. */ 404701e04c3fSmrg if (so.num_outputs) { 404801e04c3fSmrg bool emit = false; 404901e04c3fSmrg if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX) 405001e04c3fSmrg emit = true; 405101e04c3fSmrg if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL) 405201e04c3fSmrg emit = true; 405301e04c3fSmrg if (emit) 405401e04c3fSmrg emit_streamout(&ctx, &so, -1, NULL); 405501e04c3fSmrg } 405601e04c3fSmrg pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 4057af69d88dSmrg convert_edgeflag_to_int(&ctx); 4058af69d88dSmrg 405901e04c3fSmrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 406001e04c3fSmrg r600_emit_tess_factor(&ctx); 406101e04c3fSmrg 406201e04c3fSmrg if (lds_outputs) { 406301e04c3fSmrg if (ctx.type == PIPE_SHADER_VERTEX) { 406401e04c3fSmrg if (ctx.shader->noutput) 406501e04c3fSmrg emit_lds_vs_writes(&ctx); 406601e04c3fSmrg } 406701e04c3fSmrg } else if (ring_outputs) { 406801e04c3fSmrg if (shader->vs_as_es || shader->tes_as_es) { 406901e04c3fSmrg ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 407001e04c3fSmrg ctx.gs_export_gpr_tregs[1] = -1; 407101e04c3fSmrg ctx.gs_export_gpr_tregs[2] = -1; 407201e04c3fSmrg ctx.gs_export_gpr_tregs[3] = -1; 407301e04c3fSmrg 407401e04c3fSmrg emit_gs_ring_writes(&ctx, &so, -1, FALSE); 407501e04c3fSmrg } 4076af69d88dSmrg } else { 4077af69d88dSmrg /* Export output */ 4078af69d88dSmrg next_clip_base = shader->vs_out_misc_write ? 62 : 61; 4079af69d88dSmrg 4080af69d88dSmrg for (i = 0, j = 0; i < noutput; i++, j++) { 4081af69d88dSmrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4082af69d88dSmrg output[j].gpr = shader->output[i].gpr; 4083af69d88dSmrg output[j].elem_size = 3; 4084af69d88dSmrg output[j].swizzle_x = 0; 4085af69d88dSmrg output[j].swizzle_y = 1; 4086af69d88dSmrg output[j].swizzle_z = 2; 4087af69d88dSmrg output[j].swizzle_w = 3; 4088af69d88dSmrg output[j].burst_count = 1; 408901e04c3fSmrg output[j].type = 0xffffffff; 4090af69d88dSmrg output[j].op = CF_OP_EXPORT; 4091af69d88dSmrg switch (ctx.type) { 409201e04c3fSmrg case PIPE_SHADER_VERTEX: 409301e04c3fSmrg case PIPE_SHADER_TESS_EVAL: 4094af69d88dSmrg switch (shader->output[i].name) { 4095af69d88dSmrg case TGSI_SEMANTIC_POSITION: 4096af69d88dSmrg output[j].array_base = 60; 4097af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4098af69d88dSmrg pos_emitted = true; 4099af69d88dSmrg break; 4100af69d88dSmrg 4101af69d88dSmrg case TGSI_SEMANTIC_PSIZE: 4102af69d88dSmrg output[j].array_base = 61; 4103af69d88dSmrg output[j].swizzle_y = 7; 4104af69d88dSmrg output[j].swizzle_z = 7; 4105af69d88dSmrg output[j].swizzle_w = 7; 4106af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4107af69d88dSmrg pos_emitted = true; 4108af69d88dSmrg break; 4109af69d88dSmrg case TGSI_SEMANTIC_EDGEFLAG: 4110af69d88dSmrg output[j].array_base = 61; 4111af69d88dSmrg output[j].swizzle_x = 7; 4112af69d88dSmrg output[j].swizzle_y = 0; 4113af69d88dSmrg output[j].swizzle_z = 7; 4114af69d88dSmrg output[j].swizzle_w = 7; 4115af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4116af69d88dSmrg pos_emitted = true; 4117af69d88dSmrg break; 4118af69d88dSmrg case TGSI_SEMANTIC_LAYER: 4119af69d88dSmrg /* spi_sid is 0 for outputs that are 4120af69d88dSmrg * not consumed by PS */ 4121af69d88dSmrg if (shader->output[i].spi_sid) { 4122af69d88dSmrg output[j].array_base = next_param_base++; 4123af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4124af69d88dSmrg j++; 4125af69d88dSmrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4126af69d88dSmrg } 4127af69d88dSmrg output[j].array_base = 61; 4128af69d88dSmrg output[j].swizzle_x = 7; 4129af69d88dSmrg output[j].swizzle_y = 7; 4130af69d88dSmrg output[j].swizzle_z = 0; 4131af69d88dSmrg output[j].swizzle_w = 7; 4132af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4133af69d88dSmrg pos_emitted = true; 4134af69d88dSmrg break; 4135af69d88dSmrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 4136af69d88dSmrg /* spi_sid is 0 for outputs that are 4137af69d88dSmrg * not consumed by PS */ 4138af69d88dSmrg if (shader->output[i].spi_sid) { 4139af69d88dSmrg output[j].array_base = next_param_base++; 4140af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4141af69d88dSmrg j++; 4142af69d88dSmrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4143af69d88dSmrg } 4144af69d88dSmrg output[j].array_base = 61; 4145af69d88dSmrg output[j].swizzle_x = 7; 4146af69d88dSmrg output[j].swizzle_y = 7; 4147af69d88dSmrg output[j].swizzle_z = 7; 4148af69d88dSmrg output[j].swizzle_w = 0; 4149af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4150af69d88dSmrg pos_emitted = true; 4151af69d88dSmrg break; 4152af69d88dSmrg case TGSI_SEMANTIC_CLIPVERTEX: 4153af69d88dSmrg j--; 4154af69d88dSmrg break; 4155af69d88dSmrg case TGSI_SEMANTIC_CLIPDIST: 4156af69d88dSmrg output[j].array_base = next_clip_base++; 4157af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4158af69d88dSmrg pos_emitted = true; 4159af69d88dSmrg /* spi_sid is 0 for clipdistance outputs that were generated 4160af69d88dSmrg * for clipvertex - we don't need to pass them to PS */ 4161af69d88dSmrg if (shader->output[i].spi_sid) { 4162af69d88dSmrg j++; 4163af69d88dSmrg /* duplicate it as PARAM to pass to the pixel shader */ 4164af69d88dSmrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4165af69d88dSmrg output[j].array_base = next_param_base++; 4166af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4167af69d88dSmrg } 4168af69d88dSmrg break; 4169af69d88dSmrg case TGSI_SEMANTIC_FOG: 4170af69d88dSmrg output[j].swizzle_y = 4; /* 0 */ 4171af69d88dSmrg output[j].swizzle_z = 4; /* 0 */ 4172af69d88dSmrg output[j].swizzle_w = 5; /* 1 */ 4173af69d88dSmrg break; 417401e04c3fSmrg case TGSI_SEMANTIC_PRIMID: 417501e04c3fSmrg output[j].swizzle_x = 2; 417601e04c3fSmrg output[j].swizzle_y = 4; /* 0 */ 417701e04c3fSmrg output[j].swizzle_z = 4; /* 0 */ 417801e04c3fSmrg output[j].swizzle_w = 4; /* 0 */ 417901e04c3fSmrg break; 4180af69d88dSmrg } 418101e04c3fSmrg 4182af69d88dSmrg break; 418301e04c3fSmrg case PIPE_SHADER_FRAGMENT: 4184af69d88dSmrg if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 4185af69d88dSmrg /* never export more colors than the number of CBs */ 4186af69d88dSmrg if (shader->output[i].sid >= max_color_exports) { 4187af69d88dSmrg /* skip export */ 4188af69d88dSmrg j--; 4189af69d88dSmrg continue; 4190af69d88dSmrg } 419101e04c3fSmrg output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 4192af69d88dSmrg output[j].array_base = shader->output[i].sid; 4193af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4194af69d88dSmrg shader->nr_ps_color_exports++; 419501e04c3fSmrg shader->ps_color_export_mask |= (0xf << (shader->output[i].sid * 4)); 419601e04c3fSmrg 419701e04c3fSmrg /* If the i-th target format is set, all previous target formats must 419801e04c3fSmrg * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well. 419901e04c3fSmrg */ 420001e04c3fSmrg if (shader->output[i].sid > 0) 420101e04c3fSmrg for (unsigned x = 0; x < shader->output[i].sid; x++) 420201e04c3fSmrg shader->ps_color_export_mask |= (1 << (x*4)); 420301e04c3fSmrg 420401e04c3fSmrg if (shader->output[i].sid > shader->ps_export_highest) 420501e04c3fSmrg shader->ps_export_highest = shader->output[i].sid; 4206af69d88dSmrg if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 4207af69d88dSmrg for (k = 1; k < max_color_exports; k++) { 4208af69d88dSmrg j++; 4209af69d88dSmrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4210af69d88dSmrg output[j].gpr = shader->output[i].gpr; 4211af69d88dSmrg output[j].elem_size = 3; 4212af69d88dSmrg output[j].swizzle_x = 0; 4213af69d88dSmrg output[j].swizzle_y = 1; 4214af69d88dSmrg output[j].swizzle_z = 2; 421501e04c3fSmrg output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 4216af69d88dSmrg output[j].burst_count = 1; 4217af69d88dSmrg output[j].array_base = k; 4218af69d88dSmrg output[j].op = CF_OP_EXPORT; 4219af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4220af69d88dSmrg shader->nr_ps_color_exports++; 422101e04c3fSmrg if (k > shader->ps_export_highest) 422201e04c3fSmrg shader->ps_export_highest = k; 422301e04c3fSmrg shader->ps_color_export_mask |= (0xf << (j * 4)); 4224af69d88dSmrg } 4225af69d88dSmrg } 4226af69d88dSmrg } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 4227af69d88dSmrg output[j].array_base = 61; 4228af69d88dSmrg output[j].swizzle_x = 2; 4229af69d88dSmrg output[j].swizzle_y = 7; 4230af69d88dSmrg output[j].swizzle_z = output[j].swizzle_w = 7; 4231af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4232af69d88dSmrg } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 4233af69d88dSmrg output[j].array_base = 61; 4234af69d88dSmrg output[j].swizzle_x = 7; 4235af69d88dSmrg output[j].swizzle_y = 1; 4236af69d88dSmrg output[j].swizzle_z = output[j].swizzle_w = 7; 4237af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 423801e04c3fSmrg } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 423901e04c3fSmrg output[j].array_base = 61; 424001e04c3fSmrg output[j].swizzle_x = 7; 424101e04c3fSmrg output[j].swizzle_y = 7; 424201e04c3fSmrg output[j].swizzle_z = 0; 424301e04c3fSmrg output[j].swizzle_w = 7; 424401e04c3fSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4245af69d88dSmrg } else { 4246af69d88dSmrg R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 4247af69d88dSmrg r = -EINVAL; 4248af69d88dSmrg goto out_err; 4249af69d88dSmrg } 4250af69d88dSmrg break; 425101e04c3fSmrg case PIPE_SHADER_TESS_CTRL: 425201e04c3fSmrg break; 4253af69d88dSmrg default: 4254af69d88dSmrg R600_ERR("unsupported processor type %d\n", ctx.type); 4255af69d88dSmrg r = -EINVAL; 4256af69d88dSmrg goto out_err; 4257af69d88dSmrg } 4258af69d88dSmrg 425901e04c3fSmrg if (output[j].type == 0xffffffff) { 4260af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4261af69d88dSmrg output[j].array_base = next_param_base++; 4262af69d88dSmrg } 4263af69d88dSmrg } 4264af69d88dSmrg 4265af69d88dSmrg /* add fake position export */ 426601e04c3fSmrg if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) { 4267af69d88dSmrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4268af69d88dSmrg output[j].gpr = 0; 4269af69d88dSmrg output[j].elem_size = 3; 4270af69d88dSmrg output[j].swizzle_x = 7; 4271af69d88dSmrg output[j].swizzle_y = 7; 4272af69d88dSmrg output[j].swizzle_z = 7; 4273af69d88dSmrg output[j].swizzle_w = 7; 4274af69d88dSmrg output[j].burst_count = 1; 4275af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4276af69d88dSmrg output[j].array_base = 60; 4277af69d88dSmrg output[j].op = CF_OP_EXPORT; 4278af69d88dSmrg j++; 4279af69d88dSmrg } 4280af69d88dSmrg 4281af69d88dSmrg /* add fake param output for vertex shader if no param is exported */ 428201e04c3fSmrg if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) { 4283af69d88dSmrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4284af69d88dSmrg output[j].gpr = 0; 4285af69d88dSmrg output[j].elem_size = 3; 4286af69d88dSmrg output[j].swizzle_x = 7; 4287af69d88dSmrg output[j].swizzle_y = 7; 4288af69d88dSmrg output[j].swizzle_z = 7; 4289af69d88dSmrg output[j].swizzle_w = 7; 4290af69d88dSmrg output[j].burst_count = 1; 4291af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4292af69d88dSmrg output[j].array_base = 0; 4293af69d88dSmrg output[j].op = CF_OP_EXPORT; 4294af69d88dSmrg j++; 4295af69d88dSmrg } 4296af69d88dSmrg 4297af69d88dSmrg /* add fake pixel export */ 429801e04c3fSmrg if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) { 4299af69d88dSmrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4300af69d88dSmrg output[j].gpr = 0; 4301af69d88dSmrg output[j].elem_size = 3; 4302af69d88dSmrg output[j].swizzle_x = 7; 4303af69d88dSmrg output[j].swizzle_y = 7; 4304af69d88dSmrg output[j].swizzle_z = 7; 4305af69d88dSmrg output[j].swizzle_w = 7; 4306af69d88dSmrg output[j].burst_count = 1; 4307af69d88dSmrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4308af69d88dSmrg output[j].array_base = 0; 4309af69d88dSmrg output[j].op = CF_OP_EXPORT; 4310af69d88dSmrg j++; 431101e04c3fSmrg shader->nr_ps_color_exports++; 431201e04c3fSmrg shader->ps_color_export_mask = 0xf; 4313af69d88dSmrg } 4314af69d88dSmrg 4315af69d88dSmrg noutput = j; 4316af69d88dSmrg 4317af69d88dSmrg /* set export done on last export of each type */ 431801e04c3fSmrg for (k = noutput - 1, output_done = 0; k >= 0; k--) { 431901e04c3fSmrg if (!(output_done & (1 << output[k].type))) { 432001e04c3fSmrg output_done |= (1 << output[k].type); 432101e04c3fSmrg output[k].op = CF_OP_EXPORT_DONE; 4322af69d88dSmrg } 4323af69d88dSmrg } 4324af69d88dSmrg /* add output to bytecode */ 432501e04c3fSmrg for (i = 0; i < noutput; i++) { 432601e04c3fSmrg r = r600_bytecode_add_output(ctx.bc, &output[i]); 432701e04c3fSmrg if (r) 432801e04c3fSmrg goto out_err; 4329af69d88dSmrg } 4330af69d88dSmrg } 4331af69d88dSmrg 4332af69d88dSmrg /* add program end */ 433301e04c3fSmrg if (ctx.bc->chip_class == CAYMAN) 433401e04c3fSmrg cm_bytecode_add_cf_end(ctx.bc); 433501e04c3fSmrg else { 433601e04c3fSmrg const struct cf_op_info *last = NULL; 4337af69d88dSmrg 433801e04c3fSmrg if (ctx.bc->cf_last) 433901e04c3fSmrg last = r600_isa_cf(ctx.bc->cf_last->op); 4340af69d88dSmrg 434101e04c3fSmrg /* alu clause instructions don't have EOP bit, so add NOP */ 434201e04c3fSmrg if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_POP) 434301e04c3fSmrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 4344af69d88dSmrg 434501e04c3fSmrg ctx.bc->cf_last->end_of_program = 1; 4346af69d88dSmrg } 4347af69d88dSmrg 4348af69d88dSmrg /* check GPR limit - we have 124 = 128 - 4 4349af69d88dSmrg * (4 are reserved as alu clause temporary registers) */ 4350af69d88dSmrg if (ctx.bc->ngpr > 124) { 4351af69d88dSmrg R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 4352af69d88dSmrg r = -ENOMEM; 4353af69d88dSmrg goto out_err; 4354af69d88dSmrg } 4355af69d88dSmrg 435601e04c3fSmrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 4357af69d88dSmrg if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 4358af69d88dSmrg return r; 4359af69d88dSmrg } 4360af69d88dSmrg 436101e04c3fSmrg free(ctx.spilled_arrays); 436201e04c3fSmrg free(ctx.array_infos); 4363af69d88dSmrg free(ctx.literals); 4364af69d88dSmrg tgsi_parse_free(&ctx.parse); 4365af69d88dSmrg return 0; 43663464ebd5Sriastradhout_err: 436701e04c3fSmrg free(ctx.spilled_arrays); 436801e04c3fSmrg free(ctx.array_infos); 43693464ebd5Sriastradh free(ctx.literals); 43703464ebd5Sriastradh tgsi_parse_free(&ctx.parse); 43713464ebd5Sriastradh return r; 43723464ebd5Sriastradh} 43733464ebd5Sriastradh 43743464ebd5Sriastradhstatic int tgsi_unsupported(struct r600_shader_ctx *ctx) 43753464ebd5Sriastradh{ 437601e04c3fSmrg const unsigned tgsi_opcode = 437701e04c3fSmrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 43783464ebd5Sriastradh R600_ERR("%s tgsi opcode unsupported\n", 437901e04c3fSmrg tgsi_get_opcode_name(tgsi_opcode)); 43803464ebd5Sriastradh return -EINVAL; 43813464ebd5Sriastradh} 43823464ebd5Sriastradh 438301e04c3fSmrgstatic int tgsi_end(struct r600_shader_ctx *ctx UNUSED) 43843464ebd5Sriastradh{ 43853464ebd5Sriastradh return 0; 43863464ebd5Sriastradh} 43873464ebd5Sriastradh 4388af69d88dSmrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 43893464ebd5Sriastradh const struct r600_shader_src *shader_src, 43903464ebd5Sriastradh unsigned chan) 43913464ebd5Sriastradh{ 43923464ebd5Sriastradh bc_src->sel = shader_src->sel; 43933464ebd5Sriastradh bc_src->chan = shader_src->swizzle[chan]; 43943464ebd5Sriastradh bc_src->neg = shader_src->neg; 43953464ebd5Sriastradh bc_src->abs = shader_src->abs; 43963464ebd5Sriastradh bc_src->rel = shader_src->rel; 43973464ebd5Sriastradh bc_src->value = shader_src->value[bc_src->chan]; 4398af69d88dSmrg bc_src->kc_bank = shader_src->kc_bank; 439901e04c3fSmrg bc_src->kc_rel = shader_src->kc_rel; 44003464ebd5Sriastradh} 44013464ebd5Sriastradh 4402af69d88dSmrgstatic void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 44033464ebd5Sriastradh{ 44043464ebd5Sriastradh bc_src->abs = 1; 44053464ebd5Sriastradh bc_src->neg = 0; 44063464ebd5Sriastradh} 44073464ebd5Sriastradh 4408af69d88dSmrgstatic void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 44093464ebd5Sriastradh{ 44103464ebd5Sriastradh bc_src->neg = !bc_src->neg; 44113464ebd5Sriastradh} 44123464ebd5Sriastradh 44133464ebd5Sriastradhstatic void tgsi_dst(struct r600_shader_ctx *ctx, 44143464ebd5Sriastradh const struct tgsi_full_dst_register *tgsi_dst, 44153464ebd5Sriastradh unsigned swizzle, 4416af69d88dSmrg struct r600_bytecode_alu_dst *r600_dst) 44173464ebd5Sriastradh{ 44183464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 44193464ebd5Sriastradh 442001e04c3fSmrg if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) { 442101e04c3fSmrg bool spilled; 442201e04c3fSmrg unsigned idx; 442301e04c3fSmrg 442401e04c3fSmrg idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_dst->Register.Index, &spilled); 442501e04c3fSmrg 442601e04c3fSmrg if (spilled) { 442701e04c3fSmrg struct r600_bytecode_output cf; 442801e04c3fSmrg int reg = 0; 442901e04c3fSmrg int r; 443001e04c3fSmrg bool add_pending_output = true; 443101e04c3fSmrg 443201e04c3fSmrg memset(&cf, 0, sizeof(struct r600_bytecode_output)); 443301e04c3fSmrg get_spilled_array_base_and_size(ctx, tgsi_dst->Register.Index, 443401e04c3fSmrg &cf.array_base, &cf.array_size); 443501e04c3fSmrg 443601e04c3fSmrg /* If no component has spilled, reserve a register and add the spill code 443701e04c3fSmrg * ctx->bc->n_pending_outputs is cleared after each instruction group */ 443801e04c3fSmrg if (ctx->bc->n_pending_outputs == 0) { 443901e04c3fSmrg reg = r600_get_temp(ctx); 444001e04c3fSmrg } else { 444101e04c3fSmrg /* If we are already spilling and the output address is the same like 444201e04c3fSmrg * before then just reuse the same slot */ 444301e04c3fSmrg struct r600_bytecode_output *tmpl = &ctx->bc->pending_outputs[ctx->bc->n_pending_outputs-1]; 444401e04c3fSmrg if ((cf.array_base + idx == tmpl->array_base) || 444501e04c3fSmrg (cf.array_base == tmpl->array_base && 444601e04c3fSmrg tmpl->index_gpr == ctx->bc->ar_reg && 444701e04c3fSmrg tgsi_dst->Register.Indirect)) { 444801e04c3fSmrg reg = ctx->bc->pending_outputs[0].gpr; 444901e04c3fSmrg add_pending_output = false; 445001e04c3fSmrg } else { 445101e04c3fSmrg reg = r600_get_temp(ctx); 445201e04c3fSmrg } 445301e04c3fSmrg } 445401e04c3fSmrg 445501e04c3fSmrg r600_dst->sel = reg; 445601e04c3fSmrg r600_dst->chan = swizzle; 445701e04c3fSmrg r600_dst->write = 1; 445801e04c3fSmrg if (inst->Instruction.Saturate) { 445901e04c3fSmrg r600_dst->clamp = 1; 446001e04c3fSmrg } 446101e04c3fSmrg 446201e04c3fSmrg /* Add new outputs as pending */ 446301e04c3fSmrg if (add_pending_output) { 446401e04c3fSmrg cf.op = CF_OP_MEM_SCRATCH; 446501e04c3fSmrg cf.elem_size = 3; 446601e04c3fSmrg cf.gpr = reg; 446701e04c3fSmrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 446801e04c3fSmrg cf.mark = 1; 446901e04c3fSmrg cf.comp_mask = inst->Dst[0].Register.WriteMask; 447001e04c3fSmrg cf.swizzle_x = 0; 447101e04c3fSmrg cf.swizzle_y = 1; 447201e04c3fSmrg cf.swizzle_z = 2; 447301e04c3fSmrg cf.swizzle_w = 3; 447401e04c3fSmrg cf.burst_count = 1; 447501e04c3fSmrg 447601e04c3fSmrg if (tgsi_dst->Register.Indirect) { 447701e04c3fSmrg if (ctx->bc->chip_class < R700) 447801e04c3fSmrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 447901e04c3fSmrg else 448001e04c3fSmrg cf.type = 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK; 448101e04c3fSmrg cf.index_gpr = ctx->bc->ar_reg; 448201e04c3fSmrg } 448301e04c3fSmrg else { 448401e04c3fSmrg cf.array_base += idx; 448501e04c3fSmrg cf.array_size = 0; 448601e04c3fSmrg } 448701e04c3fSmrg 448801e04c3fSmrg r = r600_bytecode_add_pending_output(ctx->bc, &cf); 448901e04c3fSmrg if (r) 449001e04c3fSmrg return; 449101e04c3fSmrg 449201e04c3fSmrg if (ctx->bc->chip_class >= R700) 449301e04c3fSmrg r600_bytecode_need_wait_ack(ctx->bc, true); 449401e04c3fSmrg } 449501e04c3fSmrg return; 449601e04c3fSmrg } 449701e04c3fSmrg else { 449801e04c3fSmrg r600_dst->sel = idx; 449901e04c3fSmrg } 450001e04c3fSmrg } 450101e04c3fSmrg else { 450201e04c3fSmrg r600_dst->sel = tgsi_dst->Register.Index; 450301e04c3fSmrg r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 450401e04c3fSmrg } 45053464ebd5Sriastradh r600_dst->chan = swizzle; 45063464ebd5Sriastradh r600_dst->write = 1; 45073464ebd5Sriastradh if (inst->Instruction.Saturate) { 45083464ebd5Sriastradh r600_dst->clamp = 1; 45093464ebd5Sriastradh } 451001e04c3fSmrg if (ctx->type == PIPE_SHADER_TESS_CTRL) { 451101e04c3fSmrg if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 451201e04c3fSmrg return; 45133464ebd5Sriastradh } 45143464ebd5Sriastradh } 451501e04c3fSmrg if (tgsi_dst->Register.Indirect) 451601e04c3fSmrg r600_dst->rel = V_SQ_REL_RELATIVE; 451701e04c3fSmrg 45183464ebd5Sriastradh} 45193464ebd5Sriastradh 452001e04c3fSmrgstatic int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap, int dest_temp, int op_override) 45213464ebd5Sriastradh{ 45223464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4523af69d88dSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 452401e04c3fSmrg struct r600_bytecode_alu alu; 4525af69d88dSmrg int i, j, r, lasti = tgsi_last_instruction(write_mask); 452601e04c3fSmrg int use_tmp = 0; 452701e04c3fSmrg int swizzle_x = inst->Src[0].Register.SwizzleX; 452801e04c3fSmrg 452901e04c3fSmrg if (singledest) { 453001e04c3fSmrg switch (write_mask) { 453101e04c3fSmrg case 0x1: 453201e04c3fSmrg if (swizzle_x == 2) { 453301e04c3fSmrg write_mask = 0xc; 453401e04c3fSmrg use_tmp = 3; 453501e04c3fSmrg } else 453601e04c3fSmrg write_mask = 0x3; 453701e04c3fSmrg break; 453801e04c3fSmrg case 0x2: 453901e04c3fSmrg if (swizzle_x == 2) { 454001e04c3fSmrg write_mask = 0xc; 454101e04c3fSmrg use_tmp = 3; 454201e04c3fSmrg } else { 454301e04c3fSmrg write_mask = 0x3; 454401e04c3fSmrg use_tmp = 1; 454501e04c3fSmrg } 454601e04c3fSmrg break; 454701e04c3fSmrg case 0x4: 454801e04c3fSmrg if (swizzle_x == 0) { 454901e04c3fSmrg write_mask = 0x3; 455001e04c3fSmrg use_tmp = 1; 455101e04c3fSmrg } else 455201e04c3fSmrg write_mask = 0xc; 455301e04c3fSmrg break; 455401e04c3fSmrg case 0x8: 455501e04c3fSmrg if (swizzle_x == 0) { 455601e04c3fSmrg write_mask = 0x3; 455701e04c3fSmrg use_tmp = 1; 455801e04c3fSmrg } else { 455901e04c3fSmrg write_mask = 0xc; 456001e04c3fSmrg use_tmp = 3; 456101e04c3fSmrg } 456201e04c3fSmrg break; 456301e04c3fSmrg } 456401e04c3fSmrg } 4565af69d88dSmrg 456601e04c3fSmrg lasti = tgsi_last_instruction(write_mask); 4567af69d88dSmrg for (i = 0; i <= lasti; i++) { 456801e04c3fSmrg 4569af69d88dSmrg if (!(write_mask & (1 << i))) 45703464ebd5Sriastradh continue; 45713464ebd5Sriastradh 4572af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 457301e04c3fSmrg 457401e04c3fSmrg if (singledest) { 457501e04c3fSmrg if (use_tmp || dest_temp) { 457601e04c3fSmrg alu.dst.sel = use_tmp ? ctx->temp_reg : dest_temp; 457701e04c3fSmrg alu.dst.chan = i; 457801e04c3fSmrg alu.dst.write = 1; 457901e04c3fSmrg } else { 458001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 458101e04c3fSmrg } 458201e04c3fSmrg if (i == 1 || i == 3) 458301e04c3fSmrg alu.dst.write = 0; 4584af69d88dSmrg } else 4585af69d88dSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 45863464ebd5Sriastradh 458701e04c3fSmrg alu.op = op_override ? op_override : ctx->inst_info->op; 458801e04c3fSmrg if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 458901e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 459001e04c3fSmrg } else if (!swap) { 45913464ebd5Sriastradh for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 459201e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 45933464ebd5Sriastradh } 45943464ebd5Sriastradh } else { 459501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 459601e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 45973464ebd5Sriastradh } 459801e04c3fSmrg 45993464ebd5Sriastradh /* handle some special cases */ 460001e04c3fSmrg if (i == 1 || i == 3) { 460101e04c3fSmrg switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 460201e04c3fSmrg case TGSI_OPCODE_DABS: 460301e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 460401e04c3fSmrg break; 460501e04c3fSmrg default: 460601e04c3fSmrg break; 460701e04c3fSmrg } 46083464ebd5Sriastradh } 460901e04c3fSmrg if (i == lasti) { 46103464ebd5Sriastradh alu.last = 1; 46113464ebd5Sriastradh } 4612af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 46133464ebd5Sriastradh if (r) 46143464ebd5Sriastradh return r; 46153464ebd5Sriastradh } 4616af69d88dSmrg 4617af69d88dSmrg if (use_tmp) { 461801e04c3fSmrg write_mask = inst->Dst[0].Register.WriteMask; 461901e04c3fSmrg 462001e04c3fSmrg lasti = tgsi_last_instruction(write_mask); 4621af69d88dSmrg /* move result from temp to dst */ 4622af69d88dSmrg for (i = 0; i <= lasti; i++) { 4623af69d88dSmrg if (!(write_mask & (1 << i))) 4624af69d88dSmrg continue; 4625af69d88dSmrg 4626af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4627af69d88dSmrg alu.op = ALU_OP1_MOV; 462801e04c3fSmrg 462901e04c3fSmrg if (dest_temp) { 463001e04c3fSmrg alu.dst.sel = dest_temp; 463101e04c3fSmrg alu.dst.chan = i; 463201e04c3fSmrg alu.dst.write = 1; 463301e04c3fSmrg } else 463401e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4635af69d88dSmrg alu.src[0].sel = ctx->temp_reg; 463601e04c3fSmrg alu.src[0].chan = use_tmp - 1; 4637af69d88dSmrg alu.last = (i == lasti); 4638af69d88dSmrg 4639af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4640af69d88dSmrg if (r) 4641af69d88dSmrg return r; 4642af69d88dSmrg } 4643af69d88dSmrg } 46443464ebd5Sriastradh return 0; 46453464ebd5Sriastradh} 46463464ebd5Sriastradh 464701e04c3fSmrgstatic int tgsi_op2_64(struct r600_shader_ctx *ctx) 46483464ebd5Sriastradh{ 464901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 465001e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 465101e04c3fSmrg /* confirm writemasking */ 465201e04c3fSmrg if ((write_mask & 0x3) != 0x3 && 465301e04c3fSmrg (write_mask & 0xc) != 0xc) { 465401e04c3fSmrg fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 465501e04c3fSmrg return -1; 465601e04c3fSmrg } 465701e04c3fSmrg return tgsi_op2_64_params(ctx, false, false, 0, 0); 465801e04c3fSmrg} 465901e04c3fSmrg 466001e04c3fSmrgstatic int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 46613464ebd5Sriastradh{ 466201e04c3fSmrg return tgsi_op2_64_params(ctx, true, false, 0, 0); 4663af69d88dSmrg} 4664af69d88dSmrg 466501e04c3fSmrgstatic int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 4666af69d88dSmrg{ 466701e04c3fSmrg return tgsi_op2_64_params(ctx, true, true, 0, 0); 4668af69d88dSmrg} 4669af69d88dSmrg 467001e04c3fSmrgstatic int tgsi_op3_64(struct r600_shader_ctx *ctx) 4671af69d88dSmrg{ 4672af69d88dSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4673af69d88dSmrg struct r600_bytecode_alu alu; 467401e04c3fSmrg int i, j, r; 467501e04c3fSmrg int lasti = 3; 467601e04c3fSmrg int tmp = r600_get_temp(ctx); 4677af69d88dSmrg 4678af69d88dSmrg for (i = 0; i < lasti + 1; i++) { 4679af69d88dSmrg 4680af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4681af69d88dSmrg alu.op = ctx->inst_info->op; 468201e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 468301e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 468401e04c3fSmrg } 4685af69d88dSmrg 468601e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << i)) 468701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 468801e04c3fSmrg else 468901e04c3fSmrg alu.dst.sel = tmp; 4690af69d88dSmrg 469101e04c3fSmrg alu.dst.chan = i; 469201e04c3fSmrg alu.is_op3 = 1; 4693af69d88dSmrg if (i == lasti) { 4694af69d88dSmrg alu.last = 1; 4695af69d88dSmrg } 4696af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4697af69d88dSmrg if (r) 4698af69d88dSmrg return r; 4699af69d88dSmrg } 4700af69d88dSmrg return 0; 47013464ebd5Sriastradh} 47023464ebd5Sriastradh 470301e04c3fSmrgstatic int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 47043464ebd5Sriastradh{ 47053464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4706af69d88dSmrg struct r600_bytecode_alu alu; 470701e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 470801e04c3fSmrg int i, j, r, lasti = tgsi_last_instruction(write_mask); 470901e04c3fSmrg /* use temp register if trans_only and more than one dst component */ 471001e04c3fSmrg int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 471101e04c3fSmrg unsigned op = ctx->inst_info->op; 471201e04c3fSmrg 471301e04c3fSmrg if (op == ALU_OP2_MUL_IEEE && 471401e04c3fSmrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 471501e04c3fSmrg op = ALU_OP2_MUL; 471601e04c3fSmrg 47177ec681f3Smrg /* nir_to_tgsi lowers nir_op_isub to UADD + negate, since r600 doesn't support 47187ec681f3Smrg * source modifiers with integer ops we switch back to SUB_INT */ 47197ec681f3Smrg bool src1_neg = ctx->src[1].neg; 47207ec681f3Smrg if (op == ALU_OP2_ADD_INT && src1_neg) { 47217ec681f3Smrg src1_neg = false; 47227ec681f3Smrg op = ALU_OP2_SUB_INT; 47237ec681f3Smrg } 47247ec681f3Smrg 472501e04c3fSmrg for (i = 0; i <= lasti; i++) { 472601e04c3fSmrg if (!(write_mask & (1 << i))) 472701e04c3fSmrg continue; 472801e04c3fSmrg 4729af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 473001e04c3fSmrg if (use_tmp) { 473101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 473201e04c3fSmrg alu.dst.chan = i; 473301e04c3fSmrg alu.dst.write = 1; 473401e04c3fSmrg } else 473501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4736af69d88dSmrg 473701e04c3fSmrg alu.op = op; 473801e04c3fSmrg if (!swap) { 473901e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 474001e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4741af69d88dSmrg } 47427ec681f3Smrg alu.src[1].neg = src1_neg; 474301e04c3fSmrg } else { 474401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 474501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 47463464ebd5Sriastradh } 474701e04c3fSmrg if (i == lasti || trans_only) { 47483464ebd5Sriastradh alu.last = 1; 474901e04c3fSmrg } 4750af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4751af69d88dSmrg if (r) 4752af69d88dSmrg return r; 4753af69d88dSmrg } 4754af69d88dSmrg 475501e04c3fSmrg if (use_tmp) { 475601e04c3fSmrg /* move result from temp to dst */ 475701e04c3fSmrg for (i = 0; i <= lasti; i++) { 475801e04c3fSmrg if (!(write_mask & (1 << i))) 475901e04c3fSmrg continue; 4760af69d88dSmrg 4761af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 476201e04c3fSmrg alu.op = ALU_OP1_MOV; 476301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 476401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 476501e04c3fSmrg alu.src[0].chan = i; 476601e04c3fSmrg alu.last = (i == lasti); 476701e04c3fSmrg 4768af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4769af69d88dSmrg if (r) 4770af69d88dSmrg return r; 4771af69d88dSmrg } 4772af69d88dSmrg } 47733464ebd5Sriastradh return 0; 47743464ebd5Sriastradh} 47753464ebd5Sriastradh 477601e04c3fSmrgstatic int tgsi_op2(struct r600_shader_ctx *ctx) 47773464ebd5Sriastradh{ 477801e04c3fSmrg return tgsi_op2_s(ctx, 0, 0); 477901e04c3fSmrg} 47803464ebd5Sriastradh 478101e04c3fSmrgstatic int tgsi_op2_swap(struct r600_shader_ctx *ctx) 478201e04c3fSmrg{ 478301e04c3fSmrg return tgsi_op2_s(ctx, 1, 0); 478401e04c3fSmrg} 47853464ebd5Sriastradh 478601e04c3fSmrgstatic int tgsi_op2_trans(struct r600_shader_ctx *ctx) 478701e04c3fSmrg{ 478801e04c3fSmrg return tgsi_op2_s(ctx, 0, 1); 47893464ebd5Sriastradh} 47903464ebd5Sriastradh 479101e04c3fSmrgstatic int tgsi_ineg(struct r600_shader_ctx *ctx) 47923464ebd5Sriastradh{ 47933464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4794af69d88dSmrg struct r600_bytecode_alu alu; 47953464ebd5Sriastradh int i, r; 479601e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 47973464ebd5Sriastradh 479801e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 47993464ebd5Sriastradh 480001e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 480101e04c3fSmrg continue; 4802af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4803af69d88dSmrg alu.op = ctx->inst_info->op; 480401e04c3fSmrg 480501e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 480601e04c3fSmrg 480701e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 48083464ebd5Sriastradh 48093464ebd5Sriastradh tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 48103464ebd5Sriastradh 481101e04c3fSmrg if (i == lasti) { 48123464ebd5Sriastradh alu.last = 1; 481301e04c3fSmrg } 4814af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 48153464ebd5Sriastradh if (r) 48163464ebd5Sriastradh return r; 48173464ebd5Sriastradh } 48183464ebd5Sriastradh return 0; 481901e04c3fSmrg 48203464ebd5Sriastradh} 48213464ebd5Sriastradh 482201e04c3fSmrgstatic int tgsi_dneg(struct r600_shader_ctx *ctx) 48233464ebd5Sriastradh{ 48243464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4825af69d88dSmrg struct r600_bytecode_alu alu; 48263464ebd5Sriastradh int i, r; 48273464ebd5Sriastradh int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 48283464ebd5Sriastradh 48293464ebd5Sriastradh for (i = 0; i < lasti + 1; i++) { 483001e04c3fSmrg 48313464ebd5Sriastradh if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 48323464ebd5Sriastradh continue; 4833af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4834af69d88dSmrg alu.op = ALU_OP1_MOV; 48353464ebd5Sriastradh 483601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 483701e04c3fSmrg 483801e04c3fSmrg if (i == 1 || i == 3) 483901e04c3fSmrg r600_bytecode_src_toggle_neg(&alu.src[0]); 48403464ebd5Sriastradh tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 484101e04c3fSmrg 484201e04c3fSmrg if (i == lasti) { 48433464ebd5Sriastradh alu.last = 1; 484401e04c3fSmrg } 4845af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 48463464ebd5Sriastradh if (r) 48473464ebd5Sriastradh return r; 48483464ebd5Sriastradh } 48493464ebd5Sriastradh return 0; 485001e04c3fSmrg 48513464ebd5Sriastradh} 48523464ebd5Sriastradh 485301e04c3fSmrgstatic int tgsi_dfracexp(struct r600_shader_ctx *ctx) 48543464ebd5Sriastradh{ 48553464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4856af69d88dSmrg struct r600_bytecode_alu alu; 485701e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 485801e04c3fSmrg int i, j, r; 48593464ebd5Sriastradh 486001e04c3fSmrg for (i = 0; i <= 3; i++) { 486101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 486201e04c3fSmrg alu.op = ctx->inst_info->op; 48633464ebd5Sriastradh 486401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 486501e04c3fSmrg alu.dst.chan = i; 486601e04c3fSmrg alu.dst.write = 1; 486701e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 486801e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 486901e04c3fSmrg } 48703464ebd5Sriastradh 487101e04c3fSmrg if (i == 3) 487201e04c3fSmrg alu.last = 1; 487301e04c3fSmrg 487401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 487501e04c3fSmrg if (r) 487601e04c3fSmrg return r; 487701e04c3fSmrg } 487801e04c3fSmrg 487901e04c3fSmrg /* Replicate significand result across channels. */ 488001e04c3fSmrg for (i = 0; i <= 3; i++) { 488101e04c3fSmrg if (!(write_mask & (1 << i))) 488201e04c3fSmrg continue; 488301e04c3fSmrg 488401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 488501e04c3fSmrg alu.op = ALU_OP1_MOV; 488601e04c3fSmrg alu.src[0].chan = (i & 1) + 2; 488701e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 488801e04c3fSmrg 488901e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 489001e04c3fSmrg alu.dst.write = 1; 489101e04c3fSmrg alu.last = 1; 489201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 489301e04c3fSmrg if (r) 489401e04c3fSmrg return r; 489501e04c3fSmrg } 48963464ebd5Sriastradh 489701e04c3fSmrg for (i = 0; i <= 3; i++) { 489801e04c3fSmrg if (inst->Dst[1].Register.WriteMask & (1 << i)) { 489901e04c3fSmrg /* MOV third channels to writemask dst1 */ 490001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 490101e04c3fSmrg alu.op = ALU_OP1_MOV; 490201e04c3fSmrg alu.src[0].chan = 1; 49033464ebd5Sriastradh alu.src[0].sel = ctx->temp_reg; 490401e04c3fSmrg 490501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 49063464ebd5Sriastradh alu.last = 1; 4907af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 49083464ebd5Sriastradh if (r) 49093464ebd5Sriastradh return r; 491001e04c3fSmrg break; 49113464ebd5Sriastradh } 49123464ebd5Sriastradh } 491301e04c3fSmrg return 0; 491401e04c3fSmrg} 49153464ebd5Sriastradh 491601e04c3fSmrg 491701e04c3fSmrgstatic int egcm_int_to_double(struct r600_shader_ctx *ctx) 491801e04c3fSmrg{ 491901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 492001e04c3fSmrg struct r600_bytecode_alu alu; 492101e04c3fSmrg int i, c, r; 492201e04c3fSmrg int write_mask = inst->Dst[0].Register.WriteMask; 492301e04c3fSmrg int temp_reg = r600_get_temp(ctx); 492401e04c3fSmrg 492501e04c3fSmrg assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 492601e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_U2D); 492701e04c3fSmrg 492801e04c3fSmrg for (c = 0; c < 2; c++) { 492901e04c3fSmrg int dchan = c * 2; 493001e04c3fSmrg if (write_mask & (0x3 << dchan)) { 493101e04c3fSmrg /* split into 24-bit int and 8-bit int */ 4932af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 493301e04c3fSmrg alu.op = ALU_OP2_AND_INT; 493401e04c3fSmrg alu.dst.sel = temp_reg; 493501e04c3fSmrg alu.dst.chan = dchan; 493601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], c); 493701e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 493801e04c3fSmrg alu.src[1].value = 0xffffff00; 493901e04c3fSmrg alu.dst.write = 1; 494001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 494101e04c3fSmrg if (r) 494201e04c3fSmrg return r; 49433464ebd5Sriastradh 494401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 494501e04c3fSmrg alu.op = ALU_OP2_AND_INT; 494601e04c3fSmrg alu.dst.sel = temp_reg; 494701e04c3fSmrg alu.dst.chan = dchan + 1; 494801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], c); 494901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 495001e04c3fSmrg alu.src[1].value = 0xff; 495101e04c3fSmrg alu.dst.write = 1; 49523464ebd5Sriastradh alu.last = 1; 4953af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 49543464ebd5Sriastradh if (r) 49553464ebd5Sriastradh return r; 49563464ebd5Sriastradh } 49573464ebd5Sriastradh } 49583464ebd5Sriastradh 495901e04c3fSmrg for (c = 0; c < 2; c++) { 496001e04c3fSmrg int dchan = c * 2; 496101e04c3fSmrg if (write_mask & (0x3 << dchan)) { 496201e04c3fSmrg for (i = dchan; i <= dchan + 1; i++) { 496301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 496401e04c3fSmrg alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT; 49653464ebd5Sriastradh 496601e04c3fSmrg alu.src[0].sel = temp_reg; 496701e04c3fSmrg alu.src[0].chan = i; 496801e04c3fSmrg alu.dst.sel = temp_reg; 496901e04c3fSmrg alu.dst.chan = i; 497001e04c3fSmrg alu.dst.write = 1; 497101e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) 497201e04c3fSmrg alu.last = i == dchan + 1; 497301e04c3fSmrg else 497401e04c3fSmrg alu.last = 1; /* trans only ops on evergreen */ 497501e04c3fSmrg 497601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 497701e04c3fSmrg if (r) 497801e04c3fSmrg return r; 497901e04c3fSmrg } 498001e04c3fSmrg } 498101e04c3fSmrg } 49823464ebd5Sriastradh 498301e04c3fSmrg for (c = 0; c < 2; c++) { 498401e04c3fSmrg int dchan = c * 2; 498501e04c3fSmrg if (write_mask & (0x3 << dchan)) { 498601e04c3fSmrg for (i = 0; i < 4; i++) { 498701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 498801e04c3fSmrg alu.op = ALU_OP1_FLT32_TO_FLT64; 498901e04c3fSmrg 499001e04c3fSmrg alu.src[0].chan = dchan + (i / 2); 499101e04c3fSmrg if (i == 0 || i == 2) 499201e04c3fSmrg alu.src[0].sel = temp_reg; 499301e04c3fSmrg else { 499401e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 499501e04c3fSmrg alu.src[0].value = 0x0; 499601e04c3fSmrg } 499701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 499801e04c3fSmrg alu.dst.chan = i; 499901e04c3fSmrg alu.last = i == 3; 500001e04c3fSmrg alu.dst.write = 1; 50013464ebd5Sriastradh 500201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 500301e04c3fSmrg if (r) 500401e04c3fSmrg return r; 500501e04c3fSmrg } 50063464ebd5Sriastradh 500701e04c3fSmrg for (i = 0; i <= 1; i++) { 500801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 500901e04c3fSmrg alu.op = ALU_OP2_ADD_64; 50103464ebd5Sriastradh 501101e04c3fSmrg alu.src[0].chan = fp64_switch(i); 501201e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 501301e04c3fSmrg 501401e04c3fSmrg alu.src[1].chan = fp64_switch(i + 2); 501501e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 501601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst); 501701e04c3fSmrg alu.last = i == 1; 501801e04c3fSmrg 501901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 502001e04c3fSmrg if (r) 502101e04c3fSmrg return r; 502201e04c3fSmrg } 502301e04c3fSmrg } 50243464ebd5Sriastradh } 50253464ebd5Sriastradh 502601e04c3fSmrg return 0; 502701e04c3fSmrg} 50283464ebd5Sriastradh 502901e04c3fSmrgstatic int egcm_double_to_int(struct r600_shader_ctx *ctx) 503001e04c3fSmrg{ 503101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 503201e04c3fSmrg struct r600_bytecode_alu alu; 503301e04c3fSmrg int i, r; 503401e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 503501e04c3fSmrg int treg = r600_get_temp(ctx); 503601e04c3fSmrg assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 503701e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_D2U); 50383464ebd5Sriastradh 503901e04c3fSmrg /* do a 64->32 into a temp register */ 504001e04c3fSmrg r = tgsi_op2_64_params(ctx, true, false, treg, ALU_OP1_FLT64_TO_FLT32); 504101e04c3fSmrg if (r) 504201e04c3fSmrg return r; 50433464ebd5Sriastradh 504401e04c3fSmrg for (i = 0; i <= lasti; i++) { 504501e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 504601e04c3fSmrg continue; 504701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 504801e04c3fSmrg alu.op = ctx->inst_info->op; 50493464ebd5Sriastradh 505001e04c3fSmrg alu.src[0].chan = i; 505101e04c3fSmrg alu.src[0].sel = treg; 505201e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 505301e04c3fSmrg alu.last = (i == lasti); 50543464ebd5Sriastradh 5055af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 50563464ebd5Sriastradh if (r) 50573464ebd5Sriastradh return r; 50583464ebd5Sriastradh } 50593464ebd5Sriastradh 50603464ebd5Sriastradh return 0; 50613464ebd5Sriastradh} 50623464ebd5Sriastradh 506301e04c3fSmrgstatic int cayman_emit_unary_double_raw(struct r600_bytecode *bc, 506401e04c3fSmrg unsigned op, 506501e04c3fSmrg int dst_reg, 506601e04c3fSmrg struct r600_shader_src *src, 506701e04c3fSmrg bool abs) 50683464ebd5Sriastradh{ 5069af69d88dSmrg struct r600_bytecode_alu alu; 507001e04c3fSmrg const int last_slot = 3; 507101e04c3fSmrg int r; 50723464ebd5Sriastradh 507301e04c3fSmrg /* these have to write the result to X/Y by the looks of it */ 507401e04c3fSmrg for (int i = 0 ; i < last_slot; i++) { 5075af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 507601e04c3fSmrg alu.op = op; 50773464ebd5Sriastradh 507801e04c3fSmrg r600_bytecode_src(&alu.src[0], src, 1); 507901e04c3fSmrg r600_bytecode_src(&alu.src[1], src, 0); 50803464ebd5Sriastradh 508101e04c3fSmrg if (abs) 508201e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[1]); 50833464ebd5Sriastradh 508401e04c3fSmrg alu.dst.sel = dst_reg; 508501e04c3fSmrg alu.dst.chan = i; 508601e04c3fSmrg alu.dst.write = (i == 0 || i == 1); 508701e04c3fSmrg 508801e04c3fSmrg if (bc->chip_class != CAYMAN || i == last_slot - 1) 50893464ebd5Sriastradh alu.last = 1; 509001e04c3fSmrg r = r600_bytecode_add_alu(bc, &alu); 50913464ebd5Sriastradh if (r) 50923464ebd5Sriastradh return r; 50933464ebd5Sriastradh } 50943464ebd5Sriastradh 50953464ebd5Sriastradh return 0; 50963464ebd5Sriastradh} 50973464ebd5Sriastradh 509801e04c3fSmrgstatic int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 50993464ebd5Sriastradh{ 51003464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 510101e04c3fSmrg int i, r; 5102af69d88dSmrg struct r600_bytecode_alu alu; 510301e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 510401e04c3fSmrg int t1 = ctx->temp_reg; 51053464ebd5Sriastradh 510601e04c3fSmrg /* should only be one src regs */ 510701e04c3fSmrg assert(inst->Instruction.NumSrcRegs == 1); 51083464ebd5Sriastradh 510901e04c3fSmrg /* only support one double at a time */ 511001e04c3fSmrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 511101e04c3fSmrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 51123464ebd5Sriastradh 511301e04c3fSmrg r = cayman_emit_unary_double_raw( 511401e04c3fSmrg ctx->bc, ctx->inst_info->op, t1, 511501e04c3fSmrg &ctx->src[0], 511601e04c3fSmrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 511701e04c3fSmrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT); 51183464ebd5Sriastradh if (r) 51193464ebd5Sriastradh return r; 51203464ebd5Sriastradh 512101e04c3fSmrg for (i = 0 ; i <= lasti; i++) { 512201e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 512301e04c3fSmrg continue; 5124af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 512501e04c3fSmrg alu.op = ALU_OP1_MOV; 512601e04c3fSmrg alu.src[0].sel = t1; 512701e04c3fSmrg alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 512801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 51293464ebd5Sriastradh alu.dst.write = 1; 513001e04c3fSmrg if (i == lasti) 513101e04c3fSmrg alu.last = 1; 5132af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 51333464ebd5Sriastradh if (r) 51343464ebd5Sriastradh return r; 513501e04c3fSmrg } 513601e04c3fSmrg return 0; 513701e04c3fSmrg} 51383464ebd5Sriastradh 513901e04c3fSmrgstatic int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 514001e04c3fSmrg{ 514101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 514201e04c3fSmrg int i, j, r; 514301e04c3fSmrg struct r600_bytecode_alu alu; 514401e04c3fSmrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 51453464ebd5Sriastradh 514601e04c3fSmrg for (i = 0 ; i < last_slot; i++) { 514701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 514801e04c3fSmrg alu.op = ctx->inst_info->op; 514901e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 515001e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 51513464ebd5Sriastradh 515201e04c3fSmrg /* RSQ should take the absolute value of src */ 515301e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 515401e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[j]); 515501e04c3fSmrg } 515601e04c3fSmrg } 515701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 515801e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 51593464ebd5Sriastradh 516001e04c3fSmrg if (i == last_slot - 1) 516101e04c3fSmrg alu.last = 1; 516201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 516301e04c3fSmrg if (r) 516401e04c3fSmrg return r; 516501e04c3fSmrg } 51663464ebd5Sriastradh return 0; 51673464ebd5Sriastradh} 51683464ebd5Sriastradh 516901e04c3fSmrgstatic int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 51703464ebd5Sriastradh{ 51713464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 517201e04c3fSmrg int i, j, k, r; 5173af69d88dSmrg struct r600_bytecode_alu alu; 517401e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 517501e04c3fSmrg int t1 = ctx->temp_reg; 51763464ebd5Sriastradh 517701e04c3fSmrg for (k = 0; k <= lasti; k++) { 517801e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 517901e04c3fSmrg continue; 51803464ebd5Sriastradh 518101e04c3fSmrg for (i = 0 ; i < 4; i++) { 518201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 518301e04c3fSmrg alu.op = ctx->inst_info->op; 518401e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 518501e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 518601e04c3fSmrg } 518701e04c3fSmrg alu.dst.sel = t1; 518801e04c3fSmrg alu.dst.chan = i; 518901e04c3fSmrg alu.dst.write = (i == k); 519001e04c3fSmrg if (i == 3) 519101e04c3fSmrg alu.last = 1; 519201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 519301e04c3fSmrg if (r) 519401e04c3fSmrg return r; 519501e04c3fSmrg } 519601e04c3fSmrg } 51973464ebd5Sriastradh 519801e04c3fSmrg for (i = 0 ; i <= lasti; i++) { 519901e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 520001e04c3fSmrg continue; 520101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 520201e04c3fSmrg alu.op = ALU_OP1_MOV; 520301e04c3fSmrg alu.src[0].sel = t1; 520401e04c3fSmrg alu.src[0].chan = i; 520501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 520601e04c3fSmrg alu.dst.write = 1; 520701e04c3fSmrg if (i == lasti) 520801e04c3fSmrg alu.last = 1; 520901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 521001e04c3fSmrg if (r) 521101e04c3fSmrg return r; 52123464ebd5Sriastradh } 521301e04c3fSmrg 521401e04c3fSmrg return 0; 52153464ebd5Sriastradh} 52163464ebd5Sriastradh 521701e04c3fSmrg 521801e04c3fSmrgstatic int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 52193464ebd5Sriastradh{ 52203464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 522101e04c3fSmrg int i, j, k, r; 5222af69d88dSmrg struct r600_bytecode_alu alu; 522301e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 522401e04c3fSmrg int t1 = ctx->temp_reg; 522501e04c3fSmrg 522601e04c3fSmrg /* t1 would get overwritten below if we actually tried to 522701e04c3fSmrg * multiply two pairs of doubles at a time. */ 522801e04c3fSmrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 522901e04c3fSmrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 523001e04c3fSmrg 523101e04c3fSmrg k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 52323464ebd5Sriastradh 52333464ebd5Sriastradh for (i = 0; i < 4; i++) { 5234af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 523501e04c3fSmrg alu.op = ctx->inst_info->op; 523601e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 523701e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); 523801e04c3fSmrg } 523901e04c3fSmrg alu.dst.sel = t1; 52403464ebd5Sriastradh alu.dst.chan = i; 524101e04c3fSmrg alu.dst.write = 1; 52423464ebd5Sriastradh if (i == 3) 52433464ebd5Sriastradh alu.last = 1; 5244af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 52453464ebd5Sriastradh if (r) 52463464ebd5Sriastradh return r; 52473464ebd5Sriastradh } 524801e04c3fSmrg 524901e04c3fSmrg for (i = 0; i <= lasti; i++) { 525001e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 525101e04c3fSmrg continue; 525201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 525301e04c3fSmrg alu.op = ALU_OP1_MOV; 525401e04c3fSmrg alu.src[0].sel = t1; 525501e04c3fSmrg alu.src[0].chan = i; 525601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 525701e04c3fSmrg alu.dst.write = 1; 525801e04c3fSmrg if (i == lasti) 525901e04c3fSmrg alu.last = 1; 526001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 526101e04c3fSmrg if (r) 526201e04c3fSmrg return r; 526301e04c3fSmrg } 526401e04c3fSmrg 52653464ebd5Sriastradh return 0; 52663464ebd5Sriastradh} 52673464ebd5Sriastradh 526801e04c3fSmrg/* 526901e04c3fSmrg * Emit RECIP_64 + MUL_64 to implement division. 527001e04c3fSmrg */ 527101e04c3fSmrgstatic int cayman_ddiv_instr(struct r600_shader_ctx *ctx) 52723464ebd5Sriastradh{ 52733464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 527401e04c3fSmrg int r; 5275af69d88dSmrg struct r600_bytecode_alu alu; 527601e04c3fSmrg int t1 = ctx->temp_reg; 527701e04c3fSmrg int k; 52783464ebd5Sriastradh 527901e04c3fSmrg /* Only support one double at a time. This is the same constraint as 528001e04c3fSmrg * in DMUL lowering. */ 528101e04c3fSmrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 528201e04c3fSmrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 528301e04c3fSmrg 528401e04c3fSmrg k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 528501e04c3fSmrg 528601e04c3fSmrg r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false); 52873464ebd5Sriastradh if (r) 52883464ebd5Sriastradh return r; 52893464ebd5Sriastradh 529001e04c3fSmrg for (int i = 0; i < 4; i++) { 5291af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 529201e04c3fSmrg alu.op = ALU_OP2_MUL_64; 529301e04c3fSmrg 529401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1)); 529501e04c3fSmrg 529601e04c3fSmrg alu.src[1].sel = t1; 529701e04c3fSmrg alu.src[1].chan = (i == 3) ? 0 : 1; 529801e04c3fSmrg 529901e04c3fSmrg alu.dst.sel = t1; 53003464ebd5Sriastradh alu.dst.chan = i; 53013464ebd5Sriastradh alu.dst.write = 1; 530201e04c3fSmrg if (i == 3) 53033464ebd5Sriastradh alu.last = 1; 5304af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 53053464ebd5Sriastradh if (r) 53063464ebd5Sriastradh return r; 53073464ebd5Sriastradh } 53083464ebd5Sriastradh 530901e04c3fSmrg for (int i = 0; i < 2; i++) { 5310af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 531101e04c3fSmrg alu.op = ALU_OP1_MOV; 531201e04c3fSmrg alu.src[0].sel = t1; 531301e04c3fSmrg alu.src[0].chan = i; 531401e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst); 531501e04c3fSmrg alu.dst.write = 1; 531601e04c3fSmrg if (i == 1) 53173464ebd5Sriastradh alu.last = 1; 5318af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 53193464ebd5Sriastradh if (r) 53203464ebd5Sriastradh return r; 53213464ebd5Sriastradh } 53223464ebd5Sriastradh return 0; 53233464ebd5Sriastradh} 53243464ebd5Sriastradh 532501e04c3fSmrg/* 532601e04c3fSmrg * r600 - trunc to -PI..PI range 532701e04c3fSmrg * r700 - normalize by dividing by 2PI 532801e04c3fSmrg * see fdo bug 27901 532901e04c3fSmrg */ 533001e04c3fSmrgstatic int tgsi_setup_trig(struct r600_shader_ctx *ctx) 53313464ebd5Sriastradh{ 53323464ebd5Sriastradh int r; 533301e04c3fSmrg struct r600_bytecode_alu alu; 53343464ebd5Sriastradh 5335af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 533601e04c3fSmrg alu.op = ALU_OP3_MULADD; 533701e04c3fSmrg alu.is_op3 = 1; 533801e04c3fSmrg 533901e04c3fSmrg alu.dst.chan = 0; 53403464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 53413464ebd5Sriastradh alu.dst.write = 1; 534201e04c3fSmrg 534301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 534401e04c3fSmrg 534501e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 534601e04c3fSmrg alu.src[1].chan = 0; 534701e04c3fSmrg alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI); 534801e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_0_5; 534901e04c3fSmrg alu.src[2].chan = 0; 53503464ebd5Sriastradh alu.last = 1; 5351af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 53523464ebd5Sriastradh if (r) 53533464ebd5Sriastradh return r; 535401e04c3fSmrg 5355af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 535601e04c3fSmrg alu.op = ALU_OP1_FRACT; 535701e04c3fSmrg 535801e04c3fSmrg alu.dst.chan = 0; 53593464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 53603464ebd5Sriastradh alu.dst.write = 1; 536101e04c3fSmrg 536201e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 536301e04c3fSmrg alu.src[0].chan = 0; 53643464ebd5Sriastradh alu.last = 1; 5365af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 53663464ebd5Sriastradh if (r) 53673464ebd5Sriastradh return r; 536801e04c3fSmrg 5369af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 537001e04c3fSmrg alu.op = ALU_OP3_MULADD; 537101e04c3fSmrg alu.is_op3 = 1; 537201e04c3fSmrg 537301e04c3fSmrg alu.dst.chan = 0; 53743464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 53753464ebd5Sriastradh alu.dst.write = 1; 537601e04c3fSmrg 537701e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 537801e04c3fSmrg alu.src[0].chan = 0; 537901e04c3fSmrg 538001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 538101e04c3fSmrg alu.src[1].chan = 0; 538201e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 538301e04c3fSmrg alu.src[2].chan = 0; 538401e04c3fSmrg 538501e04c3fSmrg if (ctx->bc->chip_class == R600) { 538601e04c3fSmrg alu.src[1].value = u_bitcast_f2u(2.0f * M_PI); 538701e04c3fSmrg alu.src[2].value = u_bitcast_f2u(-M_PI); 538801e04c3fSmrg } else { 538901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1; 539001e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_0_5; 539101e04c3fSmrg alu.src[2].neg = 1; 539201e04c3fSmrg } 539301e04c3fSmrg 53943464ebd5Sriastradh alu.last = 1; 5395af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 53963464ebd5Sriastradh if (r) 53973464ebd5Sriastradh return r; 539801e04c3fSmrg return 0; 53993464ebd5Sriastradh} 54003464ebd5Sriastradh 540101e04c3fSmrgstatic int cayman_trig(struct r600_shader_ctx *ctx) 5402af69d88dSmrg{ 5403af69d88dSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5404af69d88dSmrg struct r600_bytecode_alu alu; 540501e04c3fSmrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 540601e04c3fSmrg int i, r; 5407af69d88dSmrg 540801e04c3fSmrg r = tgsi_setup_trig(ctx); 540901e04c3fSmrg if (r) 541001e04c3fSmrg return r; 5411af69d88dSmrg 5412af69d88dSmrg 541301e04c3fSmrg for (i = 0; i < last_slot; i++) { 541401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 541501e04c3fSmrg alu.op = ctx->inst_info->op; 541601e04c3fSmrg alu.dst.chan = i; 5417af69d88dSmrg 541801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 541901e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5420af69d88dSmrg 542101e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 542201e04c3fSmrg alu.src[0].chan = 0; 542301e04c3fSmrg if (i == last_slot - 1) 5424af69d88dSmrg alu.last = 1; 542501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 542601e04c3fSmrg if (r) 542701e04c3fSmrg return r; 542801e04c3fSmrg } 542901e04c3fSmrg return 0; 543001e04c3fSmrg} 5431af69d88dSmrg 543201e04c3fSmrgstatic int tgsi_trig(struct r600_shader_ctx *ctx) 543301e04c3fSmrg{ 543401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 543501e04c3fSmrg struct r600_bytecode_alu alu; 543601e04c3fSmrg int i, r; 543701e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5438af69d88dSmrg 543901e04c3fSmrg r = tgsi_setup_trig(ctx); 544001e04c3fSmrg if (r) 544101e04c3fSmrg return r; 5442af69d88dSmrg 544301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 544401e04c3fSmrg alu.op = ctx->inst_info->op; 544501e04c3fSmrg alu.dst.chan = 0; 544601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 544701e04c3fSmrg alu.dst.write = 1; 5448af69d88dSmrg 544901e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 545001e04c3fSmrg alu.src[0].chan = 0; 545101e04c3fSmrg alu.last = 1; 545201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 545301e04c3fSmrg if (r) 545401e04c3fSmrg return r; 5455af69d88dSmrg 545601e04c3fSmrg /* replicate result */ 545701e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 545801e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 545901e04c3fSmrg continue; 5460af69d88dSmrg 546101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 546201e04c3fSmrg alu.op = ALU_OP1_MOV; 5463af69d88dSmrg 546401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 546501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 546601e04c3fSmrg if (i == lasti) 546701e04c3fSmrg alu.last = 1; 546801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 546901e04c3fSmrg if (r) 547001e04c3fSmrg return r; 547101e04c3fSmrg } 547201e04c3fSmrg return 0; 547301e04c3fSmrg} 5474af69d88dSmrg 547501e04c3fSmrgstatic int tgsi_kill(struct r600_shader_ctx *ctx) 547601e04c3fSmrg{ 547701e04c3fSmrg const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 547801e04c3fSmrg struct r600_bytecode_alu alu; 547901e04c3fSmrg int i, r; 5480af69d88dSmrg 548101e04c3fSmrg for (i = 0; i < 4; i++) { 548201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 548301e04c3fSmrg alu.op = ctx->inst_info->op; 5484af69d88dSmrg 548501e04c3fSmrg alu.dst.chan = i; 5486af69d88dSmrg 548701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 5488af69d88dSmrg 548901e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 549001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1; 549101e04c3fSmrg alu.src[1].neg = 1; 549201e04c3fSmrg } else { 5493af69d88dSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 549401e04c3fSmrg } 549501e04c3fSmrg if (i == 3) { 5496af69d88dSmrg alu.last = 1; 549701e04c3fSmrg } 549801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 549901e04c3fSmrg if (r) 550001e04c3fSmrg return r; 550101e04c3fSmrg } 5502af69d88dSmrg 550301e04c3fSmrg /* kill must be last in ALU */ 550401e04c3fSmrg ctx->bc->force_add_cf = 1; 550501e04c3fSmrg ctx->shader->uses_kill = TRUE; 550601e04c3fSmrg return 0; 550701e04c3fSmrg} 5508af69d88dSmrg 550901e04c3fSmrgstatic int tgsi_lit(struct r600_shader_ctx *ctx) 551001e04c3fSmrg{ 551101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 551201e04c3fSmrg struct r600_bytecode_alu alu; 551301e04c3fSmrg int r; 5514af69d88dSmrg 551501e04c3fSmrg /* tmp.x = max(src.y, 0.0) */ 551601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 551701e04c3fSmrg alu.op = ALU_OP2_MAX; 551801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 551901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 552001e04c3fSmrg alu.src[1].chan = 1; 5521af69d88dSmrg 552201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 552301e04c3fSmrg alu.dst.chan = 0; 552401e04c3fSmrg alu.dst.write = 1; 5525af69d88dSmrg 552601e04c3fSmrg alu.last = 1; 552701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 552801e04c3fSmrg if (r) 552901e04c3fSmrg return r; 553001e04c3fSmrg 553101e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << 2)) 553201e04c3fSmrg { 553301e04c3fSmrg int chan; 553401e04c3fSmrg int sel; 553501e04c3fSmrg unsigned i; 5536af69d88dSmrg 5537af69d88dSmrg if (ctx->bc->chip_class == CAYMAN) { 553801e04c3fSmrg for (i = 0; i < 3; i++) { 553901e04c3fSmrg /* tmp.z = log(tmp.x) */ 5540af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 554101e04c3fSmrg alu.op = ALU_OP1_LOG_CLAMPED; 554201e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 5543af69d88dSmrg alu.src[0].chan = 0; 554401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 554501e04c3fSmrg alu.dst.chan = i; 554601e04c3fSmrg if (i == 2) { 554701e04c3fSmrg alu.dst.write = 1; 5548af69d88dSmrg alu.last = 1; 554901e04c3fSmrg } else 555001e04c3fSmrg alu.dst.write = 0; 555101e04c3fSmrg 555201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 555301e04c3fSmrg if (r) 5554af69d88dSmrg return r; 5555af69d88dSmrg } 555601e04c3fSmrg } else { 555701e04c3fSmrg /* tmp.z = log(tmp.x) */ 5558af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 555901e04c3fSmrg alu.op = ALU_OP1_LOG_CLAMPED; 556001e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 5561af69d88dSmrg alu.src[0].chan = 0; 556201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 556301e04c3fSmrg alu.dst.chan = 2; 5564af69d88dSmrg alu.dst.write = 1; 5565af69d88dSmrg alu.last = 1; 5566af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5567af69d88dSmrg if (r) 5568af69d88dSmrg return r; 5569af69d88dSmrg } 5570af69d88dSmrg 557101e04c3fSmrg chan = alu.dst.chan; 557201e04c3fSmrg sel = alu.dst.sel; 5573af69d88dSmrg 557401e04c3fSmrg /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 5575af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 557601e04c3fSmrg alu.op = ALU_OP3_MUL_LIT; 557701e04c3fSmrg alu.src[0].sel = sel; 557801e04c3fSmrg alu.src[0].chan = chan; 557901e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 558001e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 558101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 558201e04c3fSmrg alu.dst.chan = 0; 5583af69d88dSmrg alu.dst.write = 1; 558401e04c3fSmrg alu.is_op3 = 1; 5585af69d88dSmrg alu.last = 1; 558601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 558701e04c3fSmrg if (r) 5588af69d88dSmrg return r; 5589af69d88dSmrg 5590af69d88dSmrg if (ctx->bc->chip_class == CAYMAN) { 559101e04c3fSmrg for (i = 0; i < 3; i++) { 559201e04c3fSmrg /* dst.z = exp(tmp.x) */ 5593af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 559401e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 559501e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 5596af69d88dSmrg alu.src[0].chan = 0; 559701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 559801e04c3fSmrg if (i == 2) { 559901e04c3fSmrg alu.dst.write = 1; 560001e04c3fSmrg alu.last = 1; 560101e04c3fSmrg } else 560201e04c3fSmrg alu.dst.write = 0; 560301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 560401e04c3fSmrg if (r) 5605af69d88dSmrg return r; 5606af69d88dSmrg } 5607af69d88dSmrg } else { 560801e04c3fSmrg /* dst.z = exp(tmp.x) */ 5609af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 561001e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 561101e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 5612af69d88dSmrg alu.src[0].chan = 0; 561301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 5614af69d88dSmrg alu.last = 1; 561501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 561601e04c3fSmrg if (r) 5617af69d88dSmrg return r; 5618af69d88dSmrg } 561901e04c3fSmrg } 5620af69d88dSmrg 562101e04c3fSmrg /* dst.x, <- 1.0 */ 562201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 562301e04c3fSmrg alu.op = ALU_OP1_MOV; 562401e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 562501e04c3fSmrg alu.src[0].chan = 0; 562601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 562701e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 562801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 562901e04c3fSmrg if (r) 563001e04c3fSmrg return r; 5631af69d88dSmrg 563201e04c3fSmrg /* dst.y = max(src.x, 0.0) */ 563301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 563401e04c3fSmrg alu.op = ALU_OP2_MAX; 563501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 563601e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 563701e04c3fSmrg alu.src[1].chan = 0; 563801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 563901e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 564001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 564101e04c3fSmrg if (r) 564201e04c3fSmrg return r; 5643af69d88dSmrg 564401e04c3fSmrg /* dst.w, <- 1.0 */ 564501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 564601e04c3fSmrg alu.op = ALU_OP1_MOV; 564701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; 564801e04c3fSmrg alu.src[0].chan = 0; 564901e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 565001e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 565101e04c3fSmrg alu.last = 1; 565201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 565301e04c3fSmrg if (r) 565401e04c3fSmrg return r; 5655af69d88dSmrg 565601e04c3fSmrg return 0; 565701e04c3fSmrg} 5658af69d88dSmrg 565901e04c3fSmrgstatic int tgsi_rsq(struct r600_shader_ctx *ctx) 566001e04c3fSmrg{ 566101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 566201e04c3fSmrg struct r600_bytecode_alu alu; 566301e04c3fSmrg int i, r; 5664af69d88dSmrg 566501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5666af69d88dSmrg 566701e04c3fSmrg alu.op = ALU_OP1_RECIPSQRT_IEEE; 5668af69d88dSmrg 566901e04c3fSmrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 567001e04c3fSmrg r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 567101e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[i]); 567201e04c3fSmrg } 567301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 567401e04c3fSmrg alu.dst.write = 1; 567501e04c3fSmrg alu.last = 1; 567601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 567701e04c3fSmrg if (r) 567801e04c3fSmrg return r; 567901e04c3fSmrg /* replicate result */ 568001e04c3fSmrg return tgsi_helper_tempx_replicate(ctx); 568101e04c3fSmrg} 5682af69d88dSmrg 568301e04c3fSmrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 568401e04c3fSmrg{ 568501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 568601e04c3fSmrg struct r600_bytecode_alu alu; 568701e04c3fSmrg int i, r; 5688af69d88dSmrg 568901e04c3fSmrg for (i = 0; i < 4; i++) { 5690af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 569101e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 569201e04c3fSmrg alu.op = ALU_OP1_MOV; 569301e04c3fSmrg alu.dst.chan = i; 569401e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 569501e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 569601e04c3fSmrg if (i == 3) 569701e04c3fSmrg alu.last = 1; 569801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 569901e04c3fSmrg if (r) 5700af69d88dSmrg return r; 570101e04c3fSmrg } 570201e04c3fSmrg return 0; 570301e04c3fSmrg} 5704af69d88dSmrg 570501e04c3fSmrgstatic int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 570601e04c3fSmrg{ 570701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 570801e04c3fSmrg struct r600_bytecode_alu alu; 570901e04c3fSmrg int i, r; 5710af69d88dSmrg 571101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 571201e04c3fSmrg alu.op = ctx->inst_info->op; 571301e04c3fSmrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 571401e04c3fSmrg r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 571501e04c3fSmrg } 571601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 571701e04c3fSmrg alu.dst.write = 1; 571801e04c3fSmrg alu.last = 1; 571901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 572001e04c3fSmrg if (r) 572101e04c3fSmrg return r; 572201e04c3fSmrg /* replicate result */ 572301e04c3fSmrg return tgsi_helper_tempx_replicate(ctx); 572401e04c3fSmrg} 5725af69d88dSmrg 572601e04c3fSmrgstatic int cayman_pow(struct r600_shader_ctx *ctx) 572701e04c3fSmrg{ 572801e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 572901e04c3fSmrg int i, r; 573001e04c3fSmrg struct r600_bytecode_alu alu; 573101e04c3fSmrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 5732af69d88dSmrg 573301e04c3fSmrg for (i = 0; i < 3; i++) { 5734af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 573501e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 573601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 573701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 573801e04c3fSmrg alu.dst.chan = i; 5739af69d88dSmrg alu.dst.write = 1; 574001e04c3fSmrg if (i == 2) 574101e04c3fSmrg alu.last = 1; 574201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 574301e04c3fSmrg if (r) 5744af69d88dSmrg return r; 574501e04c3fSmrg } 5746af69d88dSmrg 574701e04c3fSmrg /* b * LOG2(a) */ 574801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 574901e04c3fSmrg alu.op = ALU_OP2_MUL; 575001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 575101e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 575201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 575301e04c3fSmrg alu.dst.write = 1; 575401e04c3fSmrg alu.last = 1; 575501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 575601e04c3fSmrg if (r) 575701e04c3fSmrg return r; 5758af69d88dSmrg 575901e04c3fSmrg for (i = 0; i < last_slot; i++) { 576001e04c3fSmrg /* POW(a,b) = EXP2(b * LOG2(a))*/ 576101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 576201e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 576301e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 5764af69d88dSmrg 576501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 576601e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 576701e04c3fSmrg if (i == last_slot - 1) 576801e04c3fSmrg alu.last = 1; 576901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 577001e04c3fSmrg if (r) 577101e04c3fSmrg return r; 577201e04c3fSmrg } 577301e04c3fSmrg return 0; 577401e04c3fSmrg} 5775af69d88dSmrg 577601e04c3fSmrgstatic int tgsi_pow(struct r600_shader_ctx *ctx) 577701e04c3fSmrg{ 577801e04c3fSmrg struct r600_bytecode_alu alu; 577901e04c3fSmrg int r; 5780af69d88dSmrg 578101e04c3fSmrg /* LOG2(a) */ 578201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 578301e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 578401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 578501e04c3fSmrg alu.dst.sel = ctx->temp_reg; 578601e04c3fSmrg alu.dst.write = 1; 578701e04c3fSmrg alu.last = 1; 578801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 578901e04c3fSmrg if (r) 579001e04c3fSmrg return r; 579101e04c3fSmrg /* b * LOG2(a) */ 579201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 579301e04c3fSmrg alu.op = ALU_OP2_MUL; 579401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 579501e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 579601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 579701e04c3fSmrg alu.dst.write = 1; 579801e04c3fSmrg alu.last = 1; 579901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 580001e04c3fSmrg if (r) 580101e04c3fSmrg return r; 580201e04c3fSmrg /* POW(a,b) = EXP2(b * LOG2(a))*/ 580301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 580401e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 580501e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 580601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 580701e04c3fSmrg alu.dst.write = 1; 580801e04c3fSmrg alu.last = 1; 580901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 581001e04c3fSmrg if (r) 581101e04c3fSmrg return r; 581201e04c3fSmrg return tgsi_helper_tempx_replicate(ctx); 581301e04c3fSmrg} 5814af69d88dSmrg 581501e04c3fSmrgstatic int emit_mul_int_op(struct r600_bytecode *bc, 581601e04c3fSmrg struct r600_bytecode_alu *alu_src) 581701e04c3fSmrg{ 581801e04c3fSmrg struct r600_bytecode_alu alu; 581901e04c3fSmrg int i, r; 582001e04c3fSmrg alu = *alu_src; 582101e04c3fSmrg if (bc->chip_class == CAYMAN) { 582201e04c3fSmrg for (i = 0; i < 4; i++) { 582301e04c3fSmrg alu.dst.chan = i; 582401e04c3fSmrg alu.dst.write = (i == alu_src->dst.chan); 582501e04c3fSmrg alu.last = (i == 3); 5826af69d88dSmrg 582701e04c3fSmrg r = r600_bytecode_add_alu(bc, &alu); 582801e04c3fSmrg if (r) 5829af69d88dSmrg return r; 5830af69d88dSmrg } 583101e04c3fSmrg } else { 583201e04c3fSmrg alu.last = 1; 583301e04c3fSmrg r = r600_bytecode_add_alu(bc, &alu); 583401e04c3fSmrg if (r) 583501e04c3fSmrg return r; 583601e04c3fSmrg } 583701e04c3fSmrg return 0; 583801e04c3fSmrg} 5839af69d88dSmrg 584001e04c3fSmrgstatic int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 584101e04c3fSmrg{ 584201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 584301e04c3fSmrg struct r600_bytecode_alu alu; 584401e04c3fSmrg int i, r, j; 584501e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 584601e04c3fSmrg int lasti = tgsi_last_instruction(write_mask); 584701e04c3fSmrg int tmp0 = ctx->temp_reg; 584801e04c3fSmrg int tmp1 = r600_get_temp(ctx); 584901e04c3fSmrg int tmp2 = r600_get_temp(ctx); 585001e04c3fSmrg int tmp3 = r600_get_temp(ctx); 585101e04c3fSmrg int tmp4 = 0; 5852af69d88dSmrg 585301e04c3fSmrg /* Use additional temp if dst register and src register are the same */ 585401e04c3fSmrg if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index || 585501e04c3fSmrg inst->Src[1].Register.Index == inst->Dst[0].Register.Index) { 585601e04c3fSmrg tmp4 = r600_get_temp(ctx); 585701e04c3fSmrg } 5858af69d88dSmrg 585901e04c3fSmrg /* Unsigned path: 586001e04c3fSmrg * 586101e04c3fSmrg * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 586201e04c3fSmrg * 586301e04c3fSmrg * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 586401e04c3fSmrg * 2. tmp0.z = lo (tmp0.x * src2) 586501e04c3fSmrg * 3. tmp0.w = -tmp0.z 586601e04c3fSmrg * 4. tmp0.y = hi (tmp0.x * src2) 586701e04c3fSmrg * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 586801e04c3fSmrg * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 586901e04c3fSmrg * 7. tmp1.x = tmp0.x - tmp0.w 587001e04c3fSmrg * 8. tmp1.y = tmp0.x + tmp0.w 587101e04c3fSmrg * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 587201e04c3fSmrg * 10. tmp0.z = hi(tmp0.x * src1) = q 587301e04c3fSmrg * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 587401e04c3fSmrg * 587501e04c3fSmrg * 12. tmp0.w = src1 - tmp0.y = r 587601e04c3fSmrg * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 587701e04c3fSmrg * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 587801e04c3fSmrg * 587901e04c3fSmrg * if DIV 588001e04c3fSmrg * 588101e04c3fSmrg * 15. tmp1.z = tmp0.z + 1 = q + 1 588201e04c3fSmrg * 16. tmp1.w = tmp0.z - 1 = q - 1 588301e04c3fSmrg * 588401e04c3fSmrg * else MOD 588501e04c3fSmrg * 588601e04c3fSmrg * 15. tmp1.z = tmp0.w - src2 = r - src2 588701e04c3fSmrg * 16. tmp1.w = tmp0.w + src2 = r + src2 588801e04c3fSmrg * 588901e04c3fSmrg * endif 589001e04c3fSmrg * 589101e04c3fSmrg * 17. tmp1.x = tmp1.x & tmp1.y 589201e04c3fSmrg * 589301e04c3fSmrg * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 589401e04c3fSmrg * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 589501e04c3fSmrg * 589601e04c3fSmrg * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 589701e04c3fSmrg * 20. dst = src2==0 ? MAX_UINT : tmp0.z 589801e04c3fSmrg * 589901e04c3fSmrg * Signed path: 590001e04c3fSmrg * 590101e04c3fSmrg * Same as unsigned, using abs values of the operands, 590201e04c3fSmrg * and fixing the sign of the result in the end. 590301e04c3fSmrg */ 5904af69d88dSmrg 590501e04c3fSmrg for (i = 0; i < 4; i++) { 590601e04c3fSmrg if (!(write_mask & (1<<i))) 590701e04c3fSmrg continue; 5908af69d88dSmrg 590901e04c3fSmrg if (signed_op) { 591001e04c3fSmrg 591101e04c3fSmrg /* tmp2.x = -src0 */ 5912af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 591301e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 5914af69d88dSmrg 591501e04c3fSmrg alu.dst.sel = tmp2; 591601e04c3fSmrg alu.dst.chan = 0; 5917af69d88dSmrg alu.dst.write = 1; 5918af69d88dSmrg 591901e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 592001e04c3fSmrg 592101e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5922af69d88dSmrg 5923af69d88dSmrg alu.last = 1; 5924af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5925af69d88dSmrg return r; 5926af69d88dSmrg 592701e04c3fSmrg /* tmp2.y = -src1 */ 592801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 592901e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 5930af69d88dSmrg 593101e04c3fSmrg alu.dst.sel = tmp2; 593201e04c3fSmrg alu.dst.chan = 1; 593301e04c3fSmrg alu.dst.write = 1; 5934af69d88dSmrg 593501e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 5936af69d88dSmrg 593701e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5938af69d88dSmrg 593901e04c3fSmrg alu.last = 1; 594001e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 594101e04c3fSmrg return r; 5942af69d88dSmrg 594301e04c3fSmrg /* tmp2.z sign bit is set if src0 and src2 signs are different */ 594401e04c3fSmrg /* it will be a sign of the quotient */ 594501e04c3fSmrg if (!mod) { 5946af69d88dSmrg 594701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 594801e04c3fSmrg alu.op = ALU_OP2_XOR_INT; 5949af69d88dSmrg 595001e04c3fSmrg alu.dst.sel = tmp2; 595101e04c3fSmrg alu.dst.chan = 2; 595201e04c3fSmrg alu.dst.write = 1; 5953af69d88dSmrg 595401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 595501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5956af69d88dSmrg 595701e04c3fSmrg alu.last = 1; 595801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 595901e04c3fSmrg return r; 596001e04c3fSmrg } 5961af69d88dSmrg 596201e04c3fSmrg /* tmp2.x = |src0| */ 596301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 596401e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 596501e04c3fSmrg alu.is_op3 = 1; 5966af69d88dSmrg 596701e04c3fSmrg alu.dst.sel = tmp2; 596801e04c3fSmrg alu.dst.chan = 0; 596901e04c3fSmrg alu.dst.write = 1; 5970af69d88dSmrg 597101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 597201e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 597301e04c3fSmrg alu.src[2].sel = tmp2; 597401e04c3fSmrg alu.src[2].chan = 0; 5975af69d88dSmrg 597601e04c3fSmrg alu.last = 1; 597701e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 597801e04c3fSmrg return r; 5979af69d88dSmrg 598001e04c3fSmrg /* tmp2.y = |src1| */ 5981af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 598201e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 598301e04c3fSmrg alu.is_op3 = 1; 5984af69d88dSmrg 598501e04c3fSmrg alu.dst.sel = tmp2; 598601e04c3fSmrg alu.dst.chan = 1; 5987af69d88dSmrg alu.dst.write = 1; 5988af69d88dSmrg 598901e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 599001e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 599101e04c3fSmrg alu.src[2].sel = tmp2; 599201e04c3fSmrg alu.src[2].chan = 1; 5993af69d88dSmrg 5994af69d88dSmrg alu.last = 1; 5995af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5996af69d88dSmrg return r; 5997af69d88dSmrg 599801e04c3fSmrg } 599901e04c3fSmrg 600001e04c3fSmrg /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 600101e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 600201e04c3fSmrg /* tmp3.x = u2f(src2) */ 6003af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 600401e04c3fSmrg alu.op = ALU_OP1_UINT_TO_FLT; 6005af69d88dSmrg 600601e04c3fSmrg alu.dst.sel = tmp3; 600701e04c3fSmrg alu.dst.chan = 0; 6008af69d88dSmrg alu.dst.write = 1; 6009af69d88dSmrg 6010af69d88dSmrg if (signed_op) { 601101e04c3fSmrg alu.src[0].sel = tmp2; 601201e04c3fSmrg alu.src[0].chan = 1; 6013af69d88dSmrg } else { 601401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6015af69d88dSmrg } 6016af69d88dSmrg 6017af69d88dSmrg alu.last = 1; 6018af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6019af69d88dSmrg return r; 6020af69d88dSmrg 602101e04c3fSmrg /* tmp0.x = recip(tmp3.x) */ 602201e04c3fSmrg for (j = 0 ; j < 3; j++) { 602301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 602401e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 6025af69d88dSmrg 602601e04c3fSmrg alu.dst.sel = tmp0; 602701e04c3fSmrg alu.dst.chan = j; 602801e04c3fSmrg alu.dst.write = (j == 0); 6029af69d88dSmrg 603001e04c3fSmrg alu.src[0].sel = tmp3; 603101e04c3fSmrg alu.src[0].chan = 0; 603201e04c3fSmrg 603301e04c3fSmrg if (j == 2) 603401e04c3fSmrg alu.last = 1; 603501e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 603601e04c3fSmrg return r; 603701e04c3fSmrg } 603801e04c3fSmrg 603901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 604001e04c3fSmrg alu.op = ALU_OP2_MUL; 6041af69d88dSmrg 6042af69d88dSmrg alu.src[0].sel = tmp0; 604301e04c3fSmrg alu.src[0].chan = 0; 604401e04c3fSmrg 604501e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 604601e04c3fSmrg alu.src[1].value = 0x4f800000; 6047af69d88dSmrg 604801e04c3fSmrg alu.dst.sel = tmp3; 604901e04c3fSmrg alu.dst.write = 1; 6050af69d88dSmrg alu.last = 1; 605101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 605201e04c3fSmrg if (r) 6053af69d88dSmrg return r; 6054af69d88dSmrg 6055af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 605601e04c3fSmrg alu.op = ALU_OP1_FLT_TO_UINT; 6057af69d88dSmrg 605801e04c3fSmrg alu.dst.sel = tmp0; 605901e04c3fSmrg alu.dst.chan = 0; 6060af69d88dSmrg alu.dst.write = 1; 6061af69d88dSmrg 606201e04c3fSmrg alu.src[0].sel = tmp3; 606301e04c3fSmrg alu.src[0].chan = 0; 6064af69d88dSmrg 6065af69d88dSmrg alu.last = 1; 6066af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6067af69d88dSmrg return r; 6068af69d88dSmrg 606901e04c3fSmrg } else { 607001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 607101e04c3fSmrg alu.op = ALU_OP1_RECIP_UINT; 607201e04c3fSmrg 607301e04c3fSmrg alu.dst.sel = tmp0; 607401e04c3fSmrg alu.dst.chan = 0; 607501e04c3fSmrg alu.dst.write = 1; 607601e04c3fSmrg 607701e04c3fSmrg if (signed_op) { 607801e04c3fSmrg alu.src[0].sel = tmp2; 607901e04c3fSmrg alu.src[0].chan = 1; 608001e04c3fSmrg } else { 608101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 608201e04c3fSmrg } 608301e04c3fSmrg 608401e04c3fSmrg alu.last = 1; 608501e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 608601e04c3fSmrg return r; 6087af69d88dSmrg } 6088af69d88dSmrg 608901e04c3fSmrg /* 2. tmp0.z = lo (tmp0.x * src2) */ 6090af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 609101e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 6092af69d88dSmrg 609301e04c3fSmrg alu.dst.sel = tmp0; 609401e04c3fSmrg alu.dst.chan = 2; 6095af69d88dSmrg alu.dst.write = 1; 6096af69d88dSmrg 609701e04c3fSmrg alu.src[0].sel = tmp0; 6098af69d88dSmrg alu.src[0].chan = 0; 609901e04c3fSmrg if (signed_op) { 610001e04c3fSmrg alu.src[1].sel = tmp2; 610101e04c3fSmrg alu.src[1].chan = 1; 610201e04c3fSmrg } else { 610301e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 610401e04c3fSmrg } 6105af69d88dSmrg 610601e04c3fSmrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6107af69d88dSmrg return r; 6108af69d88dSmrg 610901e04c3fSmrg /* 3. tmp0.w = -tmp0.z */ 6110af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 611101e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 6112af69d88dSmrg 6113af69d88dSmrg alu.dst.sel = tmp0; 611401e04c3fSmrg alu.dst.chan = 3; 6115af69d88dSmrg alu.dst.write = 1; 6116af69d88dSmrg 611701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 6118af69d88dSmrg alu.src[1].sel = tmp0; 611901e04c3fSmrg alu.src[1].chan = 2; 6120af69d88dSmrg 6121af69d88dSmrg alu.last = 1; 6122af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6123af69d88dSmrg return r; 6124af69d88dSmrg 612501e04c3fSmrg /* 4. tmp0.y = hi (tmp0.x * src2) */ 6126af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 612701e04c3fSmrg alu.op = ALU_OP2_MULHI_UINT; 612801e04c3fSmrg 612901e04c3fSmrg alu.dst.sel = tmp0; 613001e04c3fSmrg alu.dst.chan = 1; 613101e04c3fSmrg alu.dst.write = 1; 613201e04c3fSmrg 613301e04c3fSmrg alu.src[0].sel = tmp0; 613401e04c3fSmrg alu.src[0].chan = 0; 6135af69d88dSmrg 6136af69d88dSmrg if (signed_op) { 613701e04c3fSmrg alu.src[1].sel = tmp2; 613801e04c3fSmrg alu.src[1].chan = 1; 6139af69d88dSmrg } else { 614001e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6141af69d88dSmrg } 6142af69d88dSmrg 614301e04c3fSmrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 614401e04c3fSmrg return r; 614501e04c3fSmrg 614601e04c3fSmrg /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 614701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 614801e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 614901e04c3fSmrg alu.is_op3 = 1; 615001e04c3fSmrg 615101e04c3fSmrg alu.dst.sel = tmp0; 615201e04c3fSmrg alu.dst.chan = 2; 615301e04c3fSmrg alu.dst.write = 1; 615401e04c3fSmrg 615501e04c3fSmrg alu.src[0].sel = tmp0; 6156af69d88dSmrg alu.src[0].chan = 1; 615701e04c3fSmrg alu.src[1].sel = tmp0; 6158af69d88dSmrg alu.src[1].chan = 3; 6159af69d88dSmrg alu.src[2].sel = tmp0; 6160af69d88dSmrg alu.src[2].chan = 2; 6161af69d88dSmrg 6162af69d88dSmrg alu.last = 1; 6163af69d88dSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6164af69d88dSmrg return r; 6165af69d88dSmrg 616601e04c3fSmrg /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 616701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 616801e04c3fSmrg alu.op = ALU_OP2_MULHI_UINT; 6169af69d88dSmrg 617001e04c3fSmrg alu.dst.sel = tmp0; 617101e04c3fSmrg alu.dst.chan = 3; 617201e04c3fSmrg alu.dst.write = 1; 6173af69d88dSmrg 617401e04c3fSmrg alu.src[0].sel = tmp0; 617501e04c3fSmrg alu.src[0].chan = 2; 6176af69d88dSmrg 617701e04c3fSmrg alu.src[1].sel = tmp0; 617801e04c3fSmrg alu.src[1].chan = 0; 6179af69d88dSmrg 618001e04c3fSmrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 618101e04c3fSmrg return r; 6182af69d88dSmrg 618301e04c3fSmrg /* 7. tmp1.x = tmp0.x - tmp0.w */ 618401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 618501e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 6186af69d88dSmrg 618701e04c3fSmrg alu.dst.sel = tmp1; 618801e04c3fSmrg alu.dst.chan = 0; 618901e04c3fSmrg alu.dst.write = 1; 6190af69d88dSmrg 619101e04c3fSmrg alu.src[0].sel = tmp0; 619201e04c3fSmrg alu.src[0].chan = 0; 619301e04c3fSmrg alu.src[1].sel = tmp0; 619401e04c3fSmrg alu.src[1].chan = 3; 6195af69d88dSmrg 619601e04c3fSmrg alu.last = 1; 619701e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 619801e04c3fSmrg return r; 6199af69d88dSmrg 620001e04c3fSmrg /* 8. tmp1.y = tmp0.x + tmp0.w */ 620101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 620201e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 6203af69d88dSmrg 620401e04c3fSmrg alu.dst.sel = tmp1; 620501e04c3fSmrg alu.dst.chan = 1; 620601e04c3fSmrg alu.dst.write = 1; 6207af69d88dSmrg 620801e04c3fSmrg alu.src[0].sel = tmp0; 620901e04c3fSmrg alu.src[0].chan = 0; 621001e04c3fSmrg alu.src[1].sel = tmp0; 621101e04c3fSmrg alu.src[1].chan = 3; 6212af69d88dSmrg 621301e04c3fSmrg alu.last = 1; 621401e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 621501e04c3fSmrg return r; 6216af69d88dSmrg 621701e04c3fSmrg /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 621801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 621901e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 622001e04c3fSmrg alu.is_op3 = 1; 6221af69d88dSmrg 622201e04c3fSmrg alu.dst.sel = tmp0; 622301e04c3fSmrg alu.dst.chan = 0; 622401e04c3fSmrg alu.dst.write = 1; 6225af69d88dSmrg 622601e04c3fSmrg alu.src[0].sel = tmp0; 622701e04c3fSmrg alu.src[0].chan = 1; 622801e04c3fSmrg alu.src[1].sel = tmp1; 622901e04c3fSmrg alu.src[1].chan = 1; 623001e04c3fSmrg alu.src[2].sel = tmp1; 623101e04c3fSmrg alu.src[2].chan = 0; 6232af69d88dSmrg 623301e04c3fSmrg alu.last = 1; 623401e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 623501e04c3fSmrg return r; 6236af69d88dSmrg 623701e04c3fSmrg /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 623801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 623901e04c3fSmrg alu.op = ALU_OP2_MULHI_UINT; 6240af69d88dSmrg 624101e04c3fSmrg alu.dst.sel = tmp0; 624201e04c3fSmrg alu.dst.chan = 2; 624301e04c3fSmrg alu.dst.write = 1; 6244af69d88dSmrg 624501e04c3fSmrg alu.src[0].sel = tmp0; 624601e04c3fSmrg alu.src[0].chan = 0; 6247af69d88dSmrg 624801e04c3fSmrg if (signed_op) { 624901e04c3fSmrg alu.src[1].sel = tmp2; 625001e04c3fSmrg alu.src[1].chan = 0; 625101e04c3fSmrg } else { 625201e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 625301e04c3fSmrg } 6254af69d88dSmrg 625501e04c3fSmrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 625601e04c3fSmrg return r; 6257af69d88dSmrg 625801e04c3fSmrg /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 625901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 626001e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 6261af69d88dSmrg 626201e04c3fSmrg alu.dst.sel = tmp0; 626301e04c3fSmrg alu.dst.chan = 1; 626401e04c3fSmrg alu.dst.write = 1; 6265af69d88dSmrg 626601e04c3fSmrg if (signed_op) { 626701e04c3fSmrg alu.src[0].sel = tmp2; 626801e04c3fSmrg alu.src[0].chan = 1; 626901e04c3fSmrg } else { 627001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 627101e04c3fSmrg } 6272af69d88dSmrg 627301e04c3fSmrg alu.src[1].sel = tmp0; 627401e04c3fSmrg alu.src[1].chan = 2; 6275af69d88dSmrg 627601e04c3fSmrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 627701e04c3fSmrg return r; 6278af69d88dSmrg 627901e04c3fSmrg /* 12. tmp0.w = src1 - tmp0.y = r */ 6280af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 628101e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 6282af69d88dSmrg 628301e04c3fSmrg alu.dst.sel = tmp0; 628401e04c3fSmrg alu.dst.chan = 3; 6285af69d88dSmrg alu.dst.write = 1; 6286af69d88dSmrg 628701e04c3fSmrg if (signed_op) { 628801e04c3fSmrg alu.src[0].sel = tmp2; 628901e04c3fSmrg alu.src[0].chan = 0; 629001e04c3fSmrg } else { 629101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 629201e04c3fSmrg } 6293af69d88dSmrg 629401e04c3fSmrg alu.src[1].sel = tmp0; 629501e04c3fSmrg alu.src[1].chan = 1; 6296af69d88dSmrg 629701e04c3fSmrg alu.last = 1; 629801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6299af69d88dSmrg return r; 6300af69d88dSmrg 630101e04c3fSmrg /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 6302af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 630301e04c3fSmrg alu.op = ALU_OP2_SETGE_UINT; 6304af69d88dSmrg 630501e04c3fSmrg alu.dst.sel = tmp1; 630601e04c3fSmrg alu.dst.chan = 0; 6307af69d88dSmrg alu.dst.write = 1; 6308af69d88dSmrg 630901e04c3fSmrg alu.src[0].sel = tmp0; 631001e04c3fSmrg alu.src[0].chan = 3; 631101e04c3fSmrg if (signed_op) { 631201e04c3fSmrg alu.src[1].sel = tmp2; 631301e04c3fSmrg alu.src[1].chan = 1; 631401e04c3fSmrg } else { 631501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 631601e04c3fSmrg } 6317af69d88dSmrg 631801e04c3fSmrg alu.last = 1; 631901e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6320af69d88dSmrg return r; 6321af69d88dSmrg 632201e04c3fSmrg /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 6323af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 632401e04c3fSmrg alu.op = ALU_OP2_SETGE_UINT; 632501e04c3fSmrg 632601e04c3fSmrg alu.dst.sel = tmp1; 632701e04c3fSmrg alu.dst.chan = 1; 6328af69d88dSmrg alu.dst.write = 1; 6329af69d88dSmrg 633001e04c3fSmrg if (signed_op) { 633101e04c3fSmrg alu.src[0].sel = tmp2; 633201e04c3fSmrg alu.src[0].chan = 0; 633301e04c3fSmrg } else { 633401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 633501e04c3fSmrg } 6336af69d88dSmrg 633701e04c3fSmrg alu.src[1].sel = tmp0; 633801e04c3fSmrg alu.src[1].chan = 1; 6339af69d88dSmrg 634001e04c3fSmrg alu.last = 1; 634101e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6342af69d88dSmrg return r; 6343af69d88dSmrg 634401e04c3fSmrg if (mod) { /* UMOD */ 6345af69d88dSmrg 634601e04c3fSmrg /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 634701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 634801e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 6349af69d88dSmrg 635001e04c3fSmrg alu.dst.sel = tmp1; 635101e04c3fSmrg alu.dst.chan = 2; 635201e04c3fSmrg alu.dst.write = 1; 6353af69d88dSmrg 635401e04c3fSmrg alu.src[0].sel = tmp0; 635501e04c3fSmrg alu.src[0].chan = 3; 6356af69d88dSmrg 635701e04c3fSmrg if (signed_op) { 635801e04c3fSmrg alu.src[1].sel = tmp2; 635901e04c3fSmrg alu.src[1].chan = 1; 636001e04c3fSmrg } else { 636101e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 636201e04c3fSmrg } 6363af69d88dSmrg 6364af69d88dSmrg alu.last = 1; 636501e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 636601e04c3fSmrg return r; 6367af69d88dSmrg 636801e04c3fSmrg /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 636901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 637001e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 6371af69d88dSmrg 637201e04c3fSmrg alu.dst.sel = tmp1; 637301e04c3fSmrg alu.dst.chan = 3; 637401e04c3fSmrg alu.dst.write = 1; 6375af69d88dSmrg 637601e04c3fSmrg alu.src[0].sel = tmp0; 637701e04c3fSmrg alu.src[0].chan = 3; 637801e04c3fSmrg if (signed_op) { 637901e04c3fSmrg alu.src[1].sel = tmp2; 638001e04c3fSmrg alu.src[1].chan = 1; 638101e04c3fSmrg } else { 638201e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 638301e04c3fSmrg } 6384af69d88dSmrg 638501e04c3fSmrg alu.last = 1; 638601e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 638701e04c3fSmrg return r; 6388af69d88dSmrg 638901e04c3fSmrg } else { /* UDIV */ 6390af69d88dSmrg 639101e04c3fSmrg /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 639201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 639301e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 639401e04c3fSmrg 639501e04c3fSmrg alu.dst.sel = tmp1; 639601e04c3fSmrg alu.dst.chan = 2; 639701e04c3fSmrg alu.dst.write = 1; 639801e04c3fSmrg 639901e04c3fSmrg alu.src[0].sel = tmp0; 640001e04c3fSmrg alu.src[0].chan = 2; 640101e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6402af69d88dSmrg 6403af69d88dSmrg alu.last = 1; 640401e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 640501e04c3fSmrg return r; 6406af69d88dSmrg 640701e04c3fSmrg /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 640801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 640901e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 6410af69d88dSmrg 641101e04c3fSmrg alu.dst.sel = tmp1; 641201e04c3fSmrg alu.dst.chan = 3; 641301e04c3fSmrg alu.dst.write = 1; 6414af69d88dSmrg 641501e04c3fSmrg alu.src[0].sel = tmp0; 641601e04c3fSmrg alu.src[0].chan = 2; 641701e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 6418af69d88dSmrg 641901e04c3fSmrg alu.last = 1; 642001e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 642101e04c3fSmrg return r; 642201e04c3fSmrg 642301e04c3fSmrg } 642401e04c3fSmrg 642501e04c3fSmrg /* 17. tmp1.x = tmp1.x & tmp1.y */ 6426af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 642701e04c3fSmrg alu.op = ALU_OP2_AND_INT; 6428af69d88dSmrg 642901e04c3fSmrg alu.dst.sel = tmp1; 643001e04c3fSmrg alu.dst.chan = 0; 643101e04c3fSmrg alu.dst.write = 1; 6432af69d88dSmrg 643301e04c3fSmrg alu.src[0].sel = tmp1; 643401e04c3fSmrg alu.src[0].chan = 0; 643501e04c3fSmrg alu.src[1].sel = tmp1; 643601e04c3fSmrg alu.src[1].chan = 1; 6437af69d88dSmrg 643801e04c3fSmrg alu.last = 1; 643901e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6440af69d88dSmrg return r; 6441af69d88dSmrg 644201e04c3fSmrg /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 644301e04c3fSmrg /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 6444af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 644501e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 6446af69d88dSmrg alu.is_op3 = 1; 6447af69d88dSmrg 644801e04c3fSmrg alu.dst.sel = tmp0; 644901e04c3fSmrg alu.dst.chan = 2; 645001e04c3fSmrg alu.dst.write = 1; 6451af69d88dSmrg 645201e04c3fSmrg alu.src[0].sel = tmp1; 645301e04c3fSmrg alu.src[0].chan = 0; 645401e04c3fSmrg alu.src[1].sel = tmp0; 645501e04c3fSmrg alu.src[1].chan = mod ? 3 : 2; 645601e04c3fSmrg alu.src[2].sel = tmp1; 645701e04c3fSmrg alu.src[2].chan = 2; 6458af69d88dSmrg 645901e04c3fSmrg alu.last = 1; 646001e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 646101e04c3fSmrg return r; 6462af69d88dSmrg 646301e04c3fSmrg /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 646401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 646501e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 646601e04c3fSmrg alu.is_op3 = 1; 646701e04c3fSmrg 646801e04c3fSmrg if (signed_op) { 646901e04c3fSmrg alu.dst.sel = tmp0; 647001e04c3fSmrg alu.dst.chan = 2; 647101e04c3fSmrg alu.dst.write = 1; 647201e04c3fSmrg } else { 647301e04c3fSmrg if (tmp4 > 0) { 647401e04c3fSmrg alu.dst.sel = tmp4; 647501e04c3fSmrg alu.dst.chan = i; 647601e04c3fSmrg alu.dst.write = 1; 647701e04c3fSmrg } else { 647801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 647901e04c3fSmrg } 648001e04c3fSmrg } 648101e04c3fSmrg 648201e04c3fSmrg alu.src[0].sel = tmp1; 648301e04c3fSmrg alu.src[0].chan = 1; 648401e04c3fSmrg alu.src[1].sel = tmp1; 648501e04c3fSmrg alu.src[1].chan = 3; 648601e04c3fSmrg alu.src[2].sel = tmp0; 648701e04c3fSmrg alu.src[2].chan = 2; 648801e04c3fSmrg 648901e04c3fSmrg alu.last = 1; 649001e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6491af69d88dSmrg return r; 649201e04c3fSmrg 649301e04c3fSmrg if (signed_op) { 649401e04c3fSmrg 649501e04c3fSmrg /* fix the sign of the result */ 649601e04c3fSmrg 649701e04c3fSmrg if (mod) { 649801e04c3fSmrg 649901e04c3fSmrg /* tmp0.x = -tmp0.z */ 650001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 650101e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 650201e04c3fSmrg 650301e04c3fSmrg alu.dst.sel = tmp0; 650401e04c3fSmrg alu.dst.chan = 0; 650501e04c3fSmrg alu.dst.write = 1; 650601e04c3fSmrg 650701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 650801e04c3fSmrg alu.src[1].sel = tmp0; 650901e04c3fSmrg alu.src[1].chan = 2; 651001e04c3fSmrg 651101e04c3fSmrg alu.last = 1; 651201e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 651301e04c3fSmrg return r; 651401e04c3fSmrg 651501e04c3fSmrg /* sign of the remainder is the same as the sign of src0 */ 651601e04c3fSmrg /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 651701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 651801e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 651901e04c3fSmrg alu.is_op3 = 1; 652001e04c3fSmrg 652101e04c3fSmrg if (tmp4 > 0) { 652201e04c3fSmrg alu.dst.sel = tmp4; 652301e04c3fSmrg alu.dst.chan = i; 652401e04c3fSmrg alu.dst.write = 1; 652501e04c3fSmrg } else { 652601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 652701e04c3fSmrg } 652801e04c3fSmrg 652901e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 653001e04c3fSmrg alu.src[1].sel = tmp0; 653101e04c3fSmrg alu.src[1].chan = 2; 653201e04c3fSmrg alu.src[2].sel = tmp0; 653301e04c3fSmrg alu.src[2].chan = 0; 653401e04c3fSmrg 653501e04c3fSmrg alu.last = 1; 653601e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 653701e04c3fSmrg return r; 653801e04c3fSmrg 653901e04c3fSmrg } else { 654001e04c3fSmrg 654101e04c3fSmrg /* tmp0.x = -tmp0.z */ 654201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 654301e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 654401e04c3fSmrg 654501e04c3fSmrg alu.dst.sel = tmp0; 654601e04c3fSmrg alu.dst.chan = 0; 654701e04c3fSmrg alu.dst.write = 1; 654801e04c3fSmrg 654901e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 655001e04c3fSmrg alu.src[1].sel = tmp0; 655101e04c3fSmrg alu.src[1].chan = 2; 655201e04c3fSmrg 655301e04c3fSmrg alu.last = 1; 655401e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 655501e04c3fSmrg return r; 655601e04c3fSmrg 655701e04c3fSmrg /* fix the quotient sign (same as the sign of src0*src1) */ 655801e04c3fSmrg /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 655901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 656001e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 656101e04c3fSmrg alu.is_op3 = 1; 656201e04c3fSmrg 656301e04c3fSmrg if (tmp4 > 0) { 656401e04c3fSmrg alu.dst.sel = tmp4; 656501e04c3fSmrg alu.dst.chan = i; 656601e04c3fSmrg alu.dst.write = 1; 656701e04c3fSmrg } else { 656801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 656901e04c3fSmrg } 657001e04c3fSmrg 657101e04c3fSmrg alu.src[0].sel = tmp2; 657201e04c3fSmrg alu.src[0].chan = 2; 657301e04c3fSmrg alu.src[1].sel = tmp0; 657401e04c3fSmrg alu.src[1].chan = 2; 657501e04c3fSmrg alu.src[2].sel = tmp0; 657601e04c3fSmrg alu.src[2].chan = 0; 657701e04c3fSmrg 657801e04c3fSmrg alu.last = 1; 657901e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 658001e04c3fSmrg return r; 658101e04c3fSmrg } 658201e04c3fSmrg } 658301e04c3fSmrg } 658401e04c3fSmrg 658501e04c3fSmrg if (tmp4 > 0) { 658601e04c3fSmrg for (i = 0; i <= lasti; ++i) { 658701e04c3fSmrg if (!(write_mask & (1<<i))) 658801e04c3fSmrg continue; 658901e04c3fSmrg 659001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 659101e04c3fSmrg alu.op = ALU_OP1_MOV; 659201e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 659301e04c3fSmrg alu.src[0].sel = tmp4; 659401e04c3fSmrg alu.src[0].chan = i; 659501e04c3fSmrg 659601e04c3fSmrg if (i == lasti) 659701e04c3fSmrg alu.last = 1; 659801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 659901e04c3fSmrg return r; 660001e04c3fSmrg } 6601af69d88dSmrg } 660201e04c3fSmrg 6603af69d88dSmrg return 0; 6604af69d88dSmrg} 6605af69d88dSmrg 660601e04c3fSmrgstatic int tgsi_udiv(struct r600_shader_ctx *ctx) 6607af69d88dSmrg{ 660801e04c3fSmrg return tgsi_divmod(ctx, 0, 0); 660901e04c3fSmrg} 6610af69d88dSmrg 661101e04c3fSmrgstatic int tgsi_umod(struct r600_shader_ctx *ctx) 661201e04c3fSmrg{ 661301e04c3fSmrg return tgsi_divmod(ctx, 1, 0); 661401e04c3fSmrg} 6615af69d88dSmrg 661601e04c3fSmrgstatic int tgsi_idiv(struct r600_shader_ctx *ctx) 661701e04c3fSmrg{ 661801e04c3fSmrg return tgsi_divmod(ctx, 0, 1); 661901e04c3fSmrg} 6620af69d88dSmrg 662101e04c3fSmrgstatic int tgsi_imod(struct r600_shader_ctx *ctx) 662201e04c3fSmrg{ 662301e04c3fSmrg return tgsi_divmod(ctx, 1, 1); 662401e04c3fSmrg} 6625af69d88dSmrg 6626af69d88dSmrg 662701e04c3fSmrgstatic int tgsi_f2i(struct r600_shader_ctx *ctx) 662801e04c3fSmrg{ 662901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 663001e04c3fSmrg struct r600_bytecode_alu alu; 663101e04c3fSmrg int i, r; 663201e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 663301e04c3fSmrg int last_inst = tgsi_last_instruction(write_mask); 6634af69d88dSmrg 6635af69d88dSmrg for (i = 0; i < 4; i++) { 6636af69d88dSmrg if (!(write_mask & (1<<i))) 6637af69d88dSmrg continue; 6638af69d88dSmrg 6639af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 664001e04c3fSmrg alu.op = ALU_OP1_TRUNC; 664101e04c3fSmrg 664201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 6643af69d88dSmrg alu.dst.chan = i; 6644af69d88dSmrg alu.dst.write = 1; 6645af69d88dSmrg 664601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 664701e04c3fSmrg if (i == last_inst) 664801e04c3fSmrg alu.last = 1; 6649af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6650af69d88dSmrg if (r) 6651af69d88dSmrg return r; 6652af69d88dSmrg } 6653af69d88dSmrg 6654af69d88dSmrg for (i = 0; i < 4; i++) { 6655af69d88dSmrg if (!(write_mask & (1<<i))) 6656af69d88dSmrg continue; 6657af69d88dSmrg 6658af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 665901e04c3fSmrg alu.op = ctx->inst_info->op; 666001e04c3fSmrg 6661af69d88dSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6662af69d88dSmrg 666301e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 6664af69d88dSmrg alu.src[0].chan = i; 6665af69d88dSmrg 666601e04c3fSmrg if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 666701e04c3fSmrg alu.last = 1; 6668af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6669af69d88dSmrg if (r) 6670af69d88dSmrg return r; 6671af69d88dSmrg } 6672af69d88dSmrg 6673af69d88dSmrg return 0; 6674af69d88dSmrg} 6675af69d88dSmrg 667601e04c3fSmrgstatic int tgsi_iabs(struct r600_shader_ctx *ctx) 66773464ebd5Sriastradh{ 66783464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6679af69d88dSmrg struct r600_bytecode_alu alu; 668001e04c3fSmrg int i, r; 6681af69d88dSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6682af69d88dSmrg int last_inst = tgsi_last_instruction(write_mask); 6683af69d88dSmrg 668401e04c3fSmrg /* tmp = -src */ 66853464ebd5Sriastradh for (i = 0; i < 4; i++) { 6686af69d88dSmrg if (!(write_mask & (1<<i))) 6687af69d88dSmrg continue; 66883464ebd5Sriastradh 6689af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 669001e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 669101e04c3fSmrg 669201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 66933464ebd5Sriastradh alu.dst.chan = i; 6694af69d88dSmrg alu.dst.write = 1; 66953464ebd5Sriastradh 669601e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 669701e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 66983464ebd5Sriastradh 669901e04c3fSmrg if (i == last_inst) 670001e04c3fSmrg alu.last = 1; 6701af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6702af69d88dSmrg if (r) 6703af69d88dSmrg return r; 6704af69d88dSmrg } 6705af69d88dSmrg 670601e04c3fSmrg /* dst = (src >= 0 ? src : tmp) */ 6707af69d88dSmrg for (i = 0; i < 4; i++) { 6708af69d88dSmrg if (!(write_mask & (1<<i))) 6709af69d88dSmrg continue; 6710af69d88dSmrg 6711af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 671201e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 671301e04c3fSmrg alu.is_op3 = 1; 6714af69d88dSmrg alu.dst.write = 1; 6715af69d88dSmrg 671601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6717af69d88dSmrg 671801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 671901e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 672001e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 672101e04c3fSmrg alu.src[2].chan = i; 672201e04c3fSmrg 672301e04c3fSmrg if (i == last_inst) 672401e04c3fSmrg alu.last = 1; 6725af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 67263464ebd5Sriastradh if (r) 67273464ebd5Sriastradh return r; 67283464ebd5Sriastradh } 672901e04c3fSmrg return 0; 673001e04c3fSmrg} 673101e04c3fSmrg 673201e04c3fSmrgstatic int tgsi_issg(struct r600_shader_ctx *ctx) 673301e04c3fSmrg{ 673401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 673501e04c3fSmrg struct r600_bytecode_alu alu; 673601e04c3fSmrg int i, r; 673701e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 673801e04c3fSmrg int last_inst = tgsi_last_instruction(write_mask); 67393464ebd5Sriastradh 674001e04c3fSmrg /* tmp = (src >= 0 ? src : -1) */ 67413464ebd5Sriastradh for (i = 0; i < 4; i++) { 6742af69d88dSmrg if (!(write_mask & (1<<i))) 6743af69d88dSmrg continue; 6744af69d88dSmrg 6745af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6746af69d88dSmrg alu.op = ALU_OP3_CNDGE_INT; 67473464ebd5Sriastradh alu.is_op3 = 1; 674801e04c3fSmrg 674901e04c3fSmrg alu.dst.sel = ctx->temp_reg; 6750af69d88dSmrg alu.dst.chan = i; 6751af69d88dSmrg alu.dst.write = 1; 67523464ebd5Sriastradh 675301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 675401e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 675501e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 67563464ebd5Sriastradh 675701e04c3fSmrg if (i == last_inst) 675801e04c3fSmrg alu.last = 1; 6759af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 67603464ebd5Sriastradh if (r) 67613464ebd5Sriastradh return r; 67623464ebd5Sriastradh } 6763af69d88dSmrg 676401e04c3fSmrg /* dst = (tmp > 0 ? 1 : tmp) */ 67653464ebd5Sriastradh for (i = 0; i < 4; i++) { 676601e04c3fSmrg if (!(write_mask & (1<<i))) 676701e04c3fSmrg continue; 676801e04c3fSmrg 6769af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 677001e04c3fSmrg alu.op = ALU_OP3_CNDGT_INT; 677101e04c3fSmrg alu.is_op3 = 1; 677201e04c3fSmrg alu.dst.write = 1; 677301e04c3fSmrg 677401e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 677501e04c3fSmrg 677601e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 677701e04c3fSmrg alu.src[0].chan = i; 677801e04c3fSmrg 677901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 678001e04c3fSmrg 678101e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 678201e04c3fSmrg alu.src[2].chan = i; 678301e04c3fSmrg 678401e04c3fSmrg if (i == last_inst) 67853464ebd5Sriastradh alu.last = 1; 6786af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 67873464ebd5Sriastradh if (r) 67883464ebd5Sriastradh return r; 67893464ebd5Sriastradh } 67903464ebd5Sriastradh return 0; 67913464ebd5Sriastradh} 67923464ebd5Sriastradh 679301e04c3fSmrg 679401e04c3fSmrg 679501e04c3fSmrgstatic int tgsi_ssg(struct r600_shader_ctx *ctx) 67963464ebd5Sriastradh{ 67973464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 679801e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 679901e04c3fSmrg int last_inst = tgsi_last_instruction(write_mask); 6800af69d88dSmrg struct r600_bytecode_alu alu; 680101e04c3fSmrg int i, r; 68023464ebd5Sriastradh 680301e04c3fSmrg /* tmp = (src > 0 ? 1 : src) */ 680401e04c3fSmrg for (i = 0; i <= last_inst; i++) { 680501e04c3fSmrg if (!(write_mask & (1 << i))) 68063464ebd5Sriastradh continue; 6807af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 680801e04c3fSmrg alu.op = ALU_OP3_CNDGT; 680901e04c3fSmrg alu.is_op3 = 1; 68103464ebd5Sriastradh 681101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 68123464ebd5Sriastradh alu.dst.chan = i; 681301e04c3fSmrg 681401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 681501e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1; 681601e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 681701e04c3fSmrg 681801e04c3fSmrg if (i == last_inst) 68193464ebd5Sriastradh alu.last = 1; 6820af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 68213464ebd5Sriastradh if (r) 68223464ebd5Sriastradh return r; 68233464ebd5Sriastradh } 68243464ebd5Sriastradh 682501e04c3fSmrg /* dst = (-tmp > 0 ? -1 : tmp) */ 682601e04c3fSmrg for (i = 0; i <= last_inst; i++) { 682701e04c3fSmrg if (!(write_mask & (1 << i))) 682801e04c3fSmrg continue; 6829af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 683001e04c3fSmrg alu.op = ALU_OP3_CNDGT; 683101e04c3fSmrg alu.is_op3 = 1; 68323464ebd5Sriastradh tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 683301e04c3fSmrg 683401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 683501e04c3fSmrg alu.src[0].chan = i; 683601e04c3fSmrg alu.src[0].neg = 1; 683701e04c3fSmrg 683801e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1; 683901e04c3fSmrg alu.src[1].neg = 1; 684001e04c3fSmrg 684101e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 684201e04c3fSmrg alu.src[2].chan = i; 684301e04c3fSmrg 684401e04c3fSmrg if (i == last_inst) 68453464ebd5Sriastradh alu.last = 1; 6846af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 68473464ebd5Sriastradh if (r) 68483464ebd5Sriastradh return r; 68493464ebd5Sriastradh } 68503464ebd5Sriastradh return 0; 68513464ebd5Sriastradh} 68523464ebd5Sriastradh 685301e04c3fSmrgstatic int tgsi_bfi(struct r600_shader_ctx *ctx) 68543464ebd5Sriastradh{ 68553464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6856af69d88dSmrg struct r600_bytecode_alu alu; 685701e04c3fSmrg int i, r, t1, t2; 6858af69d88dSmrg 685901e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 686001e04c3fSmrg int last_inst = tgsi_last_instruction(write_mask); 6861af69d88dSmrg 686201e04c3fSmrg t1 = r600_get_temp(ctx); 6863af69d88dSmrg 686401e04c3fSmrg for (i = 0; i < 4; i++) { 686501e04c3fSmrg if (!(write_mask & (1<<i))) 686601e04c3fSmrg continue; 6867af69d88dSmrg 686801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 686901e04c3fSmrg alu.op = ALU_OP2_SETGE_INT; 687001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 687101e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 687201e04c3fSmrg alu.src[1].value = 32; 687301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 687401e04c3fSmrg alu.dst.chan = i; 687501e04c3fSmrg alu.dst.write = 1; 687601e04c3fSmrg alu.last = i == last_inst; 687701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 687801e04c3fSmrg if (r) 687901e04c3fSmrg return r; 688001e04c3fSmrg } 6881af69d88dSmrg 6882af69d88dSmrg for (i = 0; i < 4; i++) { 688301e04c3fSmrg if (!(write_mask & (1<<i))) 6884af69d88dSmrg continue; 6885af69d88dSmrg 688601e04c3fSmrg /* create mask tmp */ 6887af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 688801e04c3fSmrg alu.op = ALU_OP2_BFM_INT; 688901e04c3fSmrg alu.dst.sel = t1; 6890af69d88dSmrg alu.dst.chan = i; 6891af69d88dSmrg alu.dst.write = 1; 689201e04c3fSmrg alu.last = i == last_inst; 6893af69d88dSmrg 689401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 689501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6896af69d88dSmrg 6897af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6898af69d88dSmrg if (r) 6899af69d88dSmrg return r; 6900af69d88dSmrg } 6901af69d88dSmrg 690201e04c3fSmrg t2 = r600_get_temp(ctx); 690301e04c3fSmrg 690401e04c3fSmrg for (i = 0; i < 4; i++) { 690501e04c3fSmrg if (!(write_mask & (1<<i))) 690601e04c3fSmrg continue; 690701e04c3fSmrg 690801e04c3fSmrg /* shift insert left */ 6909af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 691001e04c3fSmrg alu.op = ALU_OP2_LSHL_INT; 691101e04c3fSmrg alu.dst.sel = t2; 691201e04c3fSmrg alu.dst.chan = i; 691301e04c3fSmrg alu.dst.write = 1; 691401e04c3fSmrg alu.last = i == last_inst; 6915af69d88dSmrg 691601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 691701e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 691801e04c3fSmrg 691901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 692001e04c3fSmrg if (r) 692101e04c3fSmrg return r; 692201e04c3fSmrg } 692301e04c3fSmrg 692401e04c3fSmrg for (i = 0; i < 4; i++) { 692501e04c3fSmrg if (!(write_mask & (1<<i))) 692601e04c3fSmrg continue; 692701e04c3fSmrg 692801e04c3fSmrg /* actual bitfield insert */ 692901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 693001e04c3fSmrg alu.op = ALU_OP3_BFI_INT; 693101e04c3fSmrg alu.is_op3 = 1; 693201e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 693301e04c3fSmrg alu.dst.chan = i; 6934af69d88dSmrg alu.dst.write = 1; 693501e04c3fSmrg alu.last = i == last_inst; 6936af69d88dSmrg 693701e04c3fSmrg alu.src[0].sel = t1; 693801e04c3fSmrg alu.src[0].chan = i; 693901e04c3fSmrg alu.src[1].sel = t2; 694001e04c3fSmrg alu.src[1].chan = i; 694101e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6942af69d88dSmrg 694301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 694401e04c3fSmrg if (r) 694501e04c3fSmrg return r; 694601e04c3fSmrg } 6947af69d88dSmrg 694801e04c3fSmrg for (i = 0; i < 4; i++) { 694901e04c3fSmrg if (!(write_mask & (1<<i))) 695001e04c3fSmrg continue; 695101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 695201e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 695301e04c3fSmrg alu.is_op3 = 1; 695401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 695501e04c3fSmrg alu.src[0].chan = i; 695601e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 695701e04c3fSmrg 695801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 695901e04c3fSmrg 696001e04c3fSmrg alu.src[1].sel = alu.dst.sel; 696101e04c3fSmrg alu.src[1].chan = i; 696201e04c3fSmrg 696301e04c3fSmrg alu.last = i == last_inst; 6964af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6965af69d88dSmrg if (r) 6966af69d88dSmrg return r; 6967af69d88dSmrg } 6968af69d88dSmrg return 0; 6969af69d88dSmrg} 6970af69d88dSmrg 697101e04c3fSmrgstatic int tgsi_msb(struct r600_shader_ctx *ctx) 6972af69d88dSmrg{ 6973af69d88dSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6974af69d88dSmrg struct r600_bytecode_alu alu; 697501e04c3fSmrg int i, r, t1, t2; 6976af69d88dSmrg 697701e04c3fSmrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 697801e04c3fSmrg int last_inst = tgsi_last_instruction(write_mask); 6979af69d88dSmrg 698001e04c3fSmrg assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 698101e04c3fSmrg ctx->inst_info->op == ALU_OP1_FFBH_UINT); 698201e04c3fSmrg 698301e04c3fSmrg t1 = ctx->temp_reg; 698401e04c3fSmrg 698501e04c3fSmrg /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 698601e04c3fSmrg for (i = 0; i < 4; i++) { 698701e04c3fSmrg if (!(write_mask & (1<<i))) 698801e04c3fSmrg continue; 698901e04c3fSmrg 699001e04c3fSmrg /* t1 = FFBH_INT / FFBH_UINT */ 699101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 699201e04c3fSmrg alu.op = ctx->inst_info->op; 699301e04c3fSmrg alu.dst.sel = t1; 699401e04c3fSmrg alu.dst.chan = i; 699501e04c3fSmrg alu.dst.write = 1; 699601e04c3fSmrg alu.last = i == last_inst; 699701e04c3fSmrg 699801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 699901e04c3fSmrg 700001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 700101e04c3fSmrg if (r) 700201e04c3fSmrg return r; 7003af69d88dSmrg } 7004af69d88dSmrg 700501e04c3fSmrg t2 = r600_get_temp(ctx); 7006af69d88dSmrg 700701e04c3fSmrg for (i = 0; i < 4; i++) { 700801e04c3fSmrg if (!(write_mask & (1<<i))) 700901e04c3fSmrg continue; 7010af69d88dSmrg 701101e04c3fSmrg /* t2 = 31 - t1 */ 701201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 701301e04c3fSmrg alu.op = ALU_OP2_SUB_INT; 701401e04c3fSmrg alu.dst.sel = t2; 701501e04c3fSmrg alu.dst.chan = i; 701601e04c3fSmrg alu.dst.write = 1; 701701e04c3fSmrg alu.last = i == last_inst; 7018af69d88dSmrg 701901e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 702001e04c3fSmrg alu.src[0].value = 31; 702101e04c3fSmrg alu.src[1].sel = t1; 702201e04c3fSmrg alu.src[1].chan = i; 7023af69d88dSmrg 702401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 702501e04c3fSmrg if (r) 702601e04c3fSmrg return r; 702701e04c3fSmrg } 7028af69d88dSmrg 702901e04c3fSmrg for (i = 0; i < 4; i++) { 703001e04c3fSmrg if (!(write_mask & (1<<i))) 703101e04c3fSmrg continue; 70323464ebd5Sriastradh 703301e04c3fSmrg /* result = t1 >= 0 ? t2 : t1 */ 703401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 703501e04c3fSmrg alu.op = ALU_OP3_CNDGE_INT; 703601e04c3fSmrg alu.is_op3 = 1; 703701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 703801e04c3fSmrg alu.dst.chan = i; 703901e04c3fSmrg alu.dst.write = 1; 704001e04c3fSmrg alu.last = i == last_inst; 70413464ebd5Sriastradh 704201e04c3fSmrg alu.src[0].sel = t1; 704301e04c3fSmrg alu.src[0].chan = i; 704401e04c3fSmrg alu.src[1].sel = t2; 704501e04c3fSmrg alu.src[1].chan = i; 704601e04c3fSmrg alu.src[2].sel = t1; 704701e04c3fSmrg alu.src[2].chan = i; 704801e04c3fSmrg 704901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 705001e04c3fSmrg if (r) 705101e04c3fSmrg return r; 7052af69d88dSmrg } 7053af69d88dSmrg 705401e04c3fSmrg return 0; 705501e04c3fSmrg} 70563464ebd5Sriastradh 705701e04c3fSmrgstatic int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 705801e04c3fSmrg{ 705901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 706001e04c3fSmrg struct r600_bytecode_alu alu; 706101e04c3fSmrg int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 706201e04c3fSmrg unsigned location; 706301e04c3fSmrg const int input = inst->Src[0].Register.Index + ctx->shader->nsys_inputs; 70643464ebd5Sriastradh 706501e04c3fSmrg assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 70663464ebd5Sriastradh 706701e04c3fSmrg /* Interpolators have been marked for use already by allocate_system_value_inputs */ 706801e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 706901e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 707001e04c3fSmrg location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 707101e04c3fSmrg } 707201e04c3fSmrg else { 707301e04c3fSmrg location = TGSI_INTERPOLATE_LOC_CENTROID; 70747ec681f3Smrg ctx->shader->input[input].uses_interpolate_at_centroid = 1; 707501e04c3fSmrg } 70763464ebd5Sriastradh 707701e04c3fSmrg k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 707801e04c3fSmrg if (k < 0) 707901e04c3fSmrg k = 0; 708001e04c3fSmrg interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 708101e04c3fSmrg interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 708201e04c3fSmrg 708301e04c3fSmrg /* NOTE: currently offset is not perspective correct */ 708401e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 708501e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 708601e04c3fSmrg int sample_gpr = -1; 708701e04c3fSmrg int gradientsH, gradientsV; 708801e04c3fSmrg struct r600_bytecode_tex tex; 708901e04c3fSmrg 709001e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 709101e04c3fSmrg sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 709201e04c3fSmrg } 709301e04c3fSmrg 709401e04c3fSmrg gradientsH = r600_get_temp(ctx); 709501e04c3fSmrg gradientsV = r600_get_temp(ctx); 709601e04c3fSmrg for (i = 0; i < 2; i++) { 709701e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 709801e04c3fSmrg tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 709901e04c3fSmrg tex.src_gpr = interp_gpr; 710001e04c3fSmrg tex.src_sel_x = interp_base_chan + 0; 710101e04c3fSmrg tex.src_sel_y = interp_base_chan + 1; 710201e04c3fSmrg tex.src_sel_z = 0; 710301e04c3fSmrg tex.src_sel_w = 0; 710401e04c3fSmrg tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 710501e04c3fSmrg tex.dst_sel_x = 0; 710601e04c3fSmrg tex.dst_sel_y = 1; 710701e04c3fSmrg tex.dst_sel_z = 7; 710801e04c3fSmrg tex.dst_sel_w = 7; 710901e04c3fSmrg tex.inst_mod = 1; // Use per pixel gradient calculation 711001e04c3fSmrg tex.sampler_id = 0; 711101e04c3fSmrg tex.resource_id = tex.sampler_id; 7112af69d88dSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 71133464ebd5Sriastradh if (r) 71143464ebd5Sriastradh return r; 71153464ebd5Sriastradh } 71163464ebd5Sriastradh 711701e04c3fSmrg for (i = 0; i < 2; i++) { 7118af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 711901e04c3fSmrg alu.op = ALU_OP3_MULADD; 712001e04c3fSmrg alu.is_op3 = 1; 712101e04c3fSmrg alu.src[0].sel = gradientsH; 712201e04c3fSmrg alu.src[0].chan = i; 712301e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 712401e04c3fSmrg alu.src[1].sel = sample_gpr; 712501e04c3fSmrg alu.src[1].chan = 2; 712601e04c3fSmrg } 712701e04c3fSmrg else { 712801e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 712901e04c3fSmrg } 713001e04c3fSmrg alu.src[2].sel = interp_gpr; 713101e04c3fSmrg alu.src[2].chan = interp_base_chan + i; 71323464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 713301e04c3fSmrg alu.dst.chan = i; 713401e04c3fSmrg alu.last = i == 1; 713501e04c3fSmrg 7136af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 71373464ebd5Sriastradh if (r) 71383464ebd5Sriastradh return r; 71393464ebd5Sriastradh } 71403464ebd5Sriastradh 714101e04c3fSmrg for (i = 0; i < 2; i++) { 7142af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 714301e04c3fSmrg alu.op = ALU_OP3_MULADD; 714401e04c3fSmrg alu.is_op3 = 1; 714501e04c3fSmrg alu.src[0].sel = gradientsV; 714601e04c3fSmrg alu.src[0].chan = i; 714701e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 714801e04c3fSmrg alu.src[1].sel = sample_gpr; 714901e04c3fSmrg alu.src[1].chan = 3; 715001e04c3fSmrg } 715101e04c3fSmrg else { 715201e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 715301e04c3fSmrg } 715401e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 715501e04c3fSmrg alu.src[2].chan = i; 71563464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 71573464ebd5Sriastradh alu.dst.chan = i; 715801e04c3fSmrg alu.last = i == 1; 715901e04c3fSmrg 7160af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 71613464ebd5Sriastradh if (r) 71623464ebd5Sriastradh return r; 71633464ebd5Sriastradh } 71643464ebd5Sriastradh } 71653464ebd5Sriastradh 716601e04c3fSmrg tmp = r600_get_temp(ctx); 716701e04c3fSmrg for (i = 0; i < 8; i++) { 716801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 716901e04c3fSmrg alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 71703464ebd5Sriastradh 717101e04c3fSmrg alu.dst.sel = tmp; 717201e04c3fSmrg if ((i > 1 && i < 6)) { 71733464ebd5Sriastradh alu.dst.write = 1; 71743464ebd5Sriastradh } 717501e04c3fSmrg else { 717601e04c3fSmrg alu.dst.write = 0; 717701e04c3fSmrg } 717801e04c3fSmrg alu.dst.chan = i % 4; 71793464ebd5Sriastradh 718001e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 718101e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 71823464ebd5Sriastradh alu.src[0].sel = ctx->temp_reg; 718301e04c3fSmrg alu.src[0].chan = 1 - (i % 2); 718401e04c3fSmrg } else { 718501e04c3fSmrg alu.src[0].sel = interp_gpr; 718601e04c3fSmrg alu.src[0].chan = interp_base_chan + 1 - (i % 2); 71873464ebd5Sriastradh } 718801e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 718901e04c3fSmrg alu.src[1].chan = 0; 71903464ebd5Sriastradh 719101e04c3fSmrg alu.last = i % 4 == 3; 719201e04c3fSmrg alu.bank_swizzle_force = SQ_ALU_VEC_210; 71933464ebd5Sriastradh 719401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 719501e04c3fSmrg if (r) 719601e04c3fSmrg return r; 719701e04c3fSmrg } 71983464ebd5Sriastradh 719901e04c3fSmrg // INTERP can't swizzle dst 720001e04c3fSmrg lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 720101e04c3fSmrg for (i = 0; i <= lasti; i++) { 720201e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 720301e04c3fSmrg continue; 72043464ebd5Sriastradh 720501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 720601e04c3fSmrg alu.op = ALU_OP1_MOV; 720701e04c3fSmrg alu.src[0].sel = tmp; 720801e04c3fSmrg alu.src[0].chan = ctx->src[0].swizzle[i]; 720901e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 72103464ebd5Sriastradh alu.dst.write = 1; 721101e04c3fSmrg alu.last = i == lasti; 7212af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 72133464ebd5Sriastradh if (r) 72143464ebd5Sriastradh return r; 721501e04c3fSmrg } 72163464ebd5Sriastradh 721701e04c3fSmrg return 0; 721801e04c3fSmrg} 72193464ebd5Sriastradh 72203464ebd5Sriastradh 722101e04c3fSmrgstatic int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 722201e04c3fSmrg{ 722301e04c3fSmrg struct r600_bytecode_alu alu; 722401e04c3fSmrg int i, r; 7225af69d88dSmrg 722601e04c3fSmrg for (i = 0; i < 4; i++) { 722701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 722801e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 722901e04c3fSmrg alu.op = ALU_OP0_NOP; 723001e04c3fSmrg alu.dst.chan = i; 723101e04c3fSmrg } else { 7232af69d88dSmrg alu.op = ALU_OP1_MOV; 723301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 723401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 723501e04c3fSmrg alu.src[0].chan = i; 723601e04c3fSmrg } 723701e04c3fSmrg if (i == 3) { 7238af69d88dSmrg alu.last = 1; 7239af69d88dSmrg } 724001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 724101e04c3fSmrg if (r) 724201e04c3fSmrg return r; 724301e04c3fSmrg } 724401e04c3fSmrg return 0; 724501e04c3fSmrg} 7246af69d88dSmrg 724701e04c3fSmrgstatic int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 724801e04c3fSmrg unsigned writemask, 724901e04c3fSmrg struct r600_bytecode_alu_src *bc_src, 725001e04c3fSmrg const struct r600_shader_src *shader_src) 725101e04c3fSmrg{ 725201e04c3fSmrg struct r600_bytecode_alu alu; 725301e04c3fSmrg int i, r; 725401e04c3fSmrg int lasti = tgsi_last_instruction(writemask); 725501e04c3fSmrg int temp_reg = 0; 7256af69d88dSmrg 725701e04c3fSmrg r600_bytecode_src(&bc_src[0], shader_src, 0); 725801e04c3fSmrg r600_bytecode_src(&bc_src[1], shader_src, 1); 725901e04c3fSmrg r600_bytecode_src(&bc_src[2], shader_src, 2); 726001e04c3fSmrg r600_bytecode_src(&bc_src[3], shader_src, 3); 7261af69d88dSmrg 726201e04c3fSmrg if (bc_src->abs) { 726301e04c3fSmrg temp_reg = r600_get_temp(ctx); 7264af69d88dSmrg 726501e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 726601e04c3fSmrg if (!(writemask & (1 << i))) 726701e04c3fSmrg continue; 7268af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7269af69d88dSmrg alu.op = ALU_OP1_MOV; 727001e04c3fSmrg alu.dst.sel = temp_reg; 727101e04c3fSmrg alu.dst.chan = i; 7272af69d88dSmrg alu.dst.write = 1; 727301e04c3fSmrg alu.src[0] = bc_src[i]; 727401e04c3fSmrg if (i == lasti) { 727501e04c3fSmrg alu.last = 1; 727601e04c3fSmrg } 7277af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7278af69d88dSmrg if (r) 7279af69d88dSmrg return r; 728001e04c3fSmrg memset(&bc_src[i], 0, sizeof(*bc_src)); 728101e04c3fSmrg bc_src[i].sel = temp_reg; 728201e04c3fSmrg bc_src[i].chan = i; 7283af69d88dSmrg } 7284af69d88dSmrg } 728501e04c3fSmrg return 0; 728601e04c3fSmrg} 7287af69d88dSmrg 728801e04c3fSmrgstatic int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst) 728901e04c3fSmrg{ 729001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 729101e04c3fSmrg struct r600_bytecode_alu alu; 729201e04c3fSmrg struct r600_bytecode_alu_src srcs[4][4]; 729301e04c3fSmrg int i, j, r; 729401e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 729501e04c3fSmrg unsigned op = ctx->inst_info->op; 729601e04c3fSmrg 729701e04c3fSmrg if (op == ALU_OP3_MULADD_IEEE && 729801e04c3fSmrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 729901e04c3fSmrg op = ALU_OP3_MULADD; 730001e04c3fSmrg 730101e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 730201e04c3fSmrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 730301e04c3fSmrg srcs[j], &ctx->src[j]); 730401e04c3fSmrg if (r) 730501e04c3fSmrg return r; 730601e04c3fSmrg } 730701e04c3fSmrg 730801e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 730901e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 731001e04c3fSmrg continue; 731101e04c3fSmrg 731201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 731301e04c3fSmrg alu.op = op; 731401e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 731501e04c3fSmrg alu.src[j] = srcs[j][i]; 731601e04c3fSmrg } 731701e04c3fSmrg 731801e04c3fSmrg if (dst == -1) { 731901e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 732001e04c3fSmrg } else { 732101e04c3fSmrg alu.dst.sel = dst; 732201e04c3fSmrg } 732301e04c3fSmrg alu.dst.chan = i; 732401e04c3fSmrg alu.dst.write = 1; 732501e04c3fSmrg alu.is_op3 = 1; 732601e04c3fSmrg if (i == lasti) { 732701e04c3fSmrg alu.last = 1; 732801e04c3fSmrg } 732901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 733001e04c3fSmrg if (r) 733101e04c3fSmrg return r; 733201e04c3fSmrg } 733301e04c3fSmrg return 0; 733401e04c3fSmrg} 733501e04c3fSmrg 733601e04c3fSmrgstatic int tgsi_op3(struct r600_shader_ctx *ctx) 733701e04c3fSmrg{ 733801e04c3fSmrg return tgsi_op3_dst(ctx, -1); 733901e04c3fSmrg} 734001e04c3fSmrg 734101e04c3fSmrgstatic int tgsi_dp(struct r600_shader_ctx *ctx) 734201e04c3fSmrg{ 734301e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 734401e04c3fSmrg struct r600_bytecode_alu alu; 734501e04c3fSmrg int i, j, r; 734601e04c3fSmrg unsigned op = ctx->inst_info->op; 734701e04c3fSmrg if (op == ALU_OP2_DOT4_IEEE && 734801e04c3fSmrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 734901e04c3fSmrg op = ALU_OP2_DOT4; 735001e04c3fSmrg 735101e04c3fSmrg for (i = 0; i < 4; i++) { 735201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 735301e04c3fSmrg alu.op = op; 735401e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 735501e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 735601e04c3fSmrg } 735701e04c3fSmrg 735801e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 735901e04c3fSmrg alu.dst.chan = i; 736001e04c3fSmrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 736101e04c3fSmrg /* handle some special cases */ 736201e04c3fSmrg switch (inst->Instruction.Opcode) { 736301e04c3fSmrg case TGSI_OPCODE_DP2: 736401e04c3fSmrg if (i > 1) { 736501e04c3fSmrg alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 736601e04c3fSmrg alu.src[0].chan = alu.src[1].chan = 0; 736701e04c3fSmrg } 736801e04c3fSmrg break; 736901e04c3fSmrg case TGSI_OPCODE_DP3: 737001e04c3fSmrg if (i > 2) { 737101e04c3fSmrg alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 737201e04c3fSmrg alu.src[0].chan = alu.src[1].chan = 0; 737301e04c3fSmrg } 737401e04c3fSmrg break; 737501e04c3fSmrg default: 737601e04c3fSmrg break; 737701e04c3fSmrg } 737801e04c3fSmrg if (i == 3) { 737901e04c3fSmrg alu.last = 1; 738001e04c3fSmrg } 738101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 738201e04c3fSmrg if (r) 738301e04c3fSmrg return r; 738401e04c3fSmrg } 738501e04c3fSmrg return 0; 738601e04c3fSmrg} 738701e04c3fSmrg 738801e04c3fSmrgstatic inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 738901e04c3fSmrg unsigned index) 739001e04c3fSmrg{ 739101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 739201e04c3fSmrg return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 739301e04c3fSmrg inst->Src[index].Register.File != TGSI_FILE_INPUT && 739401e04c3fSmrg inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 739501e04c3fSmrg ctx->src[index].neg || ctx->src[index].abs || 739601e04c3fSmrg (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY); 739701e04c3fSmrg} 739801e04c3fSmrg 739901e04c3fSmrgstatic inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 740001e04c3fSmrg unsigned index) 740101e04c3fSmrg{ 740201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 740301e04c3fSmrg return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 740401e04c3fSmrg} 740501e04c3fSmrg 740601e04c3fSmrgstatic int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 740701e04c3fSmrg{ 740801e04c3fSmrg struct r600_bytecode_vtx vtx; 740901e04c3fSmrg struct r600_bytecode_alu alu; 741001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 741101e04c3fSmrg int src_gpr, r, i; 741201e04c3fSmrg int id = tgsi_tex_get_src_gpr(ctx, 1); 741301e04c3fSmrg int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 741401e04c3fSmrg 741501e04c3fSmrg src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 741601e04c3fSmrg if (src_requires_loading) { 741701e04c3fSmrg for (i = 0; i < 4; i++) { 7418af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7419af69d88dSmrg alu.op = ALU_OP1_MOV; 7420af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7421af69d88dSmrg alu.dst.sel = ctx->temp_reg; 7422af69d88dSmrg alu.dst.chan = i; 7423af69d88dSmrg if (i == 3) 7424af69d88dSmrg alu.last = 1; 7425af69d88dSmrg alu.dst.write = 1; 7426af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7427af69d88dSmrg if (r) 7428af69d88dSmrg return r; 7429af69d88dSmrg } 7430af69d88dSmrg src_gpr = ctx->temp_reg; 7431af69d88dSmrg } 7432af69d88dSmrg 743301e04c3fSmrg memset(&vtx, 0, sizeof(vtx)); 743401e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 743501e04c3fSmrg vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 743601e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 743701e04c3fSmrg vtx.src_gpr = src_gpr; 743801e04c3fSmrg vtx.mega_fetch_count = 16; 743901e04c3fSmrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 744001e04c3fSmrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 744101e04c3fSmrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 744201e04c3fSmrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 744301e04c3fSmrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 744401e04c3fSmrg vtx.use_const_fields = 1; 744501e04c3fSmrg vtx.buffer_index_mode = sampler_index_mode; 7446af69d88dSmrg 744701e04c3fSmrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 744801e04c3fSmrg return r; 7449af69d88dSmrg 745001e04c3fSmrg if (ctx->bc->chip_class >= EVERGREEN) 745101e04c3fSmrg return 0; 7452af69d88dSmrg 745301e04c3fSmrg for (i = 0; i < 4; i++) { 745401e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 745501e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 745601e04c3fSmrg continue; 7457af69d88dSmrg 745801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 745901e04c3fSmrg alu.op = ALU_OP2_AND_INT; 7460af69d88dSmrg 746101e04c3fSmrg alu.dst.chan = i; 746201e04c3fSmrg alu.dst.sel = vtx.dst_gpr; 746301e04c3fSmrg alu.dst.write = 1; 7464af69d88dSmrg 746501e04c3fSmrg alu.src[0].sel = vtx.dst_gpr; 746601e04c3fSmrg alu.src[0].chan = i; 7467af69d88dSmrg 746801e04c3fSmrg alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 746901e04c3fSmrg alu.src[1].sel += (id * 2); 747001e04c3fSmrg alu.src[1].chan = i % 4; 747101e04c3fSmrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 747201e04c3fSmrg 747301e04c3fSmrg if (i == lasti) 747401e04c3fSmrg alu.last = 1; 747501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 747601e04c3fSmrg if (r) 747701e04c3fSmrg return r; 747801e04c3fSmrg } 747901e04c3fSmrg 748001e04c3fSmrg if (inst->Dst[0].Register.WriteMask & 3) { 748101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 748201e04c3fSmrg alu.op = ALU_OP2_OR_INT; 748301e04c3fSmrg 748401e04c3fSmrg alu.dst.chan = 3; 748501e04c3fSmrg alu.dst.sel = vtx.dst_gpr; 748601e04c3fSmrg alu.dst.write = 1; 748701e04c3fSmrg 748801e04c3fSmrg alu.src[0].sel = vtx.dst_gpr; 748901e04c3fSmrg alu.src[0].chan = 3; 749001e04c3fSmrg 749101e04c3fSmrg alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 749201e04c3fSmrg alu.src[1].chan = 0; 749301e04c3fSmrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 749401e04c3fSmrg 749501e04c3fSmrg alu.last = 1; 749601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 749701e04c3fSmrg if (r) 749801e04c3fSmrg return r; 749901e04c3fSmrg } 750001e04c3fSmrg return 0; 750101e04c3fSmrg} 750201e04c3fSmrg 750301e04c3fSmrgstatic int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset, int eg_buffer_base) 750401e04c3fSmrg{ 750501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 750601e04c3fSmrg int r; 750701e04c3fSmrg int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset; 750801e04c3fSmrg int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 750901e04c3fSmrg 751001e04c3fSmrg if (ctx->bc->chip_class < EVERGREEN) { 751101e04c3fSmrg struct r600_bytecode_alu alu; 751201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 751301e04c3fSmrg alu.op = ALU_OP1_MOV; 751401e04c3fSmrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 751501e04c3fSmrg /* r600 we have them at channel 2 of the second dword */ 751601e04c3fSmrg alu.src[0].sel += (id * 2) + 1; 751701e04c3fSmrg alu.src[0].chan = 1; 751801e04c3fSmrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 751901e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 752001e04c3fSmrg alu.last = 1; 752101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 752201e04c3fSmrg if (r) 752301e04c3fSmrg return r; 752401e04c3fSmrg return 0; 752501e04c3fSmrg } else { 752601e04c3fSmrg struct r600_bytecode_vtx vtx; 752701e04c3fSmrg memset(&vtx, 0, sizeof(vtx)); 752801e04c3fSmrg vtx.op = FETCH_OP_GET_BUFFER_RESINFO; 752901e04c3fSmrg vtx.buffer_id = id + eg_buffer_base; 753001e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 753101e04c3fSmrg vtx.src_gpr = 0; 753201e04c3fSmrg vtx.mega_fetch_count = 16; /* no idea here really... */ 753301e04c3fSmrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 753401e04c3fSmrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 753501e04c3fSmrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7; /* SEL_Y */ 753601e04c3fSmrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7; /* SEL_Z */ 753701e04c3fSmrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7; /* SEL_W */ 753801e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 753901e04c3fSmrg vtx.buffer_index_mode = sampler_index_mode; 754001e04c3fSmrg 754101e04c3fSmrg if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) 754201e04c3fSmrg return r; 754301e04c3fSmrg return 0; 754401e04c3fSmrg } 754501e04c3fSmrg} 754601e04c3fSmrg 754701e04c3fSmrg 754801e04c3fSmrgstatic int tgsi_tex(struct r600_shader_ctx *ctx) 754901e04c3fSmrg{ 755001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 755101e04c3fSmrg struct r600_bytecode_tex tex; 755201e04c3fSmrg struct r600_bytecode_tex grad_offs[3]; 755301e04c3fSmrg struct r600_bytecode_alu alu; 755401e04c3fSmrg unsigned src_gpr; 755501e04c3fSmrg int r, i, j, n_grad_offs = 0; 755601e04c3fSmrg int opcode; 755701e04c3fSmrg bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 755801e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXF && 755901e04c3fSmrg (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 756001e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 756101e04c3fSmrg 756201e04c3fSmrg bool txf_add_offsets = inst->Texture.NumOffsets && 756301e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXF && 756401e04c3fSmrg inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 756501e04c3fSmrg 756601e04c3fSmrg /* Texture fetch instructions can only use gprs as source. 756701e04c3fSmrg * Also they cannot negate the source or take the absolute value */ 756801e04c3fSmrg const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 756901e04c3fSmrg tgsi_tex_src_requires_loading(ctx, 0)) || 757001e04c3fSmrg read_compressed_msaa || txf_add_offsets; 757101e04c3fSmrg 757201e04c3fSmrg boolean src_loaded = FALSE; 757301e04c3fSmrg unsigned sampler_src_reg = 1; 757401e04c3fSmrg int8_t offset_x = 0, offset_y = 0, offset_z = 0; 757501e04c3fSmrg boolean has_txq_cube_array_z = false; 757601e04c3fSmrg unsigned sampler_index_mode; 757701e04c3fSmrg int array_index_offset_channel = -1; 757801e04c3fSmrg 757901e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 758001e04c3fSmrg ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 758101e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 758201e04c3fSmrg if (inst->Dst[0].Register.WriteMask & 4) { 758301e04c3fSmrg ctx->shader->has_txq_cube_array_z_comp = true; 758401e04c3fSmrg has_txq_cube_array_z = true; 758501e04c3fSmrg } 758601e04c3fSmrg 758701e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 758801e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 758901e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 759001e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TG4) 759101e04c3fSmrg sampler_src_reg = 2; 759201e04c3fSmrg 759301e04c3fSmrg /* TGSI moves the sampler to src reg 3 for TXD */ 759401e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 759501e04c3fSmrg sampler_src_reg = 3; 759601e04c3fSmrg 759701e04c3fSmrg sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 759801e04c3fSmrg 759901e04c3fSmrg src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 760001e04c3fSmrg 760101e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 760201e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 760301e04c3fSmrg if (ctx->bc->chip_class < EVERGREEN) 760401e04c3fSmrg ctx->shader->uses_tex_buffers = true; 760501e04c3fSmrg return r600_do_buffer_txq(ctx, 1, 0, R600_MAX_CONST_BUFFERS); 760601e04c3fSmrg } 760701e04c3fSmrg else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 760801e04c3fSmrg if (ctx->bc->chip_class < EVERGREEN) 760901e04c3fSmrg ctx->shader->uses_tex_buffers = true; 761001e04c3fSmrg return do_vtx_fetch_inst(ctx, src_requires_loading); 761101e04c3fSmrg } 761201e04c3fSmrg } 761301e04c3fSmrg 761401e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 761501e04c3fSmrg int out_chan; 761601e04c3fSmrg /* Add perspective divide */ 761701e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 761801e04c3fSmrg out_chan = 2; 761901e04c3fSmrg for (i = 0; i < 3; i++) { 7620af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 762101e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 762201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 762301e04c3fSmrg 762401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 7625af69d88dSmrg alu.dst.chan = i; 762601e04c3fSmrg if (i == 2) 7627af69d88dSmrg alu.last = 1; 762801e04c3fSmrg if (out_chan == i) 762901e04c3fSmrg alu.dst.write = 1; 7630af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7631af69d88dSmrg if (r) 7632af69d88dSmrg return r; 7633af69d88dSmrg } 763401e04c3fSmrg 7635af69d88dSmrg } else { 763601e04c3fSmrg out_chan = 3; 7637af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 763801e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 763901e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 764001e04c3fSmrg 764101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 764201e04c3fSmrg alu.dst.chan = out_chan; 764301e04c3fSmrg alu.last = 1; 764401e04c3fSmrg alu.dst.write = 1; 764501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 764601e04c3fSmrg if (r) 764701e04c3fSmrg return r; 764801e04c3fSmrg } 764901e04c3fSmrg 765001e04c3fSmrg for (i = 0; i < 3; i++) { 765101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 765201e04c3fSmrg alu.op = ALU_OP2_MUL; 765301e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 765401e04c3fSmrg alu.src[0].chan = out_chan; 765501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 765601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 765701e04c3fSmrg alu.dst.chan = i; 765801e04c3fSmrg alu.dst.write = 1; 765901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 766001e04c3fSmrg if (r) 766101e04c3fSmrg return r; 766201e04c3fSmrg } 766301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 766401e04c3fSmrg alu.op = ALU_OP1_MOV; 766501e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; 766601e04c3fSmrg alu.src[0].chan = 0; 766701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 766801e04c3fSmrg alu.dst.chan = 3; 766901e04c3fSmrg alu.last = 1; 767001e04c3fSmrg alu.dst.write = 1; 767101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 767201e04c3fSmrg if (r) 767301e04c3fSmrg return r; 767401e04c3fSmrg src_loaded = TRUE; 767501e04c3fSmrg src_gpr = ctx->temp_reg; 767601e04c3fSmrg } 767701e04c3fSmrg 767801e04c3fSmrg 767901e04c3fSmrg if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 768001e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 768101e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 768201e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 768301e04c3fSmrg inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { 768401e04c3fSmrg 768501e04c3fSmrg static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 768601e04c3fSmrg static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 768701e04c3fSmrg 768801e04c3fSmrg /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 768901e04c3fSmrg for (i = 0; i < 4; i++) { 769001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 769101e04c3fSmrg alu.op = ALU_OP2_CUBE; 769201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 769301e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 769401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 769501e04c3fSmrg alu.dst.chan = i; 769601e04c3fSmrg if (i == 3) 769701e04c3fSmrg alu.last = 1; 769801e04c3fSmrg alu.dst.write = 1; 769901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 770001e04c3fSmrg if (r) 770101e04c3fSmrg return r; 770201e04c3fSmrg } 770301e04c3fSmrg 770401e04c3fSmrg /* tmp1.z = RCP_e(|tmp1.z|) */ 770501e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 770601e04c3fSmrg for (i = 0; i < 3; i++) { 770701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 770801e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 770901e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 771001e04c3fSmrg alu.src[0].chan = 2; 771101e04c3fSmrg alu.src[0].abs = 1; 771201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 771301e04c3fSmrg alu.dst.chan = i; 771401e04c3fSmrg if (i == 2) 771501e04c3fSmrg alu.dst.write = 1; 771601e04c3fSmrg if (i == 2) 771701e04c3fSmrg alu.last = 1; 771801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 771901e04c3fSmrg if (r) 772001e04c3fSmrg return r; 772101e04c3fSmrg } 772201e04c3fSmrg } else { 772301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 772401e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 772501e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 772601e04c3fSmrg alu.src[0].chan = 2; 772701e04c3fSmrg alu.src[0].abs = 1; 772801e04c3fSmrg alu.dst.sel = ctx->temp_reg; 772901e04c3fSmrg alu.dst.chan = 2; 7730af69d88dSmrg alu.dst.write = 1; 7731af69d88dSmrg alu.last = 1; 7732af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7733af69d88dSmrg if (r) 7734af69d88dSmrg return r; 7735af69d88dSmrg } 7736af69d88dSmrg 773701e04c3fSmrg /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 773801e04c3fSmrg * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 773901e04c3fSmrg * muladd has no writemask, have to use another temp 774001e04c3fSmrg */ 774101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 774201e04c3fSmrg alu.op = ALU_OP3_MULADD; 774301e04c3fSmrg alu.is_op3 = 1; 774401e04c3fSmrg 774501e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 774601e04c3fSmrg alu.src[0].chan = 0; 774701e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 774801e04c3fSmrg alu.src[1].chan = 2; 774901e04c3fSmrg 775001e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 775101e04c3fSmrg alu.src[2].chan = 0; 775201e04c3fSmrg alu.src[2].value = u_bitcast_f2u(1.5f); 775301e04c3fSmrg 775401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 775501e04c3fSmrg alu.dst.chan = 0; 775601e04c3fSmrg alu.dst.write = 1; 775701e04c3fSmrg 775801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 775901e04c3fSmrg if (r) 776001e04c3fSmrg return r; 776101e04c3fSmrg 776201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 776301e04c3fSmrg alu.op = ALU_OP3_MULADD; 776401e04c3fSmrg alu.is_op3 = 1; 776501e04c3fSmrg 776601e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 776701e04c3fSmrg alu.src[0].chan = 1; 776801e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 776901e04c3fSmrg alu.src[1].chan = 2; 777001e04c3fSmrg 777101e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 777201e04c3fSmrg alu.src[2].chan = 0; 777301e04c3fSmrg alu.src[2].value = u_bitcast_f2u(1.5f); 777401e04c3fSmrg 777501e04c3fSmrg alu.dst.sel = ctx->temp_reg; 777601e04c3fSmrg alu.dst.chan = 1; 777701e04c3fSmrg alu.dst.write = 1; 777801e04c3fSmrg 777901e04c3fSmrg alu.last = 1; 778001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 778101e04c3fSmrg if (r) 778201e04c3fSmrg return r; 778301e04c3fSmrg /* write initial compare value into Z component 778401e04c3fSmrg - W src 0 for shadow cube 778501e04c3fSmrg - X src 1 for shadow cube array */ 778601e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 778701e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 778801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 778901e04c3fSmrg alu.op = ALU_OP1_MOV; 779001e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 779101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 779201e04c3fSmrg else 779301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 779401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 779501e04c3fSmrg alu.dst.chan = 2; 779601e04c3fSmrg alu.dst.write = 1; 779701e04c3fSmrg alu.last = 1; 779801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 779901e04c3fSmrg if (r) 780001e04c3fSmrg return r; 780101e04c3fSmrg } 780201e04c3fSmrg 780301e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 780401e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 780501e04c3fSmrg if (ctx->bc->chip_class >= EVERGREEN) { 780601e04c3fSmrg int mytmp = r600_get_temp(ctx); 780701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 780801e04c3fSmrg alu.op = ALU_OP1_MOV; 780901e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 781001e04c3fSmrg alu.src[0].chan = 3; 781101e04c3fSmrg alu.dst.sel = mytmp; 781201e04c3fSmrg alu.dst.chan = 0; 781301e04c3fSmrg alu.dst.write = 1; 781401e04c3fSmrg alu.last = 1; 781501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 781601e04c3fSmrg if (r) 781701e04c3fSmrg return r; 781801e04c3fSmrg 781901e04c3fSmrg /* Evaluate the array index according to floor(idx + 0.5). This 782001e04c3fSmrg * needs to be done before merging the face select value, because 782101e04c3fSmrg * otherwise the fractional part of the array index will interfere 782201e04c3fSmrg * with the face select value */ 782301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 782401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 782501e04c3fSmrg alu.op = ALU_OP1_RNDNE; 782601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 782701e04c3fSmrg alu.dst.chan = 3; 782801e04c3fSmrg alu.dst.write = 1; 782901e04c3fSmrg alu.last = 1; 783001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 783101e04c3fSmrg if (r) 783201e04c3fSmrg return r; 783301e04c3fSmrg 783401e04c3fSmrg /* Because the array slice index and the cube face index are merged 783501e04c3fSmrg * into one value we have to make sure the array slice index is >= 0, 783601e04c3fSmrg * otherwise the face selection will fail */ 783701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 783801e04c3fSmrg alu.op = ALU_OP2_MAX; 783901e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 784001e04c3fSmrg alu.src[0].chan = 3; 784101e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0; 784201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 784301e04c3fSmrg alu.dst.chan = 3; 784401e04c3fSmrg alu.dst.write = 1; 784501e04c3fSmrg alu.last = 1; 784601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 784701e04c3fSmrg if (r) 784801e04c3fSmrg return r; 784901e04c3fSmrg 785001e04c3fSmrg /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 785101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 785201e04c3fSmrg alu.op = ALU_OP3_MULADD; 785301e04c3fSmrg alu.is_op3 = 1; 785401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 785501e04c3fSmrg alu.src[0].chan = 3; 785601e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 785701e04c3fSmrg alu.src[1].chan = 0; 785801e04c3fSmrg alu.src[1].value = u_bitcast_f2u(8.0f); 785901e04c3fSmrg alu.src[2].sel = mytmp; 786001e04c3fSmrg alu.src[2].chan = 0; 786101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 786201e04c3fSmrg alu.dst.chan = 3; 786301e04c3fSmrg alu.dst.write = 1; 786401e04c3fSmrg alu.last = 1; 786501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 786601e04c3fSmrg if (r) 786701e04c3fSmrg return r; 786801e04c3fSmrg } else if (ctx->bc->chip_class < EVERGREEN) { 786901e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 787001e04c3fSmrg tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 787101e04c3fSmrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 787201e04c3fSmrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 787301e04c3fSmrg tex.src_gpr = r600_get_temp(ctx); 787401e04c3fSmrg tex.src_sel_x = 0; 787501e04c3fSmrg tex.src_sel_y = 0; 787601e04c3fSmrg tex.src_sel_z = 0; 787701e04c3fSmrg tex.src_sel_w = 0; 787801e04c3fSmrg tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 787901e04c3fSmrg tex.coord_type_x = 1; 788001e04c3fSmrg tex.coord_type_y = 1; 788101e04c3fSmrg tex.coord_type_z = 1; 788201e04c3fSmrg tex.coord_type_w = 1; 788301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 788401e04c3fSmrg alu.op = ALU_OP1_MOV; 788501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 788601e04c3fSmrg alu.dst.sel = tex.src_gpr; 788701e04c3fSmrg alu.dst.chan = 0; 788801e04c3fSmrg alu.last = 1; 788901e04c3fSmrg alu.dst.write = 1; 789001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 789101e04c3fSmrg if (r) 789201e04c3fSmrg return r; 789301e04c3fSmrg 789401e04c3fSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 789501e04c3fSmrg if (r) 789601e04c3fSmrg return r; 789701e04c3fSmrg } 789801e04c3fSmrg 789901e04c3fSmrg } 790001e04c3fSmrg 790101e04c3fSmrg /* for cube forms of lod and bias we need to route things */ 790201e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 790301e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXL || 790401e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 790501e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 790601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 790701e04c3fSmrg alu.op = ALU_OP1_MOV; 790801e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 790901e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 791001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 791101e04c3fSmrg else 791201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 791301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 791401e04c3fSmrg alu.dst.chan = 2; 791501e04c3fSmrg alu.last = 1; 791601e04c3fSmrg alu.dst.write = 1; 791701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 791801e04c3fSmrg if (r) 791901e04c3fSmrg return r; 792001e04c3fSmrg } 792101e04c3fSmrg 792201e04c3fSmrg src_loaded = TRUE; 792301e04c3fSmrg src_gpr = ctx->temp_reg; 792401e04c3fSmrg } 792501e04c3fSmrg 792601e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 792701e04c3fSmrg int temp_h = 0, temp_v = 0; 792801e04c3fSmrg int start_val = 0; 792901e04c3fSmrg 793001e04c3fSmrg /* if we've already loaded the src (i.e. CUBE don't reload it). */ 793101e04c3fSmrg if (src_loaded == TRUE) 793201e04c3fSmrg start_val = 1; 793301e04c3fSmrg else 793401e04c3fSmrg src_loaded = TRUE; 793501e04c3fSmrg for (i = start_val; i < 3; i++) { 793601e04c3fSmrg int treg = r600_get_temp(ctx); 793701e04c3fSmrg 793801e04c3fSmrg if (i == 0) 793901e04c3fSmrg src_gpr = treg; 794001e04c3fSmrg else if (i == 1) 794101e04c3fSmrg temp_h = treg; 794201e04c3fSmrg else 794301e04c3fSmrg temp_v = treg; 794401e04c3fSmrg 794501e04c3fSmrg for (j = 0; j < 4; j++) { 794601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 794701e04c3fSmrg alu.op = ALU_OP1_MOV; 794801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 794901e04c3fSmrg alu.dst.sel = treg; 795001e04c3fSmrg alu.dst.chan = j; 795101e04c3fSmrg if (j == 3) 795201e04c3fSmrg alu.last = 1; 795301e04c3fSmrg alu.dst.write = 1; 795401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 795501e04c3fSmrg if (r) 795601e04c3fSmrg return r; 795701e04c3fSmrg } 795801e04c3fSmrg } 795901e04c3fSmrg for (i = 1; i < 3; i++) { 796001e04c3fSmrg /* set gradients h/v */ 796101e04c3fSmrg struct r600_bytecode_tex *t = &grad_offs[n_grad_offs++]; 796201e04c3fSmrg memset(t, 0, sizeof(struct r600_bytecode_tex)); 796301e04c3fSmrg t->op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 796401e04c3fSmrg FETCH_OP_SET_GRADIENTS_V; 796501e04c3fSmrg t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 796601e04c3fSmrg t->sampler_index_mode = sampler_index_mode; 796701e04c3fSmrg t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS; 796801e04c3fSmrg t->resource_index_mode = sampler_index_mode; 796901e04c3fSmrg 797001e04c3fSmrg t->src_gpr = (i == 1) ? temp_h : temp_v; 797101e04c3fSmrg t->src_sel_x = 0; 797201e04c3fSmrg t->src_sel_y = 1; 797301e04c3fSmrg t->src_sel_z = 2; 797401e04c3fSmrg t->src_sel_w = 3; 797501e04c3fSmrg 797601e04c3fSmrg t->dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 797701e04c3fSmrg t->dst_sel_x = t->dst_sel_y = t->dst_sel_z = t->dst_sel_w = 7; 797801e04c3fSmrg if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 797901e04c3fSmrg t->coord_type_x = 1; 798001e04c3fSmrg t->coord_type_y = 1; 798101e04c3fSmrg t->coord_type_z = 1; 798201e04c3fSmrg t->coord_type_w = 1; 798301e04c3fSmrg } 798401e04c3fSmrg } 798501e04c3fSmrg } 798601e04c3fSmrg 798701e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 798801e04c3fSmrg /* Gather4 should follow the same rules as bilinear filtering, but the hardware 798901e04c3fSmrg * incorrectly forces nearest filtering if the texture format is integer. 799001e04c3fSmrg * The only effect it has on Gather4, which always returns 4 texels for 799101e04c3fSmrg * bilinear filtering, is that the final coordinates are off by 0.5 of 799201e04c3fSmrg * the texel size. 799301e04c3fSmrg * 799401e04c3fSmrg * The workaround is to subtract 0.5 from the unnormalized coordinates, 799501e04c3fSmrg * or (0.5 / size) from the normalized coordinates. 799601e04c3fSmrg */ 799701e04c3fSmrg if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT || 799801e04c3fSmrg inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) { 799901e04c3fSmrg int treg = r600_get_temp(ctx); 800001e04c3fSmrg 800101e04c3fSmrg /* mov array and comparison oordinate to temp_reg if needed */ 800201e04c3fSmrg if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 800301e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 800401e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) && !src_loaded) { 800501e04c3fSmrg int end = inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ? 3 : 2; 800601e04c3fSmrg for (i = 2; i <= end; i++) { 800701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 800801e04c3fSmrg alu.op = ALU_OP1_MOV; 800901e04c3fSmrg alu.dst.sel = ctx->temp_reg; 801001e04c3fSmrg alu.dst.chan = i; 801101e04c3fSmrg alu.dst.write = 1; 801201e04c3fSmrg alu.last = (i == end); 801301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 801401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 801501e04c3fSmrg if (r) 801601e04c3fSmrg return r; 801701e04c3fSmrg } 801801e04c3fSmrg } 801901e04c3fSmrg 802001e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_RECT || 802101e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) { 802201e04c3fSmrg for (i = 0; i < 2; i++) { 802301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 802401e04c3fSmrg alu.op = ALU_OP2_ADD; 802501e04c3fSmrg alu.dst.sel = ctx->temp_reg; 802601e04c3fSmrg alu.dst.chan = i; 802701e04c3fSmrg alu.dst.write = 1; 802801e04c3fSmrg alu.last = i == 1; 802901e04c3fSmrg if (src_loaded) { 803001e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 803101e04c3fSmrg alu.src[0].chan = i; 803201e04c3fSmrg } else 803301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 803401e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0_5; 803501e04c3fSmrg alu.src[1].neg = 1; 803601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 803701e04c3fSmrg if (r) 803801e04c3fSmrg return r; 803901e04c3fSmrg } 804001e04c3fSmrg } else { 804101e04c3fSmrg /* execute a TXQ */ 804201e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 804301e04c3fSmrg tex.op = FETCH_OP_GET_TEXTURE_RESINFO; 804401e04c3fSmrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 804501e04c3fSmrg tex.sampler_index_mode = sampler_index_mode; 804601e04c3fSmrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 804701e04c3fSmrg tex.resource_index_mode = sampler_index_mode; 804801e04c3fSmrg tex.dst_gpr = treg; 804901e04c3fSmrg tex.src_sel_x = 4; 805001e04c3fSmrg tex.src_sel_y = 4; 805101e04c3fSmrg tex.src_sel_z = 4; 805201e04c3fSmrg tex.src_sel_w = 4; 805301e04c3fSmrg tex.dst_sel_x = 0; 805401e04c3fSmrg tex.dst_sel_y = 1; 805501e04c3fSmrg tex.dst_sel_z = 7; 805601e04c3fSmrg tex.dst_sel_w = 7; 805701e04c3fSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 805801e04c3fSmrg if (r) 805901e04c3fSmrg return r; 806001e04c3fSmrg 806101e04c3fSmrg /* coord.xy = -0.5 * (1.0/int_to_flt(size)) + coord.xy */ 806201e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 806301e04c3fSmrg /* */ 806401e04c3fSmrg for (i = 0; i < 2; i++) { 806501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 806601e04c3fSmrg alu.op = ALU_OP1_INT_TO_FLT; 806701e04c3fSmrg alu.dst.sel = treg; 806801e04c3fSmrg alu.dst.chan = i; 806901e04c3fSmrg alu.dst.write = 1; 807001e04c3fSmrg alu.src[0].sel = treg; 807101e04c3fSmrg alu.src[0].chan = i; 807201e04c3fSmrg alu.last = (i == 1) ? 1 : 0; 807301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 807401e04c3fSmrg if (r) 807501e04c3fSmrg return r; 807601e04c3fSmrg } 807701e04c3fSmrg for (j = 0; j < 2; j++) { 807801e04c3fSmrg for (i = 0; i < 3; i++) { 807901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 808001e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 808101e04c3fSmrg alu.src[0].sel = treg; 808201e04c3fSmrg alu.src[0].chan = j; 808301e04c3fSmrg alu.dst.sel = treg; 808401e04c3fSmrg alu.dst.chan = i; 808501e04c3fSmrg if (i == 2) 808601e04c3fSmrg alu.last = 1; 808701e04c3fSmrg if (i == j) 808801e04c3fSmrg alu.dst.write = 1; 808901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 809001e04c3fSmrg if (r) 809101e04c3fSmrg return r; 809201e04c3fSmrg } 809301e04c3fSmrg } 809401e04c3fSmrg } else { 809501e04c3fSmrg for (i = 0; i < 2; i++) { 809601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 809701e04c3fSmrg alu.op = ALU_OP1_INT_TO_FLT; 809801e04c3fSmrg alu.dst.sel = treg; 809901e04c3fSmrg alu.dst.chan = i; 810001e04c3fSmrg alu.dst.write = 1; 810101e04c3fSmrg alu.src[0].sel = treg; 810201e04c3fSmrg alu.src[0].chan = i; 810301e04c3fSmrg alu.last = 1; 810401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 810501e04c3fSmrg if (r) 810601e04c3fSmrg return r; 810701e04c3fSmrg } 810801e04c3fSmrg for (i = 0; i < 2; i++) { 810901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 811001e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 811101e04c3fSmrg alu.src[0].sel = treg; 811201e04c3fSmrg alu.src[0].chan = i; 811301e04c3fSmrg alu.dst.sel = treg; 811401e04c3fSmrg alu.dst.chan = i; 811501e04c3fSmrg alu.last = 1; 811601e04c3fSmrg alu.dst.write = 1; 811701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 811801e04c3fSmrg if (r) 811901e04c3fSmrg return r; 812001e04c3fSmrg } 812101e04c3fSmrg } 812201e04c3fSmrg for (i = 0; i < 2; i++) { 812301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 812401e04c3fSmrg alu.op = ALU_OP3_MULADD; 812501e04c3fSmrg alu.is_op3 = 1; 812601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 812701e04c3fSmrg alu.dst.chan = i; 812801e04c3fSmrg alu.dst.write = 1; 812901e04c3fSmrg alu.last = i == 1; 813001e04c3fSmrg alu.src[0].sel = treg; 813101e04c3fSmrg alu.src[0].chan = i; 813201e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0_5; 813301e04c3fSmrg alu.src[1].neg = 1; 813401e04c3fSmrg if (src_loaded) { 813501e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 813601e04c3fSmrg alu.src[2].chan = i; 813701e04c3fSmrg } else 813801e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 813901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 814001e04c3fSmrg if (r) 814101e04c3fSmrg return r; 814201e04c3fSmrg } 814301e04c3fSmrg } 814401e04c3fSmrg src_loaded = TRUE; 814501e04c3fSmrg src_gpr = ctx->temp_reg; 814601e04c3fSmrg } 814701e04c3fSmrg } 814801e04c3fSmrg 814901e04c3fSmrg if (src_requires_loading && !src_loaded) { 815001e04c3fSmrg for (i = 0; i < 4; i++) { 815101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 815201e04c3fSmrg alu.op = ALU_OP1_MOV; 815301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 815401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 815501e04c3fSmrg alu.dst.chan = i; 815601e04c3fSmrg if (i == 3) 815701e04c3fSmrg alu.last = 1; 815801e04c3fSmrg alu.dst.write = 1; 815901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 816001e04c3fSmrg if (r) 816101e04c3fSmrg return r; 816201e04c3fSmrg } 816301e04c3fSmrg src_loaded = TRUE; 816401e04c3fSmrg src_gpr = ctx->temp_reg; 816501e04c3fSmrg } 816601e04c3fSmrg 816701e04c3fSmrg /* get offset values */ 816801e04c3fSmrg if (inst->Texture.NumOffsets) { 816901e04c3fSmrg assert(inst->Texture.NumOffsets == 1); 817001e04c3fSmrg 817101e04c3fSmrg /* The texture offset feature doesn't work with the TXF instruction 817201e04c3fSmrg * and must be emulated by adding the offset to the texture coordinates. */ 817301e04c3fSmrg if (txf_add_offsets) { 817401e04c3fSmrg const struct tgsi_texture_offset *off = inst->TexOffsets; 817501e04c3fSmrg 817601e04c3fSmrg switch (inst->Texture.Texture) { 817701e04c3fSmrg case TGSI_TEXTURE_3D: 817801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 817901e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 818001e04c3fSmrg alu.src[0].sel = src_gpr; 818101e04c3fSmrg alu.src[0].chan = 2; 818201e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 818301e04c3fSmrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 818401e04c3fSmrg alu.dst.sel = src_gpr; 818501e04c3fSmrg alu.dst.chan = 2; 818601e04c3fSmrg alu.dst.write = 1; 818701e04c3fSmrg alu.last = 1; 818801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 818901e04c3fSmrg if (r) 819001e04c3fSmrg return r; 81917ec681f3Smrg FALLTHROUGH; 819201e04c3fSmrg 819301e04c3fSmrg case TGSI_TEXTURE_2D: 819401e04c3fSmrg case TGSI_TEXTURE_SHADOW2D: 819501e04c3fSmrg case TGSI_TEXTURE_RECT: 819601e04c3fSmrg case TGSI_TEXTURE_SHADOWRECT: 819701e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY: 819801e04c3fSmrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 819901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 820001e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 820101e04c3fSmrg alu.src[0].sel = src_gpr; 820201e04c3fSmrg alu.src[0].chan = 1; 820301e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 820401e04c3fSmrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 820501e04c3fSmrg alu.dst.sel = src_gpr; 820601e04c3fSmrg alu.dst.chan = 1; 820701e04c3fSmrg alu.dst.write = 1; 820801e04c3fSmrg alu.last = 1; 820901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 821001e04c3fSmrg if (r) 821101e04c3fSmrg return r; 82127ec681f3Smrg FALLTHROUGH; 821301e04c3fSmrg 821401e04c3fSmrg case TGSI_TEXTURE_1D: 821501e04c3fSmrg case TGSI_TEXTURE_SHADOW1D: 821601e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 821701e04c3fSmrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 821801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 821901e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 822001e04c3fSmrg alu.src[0].sel = src_gpr; 822101e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 822201e04c3fSmrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 822301e04c3fSmrg alu.dst.sel = src_gpr; 822401e04c3fSmrg alu.dst.write = 1; 822501e04c3fSmrg alu.last = 1; 822601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 822701e04c3fSmrg if (r) 822801e04c3fSmrg return r; 822901e04c3fSmrg break; 823001e04c3fSmrg /* texture offsets do not apply to other texture targets */ 823101e04c3fSmrg } 823201e04c3fSmrg } else { 823301e04c3fSmrg switch (inst->Texture.Texture) { 823401e04c3fSmrg case TGSI_TEXTURE_3D: 823501e04c3fSmrg offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 82367ec681f3Smrg FALLTHROUGH; 823701e04c3fSmrg case TGSI_TEXTURE_2D: 823801e04c3fSmrg case TGSI_TEXTURE_SHADOW2D: 823901e04c3fSmrg case TGSI_TEXTURE_RECT: 824001e04c3fSmrg case TGSI_TEXTURE_SHADOWRECT: 824101e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY: 824201e04c3fSmrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 824301e04c3fSmrg offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 82447ec681f3Smrg FALLTHROUGH; 824501e04c3fSmrg case TGSI_TEXTURE_1D: 824601e04c3fSmrg case TGSI_TEXTURE_SHADOW1D: 824701e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 824801e04c3fSmrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 824901e04c3fSmrg offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 825001e04c3fSmrg } 825101e04c3fSmrg } 825201e04c3fSmrg } 825301e04c3fSmrg 825401e04c3fSmrg /* Obtain the sample index for reading a compressed MSAA color texture. 825501e04c3fSmrg * To read the FMASK, we use the ldfptr instruction, which tells us 825601e04c3fSmrg * where the samples are stored. 825701e04c3fSmrg * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 825801e04c3fSmrg * which is the identity mapping. Each nibble says which physical sample 825901e04c3fSmrg * should be fetched to get that sample. 826001e04c3fSmrg * 826101e04c3fSmrg * Assume src.z contains the sample index. It should be modified like this: 826201e04c3fSmrg * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 826301e04c3fSmrg * Then fetch the texel with src. 826401e04c3fSmrg */ 826501e04c3fSmrg if (read_compressed_msaa) { 826601e04c3fSmrg unsigned sample_chan = 3; 826701e04c3fSmrg unsigned temp = r600_get_temp(ctx); 826801e04c3fSmrg assert(src_loaded); 826901e04c3fSmrg 827001e04c3fSmrg /* temp.w = ldfptr() */ 827101e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 827201e04c3fSmrg tex.op = FETCH_OP_LD; 827301e04c3fSmrg tex.inst_mod = 1; /* to indicate this is ldfptr */ 827401e04c3fSmrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 827501e04c3fSmrg tex.sampler_index_mode = sampler_index_mode; 827601e04c3fSmrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 827701e04c3fSmrg tex.resource_index_mode = sampler_index_mode; 827801e04c3fSmrg tex.src_gpr = src_gpr; 827901e04c3fSmrg tex.dst_gpr = temp; 828001e04c3fSmrg tex.dst_sel_x = 7; /* mask out these components */ 828101e04c3fSmrg tex.dst_sel_y = 7; 828201e04c3fSmrg tex.dst_sel_z = 7; 828301e04c3fSmrg tex.dst_sel_w = 0; /* store X */ 828401e04c3fSmrg tex.src_sel_x = 0; 828501e04c3fSmrg tex.src_sel_y = 1; 828601e04c3fSmrg tex.src_sel_z = 2; 828701e04c3fSmrg tex.src_sel_w = 3; 828801e04c3fSmrg tex.offset_x = offset_x; 828901e04c3fSmrg tex.offset_y = offset_y; 829001e04c3fSmrg tex.offset_z = offset_z; 829101e04c3fSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 829201e04c3fSmrg if (r) 829301e04c3fSmrg return r; 829401e04c3fSmrg 829501e04c3fSmrg /* temp.x = sample_index*4 */ 829601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 829701e04c3fSmrg alu.op = ALU_OP2_MULLO_INT; 829801e04c3fSmrg alu.src[0].sel = src_gpr; 829901e04c3fSmrg alu.src[0].chan = sample_chan; 830001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 830101e04c3fSmrg alu.src[1].value = 4; 830201e04c3fSmrg alu.dst.sel = temp; 830301e04c3fSmrg alu.dst.chan = 0; 830401e04c3fSmrg alu.dst.write = 1; 830501e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 830601e04c3fSmrg if (r) 830701e04c3fSmrg return r; 830801e04c3fSmrg 830901e04c3fSmrg /* sample_index = temp.w >> temp.x */ 831001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 831101e04c3fSmrg alu.op = ALU_OP2_LSHR_INT; 831201e04c3fSmrg alu.src[0].sel = temp; 831301e04c3fSmrg alu.src[0].chan = 3; 831401e04c3fSmrg alu.src[1].sel = temp; 831501e04c3fSmrg alu.src[1].chan = 0; 831601e04c3fSmrg alu.dst.sel = src_gpr; 831701e04c3fSmrg alu.dst.chan = sample_chan; 831801e04c3fSmrg alu.dst.write = 1; 831901e04c3fSmrg alu.last = 1; 832001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 832101e04c3fSmrg if (r) 832201e04c3fSmrg return r; 832301e04c3fSmrg 832401e04c3fSmrg /* sample_index & 0xF */ 832501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 832601e04c3fSmrg alu.op = ALU_OP2_AND_INT; 832701e04c3fSmrg alu.src[0].sel = src_gpr; 832801e04c3fSmrg alu.src[0].chan = sample_chan; 832901e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 833001e04c3fSmrg alu.src[1].value = 0xF; 833101e04c3fSmrg alu.dst.sel = src_gpr; 833201e04c3fSmrg alu.dst.chan = sample_chan; 833301e04c3fSmrg alu.dst.write = 1; 833401e04c3fSmrg alu.last = 1; 833501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 833601e04c3fSmrg if (r) 833701e04c3fSmrg return r; 833801e04c3fSmrg#if 0 833901e04c3fSmrg /* visualize the FMASK */ 834001e04c3fSmrg for (i = 0; i < 4; i++) { 834101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 834201e04c3fSmrg alu.op = ALU_OP1_INT_TO_FLT; 834301e04c3fSmrg alu.src[0].sel = src_gpr; 834401e04c3fSmrg alu.src[0].chan = sample_chan; 834501e04c3fSmrg alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 834601e04c3fSmrg alu.dst.chan = i; 834701e04c3fSmrg alu.dst.write = 1; 834801e04c3fSmrg alu.last = 1; 834901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 835001e04c3fSmrg if (r) 835101e04c3fSmrg return r; 835201e04c3fSmrg } 835301e04c3fSmrg return 0; 835401e04c3fSmrg#endif 835501e04c3fSmrg } 835601e04c3fSmrg 835701e04c3fSmrg /* does this shader want a num layers from TXQ for a cube array? */ 835801e04c3fSmrg if (has_txq_cube_array_z) { 835901e04c3fSmrg int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 836001e04c3fSmrg 836101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 836201e04c3fSmrg alu.op = ALU_OP1_MOV; 836301e04c3fSmrg 836401e04c3fSmrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 836501e04c3fSmrg if (ctx->bc->chip_class >= EVERGREEN) { 836601e04c3fSmrg /* with eg each dword is number of cubes */ 836701e04c3fSmrg alu.src[0].sel += id / 4; 836801e04c3fSmrg alu.src[0].chan = id % 4; 836901e04c3fSmrg } else { 837001e04c3fSmrg /* r600 we have them at channel 2 of the second dword */ 837101e04c3fSmrg alu.src[0].sel += (id * 2) + 1; 837201e04c3fSmrg alu.src[0].chan = 2; 837301e04c3fSmrg } 837401e04c3fSmrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 837501e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 837601e04c3fSmrg alu.last = 1; 837701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 837801e04c3fSmrg if (r) 837901e04c3fSmrg return r; 838001e04c3fSmrg /* disable writemask from texture instruction */ 838101e04c3fSmrg inst->Dst[0].Register.WriteMask &= ~4; 838201e04c3fSmrg } 838301e04c3fSmrg 838401e04c3fSmrg opcode = ctx->inst_info->op; 838501e04c3fSmrg if (opcode == FETCH_OP_GATHER4 && 838601e04c3fSmrg inst->TexOffsets[0].File != TGSI_FILE_NULL && 838701e04c3fSmrg inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 838801e04c3fSmrg struct r600_bytecode_tex *t; 838901e04c3fSmrg opcode = FETCH_OP_GATHER4_O; 839001e04c3fSmrg 839101e04c3fSmrg /* GATHER4_O/GATHER4_C_O use offset values loaded by 839201e04c3fSmrg SET_TEXTURE_OFFSETS instruction. The immediate offset values 839301e04c3fSmrg encoded in the instruction are ignored. */ 839401e04c3fSmrg t = &grad_offs[n_grad_offs++]; 839501e04c3fSmrg memset(t, 0, sizeof(struct r600_bytecode_tex)); 839601e04c3fSmrg t->op = FETCH_OP_SET_TEXTURE_OFFSETS; 839701e04c3fSmrg t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 839801e04c3fSmrg t->sampler_index_mode = sampler_index_mode; 839901e04c3fSmrg t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS; 840001e04c3fSmrg t->resource_index_mode = sampler_index_mode; 840101e04c3fSmrg 840201e04c3fSmrg t->src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 840301e04c3fSmrg t->src_sel_x = inst->TexOffsets[0].SwizzleX; 840401e04c3fSmrg t->src_sel_y = inst->TexOffsets[0].SwizzleY; 840501e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 840601e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 840701e04c3fSmrg /* make sure array index selector is 0, this is just a safety 840801e04c3fSmrg * precausion because TGSI seems to emit something strange here */ 840901e04c3fSmrg t->src_sel_z = 4; 841001e04c3fSmrg else 841101e04c3fSmrg t->src_sel_z = inst->TexOffsets[0].SwizzleZ; 841201e04c3fSmrg 841301e04c3fSmrg t->src_sel_w = 4; 841401e04c3fSmrg 841501e04c3fSmrg t->dst_sel_x = 7; 841601e04c3fSmrg t->dst_sel_y = 7; 841701e04c3fSmrg t->dst_sel_z = 7; 841801e04c3fSmrg t->dst_sel_w = 7; 841901e04c3fSmrg } 842001e04c3fSmrg 842101e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 842201e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 842301e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 842401e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 842501e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 842601e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 842701e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 842801e04c3fSmrg switch (opcode) { 842901e04c3fSmrg case FETCH_OP_SAMPLE: 843001e04c3fSmrg opcode = FETCH_OP_SAMPLE_C; 843101e04c3fSmrg break; 843201e04c3fSmrg case FETCH_OP_SAMPLE_L: 843301e04c3fSmrg opcode = FETCH_OP_SAMPLE_C_L; 843401e04c3fSmrg break; 843501e04c3fSmrg case FETCH_OP_SAMPLE_LB: 843601e04c3fSmrg opcode = FETCH_OP_SAMPLE_C_LB; 843701e04c3fSmrg break; 843801e04c3fSmrg case FETCH_OP_SAMPLE_G: 843901e04c3fSmrg opcode = FETCH_OP_SAMPLE_C_G; 844001e04c3fSmrg break; 844101e04c3fSmrg /* Texture gather variants */ 844201e04c3fSmrg case FETCH_OP_GATHER4: 844301e04c3fSmrg opcode = FETCH_OP_GATHER4_C; 844401e04c3fSmrg break; 844501e04c3fSmrg case FETCH_OP_GATHER4_O: 844601e04c3fSmrg opcode = FETCH_OP_GATHER4_C_O; 844701e04c3fSmrg break; 844801e04c3fSmrg } 844901e04c3fSmrg } 845001e04c3fSmrg 845101e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 845201e04c3fSmrg tex.op = opcode; 845301e04c3fSmrg 845401e04c3fSmrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 845501e04c3fSmrg tex.sampler_index_mode = sampler_index_mode; 845601e04c3fSmrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 845701e04c3fSmrg tex.resource_index_mode = sampler_index_mode; 845801e04c3fSmrg tex.src_gpr = src_gpr; 845901e04c3fSmrg tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 846001e04c3fSmrg 846101e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 846201e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 846301e04c3fSmrg tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 846401e04c3fSmrg } 846501e04c3fSmrg 846601e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 846701e04c3fSmrg int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 846801e04c3fSmrg tex.inst_mod = texture_component_select; 846901e04c3fSmrg 847001e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 847101e04c3fSmrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 847201e04c3fSmrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 847301e04c3fSmrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 847401e04c3fSmrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 847501e04c3fSmrg } else { 847601e04c3fSmrg /* GATHER4 result order is different from TGSI TG4 */ 847701e04c3fSmrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 1 : 7; 847801e04c3fSmrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 2 : 7; 847901e04c3fSmrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 0 : 7; 848001e04c3fSmrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 848101e04c3fSmrg } 848201e04c3fSmrg } 848301e04c3fSmrg else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 848401e04c3fSmrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 848501e04c3fSmrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 848601e04c3fSmrg tex.dst_sel_z = 7; 848701e04c3fSmrg tex.dst_sel_w = 7; 848801e04c3fSmrg } 848901e04c3fSmrg else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 849001e04c3fSmrg tex.dst_sel_x = 3; 849101e04c3fSmrg tex.dst_sel_y = 7; 849201e04c3fSmrg tex.dst_sel_z = 7; 849301e04c3fSmrg tex.dst_sel_w = 7; 849401e04c3fSmrg } 849501e04c3fSmrg else { 849601e04c3fSmrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 849701e04c3fSmrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 849801e04c3fSmrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 849901e04c3fSmrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 850001e04c3fSmrg } 850101e04c3fSmrg 850201e04c3fSmrg 850301e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 850401e04c3fSmrg tex.src_sel_x = 4; 850501e04c3fSmrg tex.src_sel_y = 4; 850601e04c3fSmrg tex.src_sel_z = 4; 850701e04c3fSmrg tex.src_sel_w = 4; 850801e04c3fSmrg } else if (src_loaded) { 850901e04c3fSmrg tex.src_sel_x = 0; 851001e04c3fSmrg tex.src_sel_y = 1; 851101e04c3fSmrg tex.src_sel_z = 2; 851201e04c3fSmrg tex.src_sel_w = 3; 851301e04c3fSmrg } else { 851401e04c3fSmrg tex.src_sel_x = ctx->src[0].swizzle[0]; 851501e04c3fSmrg tex.src_sel_y = ctx->src[0].swizzle[1]; 851601e04c3fSmrg tex.src_sel_z = ctx->src[0].swizzle[2]; 851701e04c3fSmrg tex.src_sel_w = ctx->src[0].swizzle[3]; 851801e04c3fSmrg tex.src_rel = ctx->src[0].rel; 851901e04c3fSmrg } 852001e04c3fSmrg 852101e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 852201e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 852301e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 852401e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 852501e04c3fSmrg tex.src_sel_x = 1; 852601e04c3fSmrg tex.src_sel_y = 0; 852701e04c3fSmrg tex.src_sel_z = 3; 852801e04c3fSmrg tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 852901e04c3fSmrg } 853001e04c3fSmrg 853101e04c3fSmrg if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 853201e04c3fSmrg inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 853301e04c3fSmrg tex.coord_type_x = 1; 853401e04c3fSmrg tex.coord_type_y = 1; 853501e04c3fSmrg } 853601e04c3fSmrg tex.coord_type_z = 1; 853701e04c3fSmrg tex.coord_type_w = 1; 853801e04c3fSmrg 853901e04c3fSmrg tex.offset_x = offset_x; 854001e04c3fSmrg tex.offset_y = offset_y; 854101e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 854201e04c3fSmrg (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 854301e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 854401e04c3fSmrg tex.offset_z = 0; 854501e04c3fSmrg } 854601e04c3fSmrg else { 854701e04c3fSmrg tex.offset_z = offset_z; 854801e04c3fSmrg } 854901e04c3fSmrg 855001e04c3fSmrg /* Put the depth for comparison in W. 855101e04c3fSmrg * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 855201e04c3fSmrg * Some instructions expect the depth in Z. */ 855301e04c3fSmrg if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 855401e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 855501e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 855601e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 855701e04c3fSmrg opcode != FETCH_OP_SAMPLE_C_L && 855801e04c3fSmrg opcode != FETCH_OP_SAMPLE_C_LB) { 855901e04c3fSmrg tex.src_sel_w = tex.src_sel_z; 856001e04c3fSmrg } 856101e04c3fSmrg 856201e04c3fSmrg if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 856301e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 856401e04c3fSmrg if (opcode == FETCH_OP_SAMPLE_C_L || 856501e04c3fSmrg opcode == FETCH_OP_SAMPLE_C_LB) { 856601e04c3fSmrg /* the array index is read from Y */ 856701e04c3fSmrg tex.coord_type_y = 0; 856801e04c3fSmrg array_index_offset_channel = tex.src_sel_y; 856901e04c3fSmrg } else { 857001e04c3fSmrg /* the array index is read from Z */ 857101e04c3fSmrg tex.coord_type_z = 0; 857201e04c3fSmrg tex.src_sel_z = tex.src_sel_y; 857301e04c3fSmrg array_index_offset_channel = tex.src_sel_z; 857401e04c3fSmrg } 857501e04c3fSmrg } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 857601e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 857701e04c3fSmrg tex.coord_type_z = 0; 857801e04c3fSmrg array_index_offset_channel = tex.src_sel_z; 857901e04c3fSmrg } else if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 858001e04c3fSmrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 858101e04c3fSmrg (ctx->bc->chip_class >= EVERGREEN)) 858201e04c3fSmrg /* the array index is read from Z, coordinate will be corrected elsewhere */ 858301e04c3fSmrg tex.coord_type_z = 0; 858401e04c3fSmrg 858501e04c3fSmrg /* We have array access to 1D or 2D ARRAY, the coordinates are not int -> 858601e04c3fSmrg * evaluate the array index */ 858701e04c3fSmrg if (array_index_offset_channel >= 0 && 858801e04c3fSmrg opcode != FETCH_OP_LD && 858901e04c3fSmrg opcode != FETCH_OP_GET_TEXTURE_RESINFO) { 859001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 859101e04c3fSmrg alu.src[0].sel = tex.src_gpr; 859201e04c3fSmrg alu.src[0].chan = array_index_offset_channel; 859301e04c3fSmrg alu.src[0].rel = tex.src_rel; 859401e04c3fSmrg alu.op = ALU_OP1_RNDNE; 859501e04c3fSmrg alu.dst.sel = tex.src_gpr; 859601e04c3fSmrg alu.dst.chan = array_index_offset_channel; 859701e04c3fSmrg alu.dst.rel = tex.src_rel; 859801e04c3fSmrg alu.dst.write = 1; 859901e04c3fSmrg alu.last = 1; 860001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 860101e04c3fSmrg if (r) 860201e04c3fSmrg return r; 860301e04c3fSmrg } 860401e04c3fSmrg 860501e04c3fSmrg /* mask unused source components */ 860601e04c3fSmrg if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 860701e04c3fSmrg switch (inst->Texture.Texture) { 860801e04c3fSmrg case TGSI_TEXTURE_2D: 860901e04c3fSmrg case TGSI_TEXTURE_RECT: 861001e04c3fSmrg tex.src_sel_z = 7; 861101e04c3fSmrg tex.src_sel_w = 7; 861201e04c3fSmrg break; 861301e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 861401e04c3fSmrg tex.src_sel_y = 7; 861501e04c3fSmrg tex.src_sel_w = 7; 861601e04c3fSmrg break; 861701e04c3fSmrg case TGSI_TEXTURE_1D: 861801e04c3fSmrg tex.src_sel_y = 7; 861901e04c3fSmrg tex.src_sel_z = 7; 862001e04c3fSmrg tex.src_sel_w = 7; 862101e04c3fSmrg break; 862201e04c3fSmrg } 862301e04c3fSmrg } 862401e04c3fSmrg 862501e04c3fSmrg /* Emit set gradient and offset instructions. */ 862601e04c3fSmrg for (i = 0; i < n_grad_offs; ++i) { 862701e04c3fSmrg r = r600_bytecode_add_tex(ctx->bc, &grad_offs[i]); 862801e04c3fSmrg if (r) 862901e04c3fSmrg return r; 863001e04c3fSmrg } 863101e04c3fSmrg 863201e04c3fSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 863301e04c3fSmrg if (r) 863401e04c3fSmrg return r; 863501e04c3fSmrg 863601e04c3fSmrg /* add shadow ambient support - gallium doesn't do it yet */ 863701e04c3fSmrg return 0; 863801e04c3fSmrg} 863901e04c3fSmrg 864001e04c3fSmrgstatic int find_hw_atomic_counter(struct r600_shader_ctx *ctx, 864101e04c3fSmrg struct tgsi_full_src_register *src) 864201e04c3fSmrg{ 864301e04c3fSmrg unsigned i; 864401e04c3fSmrg 864501e04c3fSmrg if (src->Register.Indirect) { 864601e04c3fSmrg for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) { 864701e04c3fSmrg if (src->Indirect.ArrayID == ctx->shader->atomics[i].array_id) 864801e04c3fSmrg return ctx->shader->atomics[i].hw_idx; 864901e04c3fSmrg } 865001e04c3fSmrg } else { 865101e04c3fSmrg uint32_t index = src->Register.Index; 865201e04c3fSmrg for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) { 865301e04c3fSmrg if (ctx->shader->atomics[i].buffer_id != (unsigned)src->Dimension.Index) 865401e04c3fSmrg continue; 865501e04c3fSmrg if (index > ctx->shader->atomics[i].end) 865601e04c3fSmrg continue; 865701e04c3fSmrg if (index < ctx->shader->atomics[i].start) 865801e04c3fSmrg continue; 865901e04c3fSmrg uint32_t offset = (index - ctx->shader->atomics[i].start); 866001e04c3fSmrg return ctx->shader->atomics[i].hw_idx + offset; 866101e04c3fSmrg } 866201e04c3fSmrg } 866301e04c3fSmrg assert(0); 866401e04c3fSmrg return -1; 866501e04c3fSmrg} 866601e04c3fSmrg 866701e04c3fSmrgstatic int tgsi_set_gds_temp(struct r600_shader_ctx *ctx, 866801e04c3fSmrg int *uav_id_p, int *uav_index_mode_p) 866901e04c3fSmrg{ 867001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 867101e04c3fSmrg int uav_id, uav_index_mode = 0; 867201e04c3fSmrg int r; 867301e04c3fSmrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 867401e04c3fSmrg 867501e04c3fSmrg uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]); 867601e04c3fSmrg 867701e04c3fSmrg if (inst->Src[0].Register.Indirect) { 867801e04c3fSmrg if (is_cm) { 867901e04c3fSmrg struct r600_bytecode_alu alu; 868001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 868101e04c3fSmrg alu.op = ALU_OP2_LSHL_INT; 868201e04c3fSmrg alu.src[0].sel = get_address_file_reg(ctx, inst->Src[0].Indirect.Index); 868301e04c3fSmrg alu.src[0].chan = 0; 868401e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 868501e04c3fSmrg alu.src[1].value = 2; 868601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 868701e04c3fSmrg alu.dst.chan = 0; 868801e04c3fSmrg alu.dst.write = 1; 868901e04c3fSmrg alu.last = 1; 869001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 869101e04c3fSmrg if (r) 869201e04c3fSmrg return r; 869301e04c3fSmrg 869401e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 869501e04c3fSmrg ctx->temp_reg, 0, 869601e04c3fSmrg ctx->temp_reg, 0, 869701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, uav_id * 4); 869801e04c3fSmrg if (r) 869901e04c3fSmrg return r; 870001e04c3fSmrg } else 870101e04c3fSmrg uav_index_mode = 2; 870201e04c3fSmrg } else if (is_cm) { 870301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 870401e04c3fSmrg ctx->temp_reg, 0, 870501e04c3fSmrg V_SQ_ALU_SRC_LITERAL, uav_id * 4, 870601e04c3fSmrg 0, 0); 870701e04c3fSmrg if (r) 870801e04c3fSmrg return r; 870901e04c3fSmrg } 871001e04c3fSmrg *uav_id_p = uav_id; 871101e04c3fSmrg *uav_index_mode_p = uav_index_mode; 871201e04c3fSmrg return 0; 871301e04c3fSmrg} 871401e04c3fSmrg 871501e04c3fSmrgstatic int tgsi_load_gds(struct r600_shader_ctx *ctx) 871601e04c3fSmrg{ 871701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 871801e04c3fSmrg int r; 871901e04c3fSmrg struct r600_bytecode_gds gds; 872001e04c3fSmrg int uav_id = 0; 872101e04c3fSmrg int uav_index_mode = 0; 872201e04c3fSmrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 872301e04c3fSmrg 872401e04c3fSmrg r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode); 872501e04c3fSmrg if (r) 872601e04c3fSmrg return r; 872701e04c3fSmrg 872801e04c3fSmrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 872901e04c3fSmrg gds.op = FETCH_OP_GDS_READ_RET; 873001e04c3fSmrg gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 873101e04c3fSmrg gds.uav_id = is_cm ? 0 : uav_id; 873201e04c3fSmrg gds.uav_index_mode = is_cm ? 0 : uav_index_mode; 873301e04c3fSmrg gds.src_gpr = ctx->temp_reg; 873401e04c3fSmrg gds.src_sel_x = (is_cm) ? 0 : 4; 873501e04c3fSmrg gds.src_sel_y = 4; 873601e04c3fSmrg gds.src_sel_z = 4; 873701e04c3fSmrg gds.dst_sel_x = 0; 873801e04c3fSmrg gds.dst_sel_y = 7; 873901e04c3fSmrg gds.dst_sel_z = 7; 874001e04c3fSmrg gds.dst_sel_w = 7; 874101e04c3fSmrg gds.src_gpr2 = 0; 874201e04c3fSmrg gds.alloc_consume = !is_cm; 874301e04c3fSmrg r = r600_bytecode_add_gds(ctx->bc, &gds); 874401e04c3fSmrg if (r) 874501e04c3fSmrg return r; 874601e04c3fSmrg 874701e04c3fSmrg ctx->bc->cf_last->vpm = 1; 874801e04c3fSmrg return 0; 874901e04c3fSmrg} 875001e04c3fSmrg 875101e04c3fSmrg/* this fixes up 1D arrays properly */ 875201e04c3fSmrgstatic int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr) 875301e04c3fSmrg{ 875401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 875501e04c3fSmrg int r, i; 875601e04c3fSmrg struct r600_bytecode_alu alu; 875701e04c3fSmrg int temp_reg = r600_get_temp(ctx); 875801e04c3fSmrg 875901e04c3fSmrg for (i = 0; i < 4; i++) { 876001e04c3fSmrg bool def_val = true, write_zero = false; 876101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 876201e04c3fSmrg alu.op = ALU_OP1_MOV; 876301e04c3fSmrg alu.dst.sel = temp_reg; 876401e04c3fSmrg alu.dst.chan = i; 876501e04c3fSmrg 876601e04c3fSmrg switch (inst->Memory.Texture) { 876701e04c3fSmrg case TGSI_TEXTURE_BUFFER: 876801e04c3fSmrg case TGSI_TEXTURE_1D: 876901e04c3fSmrg if (i == 1 || i == 2 || i == 3) { 877001e04c3fSmrg write_zero = true; 877101e04c3fSmrg } 877201e04c3fSmrg break; 877301e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 877401e04c3fSmrg if (i == 1 || i == 3) 877501e04c3fSmrg write_zero = true; 877601e04c3fSmrg else if (i == 2) { 877701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1); 877801e04c3fSmrg def_val = false; 877901e04c3fSmrg } 878001e04c3fSmrg break; 878101e04c3fSmrg case TGSI_TEXTURE_2D: 878201e04c3fSmrg if (i == 2 || i == 3) 878301e04c3fSmrg write_zero = true; 878401e04c3fSmrg break; 878501e04c3fSmrg default: 878601e04c3fSmrg if (i == 3) 878701e04c3fSmrg write_zero = true; 878801e04c3fSmrg break; 878901e04c3fSmrg } 879001e04c3fSmrg 879101e04c3fSmrg if (write_zero) { 879201e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 879301e04c3fSmrg alu.src[0].value = 0; 879401e04c3fSmrg } else if (def_val) { 879501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i); 879601e04c3fSmrg } 879701e04c3fSmrg 879801e04c3fSmrg if (i == 3) 879901e04c3fSmrg alu.last = 1; 880001e04c3fSmrg alu.dst.write = 1; 880101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 880201e04c3fSmrg if (r) 880301e04c3fSmrg return r; 880401e04c3fSmrg } 880501e04c3fSmrg *idx_gpr = temp_reg; 880601e04c3fSmrg return 0; 880701e04c3fSmrg} 880801e04c3fSmrg 880901e04c3fSmrgstatic int load_buffer_coord(struct r600_shader_ctx *ctx, int src_idx, 881001e04c3fSmrg int temp_reg) 881101e04c3fSmrg{ 881201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 881301e04c3fSmrg int r; 881401e04c3fSmrg if (inst->Src[src_idx].Register.File == TGSI_FILE_IMMEDIATE) { 881501e04c3fSmrg int value = (ctx->literals[4 * inst->Src[src_idx].Register.Index + inst->Src[src_idx].Register.SwizzleX]); 881601e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 881701e04c3fSmrg temp_reg, 0, 881801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, value >> 2, 881901e04c3fSmrg 0, 0); 882001e04c3fSmrg if (r) 882101e04c3fSmrg return r; 882201e04c3fSmrg } else { 882301e04c3fSmrg struct r600_bytecode_alu alu; 882401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 882501e04c3fSmrg alu.op = ALU_OP2_LSHR_INT; 882601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[src_idx], 0); 882701e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 882801e04c3fSmrg alu.src[1].value = 2; 882901e04c3fSmrg alu.dst.sel = temp_reg; 883001e04c3fSmrg alu.dst.write = 1; 883101e04c3fSmrg alu.last = 1; 883201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 883301e04c3fSmrg if (r) 883401e04c3fSmrg return r; 883501e04c3fSmrg } 883601e04c3fSmrg return 0; 883701e04c3fSmrg} 883801e04c3fSmrg 883901e04c3fSmrgstatic int tgsi_load_buffer(struct r600_shader_ctx *ctx) 884001e04c3fSmrg{ 884101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 884201e04c3fSmrg /* have to work out the offset into the RAT immediate return buffer */ 884301e04c3fSmrg struct r600_bytecode_vtx vtx; 884401e04c3fSmrg struct r600_bytecode_cf *cf; 884501e04c3fSmrg int r; 884601e04c3fSmrg int temp_reg = r600_get_temp(ctx); 884701e04c3fSmrg unsigned rat_index_mode; 884801e04c3fSmrg unsigned base; 884901e04c3fSmrg 885001e04c3fSmrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 885101e04c3fSmrg base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE]; 885201e04c3fSmrg 885301e04c3fSmrg r = load_buffer_coord(ctx, 1, temp_reg); 885401e04c3fSmrg if (r) 885501e04c3fSmrg return r; 885601e04c3fSmrg ctx->bc->cf_last->barrier = 1; 885701e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 885801e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 885901e04c3fSmrg vtx.buffer_id = inst->Src[0].Register.Index + base; 886001e04c3fSmrg vtx.buffer_index_mode = rat_index_mode; 886101e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 886201e04c3fSmrg vtx.src_gpr = temp_reg; 886301e04c3fSmrg vtx.src_sel_x = 0; 886401e04c3fSmrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 886501e04c3fSmrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 886601e04c3fSmrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 886701e04c3fSmrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 886801e04c3fSmrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 886901e04c3fSmrg vtx.num_format_all = 1; 887001e04c3fSmrg vtx.format_comp_all = 1; 887101e04c3fSmrg vtx.srf_mode_all = 0; 887201e04c3fSmrg 887301e04c3fSmrg if (inst->Dst[0].Register.WriteMask & 8) { 887401e04c3fSmrg vtx.data_format = FMT_32_32_32_32; 887501e04c3fSmrg vtx.use_const_fields = 0; 887601e04c3fSmrg } else if (inst->Dst[0].Register.WriteMask & 4) { 887701e04c3fSmrg vtx.data_format = FMT_32_32_32; 887801e04c3fSmrg vtx.use_const_fields = 0; 887901e04c3fSmrg } else if (inst->Dst[0].Register.WriteMask & 2) { 888001e04c3fSmrg vtx.data_format = FMT_32_32; 888101e04c3fSmrg vtx.use_const_fields = 0; 888201e04c3fSmrg } else { 888301e04c3fSmrg vtx.data_format = FMT_32; 888401e04c3fSmrg vtx.use_const_fields = 0; 888501e04c3fSmrg } 888601e04c3fSmrg 888701e04c3fSmrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 888801e04c3fSmrg if (r) 888901e04c3fSmrg return r; 889001e04c3fSmrg cf = ctx->bc->cf_last; 889101e04c3fSmrg cf->barrier = 1; 889201e04c3fSmrg return 0; 889301e04c3fSmrg} 889401e04c3fSmrg 889501e04c3fSmrgstatic int tgsi_load_rat(struct r600_shader_ctx *ctx) 889601e04c3fSmrg{ 889701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 889801e04c3fSmrg /* have to work out the offset into the RAT immediate return buffer */ 889901e04c3fSmrg struct r600_bytecode_vtx vtx; 890001e04c3fSmrg struct r600_bytecode_cf *cf; 890101e04c3fSmrg int r; 890201e04c3fSmrg int idx_gpr; 890301e04c3fSmrg unsigned format, num_format, format_comp, endian; 890401e04c3fSmrg const struct util_format_description *desc; 890501e04c3fSmrg unsigned rat_index_mode; 890601e04c3fSmrg unsigned immed_base; 890701e04c3fSmrg 890801e04c3fSmrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 890901e04c3fSmrg 891001e04c3fSmrg immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; 891101e04c3fSmrg r = load_index_src(ctx, 1, &idx_gpr); 891201e04c3fSmrg if (r) 891301e04c3fSmrg return r; 891401e04c3fSmrg 891501e04c3fSmrg if (rat_index_mode) 891601e04c3fSmrg egcm_load_index_reg(ctx->bc, 1, false); 891701e04c3fSmrg 891801e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 891901e04c3fSmrg cf = ctx->bc->cf_last; 892001e04c3fSmrg 892101e04c3fSmrg cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index; 892201e04c3fSmrg cf->rat.inst = V_RAT_INST_NOP_RTN; 892301e04c3fSmrg cf->rat.index_mode = rat_index_mode; 892401e04c3fSmrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 892501e04c3fSmrg cf->output.gpr = ctx->thread_id_gpr; 892601e04c3fSmrg cf->output.index_gpr = idx_gpr; 892701e04c3fSmrg cf->output.comp_mask = 0xf; 892801e04c3fSmrg cf->output.burst_count = 1; 892901e04c3fSmrg cf->vpm = 1; 893001e04c3fSmrg cf->barrier = 1; 893101e04c3fSmrg cf->mark = 1; 893201e04c3fSmrg cf->output.elem_size = 0; 893301e04c3fSmrg 893401e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 893501e04c3fSmrg cf = ctx->bc->cf_last; 893601e04c3fSmrg cf->barrier = 1; 893701e04c3fSmrg 893801e04c3fSmrg desc = util_format_description(inst->Memory.Format); 893901e04c3fSmrg r600_vertex_data_type(inst->Memory.Format, 894001e04c3fSmrg &format, &num_format, &format_comp, &endian); 894101e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 894201e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 894301e04c3fSmrg vtx.buffer_id = immed_base + inst->Src[0].Register.Index; 894401e04c3fSmrg vtx.buffer_index_mode = rat_index_mode; 894501e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 894601e04c3fSmrg vtx.src_gpr = ctx->thread_id_gpr; 894701e04c3fSmrg vtx.src_sel_x = 1; 894801e04c3fSmrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 894901e04c3fSmrg vtx.dst_sel_x = desc->swizzle[0]; 895001e04c3fSmrg vtx.dst_sel_y = desc->swizzle[1]; 895101e04c3fSmrg vtx.dst_sel_z = desc->swizzle[2]; 895201e04c3fSmrg vtx.dst_sel_w = desc->swizzle[3]; 895301e04c3fSmrg vtx.srf_mode_all = 1; 895401e04c3fSmrg vtx.data_format = format; 895501e04c3fSmrg vtx.num_format_all = num_format; 895601e04c3fSmrg vtx.format_comp_all = format_comp; 895701e04c3fSmrg vtx.endian = endian; 895801e04c3fSmrg vtx.offset = 0; 895901e04c3fSmrg vtx.mega_fetch_count = 3; 896001e04c3fSmrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 896101e04c3fSmrg if (r) 896201e04c3fSmrg return r; 896301e04c3fSmrg cf = ctx->bc->cf_last; 896401e04c3fSmrg cf->barrier = 1; 896501e04c3fSmrg return 0; 896601e04c3fSmrg} 896701e04c3fSmrg 896801e04c3fSmrgstatic int tgsi_load_lds(struct r600_shader_ctx *ctx) 896901e04c3fSmrg{ 897001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 897101e04c3fSmrg struct r600_bytecode_alu alu; 897201e04c3fSmrg int r; 897301e04c3fSmrg int temp_reg = r600_get_temp(ctx); 897401e04c3fSmrg 897501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 897601e04c3fSmrg alu.op = ALU_OP1_MOV; 897701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 897801e04c3fSmrg alu.dst.sel = temp_reg; 897901e04c3fSmrg alu.dst.write = 1; 898001e04c3fSmrg alu.last = 1; 898101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 898201e04c3fSmrg if (r) 898301e04c3fSmrg return r; 898401e04c3fSmrg 898501e04c3fSmrg r = do_lds_fetch_values(ctx, temp_reg, 898601e04c3fSmrg ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask); 898701e04c3fSmrg if (r) 898801e04c3fSmrg return r; 898901e04c3fSmrg return 0; 899001e04c3fSmrg} 899101e04c3fSmrg 899201e04c3fSmrgstatic int tgsi_load(struct r600_shader_ctx *ctx) 899301e04c3fSmrg{ 899401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 899501e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 899601e04c3fSmrg return tgsi_load_rat(ctx); 899701e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) 899801e04c3fSmrg return tgsi_load_gds(ctx); 899901e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 900001e04c3fSmrg return tgsi_load_buffer(ctx); 900101e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 900201e04c3fSmrg return tgsi_load_lds(ctx); 900301e04c3fSmrg return 0; 900401e04c3fSmrg} 900501e04c3fSmrg 900601e04c3fSmrgstatic int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx) 900701e04c3fSmrg{ 900801e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 900901e04c3fSmrg struct r600_bytecode_cf *cf; 901001e04c3fSmrg int r, i; 901101e04c3fSmrg unsigned rat_index_mode; 901201e04c3fSmrg int lasti; 901301e04c3fSmrg int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx); 901401e04c3fSmrg 901501e04c3fSmrg r = load_buffer_coord(ctx, 0, treg2); 901601e04c3fSmrg if (r) 901701e04c3fSmrg return r; 901801e04c3fSmrg 901901e04c3fSmrg rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 902001e04c3fSmrg if (rat_index_mode) 902101e04c3fSmrg egcm_load_index_reg(ctx->bc, 1, false); 902201e04c3fSmrg 902301e04c3fSmrg for (i = 0; i <= 3; i++) { 902401e04c3fSmrg struct r600_bytecode_alu alu; 902501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 902601e04c3fSmrg alu.op = ALU_OP1_MOV; 902701e04c3fSmrg alu.dst.sel = temp_reg; 902801e04c3fSmrg alu.dst.chan = i; 902901e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_0; 903001e04c3fSmrg alu.last = (i == 3); 903101e04c3fSmrg alu.dst.write = 1; 903201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 903301e04c3fSmrg if (r) 903401e04c3fSmrg return r; 903501e04c3fSmrg } 903601e04c3fSmrg 903701e04c3fSmrg lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 903801e04c3fSmrg for (i = 0; i <= lasti; i++) { 903901e04c3fSmrg struct r600_bytecode_alu alu; 904001e04c3fSmrg if (!((1 << i) & inst->Dst[0].Register.WriteMask)) 904101e04c3fSmrg continue; 904201e04c3fSmrg 904301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 904401e04c3fSmrg temp_reg, 0, 904501e04c3fSmrg treg2, 0, 904601e04c3fSmrg V_SQ_ALU_SRC_LITERAL, i); 904701e04c3fSmrg if (r) 904801e04c3fSmrg return r; 904901e04c3fSmrg 905001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 905101e04c3fSmrg alu.op = ALU_OP1_MOV; 905201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 905301e04c3fSmrg alu.dst.chan = 0; 905401e04c3fSmrg 905501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 905601e04c3fSmrg alu.last = 1; 905701e04c3fSmrg alu.dst.write = 1; 905801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 905901e04c3fSmrg if (r) 906001e04c3fSmrg return r; 906101e04c3fSmrg 906201e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 906301e04c3fSmrg cf = ctx->bc->cf_last; 906401e04c3fSmrg 906501e04c3fSmrg cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE]; 906601e04c3fSmrg cf->rat.inst = V_RAT_INST_STORE_TYPED; 906701e04c3fSmrg cf->rat.index_mode = rat_index_mode; 906801e04c3fSmrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 906901e04c3fSmrg cf->output.gpr = ctx->temp_reg; 907001e04c3fSmrg cf->output.index_gpr = temp_reg; 907101e04c3fSmrg cf->output.comp_mask = 1; 907201e04c3fSmrg cf->output.burst_count = 1; 907301e04c3fSmrg cf->vpm = 1; 907401e04c3fSmrg cf->barrier = 1; 907501e04c3fSmrg cf->output.elem_size = 0; 907601e04c3fSmrg } 907701e04c3fSmrg return 0; 907801e04c3fSmrg} 907901e04c3fSmrg 908001e04c3fSmrgstatic int tgsi_store_rat(struct r600_shader_ctx *ctx) 908101e04c3fSmrg{ 908201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 908301e04c3fSmrg struct r600_bytecode_cf *cf; 908401e04c3fSmrg bool src_requires_loading = false; 908501e04c3fSmrg int val_gpr, idx_gpr; 908601e04c3fSmrg int r, i; 908701e04c3fSmrg unsigned rat_index_mode; 908801e04c3fSmrg 908901e04c3fSmrg rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 909001e04c3fSmrg 909101e04c3fSmrg r = load_index_src(ctx, 0, &idx_gpr); 909201e04c3fSmrg if (r) 909301e04c3fSmrg return r; 909401e04c3fSmrg 909501e04c3fSmrg if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY) 909601e04c3fSmrg src_requires_loading = true; 909701e04c3fSmrg 909801e04c3fSmrg if (src_requires_loading) { 909901e04c3fSmrg struct r600_bytecode_alu alu; 910001e04c3fSmrg for (i = 0; i < 4; i++) { 910101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 910201e04c3fSmrg alu.op = ALU_OP1_MOV; 910301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 910401e04c3fSmrg alu.dst.chan = i; 910501e04c3fSmrg 910601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 910701e04c3fSmrg if (i == 3) 910801e04c3fSmrg alu.last = 1; 910901e04c3fSmrg alu.dst.write = 1; 911001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 911101e04c3fSmrg if (r) 911201e04c3fSmrg return r; 911301e04c3fSmrg } 911401e04c3fSmrg val_gpr = ctx->temp_reg; 911501e04c3fSmrg } else 911601e04c3fSmrg val_gpr = tgsi_tex_get_src_gpr(ctx, 1); 911701e04c3fSmrg if (rat_index_mode) 911801e04c3fSmrg egcm_load_index_reg(ctx->bc, 1, false); 911901e04c3fSmrg 912001e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 912101e04c3fSmrg cf = ctx->bc->cf_last; 912201e04c3fSmrg 912301e04c3fSmrg cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index; 912401e04c3fSmrg cf->rat.inst = V_RAT_INST_STORE_TYPED; 912501e04c3fSmrg cf->rat.index_mode = rat_index_mode; 912601e04c3fSmrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 912701e04c3fSmrg cf->output.gpr = val_gpr; 912801e04c3fSmrg cf->output.index_gpr = idx_gpr; 912901e04c3fSmrg cf->output.comp_mask = 0xf; 913001e04c3fSmrg cf->output.burst_count = 1; 913101e04c3fSmrg cf->vpm = 1; 913201e04c3fSmrg cf->barrier = 1; 913301e04c3fSmrg cf->output.elem_size = 0; 913401e04c3fSmrg return 0; 913501e04c3fSmrg} 913601e04c3fSmrg 913701e04c3fSmrgstatic int tgsi_store_lds(struct r600_shader_ctx *ctx) 913801e04c3fSmrg{ 913901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 914001e04c3fSmrg struct r600_bytecode_alu alu; 914101e04c3fSmrg int r, i, lasti; 914201e04c3fSmrg int write_mask = inst->Dst[0].Register.WriteMask; 914301e04c3fSmrg int temp_reg = r600_get_temp(ctx); 914401e04c3fSmrg 914501e04c3fSmrg /* LDS write */ 914601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 914701e04c3fSmrg alu.op = ALU_OP1_MOV; 914801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 914901e04c3fSmrg alu.dst.sel = temp_reg; 915001e04c3fSmrg alu.dst.write = 1; 915101e04c3fSmrg alu.last = 1; 915201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 915301e04c3fSmrg if (r) 915401e04c3fSmrg return r; 915501e04c3fSmrg 915601e04c3fSmrg lasti = tgsi_last_instruction(write_mask); 915701e04c3fSmrg for (i = 1; i <= lasti; i++) { 915801e04c3fSmrg if (!(write_mask & (1 << i))) 915901e04c3fSmrg continue; 916001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 916101e04c3fSmrg temp_reg, i, 916201e04c3fSmrg temp_reg, 0, 916301e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 4 * i); 916401e04c3fSmrg if (r) 916501e04c3fSmrg return r; 916601e04c3fSmrg } 916701e04c3fSmrg for (i = 0; i <= lasti; i++) { 916801e04c3fSmrg if (!(write_mask & (1 << i))) 916901e04c3fSmrg continue; 917001e04c3fSmrg 917101e04c3fSmrg if ((i == 0 && ((write_mask & 3) == 3)) || 917201e04c3fSmrg (i == 2 && ((write_mask & 0xc) == 0xc))) { 917301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 917401e04c3fSmrg alu.op = LDS_OP3_LDS_WRITE_REL; 917501e04c3fSmrg 917601e04c3fSmrg alu.src[0].sel = temp_reg; 917701e04c3fSmrg alu.src[0].chan = i; 917801e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 917901e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1); 918001e04c3fSmrg alu.last = 1; 918101e04c3fSmrg alu.is_lds_idx_op = true; 918201e04c3fSmrg alu.lds_idx = 1; 918301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 918401e04c3fSmrg if (r) 918501e04c3fSmrg return r; 918601e04c3fSmrg i += 1; 918701e04c3fSmrg continue; 918801e04c3fSmrg } 918901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 919001e04c3fSmrg alu.op = LDS_OP2_LDS_WRITE; 919101e04c3fSmrg 919201e04c3fSmrg alu.src[0].sel = temp_reg; 919301e04c3fSmrg alu.src[0].chan = i; 919401e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 919501e04c3fSmrg 919601e04c3fSmrg alu.last = 1; 919701e04c3fSmrg alu.is_lds_idx_op = true; 919801e04c3fSmrg 919901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 920001e04c3fSmrg if (r) 920101e04c3fSmrg return r; 920201e04c3fSmrg } 920301e04c3fSmrg return 0; 920401e04c3fSmrg} 920501e04c3fSmrg 920601e04c3fSmrgstatic int tgsi_store(struct r600_shader_ctx *ctx) 920701e04c3fSmrg{ 920801e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 920901e04c3fSmrg if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 921001e04c3fSmrg return tgsi_store_buffer_rat(ctx); 921101e04c3fSmrg else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 921201e04c3fSmrg return tgsi_store_lds(ctx); 921301e04c3fSmrg else 921401e04c3fSmrg return tgsi_store_rat(ctx); 921501e04c3fSmrg} 921601e04c3fSmrg 921701e04c3fSmrgstatic int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) 921801e04c3fSmrg{ 921901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 922001e04c3fSmrg /* have to work out the offset into the RAT immediate return buffer */ 922101e04c3fSmrg struct r600_bytecode_alu alu; 922201e04c3fSmrg struct r600_bytecode_vtx vtx; 922301e04c3fSmrg struct r600_bytecode_cf *cf; 922401e04c3fSmrg int r; 922501e04c3fSmrg int idx_gpr; 922601e04c3fSmrg unsigned format, num_format, format_comp, endian; 922701e04c3fSmrg const struct util_format_description *desc; 922801e04c3fSmrg unsigned rat_index_mode; 922901e04c3fSmrg unsigned immed_base; 923001e04c3fSmrg unsigned rat_base; 923101e04c3fSmrg 923201e04c3fSmrg immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; 923301e04c3fSmrg rat_base = ctx->shader->rat_base; 923401e04c3fSmrg 923501e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 923601e04c3fSmrg immed_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 923701e04c3fSmrg rat_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 923801e04c3fSmrg 923901e04c3fSmrg r = load_buffer_coord(ctx, 1, ctx->temp_reg); 924001e04c3fSmrg if (r) 924101e04c3fSmrg return r; 924201e04c3fSmrg idx_gpr = ctx->temp_reg; 924301e04c3fSmrg } else { 924401e04c3fSmrg r = load_index_src(ctx, 1, &idx_gpr); 924501e04c3fSmrg if (r) 924601e04c3fSmrg return r; 924701e04c3fSmrg } 924801e04c3fSmrg 924901e04c3fSmrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 925001e04c3fSmrg 925101e04c3fSmrg if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) { 9252af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 925301e04c3fSmrg alu.op = ALU_OP1_MOV; 925401e04c3fSmrg alu.dst.sel = ctx->thread_id_gpr; 925501e04c3fSmrg alu.dst.chan = 0; 9256af69d88dSmrg alu.dst.write = 1; 925701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[3], 0); 9258af69d88dSmrg alu.last = 1; 9259af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9260af69d88dSmrg if (r) 9261af69d88dSmrg return r; 9262af69d88dSmrg 9263af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 926401e04c3fSmrg alu.op = ALU_OP1_MOV; 926501e04c3fSmrg alu.dst.sel = ctx->thread_id_gpr; 926601e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) 926701e04c3fSmrg alu.dst.chan = 2; 926801e04c3fSmrg else 926901e04c3fSmrg alu.dst.chan = 3; 9270af69d88dSmrg alu.dst.write = 1; 927101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 9272af69d88dSmrg alu.last = 1; 9273af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9274af69d88dSmrg if (r) 9275af69d88dSmrg return r; 927601e04c3fSmrg } else { 927701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 927801e04c3fSmrg alu.op = ALU_OP1_MOV; 927901e04c3fSmrg alu.dst.sel = ctx->thread_id_gpr; 928001e04c3fSmrg alu.dst.chan = 0; 928101e04c3fSmrg alu.dst.write = 1; 928201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 928301e04c3fSmrg alu.last = 1; 928401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 928501e04c3fSmrg if (r) 928601e04c3fSmrg return r; 928701e04c3fSmrg } 928801e04c3fSmrg 928901e04c3fSmrg if (rat_index_mode) 929001e04c3fSmrg egcm_load_index_reg(ctx->bc, 1, false); 929101e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 929201e04c3fSmrg cf = ctx->bc->cf_last; 929301e04c3fSmrg 929401e04c3fSmrg cf->rat.id = rat_base + inst->Src[0].Register.Index; 929501e04c3fSmrg cf->rat.inst = ctx->inst_info->op; 929601e04c3fSmrg cf->rat.index_mode = rat_index_mode; 929701e04c3fSmrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 929801e04c3fSmrg cf->output.gpr = ctx->thread_id_gpr; 929901e04c3fSmrg cf->output.index_gpr = idx_gpr; 930001e04c3fSmrg cf->output.comp_mask = 0xf; 930101e04c3fSmrg cf->output.burst_count = 1; 930201e04c3fSmrg cf->vpm = 1; 930301e04c3fSmrg cf->barrier = 1; 930401e04c3fSmrg cf->mark = 1; 930501e04c3fSmrg cf->output.elem_size = 0; 930601e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 930701e04c3fSmrg cf = ctx->bc->cf_last; 930801e04c3fSmrg cf->barrier = 1; 930901e04c3fSmrg cf->cf_addr = 1; 931001e04c3fSmrg 931101e04c3fSmrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 931201e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) { 931301e04c3fSmrg desc = util_format_description(inst->Memory.Format); 931401e04c3fSmrg r600_vertex_data_type(inst->Memory.Format, 931501e04c3fSmrg &format, &num_format, &format_comp, &endian); 931601e04c3fSmrg vtx.dst_sel_x = desc->swizzle[0]; 931701e04c3fSmrg } else { 931801e04c3fSmrg format = FMT_32; 931901e04c3fSmrg num_format = 1; 932001e04c3fSmrg format_comp = 0; 932101e04c3fSmrg endian = 0; 932201e04c3fSmrg vtx.dst_sel_x = 0; 932301e04c3fSmrg } 932401e04c3fSmrg vtx.op = FETCH_OP_VFETCH; 932501e04c3fSmrg vtx.buffer_id = immed_base + inst->Src[0].Register.Index; 932601e04c3fSmrg vtx.buffer_index_mode = rat_index_mode; 932701e04c3fSmrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 932801e04c3fSmrg vtx.src_gpr = ctx->thread_id_gpr; 932901e04c3fSmrg vtx.src_sel_x = 1; 933001e04c3fSmrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 933101e04c3fSmrg vtx.dst_sel_y = 7; 933201e04c3fSmrg vtx.dst_sel_z = 7; 933301e04c3fSmrg vtx.dst_sel_w = 7; 933401e04c3fSmrg vtx.use_const_fields = 0; 933501e04c3fSmrg vtx.srf_mode_all = 1; 933601e04c3fSmrg vtx.data_format = format; 933701e04c3fSmrg vtx.num_format_all = num_format; 933801e04c3fSmrg vtx.format_comp_all = format_comp; 933901e04c3fSmrg vtx.endian = endian; 934001e04c3fSmrg vtx.offset = 0; 934101e04c3fSmrg vtx.mega_fetch_count = 0xf; 934201e04c3fSmrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 934301e04c3fSmrg if (r) 934401e04c3fSmrg return r; 934501e04c3fSmrg cf = ctx->bc->cf_last; 934601e04c3fSmrg cf->vpm = 1; 934701e04c3fSmrg cf->barrier = 1; 934801e04c3fSmrg return 0; 934901e04c3fSmrg} 935001e04c3fSmrg 935101e04c3fSmrgstatic int get_gds_op(int opcode) 935201e04c3fSmrg{ 935301e04c3fSmrg switch (opcode) { 935401e04c3fSmrg case TGSI_OPCODE_ATOMUADD: 935501e04c3fSmrg return FETCH_OP_GDS_ADD_RET; 935601e04c3fSmrg case TGSI_OPCODE_ATOMAND: 935701e04c3fSmrg return FETCH_OP_GDS_AND_RET; 935801e04c3fSmrg case TGSI_OPCODE_ATOMOR: 935901e04c3fSmrg return FETCH_OP_GDS_OR_RET; 936001e04c3fSmrg case TGSI_OPCODE_ATOMXOR: 936101e04c3fSmrg return FETCH_OP_GDS_XOR_RET; 936201e04c3fSmrg case TGSI_OPCODE_ATOMUMIN: 936301e04c3fSmrg return FETCH_OP_GDS_MIN_UINT_RET; 936401e04c3fSmrg case TGSI_OPCODE_ATOMUMAX: 936501e04c3fSmrg return FETCH_OP_GDS_MAX_UINT_RET; 936601e04c3fSmrg case TGSI_OPCODE_ATOMXCHG: 936701e04c3fSmrg return FETCH_OP_GDS_XCHG_RET; 936801e04c3fSmrg case TGSI_OPCODE_ATOMCAS: 936901e04c3fSmrg return FETCH_OP_GDS_CMP_XCHG_RET; 937001e04c3fSmrg default: 937101e04c3fSmrg return -1; 937201e04c3fSmrg } 937301e04c3fSmrg} 937401e04c3fSmrg 937501e04c3fSmrgstatic int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx) 937601e04c3fSmrg{ 937701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 937801e04c3fSmrg struct r600_bytecode_gds gds; 937901e04c3fSmrg struct r600_bytecode_alu alu; 938001e04c3fSmrg int gds_op = get_gds_op(inst->Instruction.Opcode); 938101e04c3fSmrg int r; 938201e04c3fSmrg int uav_id = 0; 938301e04c3fSmrg int uav_index_mode = 0; 938401e04c3fSmrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 938501e04c3fSmrg 938601e04c3fSmrg if (gds_op == -1) { 938701e04c3fSmrg fprintf(stderr, "unknown GDS op for opcode %d\n", inst->Instruction.Opcode); 938801e04c3fSmrg return -1; 938901e04c3fSmrg } 939001e04c3fSmrg 939101e04c3fSmrg r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode); 939201e04c3fSmrg if (r) 939301e04c3fSmrg return r; 939401e04c3fSmrg 939501e04c3fSmrg if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) { 939601e04c3fSmrg if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) { 939701e04c3fSmrg int value = (ctx->literals[4 * inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]); 9398af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 939901e04c3fSmrg alu.op = ALU_OP1_MOV; 940001e04c3fSmrg alu.dst.sel = ctx->temp_reg; 940101e04c3fSmrg alu.dst.chan = is_cm ? 2 : 1; 940201e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 940301e04c3fSmrg alu.src[0].value = value; 940401e04c3fSmrg alu.last = 1; 9405af69d88dSmrg alu.dst.write = 1; 940601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 940701e04c3fSmrg if (r) 940801e04c3fSmrg return r; 940901e04c3fSmrg } else { 941001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 941101e04c3fSmrg alu.op = ALU_OP1_MOV; 941201e04c3fSmrg alu.dst.sel = ctx->temp_reg; 941301e04c3fSmrg alu.dst.chan = is_cm ? 2 : 1; 941401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[3], 0); 9415af69d88dSmrg alu.last = 1; 941601e04c3fSmrg alu.dst.write = 1; 9417af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9418af69d88dSmrg if (r) 9419af69d88dSmrg return r; 9420af69d88dSmrg } 9421af69d88dSmrg } 942201e04c3fSmrg if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) { 942301e04c3fSmrg int value = (ctx->literals[4 * inst->Src[2].Register.Index + inst->Src[2].Register.SwizzleX]); 942401e04c3fSmrg int abs_value = abs(value); 942501e04c3fSmrg if (abs_value != value && gds_op == FETCH_OP_GDS_ADD_RET) 942601e04c3fSmrg gds_op = FETCH_OP_GDS_SUB_RET; 9427af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9428af69d88dSmrg alu.op = ALU_OP1_MOV; 942901e04c3fSmrg alu.dst.sel = ctx->temp_reg; 943001e04c3fSmrg alu.dst.chan = is_cm ? 1 : 0; 943101e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 943201e04c3fSmrg alu.src[0].value = abs_value; 94333464ebd5Sriastradh alu.last = 1; 943401e04c3fSmrg alu.dst.write = 1; 943501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 943601e04c3fSmrg if (r) 943701e04c3fSmrg return r; 943801e04c3fSmrg } else { 943901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 944001e04c3fSmrg alu.op = ALU_OP1_MOV; 944101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 944201e04c3fSmrg alu.dst.chan = is_cm ? 1 : 0; 944301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 944401e04c3fSmrg alu.last = 1; 944501e04c3fSmrg alu.dst.write = 1; 9446af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 94473464ebd5Sriastradh if (r) 94483464ebd5Sriastradh return r; 94493464ebd5Sriastradh } 94503464ebd5Sriastradh 9451af69d88dSmrg 945201e04c3fSmrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 945301e04c3fSmrg gds.op = gds_op; 945401e04c3fSmrg gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 945501e04c3fSmrg gds.uav_id = is_cm ? 0 : uav_id; 945601e04c3fSmrg gds.uav_index_mode = is_cm ? 0 : uav_index_mode; 945701e04c3fSmrg gds.src_gpr = ctx->temp_reg; 945801e04c3fSmrg gds.src_gpr2 = 0; 945901e04c3fSmrg gds.src_sel_x = is_cm ? 0 : 4; 946001e04c3fSmrg gds.src_sel_y = is_cm ? 1 : 0; 946101e04c3fSmrg if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) 946201e04c3fSmrg gds.src_sel_z = is_cm ? 2 : 1; 946301e04c3fSmrg else 946401e04c3fSmrg gds.src_sel_z = 7; 946501e04c3fSmrg gds.dst_sel_x = 0; 946601e04c3fSmrg gds.dst_sel_y = 7; 946701e04c3fSmrg gds.dst_sel_z = 7; 946801e04c3fSmrg gds.dst_sel_w = 7; 946901e04c3fSmrg gds.alloc_consume = !is_cm; 947001e04c3fSmrg 947101e04c3fSmrg r = r600_bytecode_add_gds(ctx->bc, &gds); 947201e04c3fSmrg if (r) 947301e04c3fSmrg return r; 947401e04c3fSmrg ctx->bc->cf_last->vpm = 1; 947501e04c3fSmrg return 0; 947601e04c3fSmrg} 9477af69d88dSmrg 947801e04c3fSmrgstatic int get_lds_op(int opcode) 947901e04c3fSmrg{ 948001e04c3fSmrg switch (opcode) { 948101e04c3fSmrg case TGSI_OPCODE_ATOMUADD: 948201e04c3fSmrg return LDS_OP2_LDS_ADD_RET; 948301e04c3fSmrg case TGSI_OPCODE_ATOMAND: 948401e04c3fSmrg return LDS_OP2_LDS_AND_RET; 948501e04c3fSmrg case TGSI_OPCODE_ATOMOR: 948601e04c3fSmrg return LDS_OP2_LDS_OR_RET; 948701e04c3fSmrg case TGSI_OPCODE_ATOMXOR: 948801e04c3fSmrg return LDS_OP2_LDS_XOR_RET; 948901e04c3fSmrg case TGSI_OPCODE_ATOMUMIN: 949001e04c3fSmrg return LDS_OP2_LDS_MIN_UINT_RET; 949101e04c3fSmrg case TGSI_OPCODE_ATOMUMAX: 949201e04c3fSmrg return LDS_OP2_LDS_MAX_UINT_RET; 949301e04c3fSmrg case TGSI_OPCODE_ATOMIMIN: 949401e04c3fSmrg return LDS_OP2_LDS_MIN_INT_RET; 949501e04c3fSmrg case TGSI_OPCODE_ATOMIMAX: 949601e04c3fSmrg return LDS_OP2_LDS_MAX_INT_RET; 949701e04c3fSmrg case TGSI_OPCODE_ATOMXCHG: 949801e04c3fSmrg return LDS_OP2_LDS_XCHG_RET; 949901e04c3fSmrg case TGSI_OPCODE_ATOMCAS: 950001e04c3fSmrg return LDS_OP3_LDS_CMP_XCHG_RET; 950101e04c3fSmrg default: 950201e04c3fSmrg return -1; 95033464ebd5Sriastradh } 950401e04c3fSmrg} 95053464ebd5Sriastradh 950601e04c3fSmrgstatic int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx) 950701e04c3fSmrg{ 950801e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 950901e04c3fSmrg int lds_op = get_lds_op(inst->Instruction.Opcode); 951001e04c3fSmrg int r; 95113464ebd5Sriastradh 951201e04c3fSmrg struct r600_bytecode_alu alu; 951301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 951401e04c3fSmrg alu.op = lds_op; 951501e04c3fSmrg alu.is_lds_idx_op = true; 951601e04c3fSmrg alu.last = 1; 951701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 951801e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], 0); 951901e04c3fSmrg if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET) 952001e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[3], 0); 952101e04c3fSmrg else 952201e04c3fSmrg alu.src[2].sel = V_SQ_ALU_SRC_0; 952301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 952401e04c3fSmrg if (r) 952501e04c3fSmrg return r; 95263464ebd5Sriastradh 952701e04c3fSmrg /* then read from LDS_OQ_A_POP */ 952801e04c3fSmrg memset(&alu, 0, sizeof(alu)); 9529af69d88dSmrg 953001e04c3fSmrg alu.op = ALU_OP1_MOV; 953101e04c3fSmrg alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 953201e04c3fSmrg alu.src[0].chan = 0; 953301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 953401e04c3fSmrg alu.dst.write = 1; 953501e04c3fSmrg alu.last = 1; 953601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 953701e04c3fSmrg if (r) 953801e04c3fSmrg return r; 9539af69d88dSmrg 954001e04c3fSmrg return 0; 954101e04c3fSmrg} 9542af69d88dSmrg 954301e04c3fSmrgstatic int tgsi_atomic_op(struct r600_shader_ctx *ctx) 954401e04c3fSmrg{ 954501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 954601e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 954701e04c3fSmrg return tgsi_atomic_op_rat(ctx); 954801e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) 954901e04c3fSmrg return tgsi_atomic_op_gds(ctx); 955001e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 955101e04c3fSmrg return tgsi_atomic_op_rat(ctx); 955201e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 955301e04c3fSmrg return tgsi_atomic_op_lds(ctx); 955401e04c3fSmrg return 0; 955501e04c3fSmrg} 9556af69d88dSmrg 955701e04c3fSmrgstatic int tgsi_resq(struct r600_shader_ctx *ctx) 955801e04c3fSmrg{ 955901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 956001e04c3fSmrg unsigned sampler_index_mode; 956101e04c3fSmrg struct r600_bytecode_tex tex; 956201e04c3fSmrg int r; 956301e04c3fSmrg boolean has_txq_cube_array_z = false; 9564af69d88dSmrg 956501e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || 956601e04c3fSmrg (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { 956701e04c3fSmrg if (ctx->bc->chip_class < EVERGREEN) 956801e04c3fSmrg ctx->shader->uses_tex_buffers = true; 956901e04c3fSmrg unsigned eg_buffer_base = 0; 957001e04c3fSmrg eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET; 957101e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 957201e04c3fSmrg eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 957301e04c3fSmrg return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset, eg_buffer_base); 95743464ebd5Sriastradh } 95753464ebd5Sriastradh 957601e04c3fSmrg if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY && 957701e04c3fSmrg inst->Dst[0].Register.WriteMask & 4) { 957801e04c3fSmrg ctx->shader->has_txq_cube_array_z_comp = true; 957901e04c3fSmrg has_txq_cube_array_z = true; 95803464ebd5Sriastradh } 95813464ebd5Sriastradh 958201e04c3fSmrg sampler_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 958301e04c3fSmrg if (sampler_index_mode) 958401e04c3fSmrg egcm_load_index_reg(ctx->bc, 1, false); 9585af69d88dSmrg 95863464ebd5Sriastradh 958701e04c3fSmrg /* does this shader want a num layers from TXQ for a cube array? */ 958801e04c3fSmrg if (has_txq_cube_array_z) { 958901e04c3fSmrg int id = tgsi_tex_get_src_gpr(ctx, 0) + ctx->shader->image_size_const_offset; 959001e04c3fSmrg struct r600_bytecode_alu alu; 9591af69d88dSmrg 959201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 959301e04c3fSmrg alu.op = ALU_OP1_MOV; 95943464ebd5Sriastradh 959501e04c3fSmrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 959601e04c3fSmrg /* with eg each dword is either number of cubes */ 959701e04c3fSmrg alu.src[0].sel += id / 4; 959801e04c3fSmrg alu.src[0].chan = id % 4; 959901e04c3fSmrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 960001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 960101e04c3fSmrg alu.last = 1; 960201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 960301e04c3fSmrg if (r) 960401e04c3fSmrg return r; 960501e04c3fSmrg /* disable writemask from texture instruction */ 960601e04c3fSmrg inst->Dst[0].Register.WriteMask &= ~4; 9607af69d88dSmrg } 960801e04c3fSmrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 960901e04c3fSmrg tex.op = ctx->inst_info->op; 961001e04c3fSmrg tex.sampler_id = R600_IMAGE_REAL_RESOURCE_OFFSET + inst->Src[0].Register.Index; 961101e04c3fSmrg tex.sampler_index_mode = sampler_index_mode; 961201e04c3fSmrg tex.resource_id = tex.sampler_id; 961301e04c3fSmrg tex.resource_index_mode = sampler_index_mode; 961401e04c3fSmrg tex.src_sel_x = 4; 961501e04c3fSmrg tex.src_sel_y = 4; 961601e04c3fSmrg tex.src_sel_z = 4; 961701e04c3fSmrg tex.src_sel_w = 4; 961801e04c3fSmrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 961901e04c3fSmrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 962001e04c3fSmrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 962101e04c3fSmrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 962201e04c3fSmrg tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 9623af69d88dSmrg r = r600_bytecode_add_tex(ctx->bc, &tex); 96243464ebd5Sriastradh if (r) 96253464ebd5Sriastradh return r; 96263464ebd5Sriastradh 96273464ebd5Sriastradh return 0; 96283464ebd5Sriastradh} 96293464ebd5Sriastradh 96303464ebd5Sriastradhstatic int tgsi_lrp(struct r600_shader_ctx *ctx) 96313464ebd5Sriastradh{ 96323464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9633af69d88dSmrg struct r600_bytecode_alu alu; 963401e04c3fSmrg unsigned lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 963501e04c3fSmrg struct r600_bytecode_alu_src srcs[2][4]; 96363464ebd5Sriastradh unsigned i; 96373464ebd5Sriastradh int r; 96383464ebd5Sriastradh 96393464ebd5Sriastradh /* optimize if it's just an equal balance */ 96403464ebd5Sriastradh if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 96413464ebd5Sriastradh for (i = 0; i < lasti + 1; i++) { 96423464ebd5Sriastradh if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 96433464ebd5Sriastradh continue; 96443464ebd5Sriastradh 9645af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9646af69d88dSmrg alu.op = ALU_OP2_ADD; 9647af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 9648af69d88dSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 96493464ebd5Sriastradh alu.omod = 3; 96503464ebd5Sriastradh tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 96513464ebd5Sriastradh alu.dst.chan = i; 96523464ebd5Sriastradh if (i == lasti) { 96533464ebd5Sriastradh alu.last = 1; 96543464ebd5Sriastradh } 9655af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 96563464ebd5Sriastradh if (r) 96573464ebd5Sriastradh return r; 96583464ebd5Sriastradh } 96593464ebd5Sriastradh return 0; 96603464ebd5Sriastradh } 96613464ebd5Sriastradh 96623464ebd5Sriastradh /* 1 - src0 */ 96633464ebd5Sriastradh for (i = 0; i < lasti + 1; i++) { 96643464ebd5Sriastradh if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 96653464ebd5Sriastradh continue; 96663464ebd5Sriastradh 9667af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9668af69d88dSmrg alu.op = ALU_OP2_ADD; 96693464ebd5Sriastradh alu.src[0].sel = V_SQ_ALU_SRC_1; 96703464ebd5Sriastradh alu.src[0].chan = 0; 9671af69d88dSmrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 9672af69d88dSmrg r600_bytecode_src_toggle_neg(&alu.src[1]); 96733464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 96743464ebd5Sriastradh alu.dst.chan = i; 96753464ebd5Sriastradh if (i == lasti) { 96763464ebd5Sriastradh alu.last = 1; 96773464ebd5Sriastradh } 96783464ebd5Sriastradh alu.dst.write = 1; 9679af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 96803464ebd5Sriastradh if (r) 96813464ebd5Sriastradh return r; 96823464ebd5Sriastradh } 96833464ebd5Sriastradh 96843464ebd5Sriastradh /* (1 - src0) * src2 */ 96853464ebd5Sriastradh for (i = 0; i < lasti + 1; i++) { 96863464ebd5Sriastradh if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 96873464ebd5Sriastradh continue; 96883464ebd5Sriastradh 9689af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9690af69d88dSmrg alu.op = ALU_OP2_MUL; 96913464ebd5Sriastradh alu.src[0].sel = ctx->temp_reg; 96923464ebd5Sriastradh alu.src[0].chan = i; 9693af69d88dSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 96943464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 96953464ebd5Sriastradh alu.dst.chan = i; 96963464ebd5Sriastradh if (i == lasti) { 96973464ebd5Sriastradh alu.last = 1; 96983464ebd5Sriastradh } 96993464ebd5Sriastradh alu.dst.write = 1; 9700af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 97013464ebd5Sriastradh if (r) 97023464ebd5Sriastradh return r; 97033464ebd5Sriastradh } 97043464ebd5Sriastradh 97053464ebd5Sriastradh /* src0 * src1 + (1 - src0) * src2 */ 970601e04c3fSmrg 970701e04c3fSmrg for (i = 0; i < 2; i++) { 970801e04c3fSmrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 970901e04c3fSmrg srcs[i], &ctx->src[i]); 971001e04c3fSmrg if (r) 971101e04c3fSmrg return r; 971201e04c3fSmrg } 971301e04c3fSmrg 971401e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 971501e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 971601e04c3fSmrg continue; 971701e04c3fSmrg 971801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 971901e04c3fSmrg alu.op = ALU_OP3_MULADD; 972001e04c3fSmrg alu.is_op3 = 1; 972101e04c3fSmrg alu.src[0] = srcs[0][i]; 972201e04c3fSmrg alu.src[1] = srcs[1][i]; 972301e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 972401e04c3fSmrg alu.src[2].chan = i; 972501e04c3fSmrg 972601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 972701e04c3fSmrg alu.dst.chan = i; 972801e04c3fSmrg if (i == lasti) { 972901e04c3fSmrg alu.last = 1; 973001e04c3fSmrg } 973101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 973201e04c3fSmrg if (r) 973301e04c3fSmrg return r; 973401e04c3fSmrg } 973501e04c3fSmrg return 0; 973601e04c3fSmrg} 973701e04c3fSmrg 973801e04c3fSmrgstatic int tgsi_cmp(struct r600_shader_ctx *ctx) 973901e04c3fSmrg{ 974001e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 974101e04c3fSmrg struct r600_bytecode_alu alu; 974201e04c3fSmrg int i, r, j; 974301e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 974401e04c3fSmrg struct r600_bytecode_alu_src srcs[3][4]; 974501e04c3fSmrg 974601e04c3fSmrg unsigned op; 974701e04c3fSmrg 974801e04c3fSmrg if (ctx->src[0].abs && ctx->src[0].neg) { 974901e04c3fSmrg op = ALU_OP3_CNDE; 975001e04c3fSmrg ctx->src[0].abs = 0; 975101e04c3fSmrg ctx->src[0].neg = 0; 975201e04c3fSmrg } else { 975301e04c3fSmrg op = ALU_OP3_CNDGE; 975401e04c3fSmrg } 975501e04c3fSmrg 975601e04c3fSmrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 975701e04c3fSmrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 975801e04c3fSmrg srcs[j], &ctx->src[j]); 975901e04c3fSmrg if (r) 976001e04c3fSmrg return r; 976101e04c3fSmrg } 976201e04c3fSmrg 976301e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 976401e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 976501e04c3fSmrg continue; 976601e04c3fSmrg 976701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 976801e04c3fSmrg alu.op = op; 976901e04c3fSmrg alu.src[0] = srcs[0][i]; 977001e04c3fSmrg alu.src[1] = srcs[2][i]; 977101e04c3fSmrg alu.src[2] = srcs[1][i]; 977201e04c3fSmrg 977301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 977401e04c3fSmrg alu.dst.chan = i; 977501e04c3fSmrg alu.dst.write = 1; 977601e04c3fSmrg alu.is_op3 = 1; 977701e04c3fSmrg if (i == lasti) 977801e04c3fSmrg alu.last = 1; 977901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 978001e04c3fSmrg if (r) 978101e04c3fSmrg return r; 978201e04c3fSmrg } 978301e04c3fSmrg return 0; 978401e04c3fSmrg} 978501e04c3fSmrg 978601e04c3fSmrgstatic int tgsi_ucmp(struct r600_shader_ctx *ctx) 978701e04c3fSmrg{ 978801e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 978901e04c3fSmrg struct r600_bytecode_alu alu; 979001e04c3fSmrg int i, r; 979101e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 979201e04c3fSmrg 97933464ebd5Sriastradh for (i = 0; i < lasti + 1; i++) { 97943464ebd5Sriastradh if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 97953464ebd5Sriastradh continue; 97963464ebd5Sriastradh 979701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 979801e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 979901e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 980001e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 980101e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 980201e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 980301e04c3fSmrg alu.dst.chan = i; 980401e04c3fSmrg alu.dst.write = 1; 980501e04c3fSmrg alu.is_op3 = 1; 980601e04c3fSmrg if (i == lasti) 980701e04c3fSmrg alu.last = 1; 980801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 980901e04c3fSmrg if (r) 981001e04c3fSmrg return r; 981101e04c3fSmrg } 981201e04c3fSmrg return 0; 981301e04c3fSmrg} 981401e04c3fSmrg 981501e04c3fSmrgstatic int tgsi_exp(struct r600_shader_ctx *ctx) 981601e04c3fSmrg{ 981701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 981801e04c3fSmrg struct r600_bytecode_alu alu; 981901e04c3fSmrg int r; 982001e04c3fSmrg unsigned i; 982101e04c3fSmrg 982201e04c3fSmrg /* result.x = 2^floor(src); */ 982301e04c3fSmrg if (inst->Dst[0].Register.WriteMask & 1) { 982401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 982501e04c3fSmrg 982601e04c3fSmrg alu.op = ALU_OP1_FLOOR; 982701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 982801e04c3fSmrg 982901e04c3fSmrg alu.dst.sel = ctx->temp_reg; 983001e04c3fSmrg alu.dst.chan = 0; 983101e04c3fSmrg alu.dst.write = 1; 983201e04c3fSmrg alu.last = 1; 983301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 983401e04c3fSmrg if (r) 983501e04c3fSmrg return r; 983601e04c3fSmrg 983701e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 983801e04c3fSmrg for (i = 0; i < 3; i++) { 983901e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 984001e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 984101e04c3fSmrg alu.src[0].chan = 0; 984201e04c3fSmrg 984301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 984401e04c3fSmrg alu.dst.chan = i; 984501e04c3fSmrg alu.dst.write = i == 0; 984601e04c3fSmrg alu.last = i == 2; 984701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 984801e04c3fSmrg if (r) 984901e04c3fSmrg return r; 985001e04c3fSmrg } 985101e04c3fSmrg } else { 985201e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 985301e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 985401e04c3fSmrg alu.src[0].chan = 0; 985501e04c3fSmrg 985601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 985701e04c3fSmrg alu.dst.chan = 0; 985801e04c3fSmrg alu.dst.write = 1; 985901e04c3fSmrg alu.last = 1; 986001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 986101e04c3fSmrg if (r) 986201e04c3fSmrg return r; 986301e04c3fSmrg } 986401e04c3fSmrg } 986501e04c3fSmrg 986601e04c3fSmrg /* result.y = tmp - floor(tmp); */ 986701e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 986801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 986901e04c3fSmrg 987001e04c3fSmrg alu.op = ALU_OP1_FRACT; 987101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 987201e04c3fSmrg 987301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 987401e04c3fSmrg#if 0 987501e04c3fSmrg r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 987601e04c3fSmrg if (r) 987701e04c3fSmrg return r; 987801e04c3fSmrg#endif 987901e04c3fSmrg alu.dst.write = 1; 988001e04c3fSmrg alu.dst.chan = 1; 988101e04c3fSmrg 988201e04c3fSmrg alu.last = 1; 98833464ebd5Sriastradh 9884af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 98853464ebd5Sriastradh if (r) 98863464ebd5Sriastradh return r; 98873464ebd5Sriastradh } 98883464ebd5Sriastradh 988901e04c3fSmrg /* result.z = RoughApprox2ToX(tmp);*/ 989001e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 989101e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 989201e04c3fSmrg for (i = 0; i < 3; i++) { 989301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 989401e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 989501e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9896af69d88dSmrg 989701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 989801e04c3fSmrg alu.dst.chan = i; 989901e04c3fSmrg if (i == 2) { 990001e04c3fSmrg alu.dst.write = 1; 990101e04c3fSmrg alu.last = 1; 990201e04c3fSmrg } 990301e04c3fSmrg 990401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 990501e04c3fSmrg if (r) 990601e04c3fSmrg return r; 990701e04c3fSmrg } 990801e04c3fSmrg } else { 990901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 991001e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 991101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 991201e04c3fSmrg 991301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 991401e04c3fSmrg alu.dst.write = 1; 991501e04c3fSmrg alu.dst.chan = 2; 9916af69d88dSmrg 9917af69d88dSmrg alu.last = 1; 991801e04c3fSmrg 991901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 992001e04c3fSmrg if (r) 992101e04c3fSmrg return r; 992201e04c3fSmrg } 9923af69d88dSmrg } 9924af69d88dSmrg 992501e04c3fSmrg /* result.w = 1.0;*/ 992601e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 992701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 99283464ebd5Sriastradh 992901e04c3fSmrg alu.op = ALU_OP1_MOV; 993001e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; 993101e04c3fSmrg alu.src[0].chan = 0; 99323464ebd5Sriastradh 993301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 993401e04c3fSmrg alu.dst.chan = 3; 99353464ebd5Sriastradh alu.dst.write = 1; 993601e04c3fSmrg alu.last = 1; 9937af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 99383464ebd5Sriastradh if (r) 99393464ebd5Sriastradh return r; 99403464ebd5Sriastradh } 994101e04c3fSmrg return tgsi_helper_copy(ctx, inst); 99423464ebd5Sriastradh} 99433464ebd5Sriastradh 994401e04c3fSmrgstatic int tgsi_log(struct r600_shader_ctx *ctx) 99453464ebd5Sriastradh{ 99463464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9947af69d88dSmrg struct r600_bytecode_alu alu; 994801e04c3fSmrg int r; 994901e04c3fSmrg unsigned i; 99503464ebd5Sriastradh 995101e04c3fSmrg /* result.x = floor(log2(|src|)); */ 995201e04c3fSmrg if (inst->Dst[0].Register.WriteMask & 1) { 995301e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 995401e04c3fSmrg for (i = 0; i < 3; i++) { 995501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 995601e04c3fSmrg 995701e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 995801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 995901e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 996001e04c3fSmrg 996101e04c3fSmrg alu.dst.sel = ctx->temp_reg; 996201e04c3fSmrg alu.dst.chan = i; 996301e04c3fSmrg if (i == 0) 996401e04c3fSmrg alu.dst.write = 1; 996501e04c3fSmrg if (i == 2) 996601e04c3fSmrg alu.last = 1; 996701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 996801e04c3fSmrg if (r) 996901e04c3fSmrg return r; 997001e04c3fSmrg } 99713464ebd5Sriastradh 99723464ebd5Sriastradh } else { 997301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 997401e04c3fSmrg 997501e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 997601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 997701e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 997801e04c3fSmrg 997901e04c3fSmrg alu.dst.sel = ctx->temp_reg; 998001e04c3fSmrg alu.dst.chan = 0; 998101e04c3fSmrg alu.dst.write = 1; 998201e04c3fSmrg alu.last = 1; 998301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 998401e04c3fSmrg if (r) 998501e04c3fSmrg return r; 99863464ebd5Sriastradh } 99873464ebd5Sriastradh 998801e04c3fSmrg alu.op = ALU_OP1_FLOOR; 998901e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 999001e04c3fSmrg alu.src[0].chan = 0; 999101e04c3fSmrg 99923464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 999301e04c3fSmrg alu.dst.chan = 0; 99943464ebd5Sriastradh alu.dst.write = 1; 999501e04c3fSmrg alu.last = 1; 99963464ebd5Sriastradh 9997af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 99983464ebd5Sriastradh if (r) 99993464ebd5Sriastradh return r; 100003464ebd5Sriastradh } 100013464ebd5Sriastradh 1000201e04c3fSmrg /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 1000301e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 100043464ebd5Sriastradh 1000501e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 1000601e04c3fSmrg for (i = 0; i < 3; i++) { 1000701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1000801e04c3fSmrg 1000901e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 1001001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1001101e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 1001201e04c3fSmrg 1001301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1001401e04c3fSmrg alu.dst.chan = i; 1001501e04c3fSmrg if (i == 1) 1001601e04c3fSmrg alu.dst.write = 1; 1001701e04c3fSmrg if (i == 2) 1001801e04c3fSmrg alu.last = 1; 1001901e04c3fSmrg 1002001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1002101e04c3fSmrg if (r) 1002201e04c3fSmrg return r; 1002301e04c3fSmrg } 100243464ebd5Sriastradh } else { 1002501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 100263464ebd5Sriastradh 1002701e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 1002801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1002901e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 100303464ebd5Sriastradh 100313464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 1003201e04c3fSmrg alu.dst.chan = 1; 1003301e04c3fSmrg alu.dst.write = 1; 100343464ebd5Sriastradh alu.last = 1; 100353464ebd5Sriastradh 1003601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1003701e04c3fSmrg if (r) 1003801e04c3fSmrg return r; 1003901e04c3fSmrg } 100403464ebd5Sriastradh 10041af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 100423464ebd5Sriastradh 10043af69d88dSmrg alu.op = ALU_OP1_FLOOR; 1004401e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1004501e04c3fSmrg alu.src[0].chan = 1; 100463464ebd5Sriastradh 100473464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 1004801e04c3fSmrg alu.dst.chan = 1; 100493464ebd5Sriastradh alu.dst.write = 1; 100503464ebd5Sriastradh alu.last = 1; 1005101e04c3fSmrg 10052af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 100533464ebd5Sriastradh if (r) 100543464ebd5Sriastradh return r; 100553464ebd5Sriastradh 10056af69d88dSmrg if (ctx->bc->chip_class == CAYMAN) { 100573464ebd5Sriastradh for (i = 0; i < 3; i++) { 1005801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1005901e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 1006001e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1006101e04c3fSmrg alu.src[0].chan = 1; 1006201e04c3fSmrg 1006301e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1006401e04c3fSmrg alu.dst.chan = i; 1006501e04c3fSmrg if (i == 1) 1006601e04c3fSmrg alu.dst.write = 1; 1006701e04c3fSmrg if (i == 2) 1006801e04c3fSmrg alu.last = 1; 1006901e04c3fSmrg 1007001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1007101e04c3fSmrg if (r) 1007201e04c3fSmrg return r; 1007301e04c3fSmrg } 1007401e04c3fSmrg } else { 1007501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1007601e04c3fSmrg alu.op = ALU_OP1_EXP_IEEE; 1007701e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1007801e04c3fSmrg alu.src[0].chan = 1; 1007901e04c3fSmrg 1008001e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1008101e04c3fSmrg alu.dst.chan = 1; 1008201e04c3fSmrg alu.dst.write = 1; 1008301e04c3fSmrg alu.last = 1; 1008401e04c3fSmrg 1008501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1008601e04c3fSmrg if (r) 1008701e04c3fSmrg return r; 1008801e04c3fSmrg } 1008901e04c3fSmrg 1009001e04c3fSmrg if (ctx->bc->chip_class == CAYMAN) { 1009101e04c3fSmrg for (i = 0; i < 3; i++) { 1009201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1009301e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 100943464ebd5Sriastradh alu.src[0].sel = ctx->temp_reg; 1009501e04c3fSmrg alu.src[0].chan = 1; 100963464ebd5Sriastradh 100973464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 100983464ebd5Sriastradh alu.dst.chan = i; 1009901e04c3fSmrg if (i == 1) 1010001e04c3fSmrg alu.dst.write = 1; 1010101e04c3fSmrg if (i == 2) 1010201e04c3fSmrg alu.last = 1; 1010301e04c3fSmrg 10104af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101053464ebd5Sriastradh if (r) 101063464ebd5Sriastradh return r; 101073464ebd5Sriastradh } 101083464ebd5Sriastradh } else { 1010901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1011001e04c3fSmrg alu.op = ALU_OP1_RECIP_IEEE; 101113464ebd5Sriastradh alu.src[0].sel = ctx->temp_reg; 1011201e04c3fSmrg alu.src[0].chan = 1; 101133464ebd5Sriastradh 101143464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 1011501e04c3fSmrg alu.dst.chan = 1; 101163464ebd5Sriastradh alu.dst.write = 1; 101173464ebd5Sriastradh alu.last = 1; 1011801e04c3fSmrg 10119af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101203464ebd5Sriastradh if (r) 101213464ebd5Sriastradh return r; 101223464ebd5Sriastradh } 101233464ebd5Sriastradh 10124af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 101253464ebd5Sriastradh 1012601e04c3fSmrg alu.op = ALU_OP2_MUL; 1012701e04c3fSmrg 10128af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1012901e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 1013001e04c3fSmrg 1013101e04c3fSmrg alu.src[1].sel = ctx->temp_reg; 1013201e04c3fSmrg alu.src[1].chan = 1; 101333464ebd5Sriastradh 101343464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 101353464ebd5Sriastradh alu.dst.chan = 1; 1013601e04c3fSmrg alu.dst.write = 1; 101373464ebd5Sriastradh alu.last = 1; 101383464ebd5Sriastradh 10139af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101403464ebd5Sriastradh if (r) 101413464ebd5Sriastradh return r; 101423464ebd5Sriastradh } 101433464ebd5Sriastradh 1014401e04c3fSmrg /* result.z = log2(|src|);*/ 1014501e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 10146af69d88dSmrg if (ctx->bc->chip_class == CAYMAN) { 101473464ebd5Sriastradh for (i = 0; i < 3; i++) { 10148af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1014901e04c3fSmrg 1015001e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 10151af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1015201e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 101533464ebd5Sriastradh 101543464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 1015501e04c3fSmrg if (i == 2) 101563464ebd5Sriastradh alu.dst.write = 1; 1015701e04c3fSmrg alu.dst.chan = i; 1015801e04c3fSmrg if (i == 2) 101593464ebd5Sriastradh alu.last = 1; 101603464ebd5Sriastradh 10161af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101623464ebd5Sriastradh if (r) 101633464ebd5Sriastradh return r; 101643464ebd5Sriastradh } 101653464ebd5Sriastradh } else { 10166af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1016701e04c3fSmrg 1016801e04c3fSmrg alu.op = ALU_OP1_LOG_IEEE; 10169af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1017001e04c3fSmrg r600_bytecode_src_set_abs(&alu.src[0]); 101713464ebd5Sriastradh 101723464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 101733464ebd5Sriastradh alu.dst.write = 1; 101743464ebd5Sriastradh alu.dst.chan = 2; 101753464ebd5Sriastradh alu.last = 1; 101763464ebd5Sriastradh 10177af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101783464ebd5Sriastradh if (r) 101793464ebd5Sriastradh return r; 101803464ebd5Sriastradh } 101813464ebd5Sriastradh } 101823464ebd5Sriastradh 1018301e04c3fSmrg /* result.w = 1.0; */ 1018401e04c3fSmrg if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 10185af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 101863464ebd5Sriastradh 10187af69d88dSmrg alu.op = ALU_OP1_MOV; 101883464ebd5Sriastradh alu.src[0].sel = V_SQ_ALU_SRC_1; 101893464ebd5Sriastradh alu.src[0].chan = 0; 101903464ebd5Sriastradh 101913464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 101923464ebd5Sriastradh alu.dst.chan = 3; 101933464ebd5Sriastradh alu.dst.write = 1; 101943464ebd5Sriastradh alu.last = 1; 1019501e04c3fSmrg 10196af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 101973464ebd5Sriastradh if (r) 101983464ebd5Sriastradh return r; 101993464ebd5Sriastradh } 1020001e04c3fSmrg 102013464ebd5Sriastradh return tgsi_helper_copy(ctx, inst); 102023464ebd5Sriastradh} 102033464ebd5Sriastradh 1020401e04c3fSmrgstatic int tgsi_eg_arl(struct r600_shader_ctx *ctx) 102053464ebd5Sriastradh{ 102063464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10207af69d88dSmrg struct r600_bytecode_alu alu; 102083464ebd5Sriastradh int r; 1020901e04c3fSmrg int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1021001e04c3fSmrg unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 102113464ebd5Sriastradh 1021201e04c3fSmrg assert(inst->Dst[0].Register.Index < 3); 1021301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 102143464ebd5Sriastradh 1021501e04c3fSmrg switch (inst->Instruction.Opcode) { 1021601e04c3fSmrg case TGSI_OPCODE_ARL: 1021701e04c3fSmrg alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 1021801e04c3fSmrg break; 1021901e04c3fSmrg case TGSI_OPCODE_ARR: 1022001e04c3fSmrg alu.op = ALU_OP1_FLT_TO_INT; 1022101e04c3fSmrg break; 1022201e04c3fSmrg case TGSI_OPCODE_UARL: 1022301e04c3fSmrg alu.op = ALU_OP1_MOV; 1022401e04c3fSmrg break; 1022501e04c3fSmrg default: 1022601e04c3fSmrg assert(0); 1022701e04c3fSmrg return -1; 1022801e04c3fSmrg } 1022901e04c3fSmrg 1023001e04c3fSmrg for (i = 0; i <= lasti; ++i) { 1023101e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1023201e04c3fSmrg continue; 1023301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1023401e04c3fSmrg alu.last = i == lasti; 1023501e04c3fSmrg alu.dst.sel = reg; 1023601e04c3fSmrg alu.dst.chan = i; 1023701e04c3fSmrg alu.dst.write = 1; 1023801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1023901e04c3fSmrg if (r) 1024001e04c3fSmrg return r; 1024101e04c3fSmrg } 1024201e04c3fSmrg 1024301e04c3fSmrg if (inst->Dst[0].Register.Index > 0) 1024401e04c3fSmrg ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 1024501e04c3fSmrg else 1024601e04c3fSmrg ctx->bc->ar_loaded = 0; 1024701e04c3fSmrg 1024801e04c3fSmrg return 0; 1024901e04c3fSmrg} 1025001e04c3fSmrgstatic int tgsi_r600_arl(struct r600_shader_ctx *ctx) 1025101e04c3fSmrg{ 1025201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1025301e04c3fSmrg struct r600_bytecode_alu alu; 1025401e04c3fSmrg int r; 1025501e04c3fSmrg int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1025601e04c3fSmrg 1025701e04c3fSmrg switch (inst->Instruction.Opcode) { 1025801e04c3fSmrg case TGSI_OPCODE_ARL: 1025901e04c3fSmrg memset(&alu, 0, sizeof(alu)); 1026001e04c3fSmrg alu.op = ALU_OP1_FLOOR; 1026101e04c3fSmrg alu.dst.sel = ctx->bc->ar_reg; 1026201e04c3fSmrg alu.dst.write = 1; 1026301e04c3fSmrg for (i = 0; i <= lasti; ++i) { 1026401e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 102653464ebd5Sriastradh alu.dst.chan = i; 1026601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1026701e04c3fSmrg alu.last = i == lasti; 1026801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1026901e04c3fSmrg return r; 1027001e04c3fSmrg } 1027101e04c3fSmrg } 1027201e04c3fSmrg 1027301e04c3fSmrg memset(&alu, 0, sizeof(alu)); 1027401e04c3fSmrg alu.op = ALU_OP1_FLT_TO_INT; 1027501e04c3fSmrg alu.src[0].sel = ctx->bc->ar_reg; 1027601e04c3fSmrg alu.dst.sel = ctx->bc->ar_reg; 1027701e04c3fSmrg alu.dst.write = 1; 1027801e04c3fSmrg /* FLT_TO_INT is trans-only on r600/r700 */ 1027901e04c3fSmrg alu.last = TRUE; 1028001e04c3fSmrg for (i = 0; i <= lasti; ++i) { 1028101e04c3fSmrg alu.dst.chan = i; 1028201e04c3fSmrg alu.src[0].chan = i; 1028301e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1028401e04c3fSmrg return r; 1028501e04c3fSmrg } 1028601e04c3fSmrg break; 1028701e04c3fSmrg case TGSI_OPCODE_ARR: 1028801e04c3fSmrg memset(&alu, 0, sizeof(alu)); 1028901e04c3fSmrg alu.op = ALU_OP1_FLT_TO_INT; 1029001e04c3fSmrg alu.dst.sel = ctx->bc->ar_reg; 1029101e04c3fSmrg alu.dst.write = 1; 1029201e04c3fSmrg /* FLT_TO_INT is trans-only on r600/r700 */ 1029301e04c3fSmrg alu.last = TRUE; 1029401e04c3fSmrg for (i = 0; i <= lasti; ++i) { 1029501e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1029601e04c3fSmrg alu.dst.chan = i; 1029701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1029801e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 102993464ebd5Sriastradh return r; 103003464ebd5Sriastradh } 1030101e04c3fSmrg } 1030201e04c3fSmrg break; 1030301e04c3fSmrg case TGSI_OPCODE_UARL: 1030401e04c3fSmrg memset(&alu, 0, sizeof(alu)); 1030501e04c3fSmrg alu.op = ALU_OP1_MOV; 1030601e04c3fSmrg alu.dst.sel = ctx->bc->ar_reg; 1030701e04c3fSmrg alu.dst.write = 1; 1030801e04c3fSmrg for (i = 0; i <= lasti; ++i) { 1030901e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1031001e04c3fSmrg alu.dst.chan = i; 1031101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1031201e04c3fSmrg alu.last = i == lasti; 1031301e04c3fSmrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1031401e04c3fSmrg return r; 1031501e04c3fSmrg } 1031601e04c3fSmrg } 1031701e04c3fSmrg break; 1031801e04c3fSmrg default: 1031901e04c3fSmrg assert(0); 1032001e04c3fSmrg return -1; 1032101e04c3fSmrg } 1032201e04c3fSmrg 1032301e04c3fSmrg ctx->bc->ar_loaded = 0; 1032401e04c3fSmrg return 0; 1032501e04c3fSmrg} 1032601e04c3fSmrg 1032701e04c3fSmrgstatic int tgsi_opdst(struct r600_shader_ctx *ctx) 1032801e04c3fSmrg{ 1032901e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1033001e04c3fSmrg struct r600_bytecode_alu alu; 1033101e04c3fSmrg int i, r = 0; 1033201e04c3fSmrg 1033301e04c3fSmrg for (i = 0; i < 4; i++) { 1033401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1033501e04c3fSmrg 1033601e04c3fSmrg alu.op = ALU_OP2_MUL; 1033701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1033801e04c3fSmrg 1033901e04c3fSmrg if (i == 0 || i == 3) { 1034001e04c3fSmrg alu.src[0].sel = V_SQ_ALU_SRC_1; 1034101e04c3fSmrg } else { 1034201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1034301e04c3fSmrg } 1034401e04c3fSmrg 1034501e04c3fSmrg if (i == 0 || i == 2) { 1034601e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_1; 1034701e04c3fSmrg } else { 1034801e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 1034901e04c3fSmrg } 1035001e04c3fSmrg if (i == 3) 1035101e04c3fSmrg alu.last = 1; 1035201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1035301e04c3fSmrg if (r) 1035401e04c3fSmrg return r; 1035501e04c3fSmrg } 1035601e04c3fSmrg return 0; 1035701e04c3fSmrg} 1035801e04c3fSmrg 1035901e04c3fSmrgstatic int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type, 1036001e04c3fSmrg struct r600_bytecode_alu_src *src) 1036101e04c3fSmrg{ 1036201e04c3fSmrg struct r600_bytecode_alu alu; 1036301e04c3fSmrg int r; 1036401e04c3fSmrg 1036501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1036601e04c3fSmrg alu.op = opcode; 1036701e04c3fSmrg alu.execute_mask = 1; 1036801e04c3fSmrg alu.update_pred = 1; 1036901e04c3fSmrg 1037001e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1037101e04c3fSmrg alu.dst.write = 1; 1037201e04c3fSmrg alu.dst.chan = 0; 1037301e04c3fSmrg 1037401e04c3fSmrg alu.src[0] = *src; 1037501e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_0; 1037601e04c3fSmrg alu.src[1].chan = 0; 1037701e04c3fSmrg 1037801e04c3fSmrg alu.last = 1; 1037901e04c3fSmrg 1038001e04c3fSmrg r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 1038101e04c3fSmrg if (r) 1038201e04c3fSmrg return r; 1038301e04c3fSmrg return 0; 1038401e04c3fSmrg} 1038501e04c3fSmrg 1038601e04c3fSmrgstatic int pops(struct r600_shader_ctx *ctx, int pops) 1038701e04c3fSmrg{ 1038801e04c3fSmrg unsigned force_pop = ctx->bc->force_add_cf; 103893464ebd5Sriastradh 1039001e04c3fSmrg if (!force_pop) { 1039101e04c3fSmrg int alu_pop = 3; 1039201e04c3fSmrg if (ctx->bc->cf_last) { 1039301e04c3fSmrg if (ctx->bc->cf_last->op == CF_OP_ALU) 1039401e04c3fSmrg alu_pop = 0; 1039501e04c3fSmrg else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 1039601e04c3fSmrg alu_pop = 1; 1039701e04c3fSmrg } 1039801e04c3fSmrg alu_pop += pops; 1039901e04c3fSmrg if (alu_pop == 1) { 1040001e04c3fSmrg ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 1040101e04c3fSmrg ctx->bc->force_add_cf = 1; 1040201e04c3fSmrg } else if (alu_pop == 2) { 1040301e04c3fSmrg ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 1040401e04c3fSmrg ctx->bc->force_add_cf = 1; 104053464ebd5Sriastradh } else { 1040601e04c3fSmrg force_pop = 1; 104073464ebd5Sriastradh } 1040801e04c3fSmrg } 104093464ebd5Sriastradh 1041001e04c3fSmrg if (force_pop) { 1041101e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 1041201e04c3fSmrg ctx->bc->cf_last->pop_count = pops; 1041301e04c3fSmrg ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 104143464ebd5Sriastradh } 104153464ebd5Sriastradh 1041601e04c3fSmrg return 0; 1041701e04c3fSmrg} 104183464ebd5Sriastradh 1041901e04c3fSmrgstatic inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, 1042001e04c3fSmrg unsigned reason) 1042101e04c3fSmrg{ 1042201e04c3fSmrg struct r600_stack_info *stack = &ctx->bc->stack; 1042301e04c3fSmrg unsigned elements; 1042401e04c3fSmrg int entries; 104253464ebd5Sriastradh 1042601e04c3fSmrg unsigned entry_size = stack->entry_size; 104273464ebd5Sriastradh 1042801e04c3fSmrg elements = (stack->loop + stack->push_wqm ) * entry_size; 1042901e04c3fSmrg elements += stack->push; 104303464ebd5Sriastradh 1043101e04c3fSmrg switch (ctx->bc->chip_class) { 1043201e04c3fSmrg case R600: 1043301e04c3fSmrg case R700: 1043401e04c3fSmrg /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 1043501e04c3fSmrg * the stack must be reserved to hold the current active/continue 1043601e04c3fSmrg * masks */ 1043701e04c3fSmrg if (reason == FC_PUSH_VPM || stack->push > 0) { 1043801e04c3fSmrg elements += 2; 1043901e04c3fSmrg } 1044001e04c3fSmrg break; 104413464ebd5Sriastradh 1044201e04c3fSmrg case CAYMAN: 1044301e04c3fSmrg /* r9xx: any stack operation on empty stack consumes 2 additional 1044401e04c3fSmrg * elements */ 1044501e04c3fSmrg elements += 2; 104463464ebd5Sriastradh 104477ec681f3Smrg FALLTHROUGH; 1044801e04c3fSmrg /* FIXME: do the two elements added above cover the cases for the 1044901e04c3fSmrg * r8xx+ below? */ 1045001e04c3fSmrg 1045101e04c3fSmrg case EVERGREEN: 1045201e04c3fSmrg /* r8xx+: 2 extra elements are not always required, but one extra 1045301e04c3fSmrg * element must be added for each of the following cases: 1045401e04c3fSmrg * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 1045501e04c3fSmrg * stack usage. 1045601e04c3fSmrg * (Currently we don't use ALU_ELSE_AFTER.) 1045701e04c3fSmrg * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 1045801e04c3fSmrg * PUSH instruction executed. 1045901e04c3fSmrg * 1046001e04c3fSmrg * NOTE: it seems we also need to reserve additional element in some 1046101e04c3fSmrg * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 1046201e04c3fSmrg * then STACK_SIZE should be 2 instead of 1 */ 1046301e04c3fSmrg if (reason == FC_PUSH_VPM || stack->push > 0) { 1046401e04c3fSmrg elements += 1; 104653464ebd5Sriastradh } 1046601e04c3fSmrg break; 104673464ebd5Sriastradh 1046801e04c3fSmrg default: 1046901e04c3fSmrg assert(0); 1047001e04c3fSmrg break; 1047101e04c3fSmrg } 104723464ebd5Sriastradh 1047301e04c3fSmrg /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 1047401e04c3fSmrg * for all chips, so we use 4 in the final formula, not the real entry_size 1047501e04c3fSmrg * for the chip */ 1047601e04c3fSmrg entry_size = 4; 104773464ebd5Sriastradh 1047801e04c3fSmrg entries = (elements + (entry_size - 1)) / entry_size; 104793464ebd5Sriastradh 1048001e04c3fSmrg if (entries > stack->max_entries) 1048101e04c3fSmrg stack->max_entries = entries; 1048201e04c3fSmrg return elements; 1048301e04c3fSmrg} 104843464ebd5Sriastradh 1048501e04c3fSmrgstatic inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 1048601e04c3fSmrg{ 1048701e04c3fSmrg switch(reason) { 1048801e04c3fSmrg case FC_PUSH_VPM: 1048901e04c3fSmrg --ctx->bc->stack.push; 1049001e04c3fSmrg assert(ctx->bc->stack.push >= 0); 1049101e04c3fSmrg break; 1049201e04c3fSmrg case FC_PUSH_WQM: 1049301e04c3fSmrg --ctx->bc->stack.push_wqm; 1049401e04c3fSmrg assert(ctx->bc->stack.push_wqm >= 0); 1049501e04c3fSmrg break; 1049601e04c3fSmrg case FC_LOOP: 1049701e04c3fSmrg --ctx->bc->stack.loop; 1049801e04c3fSmrg assert(ctx->bc->stack.loop >= 0); 1049901e04c3fSmrg break; 1050001e04c3fSmrg default: 1050101e04c3fSmrg assert(0); 1050201e04c3fSmrg break; 1050301e04c3fSmrg } 1050401e04c3fSmrg} 105053464ebd5Sriastradh 1050601e04c3fSmrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 1050701e04c3fSmrg{ 1050801e04c3fSmrg switch (reason) { 1050901e04c3fSmrg case FC_PUSH_VPM: 1051001e04c3fSmrg ++ctx->bc->stack.push; 1051101e04c3fSmrg break; 1051201e04c3fSmrg case FC_PUSH_WQM: 1051301e04c3fSmrg ++ctx->bc->stack.push_wqm; 1051401e04c3fSmrg break; 1051501e04c3fSmrg case FC_LOOP: 1051601e04c3fSmrg ++ctx->bc->stack.loop; 1051701e04c3fSmrg break; 1051801e04c3fSmrg default: 1051901e04c3fSmrg assert(0); 1052001e04c3fSmrg } 105213464ebd5Sriastradh 1052201e04c3fSmrg return callstack_update_max_depth(ctx, reason); 1052301e04c3fSmrg} 105243464ebd5Sriastradh 1052501e04c3fSmrgstatic void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 1052601e04c3fSmrg{ 1052701e04c3fSmrg struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 105283464ebd5Sriastradh 1052901e04c3fSmrg sp->mid = realloc((void *)sp->mid, 1053001e04c3fSmrg sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 1053101e04c3fSmrg sp->mid[sp->num_mid] = ctx->bc->cf_last; 1053201e04c3fSmrg sp->num_mid++; 1053301e04c3fSmrg} 105343464ebd5Sriastradh 1053501e04c3fSmrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 1053601e04c3fSmrg{ 1053701e04c3fSmrg assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack)); 1053801e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 1053901e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 1054001e04c3fSmrg ctx->bc->fc_sp++; 1054101e04c3fSmrg} 105423464ebd5Sriastradh 1054301e04c3fSmrgstatic void fc_poplevel(struct r600_shader_ctx *ctx) 1054401e04c3fSmrg{ 1054501e04c3fSmrg struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1]; 1054601e04c3fSmrg free(sp->mid); 1054701e04c3fSmrg sp->mid = NULL; 1054801e04c3fSmrg sp->num_mid = 0; 1054901e04c3fSmrg sp->start = NULL; 1055001e04c3fSmrg sp->type = 0; 1055101e04c3fSmrg ctx->bc->fc_sp--; 1055201e04c3fSmrg} 105533464ebd5Sriastradh 1055401e04c3fSmrg#if 0 1055501e04c3fSmrgstatic int emit_return(struct r600_shader_ctx *ctx) 1055601e04c3fSmrg{ 1055701e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 1055801e04c3fSmrg return 0; 1055901e04c3fSmrg} 105603464ebd5Sriastradh 1056101e04c3fSmrgstatic int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 1056201e04c3fSmrg{ 1056301e04c3fSmrg 1056401e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 1056501e04c3fSmrg ctx->bc->cf_last->pop_count = pops; 1056601e04c3fSmrg /* XXX work out offset */ 1056701e04c3fSmrg return 0; 1056801e04c3fSmrg} 105693464ebd5Sriastradh 1057001e04c3fSmrgstatic int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 1057101e04c3fSmrg{ 1057201e04c3fSmrg return 0; 1057301e04c3fSmrg} 105743464ebd5Sriastradh 1057501e04c3fSmrgstatic void emit_testflag(struct r600_shader_ctx *ctx) 1057601e04c3fSmrg{ 105773464ebd5Sriastradh 1057801e04c3fSmrg} 105793464ebd5Sriastradh 1058001e04c3fSmrgstatic void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 1058101e04c3fSmrg{ 1058201e04c3fSmrg emit_testflag(ctx); 1058301e04c3fSmrg emit_jump_to_offset(ctx, 1, 4); 1058401e04c3fSmrg emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 1058501e04c3fSmrg pops(ctx, ifidx + 1); 1058601e04c3fSmrg emit_return(ctx); 1058701e04c3fSmrg} 105883464ebd5Sriastradh 1058901e04c3fSmrgstatic void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 1059001e04c3fSmrg{ 1059101e04c3fSmrg emit_testflag(ctx); 105923464ebd5Sriastradh 1059301e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 1059401e04c3fSmrg ctx->bc->cf_last->pop_count = 1; 105953464ebd5Sriastradh 1059601e04c3fSmrg fc_set_mid(ctx, fc_sp); 105973464ebd5Sriastradh 1059801e04c3fSmrg pops(ctx, 1); 1059901e04c3fSmrg} 1060001e04c3fSmrg#endif 106013464ebd5Sriastradh 1060201e04c3fSmrgstatic int emit_if(struct r600_shader_ctx *ctx, int opcode, 1060301e04c3fSmrg struct r600_bytecode_alu_src *src) 1060401e04c3fSmrg{ 1060501e04c3fSmrg int alu_type = CF_OP_ALU_PUSH_BEFORE; 1060601e04c3fSmrg bool needs_workaround = false; 1060701e04c3fSmrg int elems = callstack_push(ctx, FC_PUSH_VPM); 106083464ebd5Sriastradh 1060901e04c3fSmrg if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) 1061001e04c3fSmrg needs_workaround = true; 106113464ebd5Sriastradh 1061201e04c3fSmrg if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) { 1061301e04c3fSmrg unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size; 1061401e04c3fSmrg unsigned dmod2 = (elems) % ctx->bc->stack.entry_size; 106153464ebd5Sriastradh 1061601e04c3fSmrg if (elems && (!dmod1 || !dmod2)) 1061701e04c3fSmrg needs_workaround = true; 1061801e04c3fSmrg } 106193464ebd5Sriastradh 1062001e04c3fSmrg /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 1062101e04c3fSmrg * LOOP_STARTxxx for nested loops may put the branch stack into a state 1062201e04c3fSmrg * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 1062301e04c3fSmrg * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 1062401e04c3fSmrg if (needs_workaround) { 1062501e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 1062601e04c3fSmrg ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 1062701e04c3fSmrg alu_type = CF_OP_ALU; 106283464ebd5Sriastradh } 106293464ebd5Sriastradh 1063001e04c3fSmrg emit_logic_pred(ctx, opcode, alu_type, src); 106313464ebd5Sriastradh 1063201e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 106333464ebd5Sriastradh 1063401e04c3fSmrg fc_pushlevel(ctx, FC_IF); 106353464ebd5Sriastradh 1063601e04c3fSmrg return 0; 1063701e04c3fSmrg} 106383464ebd5Sriastradh 1063901e04c3fSmrgstatic int tgsi_if(struct r600_shader_ctx *ctx) 1064001e04c3fSmrg{ 1064101e04c3fSmrg struct r600_bytecode_alu_src alu_src; 1064201e04c3fSmrg r600_bytecode_src(&alu_src, &ctx->src[0], 0); 1064301e04c3fSmrg 1064401e04c3fSmrg return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src); 106453464ebd5Sriastradh} 106463464ebd5Sriastradh 1064701e04c3fSmrgstatic int tgsi_uif(struct r600_shader_ctx *ctx) 106483464ebd5Sriastradh{ 1064901e04c3fSmrg struct r600_bytecode_alu_src alu_src; 1065001e04c3fSmrg r600_bytecode_src(&alu_src, &ctx->src[0], 0); 1065101e04c3fSmrg return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1065201e04c3fSmrg} 106533464ebd5Sriastradh 1065401e04c3fSmrgstatic int tgsi_else(struct r600_shader_ctx *ctx) 1065501e04c3fSmrg{ 1065601e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 1065701e04c3fSmrg ctx->bc->cf_last->pop_count = 1; 106583464ebd5Sriastradh 1065901e04c3fSmrg fc_set_mid(ctx, ctx->bc->fc_sp - 1); 1066001e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id; 1066101e04c3fSmrg return 0; 1066201e04c3fSmrg} 1066301e04c3fSmrg 1066401e04c3fSmrgstatic int tgsi_endif(struct r600_shader_ctx *ctx) 1066501e04c3fSmrg{ 1066601e04c3fSmrg int offset = 2; 1066701e04c3fSmrg pops(ctx, 1); 1066801e04c3fSmrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) { 1066901e04c3fSmrg R600_ERR("if/endif unbalanced in shader\n"); 106703464ebd5Sriastradh return -1; 106713464ebd5Sriastradh } 106723464ebd5Sriastradh 1067301e04c3fSmrg /* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */ 1067401e04c3fSmrg if (ctx->bc->cf_last->eg_alu_extended) 1067501e04c3fSmrg offset += 2; 1067601e04c3fSmrg 1067701e04c3fSmrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) { 1067801e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + offset; 1067901e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1; 1068001e04c3fSmrg } else { 1068101e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset; 10682af69d88dSmrg } 1068301e04c3fSmrg fc_poplevel(ctx); 106843464ebd5Sriastradh 1068501e04c3fSmrg callstack_pop(ctx, FC_PUSH_VPM); 106863464ebd5Sriastradh return 0; 106873464ebd5Sriastradh} 1068801e04c3fSmrg 1068901e04c3fSmrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx) 106903464ebd5Sriastradh{ 1069101e04c3fSmrg /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 1069201e04c3fSmrg * limited to 4096 iterations, like the other LOOP_* instructions. */ 1069301e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 106943464ebd5Sriastradh 1069501e04c3fSmrg fc_pushlevel(ctx, FC_LOOP); 106963464ebd5Sriastradh 1069701e04c3fSmrg /* check stack depth */ 1069801e04c3fSmrg callstack_push(ctx, FC_LOOP); 1069901e04c3fSmrg return 0; 1070001e04c3fSmrg} 1070101e04c3fSmrg 1070201e04c3fSmrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx) 1070301e04c3fSmrg{ 1070401e04c3fSmrg int i; 1070501e04c3fSmrg 1070601e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 1070701e04c3fSmrg 1070801e04c3fSmrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) { 1070901e04c3fSmrg R600_ERR("loop/endloop in shader code are not paired.\n"); 1071001e04c3fSmrg return -EINVAL; 1071101e04c3fSmrg } 1071201e04c3fSmrg 1071301e04c3fSmrg /* fixup loop pointers - from r600isa 1071401e04c3fSmrg LOOP END points to CF after LOOP START, 1071501e04c3fSmrg LOOP START point to CF after LOOP END 1071601e04c3fSmrg BRK/CONT point to LOOP END CF 1071701e04c3fSmrg */ 1071801e04c3fSmrg ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2; 1071901e04c3fSmrg 1072001e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2; 1072101e04c3fSmrg 1072201e04c3fSmrg for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) { 1072301e04c3fSmrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id; 1072401e04c3fSmrg } 1072501e04c3fSmrg /* XXX add LOOPRET support */ 1072601e04c3fSmrg fc_poplevel(ctx); 1072701e04c3fSmrg callstack_pop(ctx, FC_LOOP); 1072801e04c3fSmrg return 0; 1072901e04c3fSmrg} 1073001e04c3fSmrg 1073101e04c3fSmrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 1073201e04c3fSmrg{ 1073301e04c3fSmrg unsigned int fscp; 1073401e04c3fSmrg 1073501e04c3fSmrg for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 1073601e04c3fSmrg { 1073701e04c3fSmrg if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type) 1073801e04c3fSmrg break; 107393464ebd5Sriastradh } 107403464ebd5Sriastradh 1074101e04c3fSmrg if (fscp == 0) { 1074201e04c3fSmrg R600_ERR("Break not inside loop/endloop pair\n"); 1074301e04c3fSmrg return -EINVAL; 1074401e04c3fSmrg } 1074501e04c3fSmrg 1074601e04c3fSmrg r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 1074701e04c3fSmrg 1074801e04c3fSmrg fc_set_mid(ctx, fscp - 1); 1074901e04c3fSmrg 107503464ebd5Sriastradh return 0; 107513464ebd5Sriastradh} 107523464ebd5Sriastradh 1075301e04c3fSmrgstatic int tgsi_gs_emit(struct r600_shader_ctx *ctx) 1075401e04c3fSmrg{ 1075501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1075601e04c3fSmrg int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 1075701e04c3fSmrg int r; 1075801e04c3fSmrg 1075901e04c3fSmrg if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 1076001e04c3fSmrg emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 1076101e04c3fSmrg 1076201e04c3fSmrg r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 1076301e04c3fSmrg if (!r) { 1076401e04c3fSmrg ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 1076501e04c3fSmrg if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 1076601e04c3fSmrg return emit_inc_ring_offset(ctx, stream, TRUE); 1076701e04c3fSmrg } 1076801e04c3fSmrg return r; 1076901e04c3fSmrg} 1077001e04c3fSmrg 1077101e04c3fSmrgstatic int tgsi_umad(struct r600_shader_ctx *ctx) 107723464ebd5Sriastradh{ 107733464ebd5Sriastradh struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10774af69d88dSmrg struct r600_bytecode_alu alu; 1077501e04c3fSmrg int i, j, r; 1077601e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1077701e04c3fSmrg 1077801e04c3fSmrg /* src0 * src1 */ 1077901e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1078001e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1078101e04c3fSmrg continue; 107823464ebd5Sriastradh 10783af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 107843464ebd5Sriastradh 1078501e04c3fSmrg alu.dst.chan = i; 1078601e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1078701e04c3fSmrg alu.dst.write = 1; 107883464ebd5Sriastradh 1078901e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 1079001e04c3fSmrg for (j = 0; j < 2; j++) { 1079101e04c3fSmrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 107923464ebd5Sriastradh } 107933464ebd5Sriastradh 1079401e04c3fSmrg alu.last = 1; 1079501e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 1079601e04c3fSmrg if (r) 1079701e04c3fSmrg return r; 1079801e04c3fSmrg } 1079901e04c3fSmrg 1080001e04c3fSmrg 1080101e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1080201e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1080301e04c3fSmrg continue; 1080401e04c3fSmrg 1080501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1080601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1080701e04c3fSmrg 1080801e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 1080901e04c3fSmrg 1081001e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1081101e04c3fSmrg alu.src[0].chan = i; 1081201e04c3fSmrg 1081301e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 1081401e04c3fSmrg if (i == lasti) { 108153464ebd5Sriastradh alu.last = 1; 1081601e04c3fSmrg } 10817af69d88dSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 108183464ebd5Sriastradh if (r) 108193464ebd5Sriastradh return r; 108203464ebd5Sriastradh } 108213464ebd5Sriastradh return 0; 108223464ebd5Sriastradh} 108233464ebd5Sriastradh 1082401e04c3fSmrgstatic int tgsi_pk2h(struct r600_shader_ctx *ctx) 108253464ebd5Sriastradh{ 1082601e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10827af69d88dSmrg struct r600_bytecode_alu alu; 1082801e04c3fSmrg int r, i; 1082901e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 108303464ebd5Sriastradh 1083101e04c3fSmrg /* temp.xy = f32_to_f16(src) */ 10832af69d88dSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1083301e04c3fSmrg alu.op = ALU_OP1_FLT32_TO_FLT16; 1083401e04c3fSmrg alu.dst.chan = 0; 108353464ebd5Sriastradh alu.dst.sel = ctx->temp_reg; 108363464ebd5Sriastradh alu.dst.write = 1; 10837af69d88dSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1083801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1083901e04c3fSmrg if (r) 1084001e04c3fSmrg return r; 1084101e04c3fSmrg alu.dst.chan = 1; 1084201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 108433464ebd5Sriastradh alu.last = 1; 1084401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 108453464ebd5Sriastradh if (r) 108463464ebd5Sriastradh return r; 108473464ebd5Sriastradh 1084801e04c3fSmrg /* dst.x = temp.y * 0x10000 + temp.x */ 1084901e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1085001e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1085101e04c3fSmrg continue; 108523464ebd5Sriastradh 1085301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1085401e04c3fSmrg alu.op = ALU_OP3_MULADD_UINT24; 1085501e04c3fSmrg alu.is_op3 = 1; 1085601e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1085701e04c3fSmrg alu.last = i == lasti; 1085801e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1085901e04c3fSmrg alu.src[0].chan = 1; 1086001e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1086101e04c3fSmrg alu.src[1].value = 0x10000; 1086201e04c3fSmrg alu.src[2].sel = ctx->temp_reg; 1086301e04c3fSmrg alu.src[2].chan = 0; 1086401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1086501e04c3fSmrg if (r) 1086601e04c3fSmrg return r; 108673464ebd5Sriastradh } 108683464ebd5Sriastradh 108693464ebd5Sriastradh return 0; 108703464ebd5Sriastradh} 108713464ebd5Sriastradh 1087201e04c3fSmrgstatic int tgsi_up2h(struct r600_shader_ctx *ctx) 10873af69d88dSmrg{ 1087401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1087501e04c3fSmrg struct r600_bytecode_alu alu; 1087601e04c3fSmrg int r, i; 1087701e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10878af69d88dSmrg 1087901e04c3fSmrg /* temp.x = src.x */ 1088001e04c3fSmrg /* note: no need to mask out the high bits */ 1088101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1088201e04c3fSmrg alu.op = ALU_OP1_MOV; 1088301e04c3fSmrg alu.dst.chan = 0; 1088401e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1088501e04c3fSmrg alu.dst.write = 1; 1088601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1088701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1088801e04c3fSmrg if (r) 1088901e04c3fSmrg return r; 10890af69d88dSmrg 1089101e04c3fSmrg /* temp.y = src.x >> 16 */ 1089201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1089301e04c3fSmrg alu.op = ALU_OP2_LSHR_INT; 1089401e04c3fSmrg alu.dst.chan = 1; 1089501e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1089601e04c3fSmrg alu.dst.write = 1; 1089701e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1089801e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1089901e04c3fSmrg alu.src[1].value = 16; 1090001e04c3fSmrg alu.last = 1; 1090101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1090201e04c3fSmrg if (r) 1090301e04c3fSmrg return r; 109043464ebd5Sriastradh 1090501e04c3fSmrg /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ 1090601e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1090701e04c3fSmrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1090801e04c3fSmrg continue; 1090901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1091001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1091101e04c3fSmrg alu.op = ALU_OP1_FLT16_TO_FLT32; 1091201e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1091301e04c3fSmrg alu.src[0].chan = i % 2; 1091401e04c3fSmrg alu.last = i == lasti; 1091501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1091601e04c3fSmrg if (r) 1091701e04c3fSmrg return r; 109183464ebd5Sriastradh } 109193464ebd5Sriastradh 1092001e04c3fSmrg return 0; 109213464ebd5Sriastradh} 109223464ebd5Sriastradh 1092301e04c3fSmrgstatic int tgsi_bfe(struct r600_shader_ctx *ctx) 109243464ebd5Sriastradh{ 1092501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1092601e04c3fSmrg struct r600_bytecode_alu alu; 1092701e04c3fSmrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1092801e04c3fSmrg int r, i; 1092901e04c3fSmrg int dst = -1; 109303464ebd5Sriastradh 1093101e04c3fSmrg if ((inst->Src[0].Register.File == inst->Dst[0].Register.File && 1093201e04c3fSmrg inst->Src[0].Register.Index == inst->Dst[0].Register.Index) || 1093301e04c3fSmrg (inst->Src[2].Register.File == inst->Dst[0].Register.File && 1093401e04c3fSmrg inst->Src[2].Register.Index == inst->Dst[0].Register.Index)) 1093501e04c3fSmrg dst = r600_get_temp(ctx); 109363464ebd5Sriastradh 1093701e04c3fSmrg r = tgsi_op3_dst(ctx, dst); 1093801e04c3fSmrg if (r) 1093901e04c3fSmrg return r; 109403464ebd5Sriastradh 1094101e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1094201e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1094301e04c3fSmrg alu.op = ALU_OP2_SETGE_INT; 1094401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[2], i); 1094501e04c3fSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1094601e04c3fSmrg alu.src[1].value = 32; 1094701e04c3fSmrg alu.dst.sel = ctx->temp_reg; 1094801e04c3fSmrg alu.dst.chan = i; 1094901e04c3fSmrg alu.dst.write = 1; 1095001e04c3fSmrg if (i == lasti) 1095101e04c3fSmrg alu.last = 1; 1095201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1095301e04c3fSmrg if (r) 1095401e04c3fSmrg return r; 1095501e04c3fSmrg } 109563464ebd5Sriastradh 1095701e04c3fSmrg for (i = 0; i < lasti + 1; i++) { 1095801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1095901e04c3fSmrg alu.op = ALU_OP3_CNDE_INT; 1096001e04c3fSmrg alu.is_op3 = 1; 1096101e04c3fSmrg alu.src[0].sel = ctx->temp_reg; 1096201e04c3fSmrg alu.src[0].chan = i; 109633464ebd5Sriastradh 1096401e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1096501e04c3fSmrg if (dst != -1) 1096601e04c3fSmrg alu.src[1].sel = dst; 1096701e04c3fSmrg else 1096801e04c3fSmrg alu.src[1].sel = alu.dst.sel; 1096901e04c3fSmrg alu.src[1].chan = i; 1097001e04c3fSmrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 1097101e04c3fSmrg alu.dst.write = 1; 1097201e04c3fSmrg if (i == lasti) 1097301e04c3fSmrg alu.last = 1; 1097401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1097501e04c3fSmrg if (r) 1097601e04c3fSmrg return r; 1097701e04c3fSmrg } 109783464ebd5Sriastradh 109793464ebd5Sriastradh return 0; 109803464ebd5Sriastradh} 109813464ebd5Sriastradh 1098201e04c3fSmrgstatic int tgsi_clock(struct r600_shader_ctx *ctx) 109833464ebd5Sriastradh{ 1098401e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1098501e04c3fSmrg struct r600_bytecode_alu alu; 1098601e04c3fSmrg int r; 1098701e04c3fSmrg 1098801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1098901e04c3fSmrg alu.op = ALU_OP1_MOV; 1099001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1099101e04c3fSmrg alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO; 1099201e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1099301e04c3fSmrg if (r) 1099401e04c3fSmrg return r; 1099501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1099601e04c3fSmrg alu.op = ALU_OP1_MOV; 1099701e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1099801e04c3fSmrg alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI; 1099901e04c3fSmrg alu.last = 1; 1100001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1100101e04c3fSmrg if (r) 1100201e04c3fSmrg return r; 110033464ebd5Sriastradh return 0; 110043464ebd5Sriastradh} 110053464ebd5Sriastradh 1100601e04c3fSmrgstatic int emit_u64add(struct r600_shader_ctx *ctx, int op, 1100701e04c3fSmrg int treg, 1100801e04c3fSmrg int src0_sel, int src0_chan, 1100901e04c3fSmrg int src1_sel, int src1_chan) 110103464ebd5Sriastradh{ 1101101e04c3fSmrg struct r600_bytecode_alu alu; 1101201e04c3fSmrg int r; 1101301e04c3fSmrg int opc; 110143464ebd5Sriastradh 1101501e04c3fSmrg if (op == ALU_OP2_ADD_INT) 1101601e04c3fSmrg opc = ALU_OP2_ADDC_UINT; 1101701e04c3fSmrg else 1101801e04c3fSmrg opc = ALU_OP2_SUBB_UINT; 110193464ebd5Sriastradh 1102001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1102101e04c3fSmrg alu.op = op; ; 1102201e04c3fSmrg alu.dst.sel = treg; 1102301e04c3fSmrg alu.dst.chan = 0; 1102401e04c3fSmrg alu.dst.write = 1; 1102501e04c3fSmrg alu.src[0].sel = src0_sel; 1102601e04c3fSmrg alu.src[0].chan = src0_chan + 0; 1102701e04c3fSmrg alu.src[1].sel = src1_sel; 1102801e04c3fSmrg alu.src[1].chan = src1_chan + 0; 1102901e04c3fSmrg alu.src[1].neg = 0; 1103001e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1103101e04c3fSmrg if (r) 1103201e04c3fSmrg return r; 110333464ebd5Sriastradh 1103401e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1103501e04c3fSmrg alu.op = op; 1103601e04c3fSmrg alu.dst.sel = treg; 1103701e04c3fSmrg alu.dst.chan = 1; 1103801e04c3fSmrg alu.dst.write = 1; 1103901e04c3fSmrg alu.src[0].sel = src0_sel; 1104001e04c3fSmrg alu.src[0].chan = src0_chan + 1; 1104101e04c3fSmrg alu.src[1].sel = src1_sel; 1104201e04c3fSmrg alu.src[1].chan = src1_chan + 1; 1104301e04c3fSmrg alu.src[1].neg = 0; 1104401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1104501e04c3fSmrg if (r) 1104601e04c3fSmrg return r; 110473464ebd5Sriastradh 1104801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1104901e04c3fSmrg alu.op = opc; 1105001e04c3fSmrg alu.dst.sel = treg; 1105101e04c3fSmrg alu.dst.chan = 2; 1105201e04c3fSmrg alu.dst.write = 1; 1105301e04c3fSmrg alu.last = 1; 1105401e04c3fSmrg alu.src[0].sel = src0_sel; 1105501e04c3fSmrg alu.src[0].chan = src0_chan + 0; 1105601e04c3fSmrg alu.src[1].sel = src1_sel; 1105701e04c3fSmrg alu.src[1].chan = src1_chan + 0; 1105801e04c3fSmrg alu.src[1].neg = 0; 1105901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1106001e04c3fSmrg if (r) 1106101e04c3fSmrg return r; 110623464ebd5Sriastradh 1106301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1106401e04c3fSmrg alu.op = op; 1106501e04c3fSmrg alu.dst.sel = treg; 1106601e04c3fSmrg alu.dst.chan = 1; 1106701e04c3fSmrg alu.dst.write = 1; 1106801e04c3fSmrg alu.src[0].sel = treg; 1106901e04c3fSmrg alu.src[0].chan = 1; 1107001e04c3fSmrg alu.src[1].sel = treg; 1107101e04c3fSmrg alu.src[1].chan = 2; 1107201e04c3fSmrg alu.last = 1; 1107301e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1107401e04c3fSmrg if (r) 1107501e04c3fSmrg return r; 1107601e04c3fSmrg return 0; 110773464ebd5Sriastradh} 110783464ebd5Sriastradh 1107901e04c3fSmrgstatic int egcm_u64add(struct r600_shader_ctx *ctx) 110803464ebd5Sriastradh{ 1108101e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1108201e04c3fSmrg struct r600_bytecode_alu alu; 1108301e04c3fSmrg int r; 1108401e04c3fSmrg int treg = ctx->temp_reg; 1108501e04c3fSmrg int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT; 11086af69d88dSmrg 1108701e04c3fSmrg if (ctx->src[1].neg) { 1108801e04c3fSmrg op = ALU_OP2_SUB_INT; 1108901e04c3fSmrg opc = ALU_OP2_SUBB_UINT; 11090af69d88dSmrg } 1109101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1109201e04c3fSmrg alu.op = op; ; 1109301e04c3fSmrg alu.dst.sel = treg; 1109401e04c3fSmrg alu.dst.chan = 0; 1109501e04c3fSmrg alu.dst.write = 1; 1109601e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1109701e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1109801e04c3fSmrg alu.src[1].neg = 0; 1109901e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1110001e04c3fSmrg if (r) 1110101e04c3fSmrg return r; 11102af69d88dSmrg 1110301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1110401e04c3fSmrg alu.op = op; 1110501e04c3fSmrg alu.dst.sel = treg; 1110601e04c3fSmrg alu.dst.chan = 1; 1110701e04c3fSmrg alu.dst.write = 1; 1110801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1110901e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 1111001e04c3fSmrg alu.src[1].neg = 0; 1111101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1111201e04c3fSmrg if (r) 1111301e04c3fSmrg return r; 111143464ebd5Sriastradh 1111501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1111601e04c3fSmrg alu.op = opc ; 1111701e04c3fSmrg alu.dst.sel = treg; 1111801e04c3fSmrg alu.dst.chan = 2; 1111901e04c3fSmrg alu.dst.write = 1; 1112001e04c3fSmrg alu.last = 1; 1112101e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1112201e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1112301e04c3fSmrg alu.src[1].neg = 0; 1112401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1112501e04c3fSmrg if (r) 1112601e04c3fSmrg return r; 111273464ebd5Sriastradh 1112801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1112901e04c3fSmrg alu.op = op; 1113001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1113101e04c3fSmrg alu.src[0].sel = treg; 1113201e04c3fSmrg alu.src[0].chan = 1; 1113301e04c3fSmrg alu.src[1].sel = treg; 1113401e04c3fSmrg alu.src[1].chan = 2; 1113501e04c3fSmrg alu.last = 1; 1113601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1113701e04c3fSmrg if (r) 1113801e04c3fSmrg return r; 1113901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1114001e04c3fSmrg alu.op = ALU_OP1_MOV; 1114101e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1114201e04c3fSmrg alu.src[0].sel = treg; 1114301e04c3fSmrg alu.src[0].chan = 0; 1114401e04c3fSmrg alu.last = 1; 1114501e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1114601e04c3fSmrg if (r) 1114701e04c3fSmrg return r; 111483464ebd5Sriastradh return 0; 111493464ebd5Sriastradh} 111503464ebd5Sriastradh 111517ec681f3Smrg 111527ec681f3Smrgstatic int egcm_i64neg(struct r600_shader_ctx *ctx) 111537ec681f3Smrg{ 111547ec681f3Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 111557ec681f3Smrg struct r600_bytecode_alu alu; 111567ec681f3Smrg int r; 111577ec681f3Smrg int treg = ctx->temp_reg; 111587ec681f3Smrg const int op = ALU_OP2_SUB_INT; 111597ec681f3Smrg const int opc = ALU_OP2_SUBB_UINT; 111607ec681f3Smrg 111617ec681f3Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 111627ec681f3Smrg alu.op = op; ; 111637ec681f3Smrg alu.dst.sel = treg; 111647ec681f3Smrg alu.dst.chan = 0; 111657ec681f3Smrg alu.dst.write = 1; 111667ec681f3Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 111677ec681f3Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 111687ec681f3Smrg alu.src[1].neg = 0; 111697ec681f3Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 111707ec681f3Smrg if (r) 111717ec681f3Smrg return r; 111727ec681f3Smrg 111737ec681f3Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 111747ec681f3Smrg alu.op = op; 111757ec681f3Smrg alu.dst.sel = treg; 111767ec681f3Smrg alu.dst.chan = 1; 111777ec681f3Smrg alu.dst.write = 1; 111787ec681f3Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 111797ec681f3Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], 1); 111807ec681f3Smrg alu.src[1].neg = 0; 111817ec681f3Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 111827ec681f3Smrg if (r) 111837ec681f3Smrg return r; 111847ec681f3Smrg 111857ec681f3Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 111867ec681f3Smrg alu.op = opc ; 111877ec681f3Smrg alu.dst.sel = treg; 111887ec681f3Smrg alu.dst.chan = 2; 111897ec681f3Smrg alu.dst.write = 1; 111907ec681f3Smrg alu.last = 1; 111917ec681f3Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 111927ec681f3Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 111937ec681f3Smrg alu.src[1].neg = 0; 111947ec681f3Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 111957ec681f3Smrg if (r) 111967ec681f3Smrg return r; 111977ec681f3Smrg 111987ec681f3Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 111997ec681f3Smrg alu.op = op; 112007ec681f3Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 112017ec681f3Smrg alu.src[0].sel = treg; 112027ec681f3Smrg alu.src[0].chan = 1; 112037ec681f3Smrg alu.src[1].sel = treg; 112047ec681f3Smrg alu.src[1].chan = 2; 112057ec681f3Smrg alu.last = 1; 112067ec681f3Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 112077ec681f3Smrg if (r) 112087ec681f3Smrg return r; 112097ec681f3Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 112107ec681f3Smrg alu.op = ALU_OP1_MOV; 112117ec681f3Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 112127ec681f3Smrg alu.src[0].sel = treg; 112137ec681f3Smrg alu.src[0].chan = 0; 112147ec681f3Smrg alu.last = 1; 112157ec681f3Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 112167ec681f3Smrg if (r) 112177ec681f3Smrg return r; 112187ec681f3Smrg return 0; 112197ec681f3Smrg} 112207ec681f3Smrg 1122101e04c3fSmrg/* result.y = mul_high a, b 1122201e04c3fSmrg result.x = mul a,b 1122301e04c3fSmrg result.y += a.x * b.y + a.y * b.x; 1122401e04c3fSmrg*/ 1122501e04c3fSmrgstatic int egcm_u64mul(struct r600_shader_ctx *ctx) 11226af69d88dSmrg{ 1122701e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1122801e04c3fSmrg struct r600_bytecode_alu alu; 1122901e04c3fSmrg int r; 1123001e04c3fSmrg int treg = ctx->temp_reg; 11231af69d88dSmrg 1123201e04c3fSmrg /* temp.x = mul_lo a.x, b.x */ 1123301e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1123401e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 1123501e04c3fSmrg alu.dst.sel = treg; 1123601e04c3fSmrg alu.dst.chan = 0; 1123701e04c3fSmrg alu.dst.write = 1; 1123801e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1123901e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1124001e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 1124101e04c3fSmrg if (r) 1124201e04c3fSmrg return r; 11243af69d88dSmrg 1124401e04c3fSmrg /* temp.y = mul_hi a.x, b.x */ 1124501e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1124601e04c3fSmrg alu.op = ALU_OP2_MULHI_UINT; 1124701e04c3fSmrg alu.dst.sel = treg; 1124801e04c3fSmrg alu.dst.chan = 1; 1124901e04c3fSmrg alu.dst.write = 1; 1125001e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1125101e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1125201e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 1125301e04c3fSmrg if (r) 1125401e04c3fSmrg return r; 112553464ebd5Sriastradh 1125601e04c3fSmrg /* temp.z = mul a.x, b.y */ 1125701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1125801e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 1125901e04c3fSmrg alu.dst.sel = treg; 1126001e04c3fSmrg alu.dst.chan = 2; 1126101e04c3fSmrg alu.dst.write = 1; 1126201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1126301e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 1126401e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 1126501e04c3fSmrg if (r) 1126601e04c3fSmrg return r; 112673464ebd5Sriastradh 1126801e04c3fSmrg /* temp.w = mul a.y, b.x */ 1126901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1127001e04c3fSmrg alu.op = ALU_OP2_MULLO_UINT; 1127101e04c3fSmrg alu.dst.sel = treg; 1127201e04c3fSmrg alu.dst.chan = 3; 1127301e04c3fSmrg alu.dst.write = 1; 1127401e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1127501e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1127601e04c3fSmrg r = emit_mul_int_op(ctx->bc, &alu); 1127701e04c3fSmrg if (r) 1127801e04c3fSmrg return r; 112793464ebd5Sriastradh 1128001e04c3fSmrg /* temp.z = temp.z + temp.w */ 1128101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1128201e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 1128301e04c3fSmrg alu.dst.sel = treg; 1128401e04c3fSmrg alu.dst.chan = 2; 1128501e04c3fSmrg alu.dst.write = 1; 1128601e04c3fSmrg alu.src[0].sel = treg; 1128701e04c3fSmrg alu.src[0].chan = 2; 1128801e04c3fSmrg alu.src[1].sel = treg; 1128901e04c3fSmrg alu.src[1].chan = 3; 1129001e04c3fSmrg alu.last = 1; 1129101e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1129201e04c3fSmrg if (r) 1129301e04c3fSmrg return r; 1129401e04c3fSmrg 1129501e04c3fSmrg /* temp.y = temp.y + temp.z */ 1129601e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1129701e04c3fSmrg alu.op = ALU_OP2_ADD_INT; 1129801e04c3fSmrg alu.dst.sel = treg; 1129901e04c3fSmrg alu.dst.chan = 1; 1130001e04c3fSmrg alu.dst.write = 1; 1130101e04c3fSmrg alu.src[0].sel = treg; 1130201e04c3fSmrg alu.src[0].chan = 1; 1130301e04c3fSmrg alu.src[1].sel = treg; 1130401e04c3fSmrg alu.src[1].chan = 2; 1130501e04c3fSmrg alu.last = 1; 1130601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1130701e04c3fSmrg if (r) 1130801e04c3fSmrg return r; 1130901e04c3fSmrg 1131001e04c3fSmrg /* dst.x = temp.x */ 1131101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1131201e04c3fSmrg alu.op = ALU_OP1_MOV; 1131301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1131401e04c3fSmrg alu.src[0].sel = treg; 1131501e04c3fSmrg alu.src[0].chan = 0; 1131601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1131701e04c3fSmrg if (r) 1131801e04c3fSmrg return r; 1131901e04c3fSmrg 1132001e04c3fSmrg /* dst.y = temp.y */ 1132101e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1132201e04c3fSmrg alu.op = ALU_OP1_MOV; 1132301e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1132401e04c3fSmrg alu.src[0].sel = treg; 1132501e04c3fSmrg alu.src[0].chan = 1; 1132601e04c3fSmrg alu.last = 1; 1132701e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1132801e04c3fSmrg if (r) 1132901e04c3fSmrg return r; 113303464ebd5Sriastradh 113313464ebd5Sriastradh return 0; 113323464ebd5Sriastradh} 113333464ebd5Sriastradh 1133401e04c3fSmrgstatic int emit_u64sge(struct r600_shader_ctx *ctx, 1133501e04c3fSmrg int treg, 1133601e04c3fSmrg int src0_sel, int src0_base_chan, 1133701e04c3fSmrg int src1_sel, int src1_base_chan) 113383464ebd5Sriastradh{ 1133901e04c3fSmrg int r; 1134001e04c3fSmrg /* for 64-bit sge */ 1134101e04c3fSmrg /* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */ 1134201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT, 1134301e04c3fSmrg treg, 1, 1134401e04c3fSmrg src0_sel, src0_base_chan + 1, 1134501e04c3fSmrg src1_sel, src1_base_chan + 1); 1134601e04c3fSmrg if (r) 1134701e04c3fSmrg return r; 113483464ebd5Sriastradh 1134901e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 1135001e04c3fSmrg treg, 0, 1135101e04c3fSmrg src0_sel, src0_base_chan, 1135201e04c3fSmrg src1_sel, src1_base_chan); 1135301e04c3fSmrg if (r) 1135401e04c3fSmrg return r; 113553464ebd5Sriastradh 1135601e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETE_INT, 1135701e04c3fSmrg treg, 2, 1135801e04c3fSmrg src0_sel, src0_base_chan + 1, 1135901e04c3fSmrg src1_sel, src1_base_chan + 1); 1136001e04c3fSmrg if (r) 1136101e04c3fSmrg return r; 1136201e04c3fSmrg 1136301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 1136401e04c3fSmrg treg, 0, 1136501e04c3fSmrg treg, 0, 1136601e04c3fSmrg treg, 2); 1136701e04c3fSmrg if (r) 1136801e04c3fSmrg return r; 1136901e04c3fSmrg 1137001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 1137101e04c3fSmrg treg, 0, 1137201e04c3fSmrg treg, 0, 1137301e04c3fSmrg treg, 1); 1137401e04c3fSmrg if (r) 1137501e04c3fSmrg return r; 113763464ebd5Sriastradh return 0; 113773464ebd5Sriastradh} 113783464ebd5Sriastradh 1137901e04c3fSmrg/* this isn't a complete div it's just enough for qbo shader to work */ 1138001e04c3fSmrgstatic int egcm_u64div(struct r600_shader_ctx *ctx) 113813464ebd5Sriastradh{ 1138201e04c3fSmrg struct r600_bytecode_alu alu; 1138301e04c3fSmrg struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src; 1138401e04c3fSmrg int r, i; 1138501e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 113863464ebd5Sriastradh 1138701e04c3fSmrg /* make sure we are dividing my a const with 0 in the high bits */ 1138801e04c3fSmrg if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL) 1138901e04c3fSmrg return -1; 1139001e04c3fSmrg if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0) 1139101e04c3fSmrg return -1; 1139201e04c3fSmrg /* make sure we are doing one division */ 1139301e04c3fSmrg if (inst->Dst[0].Register.WriteMask != 0x3) 1139401e04c3fSmrg return -1; 113953464ebd5Sriastradh 1139601e04c3fSmrg /* emit_if uses ctx->temp_reg so we can't */ 1139701e04c3fSmrg int treg = r600_get_temp(ctx); 1139801e04c3fSmrg int tmp_num = r600_get_temp(ctx); 1139901e04c3fSmrg int sub_tmp = r600_get_temp(ctx); 1140001e04c3fSmrg 1140101e04c3fSmrg /* tmp quot are tmp_num.zw */ 1140201e04c3fSmrg r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0); 1140301e04c3fSmrg r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1); 1140401e04c3fSmrg r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0); 1140501e04c3fSmrg r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1); 1140601e04c3fSmrg 1140701e04c3fSmrg /* MOV tmp_num.xy, numerator */ 1140801e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1140901e04c3fSmrg tmp_num, 0, 1141001e04c3fSmrg alu_num_lo.sel, alu_num_lo.chan, 1141101e04c3fSmrg 0, 0); 1141201e04c3fSmrg if (r) 1141301e04c3fSmrg return r; 1141401e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1141501e04c3fSmrg tmp_num, 1, 1141601e04c3fSmrg alu_num_hi.sel, alu_num_hi.chan, 1141701e04c3fSmrg 0, 0); 1141801e04c3fSmrg if (r) 1141901e04c3fSmrg return r; 114203464ebd5Sriastradh 1142101e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1142201e04c3fSmrg tmp_num, 2, 1142301e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 0, 1142401e04c3fSmrg 0, 0); 1142501e04c3fSmrg if (r) 1142601e04c3fSmrg return r; 114273464ebd5Sriastradh 1142801e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1142901e04c3fSmrg tmp_num, 3, 1143001e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 0, 1143101e04c3fSmrg 0, 0); 1143201e04c3fSmrg if (r) 1143301e04c3fSmrg return r; 114343464ebd5Sriastradh 1143501e04c3fSmrg /* treg 0 is log2_denom */ 1143601e04c3fSmrg /* normally this gets the MSB for the denom high value 1143701e04c3fSmrg - however we know this will always be 0 here. */ 1143801e04c3fSmrg r = single_alu_op2(ctx, 1143901e04c3fSmrg ALU_OP1_MOV, 1144001e04c3fSmrg treg, 0, 1144101e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 32, 1144201e04c3fSmrg 0, 0); 1144301e04c3fSmrg if (r) 1144401e04c3fSmrg return r; 11445af69d88dSmrg 1144601e04c3fSmrg /* normally check demon hi for 0, but we know it is already */ 1144701e04c3fSmrg /* t0.z = num_hi >= denom_lo */ 1144801e04c3fSmrg r = single_alu_op2(ctx, 1144901e04c3fSmrg ALU_OP2_SETGE_UINT, 1145001e04c3fSmrg treg, 1, 1145101e04c3fSmrg alu_num_hi.sel, alu_num_hi.chan, 1145201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 1145301e04c3fSmrg if (r) 1145401e04c3fSmrg return r; 11455af69d88dSmrg 1145601e04c3fSmrg memset(&alu_src, 0, sizeof(alu_src)); 1145701e04c3fSmrg alu_src.sel = treg; 1145801e04c3fSmrg alu_src.chan = 1; 1145901e04c3fSmrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1146001e04c3fSmrg if (r) 1146101e04c3fSmrg return r; 1146201e04c3fSmrg 1146301e04c3fSmrg /* for loops in here */ 1146401e04c3fSmrg /* get msb t0.x = msb(src[1].x) first */ 1146501e04c3fSmrg int msb_lo = util_last_bit(alu_denom_lo.value); 1146601e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1146701e04c3fSmrg treg, 0, 1146801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, msb_lo, 1146901e04c3fSmrg 0, 0); 1147001e04c3fSmrg if (r) 1147101e04c3fSmrg return r; 11472af69d88dSmrg 1147301e04c3fSmrg /* unroll the asm here */ 1147401e04c3fSmrg for (i = 0; i < 31; i++) { 1147501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 1147601e04c3fSmrg treg, 2, 1147701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, i, 1147801e04c3fSmrg treg, 0); 11479af69d88dSmrg if (r) 11480af69d88dSmrg return r; 11481af69d88dSmrg 1148201e04c3fSmrg /* we can do this on the CPU */ 1148301e04c3fSmrg uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i); 1148401e04c3fSmrg /* t0.z = tmp_num.y >= t0.z */ 1148501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 1148601e04c3fSmrg treg, 1, 1148701e04c3fSmrg tmp_num, 1, 1148801e04c3fSmrg V_SQ_ALU_SRC_LITERAL, denom_lo_shl); 11489af69d88dSmrg if (r) 11490af69d88dSmrg return r; 11491af69d88dSmrg 1149201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 1149301e04c3fSmrg treg, 1, 1149401e04c3fSmrg treg, 1, 1149501e04c3fSmrg treg, 2); 11496af69d88dSmrg if (r) 11497af69d88dSmrg return r; 11498af69d88dSmrg 1149901e04c3fSmrg memset(&alu_src, 0, sizeof(alu_src)); 1150001e04c3fSmrg alu_src.sel = treg; 1150101e04c3fSmrg alu_src.chan = 1; 1150201e04c3fSmrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1150301e04c3fSmrg if (r) 1150401e04c3fSmrg return r; 115053464ebd5Sriastradh 1150601e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SUB_INT, 1150701e04c3fSmrg tmp_num, 1, 1150801e04c3fSmrg tmp_num, 1, 1150901e04c3fSmrg V_SQ_ALU_SRC_LITERAL, denom_lo_shl); 1151001e04c3fSmrg if (r) 1151101e04c3fSmrg return r; 115123464ebd5Sriastradh 1151301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 1151401e04c3fSmrg tmp_num, 3, 1151501e04c3fSmrg tmp_num, 3, 1151601e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); 1151701e04c3fSmrg if (r) 1151801e04c3fSmrg return r; 115193464ebd5Sriastradh 1152001e04c3fSmrg r = tgsi_endif(ctx); 1152101e04c3fSmrg if (r) 1152201e04c3fSmrg return r; 115233464ebd5Sriastradh } 115243464ebd5Sriastradh 1152501e04c3fSmrg /* log2_denom is always <= 31, so manually peel the last loop 1152601e04c3fSmrg * iteration. 1152701e04c3fSmrg */ 1152801e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 1152901e04c3fSmrg treg, 1, 1153001e04c3fSmrg tmp_num, 1, 1153101e04c3fSmrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 1153201e04c3fSmrg if (r) 1153301e04c3fSmrg return r; 115343464ebd5Sriastradh 1153501e04c3fSmrg memset(&alu_src, 0, sizeof(alu_src)); 1153601e04c3fSmrg alu_src.sel = treg; 1153701e04c3fSmrg alu_src.chan = 1; 1153801e04c3fSmrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1153901e04c3fSmrg if (r) 1154001e04c3fSmrg return r; 115413464ebd5Sriastradh 1154201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SUB_INT, 1154301e04c3fSmrg tmp_num, 1, 1154401e04c3fSmrg tmp_num, 1, 1154501e04c3fSmrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 1154601e04c3fSmrg if (r) 1154701e04c3fSmrg return r; 11548af69d88dSmrg 1154901e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 1155001e04c3fSmrg tmp_num, 3, 1155101e04c3fSmrg tmp_num, 3, 1155201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 1U); 1155301e04c3fSmrg if (r) 1155401e04c3fSmrg return r; 1155501e04c3fSmrg r = tgsi_endif(ctx); 1155601e04c3fSmrg if (r) 1155701e04c3fSmrg return r; 11558af69d88dSmrg 1155901e04c3fSmrg r = tgsi_endif(ctx); 1156001e04c3fSmrg if (r) 1156101e04c3fSmrg return r; 11562af69d88dSmrg 1156301e04c3fSmrg /* onto the second loop to unroll */ 1156401e04c3fSmrg for (i = 0; i < 31; i++) { 1156501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 1156601e04c3fSmrg treg, 1, 1156701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)), 1156801e04c3fSmrg treg, 0); 1156901e04c3fSmrg if (r) 1157001e04c3fSmrg return r; 11571af69d88dSmrg 1157201e04c3fSmrg uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i); 1157301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1157401e04c3fSmrg treg, 2, 1157501e04c3fSmrg V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), 1157601e04c3fSmrg 0, 0); 1157701e04c3fSmrg if (r) 1157801e04c3fSmrg return r; 11579af69d88dSmrg 1158001e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1158101e04c3fSmrg treg, 3, 1158201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), 1158301e04c3fSmrg 0, 0); 1158401e04c3fSmrg if (r) 1158501e04c3fSmrg return r; 11586af69d88dSmrg 1158701e04c3fSmrg r = emit_u64sge(ctx, sub_tmp, 1158801e04c3fSmrg tmp_num, 0, 1158901e04c3fSmrg treg, 2); 1159001e04c3fSmrg if (r) 1159101e04c3fSmrg return r; 11592af69d88dSmrg 1159301e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 1159401e04c3fSmrg treg, 1, 1159501e04c3fSmrg treg, 1, 1159601e04c3fSmrg sub_tmp, 0); 1159701e04c3fSmrg if (r) 1159801e04c3fSmrg return r; 11599af69d88dSmrg 1160001e04c3fSmrg memset(&alu_src, 0, sizeof(alu_src)); 1160101e04c3fSmrg alu_src.sel = treg; 1160201e04c3fSmrg alu_src.chan = 1; 1160301e04c3fSmrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1160401e04c3fSmrg if (r) 1160501e04c3fSmrg return r; 11606af69d88dSmrg 11607af69d88dSmrg 1160801e04c3fSmrg r = emit_u64add(ctx, ALU_OP2_SUB_INT, 1160901e04c3fSmrg sub_tmp, 1161001e04c3fSmrg tmp_num, 0, 1161101e04c3fSmrg treg, 2); 1161201e04c3fSmrg if (r) 1161301e04c3fSmrg return r; 11614af69d88dSmrg 1161501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1161601e04c3fSmrg tmp_num, 0, 1161701e04c3fSmrg sub_tmp, 0, 1161801e04c3fSmrg 0, 0); 1161901e04c3fSmrg if (r) 1162001e04c3fSmrg return r; 11621af69d88dSmrg 1162201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1162301e04c3fSmrg tmp_num, 1, 1162401e04c3fSmrg sub_tmp, 1, 1162501e04c3fSmrg 0, 0); 1162601e04c3fSmrg if (r) 1162701e04c3fSmrg return r; 11628af69d88dSmrg 1162901e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 1163001e04c3fSmrg tmp_num, 2, 1163101e04c3fSmrg tmp_num, 2, 1163201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); 1163301e04c3fSmrg if (r) 1163401e04c3fSmrg return r; 11635af69d88dSmrg 1163601e04c3fSmrg r = tgsi_endif(ctx); 11637af69d88dSmrg if (r) 11638af69d88dSmrg return r; 11639af69d88dSmrg } 1164001e04c3fSmrg 1164101e04c3fSmrg /* log2_denom is always <= 63, so manually peel the last loop 1164201e04c3fSmrg * iteration. 1164301e04c3fSmrg */ 1164401e04c3fSmrg uint64_t denom_shl = (uint64_t)alu_denom_lo.value; 1164501e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1164601e04c3fSmrg treg, 2, 1164701e04c3fSmrg V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), 1164801e04c3fSmrg 0, 0); 1164901e04c3fSmrg if (r) 1165001e04c3fSmrg return r; 1165101e04c3fSmrg 1165201e04c3fSmrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1165301e04c3fSmrg treg, 3, 1165401e04c3fSmrg V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), 1165501e04c3fSmrg 0, 0); 1165601e04c3fSmrg if (r) 1165701e04c3fSmrg return r; 1165801e04c3fSmrg 1165901e04c3fSmrg r = emit_u64sge(ctx, sub_tmp, 1166001e04c3fSmrg tmp_num, 0, 1166101e04c3fSmrg treg, 2); 1166201e04c3fSmrg if (r) 1166301e04c3fSmrg return r; 1166401e04c3fSmrg 1166501e04c3fSmrg memset(&alu_src, 0, sizeof(alu_src)); 1166601e04c3fSmrg alu_src.sel = sub_tmp; 1166701e04c3fSmrg alu_src.chan = 0; 1166801e04c3fSmrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 1166901e04c3fSmrg if (r) 1167001e04c3fSmrg return r; 1167101e04c3fSmrg 1167201e04c3fSmrg r = emit_u64add(ctx, ALU_OP2_SUB_INT, 1167301e04c3fSmrg sub_tmp, 1167401e04c3fSmrg tmp_num, 0, 1167501e04c3fSmrg treg, 2); 1167601e04c3fSmrg if (r) 1167701e04c3fSmrg return r; 1167801e04c3fSmrg 1167901e04c3fSmrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 1168001e04c3fSmrg tmp_num, 2, 1168101e04c3fSmrg tmp_num, 2, 1168201e04c3fSmrg V_SQ_ALU_SRC_LITERAL, 1U); 1168301e04c3fSmrg if (r) 1168401e04c3fSmrg return r; 1168501e04c3fSmrg r = tgsi_endif(ctx); 1168601e04c3fSmrg if (r) 1168701e04c3fSmrg return r; 1168801e04c3fSmrg 1168901e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1169001e04c3fSmrg alu.op = ALU_OP1_MOV; 1169101e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1169201e04c3fSmrg alu.src[0].sel = tmp_num; 1169301e04c3fSmrg alu.src[0].chan = 2; 1169401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1169501e04c3fSmrg if (r) 1169601e04c3fSmrg return r; 1169701e04c3fSmrg 1169801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1169901e04c3fSmrg alu.op = ALU_OP1_MOV; 1170001e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1170101e04c3fSmrg alu.src[0].sel = tmp_num; 1170201e04c3fSmrg alu.src[0].chan = 3; 1170301e04c3fSmrg alu.last = 1; 1170401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1170501e04c3fSmrg if (r) 1170601e04c3fSmrg return r; 117073464ebd5Sriastradh return 0; 117083464ebd5Sriastradh} 117093464ebd5Sriastradh 1171001e04c3fSmrgstatic int egcm_u64sne(struct r600_shader_ctx *ctx) 1171101e04c3fSmrg{ 1171201e04c3fSmrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1171301e04c3fSmrg struct r600_bytecode_alu alu; 1171401e04c3fSmrg int r; 1171501e04c3fSmrg int treg = ctx->temp_reg; 1171601e04c3fSmrg 1171701e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1171801e04c3fSmrg alu.op = ALU_OP2_SETNE_INT; 1171901e04c3fSmrg alu.dst.sel = treg; 1172001e04c3fSmrg alu.dst.chan = 0; 1172101e04c3fSmrg alu.dst.write = 1; 1172201e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1172301e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 1172401e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1172501e04c3fSmrg if (r) 1172601e04c3fSmrg return r; 117273464ebd5Sriastradh 1172801e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1172901e04c3fSmrg alu.op = ALU_OP2_SETNE_INT; 1173001e04c3fSmrg alu.dst.sel = treg; 1173101e04c3fSmrg alu.dst.chan = 1; 1173201e04c3fSmrg alu.dst.write = 1; 1173301e04c3fSmrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1173401e04c3fSmrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 1173501e04c3fSmrg alu.last = 1; 1173601e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1173701e04c3fSmrg if (r) 1173801e04c3fSmrg return r; 1173901e04c3fSmrg 1174001e04c3fSmrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1174101e04c3fSmrg alu.op = ALU_OP2_OR_INT; 1174201e04c3fSmrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1174301e04c3fSmrg alu.src[0].sel = treg; 1174401e04c3fSmrg alu.src[0].chan = 0; 1174501e04c3fSmrg alu.src[1].sel = treg; 1174601e04c3fSmrg alu.src[1].chan = 1; 1174701e04c3fSmrg alu.last = 1; 1174801e04c3fSmrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1174901e04c3fSmrg if (r) 1175001e04c3fSmrg return r; 1175101e04c3fSmrg return 0; 1175201e04c3fSmrg} 1175301e04c3fSmrg 1175401e04c3fSmrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 1175501e04c3fSmrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 1175601e04c3fSmrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 1175701e04c3fSmrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 1175801e04c3fSmrg 1175901e04c3fSmrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 1176001e04c3fSmrg 1176101e04c3fSmrg [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 1176201e04c3fSmrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 1176301e04c3fSmrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 1176401e04c3fSmrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 1176501e04c3fSmrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 1176601e04c3fSmrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1176701e04c3fSmrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1176801e04c3fSmrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 1176901e04c3fSmrg /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */ 1177001e04c3fSmrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 1177101e04c3fSmrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 1177201e04c3fSmrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 1177301e04c3fSmrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 1177401e04c3fSmrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 1177501e04c3fSmrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 1177601e04c3fSmrg [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 1177701e04c3fSmrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 1177801e04c3fSmrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 1177901e04c3fSmrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 1178001e04c3fSmrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 1178101e04c3fSmrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 1178201e04c3fSmrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 1178301e04c3fSmrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 1178401e04c3fSmrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 1178501e04c3fSmrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 1178601e04c3fSmrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 1178701e04c3fSmrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 1178801e04c3fSmrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 1178901e04c3fSmrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 1179001e04c3fSmrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_unsupported}, 1179101e04c3fSmrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 1179201e04c3fSmrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 1179301e04c3fSmrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 1179401e04c3fSmrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 1179501e04c3fSmrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 1179601e04c3fSmrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 1179701e04c3fSmrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 1179801e04c3fSmrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1179901e04c3fSmrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1180001e04c3fSmrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1180101e04c3fSmrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 1180201e04c3fSmrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 1180301e04c3fSmrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 1180401e04c3fSmrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 1180501e04c3fSmrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 1180601e04c3fSmrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 1180701e04c3fSmrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 1180801e04c3fSmrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 1180901e04c3fSmrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 1181001e04c3fSmrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 1181101e04c3fSmrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 1181201e04c3fSmrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 1181301e04c3fSmrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1181401e04c3fSmrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1181501e04c3fSmrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1181601e04c3fSmrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 1181701e04c3fSmrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 1181801e04c3fSmrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 1181901e04c3fSmrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 1182001e04c3fSmrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 1182101e04c3fSmrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 1182201e04c3fSmrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 1182301e04c3fSmrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 1182401e04c3fSmrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 1182501e04c3fSmrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1182601e04c3fSmrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 1182701e04c3fSmrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 1182801e04c3fSmrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1182901e04c3fSmrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1183001e04c3fSmrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 1183101e04c3fSmrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 1183201e04c3fSmrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 1183301e04c3fSmrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 1183401e04c3fSmrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 1183501e04c3fSmrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 1183601e04c3fSmrg [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 1183701e04c3fSmrg [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 1183801e04c3fSmrg [81] = { ALU_OP0_NOP, tgsi_unsupported}, 1183901e04c3fSmrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 1184001e04c3fSmrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 1184101e04c3fSmrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 1184201e04c3fSmrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 1184301e04c3fSmrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 1184401e04c3fSmrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 1184501e04c3fSmrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 1184601e04c3fSmrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 1184701e04c3fSmrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 1184801e04c3fSmrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 1184901e04c3fSmrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 1185001e04c3fSmrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 1185101e04c3fSmrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 1185201e04c3fSmrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1185301e04c3fSmrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 1185401e04c3fSmrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 1185501e04c3fSmrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 1185601e04c3fSmrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 1185701e04c3fSmrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1185801e04c3fSmrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 1185901e04c3fSmrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1186001e04c3fSmrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1186101e04c3fSmrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 1186201e04c3fSmrg [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, 1186301e04c3fSmrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 1186401e04c3fSmrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 1186501e04c3fSmrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 1186601e04c3fSmrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 1186701e04c3fSmrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 1186801e04c3fSmrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 1186901e04c3fSmrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, 1187001e04c3fSmrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 1187101e04c3fSmrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 1187201e04c3fSmrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 1187301e04c3fSmrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 1187401e04c3fSmrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 1187501e04c3fSmrg [TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported}, 1187601e04c3fSmrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 1187701e04c3fSmrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 1187801e04c3fSmrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 1187901e04c3fSmrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 1188001e04c3fSmrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 1188101e04c3fSmrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 1188201e04c3fSmrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 1188301e04c3fSmrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 1188401e04c3fSmrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 1188501e04c3fSmrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 1188601e04c3fSmrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 1188701e04c3fSmrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 1188801e04c3fSmrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 1188901e04c3fSmrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 1189001e04c3fSmrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 1189101e04c3fSmrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 1189201e04c3fSmrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 1189301e04c3fSmrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 1189401e04c3fSmrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 1189501e04c3fSmrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 1189601e04c3fSmrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 1189701e04c3fSmrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 1189801e04c3fSmrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1189901e04c3fSmrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 1190001e04c3fSmrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 1190101e04c3fSmrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1190201e04c3fSmrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 1190301e04c3fSmrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 1190401e04c3fSmrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 1190501e04c3fSmrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 1190601e04c3fSmrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 1190701e04c3fSmrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 1190801e04c3fSmrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 1190901e04c3fSmrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 1191001e04c3fSmrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 1191101e04c3fSmrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 1191201e04c3fSmrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 1191301e04c3fSmrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 1191401e04c3fSmrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 1191501e04c3fSmrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 1191601e04c3fSmrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 1191701e04c3fSmrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 1191801e04c3fSmrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 1191901e04c3fSmrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 1192001e04c3fSmrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 1192101e04c3fSmrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 1192201e04c3fSmrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 1192301e04c3fSmrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 1192401e04c3fSmrg [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 1192501e04c3fSmrg [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 1192601e04c3fSmrg [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 1192701e04c3fSmrg [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 1192801e04c3fSmrg [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 1192901e04c3fSmrg [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 1193001e04c3fSmrg [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 1193101e04c3fSmrg [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 1193201e04c3fSmrg [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 1193301e04c3fSmrg [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 1193401e04c3fSmrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 1193501e04c3fSmrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1193601e04c3fSmrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1193701e04c3fSmrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 1193801e04c3fSmrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 1193901e04c3fSmrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 1194001e04c3fSmrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 1194101e04c3fSmrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 1194201e04c3fSmrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 1194301e04c3fSmrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 1194401e04c3fSmrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 1194501e04c3fSmrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 1194601e04c3fSmrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 1194701e04c3fSmrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 1194801e04c3fSmrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 1194901e04c3fSmrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 1195001e04c3fSmrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 1195101e04c3fSmrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 1195201e04c3fSmrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 119533464ebd5Sriastradh}; 119543464ebd5Sriastradh 1195501e04c3fSmrgstatic const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 1195601e04c3fSmrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 1195701e04c3fSmrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 1195801e04c3fSmrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 1195901e04c3fSmrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 1196001e04c3fSmrg [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 1196101e04c3fSmrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 1196201e04c3fSmrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 1196301e04c3fSmrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 1196401e04c3fSmrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 1196501e04c3fSmrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1196601e04c3fSmrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1196701e04c3fSmrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 1196801e04c3fSmrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 1196901e04c3fSmrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 1197001e04c3fSmrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 1197101e04c3fSmrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 1197201e04c3fSmrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 1197301e04c3fSmrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 1197401e04c3fSmrg [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 1197501e04c3fSmrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 1197601e04c3fSmrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 1197701e04c3fSmrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 1197801e04c3fSmrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 1197901e04c3fSmrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 1198001e04c3fSmrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 1198101e04c3fSmrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 1198201e04c3fSmrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 1198301e04c3fSmrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 1198401e04c3fSmrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 1198501e04c3fSmrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 1198601e04c3fSmrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 1198701e04c3fSmrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 1198801e04c3fSmrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock}, 1198901e04c3fSmrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 1199001e04c3fSmrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 1199101e04c3fSmrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 1199201e04c3fSmrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 1199301e04c3fSmrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 1199401e04c3fSmrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 1199501e04c3fSmrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 1199601e04c3fSmrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1199701e04c3fSmrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1199801e04c3fSmrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1199901e04c3fSmrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 1200001e04c3fSmrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 1200101e04c3fSmrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 1200201e04c3fSmrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 1200301e04c3fSmrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 1200401e04c3fSmrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 1200501e04c3fSmrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 1200601e04c3fSmrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 1200701e04c3fSmrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 1200801e04c3fSmrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 1200901e04c3fSmrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 1201001e04c3fSmrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 1201101e04c3fSmrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1201201e04c3fSmrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1201301e04c3fSmrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1201401e04c3fSmrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 1201501e04c3fSmrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 1201601e04c3fSmrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 1201701e04c3fSmrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 1201801e04c3fSmrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 1201901e04c3fSmrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 1202001e04c3fSmrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 1202101e04c3fSmrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 1202201e04c3fSmrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 1202301e04c3fSmrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1202401e04c3fSmrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 1202501e04c3fSmrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 1202601e04c3fSmrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1202701e04c3fSmrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1202801e04c3fSmrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 1202901e04c3fSmrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 1203001e04c3fSmrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 1203101e04c3fSmrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 1203201e04c3fSmrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 1203301e04c3fSmrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 1203401e04c3fSmrg [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 1203501e04c3fSmrg [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 1203601e04c3fSmrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 1203701e04c3fSmrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 1203801e04c3fSmrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 1203901e04c3fSmrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 1204001e04c3fSmrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 1204101e04c3fSmrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 1204201e04c3fSmrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 1204301e04c3fSmrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 1204401e04c3fSmrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 1204501e04c3fSmrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 1204601e04c3fSmrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 1204701e04c3fSmrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 1204801e04c3fSmrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 1204901e04c3fSmrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1205001e04c3fSmrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 1205101e04c3fSmrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 1205201e04c3fSmrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 1205301e04c3fSmrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 1205401e04c3fSmrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1205501e04c3fSmrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 1205601e04c3fSmrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1205701e04c3fSmrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1205801e04c3fSmrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 1205901e04c3fSmrg [TGSI_OPCODE_RESQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq}, 1206001e04c3fSmrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 1206101e04c3fSmrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 1206201e04c3fSmrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 1206301e04c3fSmrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 1206401e04c3fSmrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 1206501e04c3fSmrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 1206601e04c3fSmrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 1206701e04c3fSmrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 1206801e04c3fSmrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 1206901e04c3fSmrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 1207001e04c3fSmrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 1207101e04c3fSmrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 1207201e04c3fSmrg /* Refer below for TGSI_OPCODE_DFMA */ 1207301e04c3fSmrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 1207401e04c3fSmrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 1207501e04c3fSmrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 1207601e04c3fSmrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 1207701e04c3fSmrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 1207801e04c3fSmrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 1207901e04c3fSmrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 1208001e04c3fSmrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 1208101e04c3fSmrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 1208201e04c3fSmrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 1208301e04c3fSmrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 1208401e04c3fSmrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 1208501e04c3fSmrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 1208601e04c3fSmrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 1208701e04c3fSmrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 1208801e04c3fSmrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 1208901e04c3fSmrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 1209001e04c3fSmrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 1209101e04c3fSmrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 1209201e04c3fSmrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 1209301e04c3fSmrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 1209401e04c3fSmrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 1209501e04c3fSmrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1209601e04c3fSmrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 1209701e04c3fSmrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 1209801e04c3fSmrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1209901e04c3fSmrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 1210001e04c3fSmrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 1210101e04c3fSmrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 1210201e04c3fSmrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 1210301e04c3fSmrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 1210401e04c3fSmrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 1210501e04c3fSmrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 1210601e04c3fSmrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 1210701e04c3fSmrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 1210801e04c3fSmrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 1210901e04c3fSmrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 1211001e04c3fSmrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 1211101e04c3fSmrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 1211201e04c3fSmrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 1211301e04c3fSmrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 1211401e04c3fSmrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 1211501e04c3fSmrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, 1211601e04c3fSmrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, 1211701e04c3fSmrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 1211801e04c3fSmrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 1211901e04c3fSmrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 1212001e04c3fSmrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 1212101e04c3fSmrg [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op}, 1212201e04c3fSmrg [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op}, 1212301e04c3fSmrg [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op}, 1212401e04c3fSmrg [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op}, 1212501e04c3fSmrg [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op}, 1212601e04c3fSmrg [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op}, 1212701e04c3fSmrg [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op}, 1212801e04c3fSmrg [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op}, 1212901e04c3fSmrg [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op}, 1213001e04c3fSmrg [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op}, 1213101e04c3fSmrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 1213201e04c3fSmrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1213301e04c3fSmrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1213401e04c3fSmrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 1213501e04c3fSmrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 1213601e04c3fSmrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 1213701e04c3fSmrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 1213801e04c3fSmrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_bfe}, 1213901e04c3fSmrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_bfe}, 1214001e04c3fSmrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 1214101e04c3fSmrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 1214201e04c3fSmrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 1214301e04c3fSmrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 1214401e04c3fSmrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 1214501e04c3fSmrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 1214601e04c3fSmrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1214701e04c3fSmrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1214801e04c3fSmrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1214901e04c3fSmrg [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 1215001e04c3fSmrg [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 1215101e04c3fSmrg [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 1215201e04c3fSmrg [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 1215301e04c3fSmrg [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 1215401e04c3fSmrg [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 1215501e04c3fSmrg [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 1215601e04c3fSmrg [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 1215701e04c3fSmrg [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 1215801e04c3fSmrg [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 1215901e04c3fSmrg [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 1216001e04c3fSmrg [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 1216101e04c3fSmrg [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 1216201e04c3fSmrg [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 1216301e04c3fSmrg [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 1216401e04c3fSmrg [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 1216501e04c3fSmrg [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 1216601e04c3fSmrg [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 1216701e04c3fSmrg [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 1216801e04c3fSmrg [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 1216901e04c3fSmrg [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 1217001e04c3fSmrg [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 1217101e04c3fSmrg [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 1217201e04c3fSmrg [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 1217301e04c3fSmrg [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 1217401e04c3fSmrg [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, 1217501e04c3fSmrg [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, 1217601e04c3fSmrg [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, 1217701e04c3fSmrg [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, 121787ec681f3Smrg [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg }, 1217901e04c3fSmrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 121803464ebd5Sriastradh}; 121813464ebd5Sriastradh 1218201e04c3fSmrgstatic const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 1218301e04c3fSmrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 1218401e04c3fSmrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 1218501e04c3fSmrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 1218601e04c3fSmrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 1218701e04c3fSmrg [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 1218801e04c3fSmrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 1218901e04c3fSmrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 1219001e04c3fSmrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 1219101e04c3fSmrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 1219201e04c3fSmrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1219301e04c3fSmrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1219401e04c3fSmrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 1219501e04c3fSmrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 1219601e04c3fSmrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 1219701e04c3fSmrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 1219801e04c3fSmrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 1219901e04c3fSmrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 1220001e04c3fSmrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 1220101e04c3fSmrg [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 1220201e04c3fSmrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 1220301e04c3fSmrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 1220401e04c3fSmrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 1220501e04c3fSmrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 1220601e04c3fSmrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 1220701e04c3fSmrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 1220801e04c3fSmrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 1220901e04c3fSmrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 1221001e04c3fSmrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 1221101e04c3fSmrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 1221201e04c3fSmrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 1221301e04c3fSmrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 1221401e04c3fSmrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 1221501e04c3fSmrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock}, 1221601e04c3fSmrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 1221701e04c3fSmrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 1221801e04c3fSmrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 1221901e04c3fSmrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 1222001e04c3fSmrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 1222101e04c3fSmrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 1222201e04c3fSmrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 1222301e04c3fSmrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1222401e04c3fSmrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1222501e04c3fSmrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1222601e04c3fSmrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 1222701e04c3fSmrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 1222801e04c3fSmrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 1222901e04c3fSmrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 1223001e04c3fSmrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 1223101e04c3fSmrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 1223201e04c3fSmrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 1223301e04c3fSmrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 1223401e04c3fSmrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 1223501e04c3fSmrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 1223601e04c3fSmrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 1223701e04c3fSmrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 1223801e04c3fSmrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 1223901e04c3fSmrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 1224001e04c3fSmrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 1224101e04c3fSmrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 1224201e04c3fSmrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 1224301e04c3fSmrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 1224401e04c3fSmrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 1224501e04c3fSmrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 1224601e04c3fSmrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 1224701e04c3fSmrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 1224801e04c3fSmrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 1224901e04c3fSmrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 1225001e04c3fSmrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1225101e04c3fSmrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 1225201e04c3fSmrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 1225301e04c3fSmrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 1225401e04c3fSmrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1225501e04c3fSmrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 1225601e04c3fSmrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 1225701e04c3fSmrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 1225801e04c3fSmrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 1225901e04c3fSmrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 1226001e04c3fSmrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 1226101e04c3fSmrg [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 1226201e04c3fSmrg [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 1226301e04c3fSmrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 1226401e04c3fSmrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 1226501e04c3fSmrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 1226601e04c3fSmrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 1226701e04c3fSmrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 1226801e04c3fSmrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 1226901e04c3fSmrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 1227001e04c3fSmrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 1227101e04c3fSmrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 1227201e04c3fSmrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 1227301e04c3fSmrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 1227401e04c3fSmrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 1227501e04c3fSmrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 1227601e04c3fSmrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1227701e04c3fSmrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 1227801e04c3fSmrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 1227901e04c3fSmrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 1228001e04c3fSmrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 1228101e04c3fSmrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1228201e04c3fSmrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 1228301e04c3fSmrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 1228401e04c3fSmrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 1228501e04c3fSmrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 1228601e04c3fSmrg [TGSI_OPCODE_RESQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq}, 1228701e04c3fSmrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 1228801e04c3fSmrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 1228901e04c3fSmrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 1229001e04c3fSmrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 1229101e04c3fSmrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 1229201e04c3fSmrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 1229301e04c3fSmrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 1229401e04c3fSmrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 1229501e04c3fSmrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 1229601e04c3fSmrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 1229701e04c3fSmrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 1229801e04c3fSmrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 1229901e04c3fSmrg /* Refer below for TGSI_OPCODE_DFMA */ 1230001e04c3fSmrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 1230101e04c3fSmrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 1230201e04c3fSmrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 1230301e04c3fSmrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 1230401e04c3fSmrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 1230501e04c3fSmrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 1230601e04c3fSmrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 1230701e04c3fSmrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 1230801e04c3fSmrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 1230901e04c3fSmrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 1231001e04c3fSmrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 1231101e04c3fSmrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 1231201e04c3fSmrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 1231301e04c3fSmrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 1231401e04c3fSmrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 1231501e04c3fSmrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 1231601e04c3fSmrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 1231701e04c3fSmrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 1231801e04c3fSmrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 1231901e04c3fSmrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 1232001e04c3fSmrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 1232101e04c3fSmrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 1232201e04c3fSmrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1232301e04c3fSmrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 1232401e04c3fSmrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 1232501e04c3fSmrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 1232601e04c3fSmrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 1232701e04c3fSmrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 1232801e04c3fSmrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 1232901e04c3fSmrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 1233001e04c3fSmrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 1233101e04c3fSmrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 1233201e04c3fSmrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 1233301e04c3fSmrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 1233401e04c3fSmrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 1233501e04c3fSmrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 1233601e04c3fSmrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 1233701e04c3fSmrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 1233801e04c3fSmrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 1233901e04c3fSmrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 1234001e04c3fSmrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 1234101e04c3fSmrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 1234201e04c3fSmrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, 1234301e04c3fSmrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, 1234401e04c3fSmrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 1234501e04c3fSmrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 1234601e04c3fSmrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 1234701e04c3fSmrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 1234801e04c3fSmrg [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op}, 1234901e04c3fSmrg [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op}, 1235001e04c3fSmrg [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op}, 1235101e04c3fSmrg [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op}, 1235201e04c3fSmrg [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op}, 1235301e04c3fSmrg [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op}, 1235401e04c3fSmrg [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op}, 1235501e04c3fSmrg [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op}, 1235601e04c3fSmrg [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op}, 1235701e04c3fSmrg [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op}, 1235801e04c3fSmrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 1235901e04c3fSmrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 1236001e04c3fSmrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 1236101e04c3fSmrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 1236201e04c3fSmrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 1236301e04c3fSmrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 1236401e04c3fSmrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 1236501e04c3fSmrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_bfe}, 1236601e04c3fSmrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_bfe}, 1236701e04c3fSmrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 1236801e04c3fSmrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 1236901e04c3fSmrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 1237001e04c3fSmrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 1237101e04c3fSmrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 1237201e04c3fSmrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 1237301e04c3fSmrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1237401e04c3fSmrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1237501e04c3fSmrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 1237601e04c3fSmrg [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 1237701e04c3fSmrg [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 1237801e04c3fSmrg [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 1237901e04c3fSmrg [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 1238001e04c3fSmrg [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 1238101e04c3fSmrg [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 1238201e04c3fSmrg [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 1238301e04c3fSmrg [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 1238401e04c3fSmrg [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 1238501e04c3fSmrg [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 1238601e04c3fSmrg [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 1238701e04c3fSmrg [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 1238801e04c3fSmrg [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 1238901e04c3fSmrg [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 1239001e04c3fSmrg [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 1239101e04c3fSmrg [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 1239201e04c3fSmrg [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 1239301e04c3fSmrg [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 1239401e04c3fSmrg [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 1239501e04c3fSmrg [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 1239601e04c3fSmrg [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 1239701e04c3fSmrg [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 1239801e04c3fSmrg [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 1239901e04c3fSmrg [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 1240001e04c3fSmrg [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 1240101e04c3fSmrg [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, 1240201e04c3fSmrg [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, 1240301e04c3fSmrg [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, 1240401e04c3fSmrg [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, 124057ec681f3Smrg [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg }, 1240601e04c3fSmrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 124073464ebd5Sriastradh}; 12408