1848b8605Smrg/* 2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 8848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 9848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22848b8605Smrg */ 23848b8605Smrg#include "r600_sq.h" 24848b8605Smrg#include "r600_formats.h" 25848b8605Smrg#include "r600_opcodes.h" 26848b8605Smrg#include "r600_shader.h" 27848b8605Smrg#include "r600d.h" 28848b8605Smrg 29848b8605Smrg#include "sb/sb_public.h" 30848b8605Smrg 31848b8605Smrg#include "pipe/p_shader_tokens.h" 32848b8605Smrg#include "tgsi/tgsi_info.h" 33848b8605Smrg#include "tgsi/tgsi_parse.h" 34848b8605Smrg#include "tgsi/tgsi_scan.h" 35848b8605Smrg#include "tgsi/tgsi_dump.h" 36b8e80941Smrg#include "util/u_bitcast.h" 37848b8605Smrg#include "util/u_memory.h" 38848b8605Smrg#include "util/u_math.h" 39848b8605Smrg#include <stdio.h> 40848b8605Smrg#include <errno.h> 41848b8605Smrg 42b8e80941Smrg/* CAYMAN notes 43848b8605SmrgWhy CAYMAN got loops for lots of instructions is explained here. 44848b8605Smrg 45848b8605Smrg-These 8xx t-slot only ops are implemented in all vector slots. 46848b8605SmrgMUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47b8e80941SmrgThese 8xx t-slot only opcodes become vector ops, with all four 48b8e80941Smrgslots expecting the arguments on sources a and b. Result is 49848b8605Smrgbroadcast to all channels. 50b8e80941SmrgMULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51b8e80941SmrgThese 8xx t-slot only opcodes become vector ops in the z, y, and 52848b8605Smrgx slots. 53848b8605SmrgEXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54848b8605SmrgRECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55848b8605SmrgSQRT_IEEE/_64 56848b8605SmrgSIN/COS 57b8e80941SmrgThe w slot may have an independent co-issued operation, or if the 58b8e80941Smrgresult is required to be in the w slot, the opcode above may be 59848b8605Smrgissued in the w slot as well. 60848b8605SmrgThe compiler must issue the source argument to slots z, y, and x 61848b8605Smrg*/ 62848b8605Smrg 63b8e80941Smrg/* Contents of r0 on entry to various shaders 64b8e80941Smrg 65b8e80941Smrg VS - .x = VertexID 66b8e80941Smrg .y = RelVertexID (??) 67b8e80941Smrg .w = InstanceID 68b8e80941Smrg 69b8e80941Smrg GS - r0.xyw, r1.xyz = per-vertex offsets 70b8e80941Smrg r0.z = PrimitiveID 71b8e80941Smrg 72b8e80941Smrg TCS - .x = PatchID 73b8e80941Smrg .y = RelPatchID (??) 74b8e80941Smrg .z = InvocationID 75b8e80941Smrg .w = tess factor base. 76b8e80941Smrg 77b8e80941Smrg TES - .x = TessCoord.x 78b8e80941Smrg - .y = TessCoord.y 79b8e80941Smrg - .z = RelPatchID (??) 80b8e80941Smrg - .w = PrimitiveID 81b8e80941Smrg 82b8e80941Smrg PS - face_gpr.z = SampleMask 83b8e80941Smrg face_gpr.w = SampleID 84b8e80941Smrg*/ 85b8e80941Smrg#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 86848b8605Smrgstatic int r600_shader_from_tgsi(struct r600_context *rctx, 87848b8605Smrg struct r600_pipe_shader *pipeshader, 88b8e80941Smrg union r600_shader_key key); 89848b8605Smrg 90848b8605Smrgstatic void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 91848b8605Smrg int size, unsigned comp_mask) { 92848b8605Smrg 93848b8605Smrg if (!size) 94848b8605Smrg return; 95848b8605Smrg 96848b8605Smrg if (ps->num_arrays == ps->max_arrays) { 97848b8605Smrg ps->max_arrays += 64; 98848b8605Smrg ps->arrays = realloc(ps->arrays, ps->max_arrays * 99848b8605Smrg sizeof(struct r600_shader_array)); 100848b8605Smrg } 101848b8605Smrg 102848b8605Smrg int n = ps->num_arrays; 103848b8605Smrg ++ps->num_arrays; 104848b8605Smrg 105848b8605Smrg ps->arrays[n].comp_mask = comp_mask; 106848b8605Smrg ps->arrays[n].gpr_start = start_gpr; 107848b8605Smrg ps->arrays[n].gpr_count = size; 108848b8605Smrg} 109848b8605Smrg 110848b8605Smrgstatic void r600_dump_streamout(struct pipe_stream_output_info *so) 111848b8605Smrg{ 112848b8605Smrg unsigned i; 113848b8605Smrg 114848b8605Smrg fprintf(stderr, "STREAMOUT\n"); 115848b8605Smrg for (i = 0; i < so->num_outputs; i++) { 116848b8605Smrg unsigned mask = ((1 << so->output[i].num_components) - 1) << 117848b8605Smrg so->output[i].start_component; 118b8e80941Smrg fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 119b8e80941Smrg i, 120b8e80941Smrg so->output[i].stream, 121b8e80941Smrg so->output[i].output_buffer, 122848b8605Smrg so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 123848b8605Smrg so->output[i].register_index, 124848b8605Smrg mask & 1 ? "x" : "", 125848b8605Smrg mask & 2 ? "y" : "", 126848b8605Smrg mask & 4 ? "z" : "", 127848b8605Smrg mask & 8 ? "w" : "", 128848b8605Smrg so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 129848b8605Smrg } 130848b8605Smrg} 131848b8605Smrg 132848b8605Smrgstatic int store_shader(struct pipe_context *ctx, 133848b8605Smrg struct r600_pipe_shader *shader) 134848b8605Smrg{ 135848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 136848b8605Smrg uint32_t *ptr, i; 137848b8605Smrg 138848b8605Smrg if (shader->bo == NULL) { 139848b8605Smrg shader->bo = (struct r600_resource*) 140b8e80941Smrg pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 141848b8605Smrg if (shader->bo == NULL) { 142848b8605Smrg return -ENOMEM; 143848b8605Smrg } 144b8e80941Smrg ptr = r600_buffer_map_sync_with_rings( 145b8e80941Smrg &rctx->b, shader->bo, 146b8e80941Smrg PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); 147848b8605Smrg if (R600_BIG_ENDIAN) { 148848b8605Smrg for (i = 0; i < shader->shader.bc.ndw; ++i) { 149848b8605Smrg ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 150848b8605Smrg } 151848b8605Smrg } else { 152848b8605Smrg memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 153848b8605Smrg } 154b8e80941Smrg rctx->b.ws->buffer_unmap(shader->bo->buf); 155848b8605Smrg } 156848b8605Smrg 157848b8605Smrg return 0; 158848b8605Smrg} 159848b8605Smrg 160848b8605Smrgint r600_pipe_shader_create(struct pipe_context *ctx, 161848b8605Smrg struct r600_pipe_shader *shader, 162b8e80941Smrg union r600_shader_key key) 163848b8605Smrg{ 164848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 165848b8605Smrg struct r600_pipe_shader_selector *sel = shader->selector; 166848b8605Smrg int r; 167b8e80941Smrg bool dump = r600_can_dump_shader(&rctx->screen->b, 168b8e80941Smrg tgsi_get_processor_type(sel->tokens)); 169848b8605Smrg unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 170b8e80941Smrg unsigned sb_disasm; 171b8e80941Smrg unsigned export_shader; 172848b8605Smrg 173848b8605Smrg shader->shader.bc.isa = rctx->isa; 174848b8605Smrg 175848b8605Smrg if (dump) { 176848b8605Smrg fprintf(stderr, "--------------------------------------------------------------\n"); 177848b8605Smrg tgsi_dump(sel->tokens, 0); 178848b8605Smrg 179848b8605Smrg if (sel->so.num_outputs) { 180848b8605Smrg r600_dump_streamout(&sel->so); 181848b8605Smrg } 182848b8605Smrg } 183848b8605Smrg r = r600_shader_from_tgsi(rctx, shader, key); 184848b8605Smrg if (r) { 185848b8605Smrg R600_ERR("translation from TGSI failed !\n"); 186848b8605Smrg goto error; 187848b8605Smrg } 188b8e80941Smrg if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { 189b8e80941Smrg /* only disable for vertex shaders in tess paths */ 190b8e80941Smrg if (key.vs.as_ls) 191b8e80941Smrg use_sb = 0; 192b8e80941Smrg } 193b8e80941Smrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); 194b8e80941Smrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); 195b8e80941Smrg use_sb &= (shader->shader.processor_type != PIPE_SHADER_COMPUTE); 196848b8605Smrg 197b8e80941Smrg /* disable SB for shaders using doubles */ 198b8e80941Smrg use_sb &= !shader->shader.uses_doubles; 199848b8605Smrg 200b8e80941Smrg use_sb &= !shader->shader.uses_atomics; 201b8e80941Smrg use_sb &= !shader->shader.uses_images; 202b8e80941Smrg use_sb &= !shader->shader.uses_helper_invocation; 203b8e80941Smrg 204b8e80941Smrg /* Check if the bytecode has already been built. */ 205848b8605Smrg if (!shader->shader.bc.bytecode) { 206848b8605Smrg r = r600_bytecode_build(&shader->shader.bc); 207848b8605Smrg if (r) { 208848b8605Smrg R600_ERR("building bytecode failed !\n"); 209848b8605Smrg goto error; 210848b8605Smrg } 211848b8605Smrg } 212848b8605Smrg 213b8e80941Smrg sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 214848b8605Smrg if (dump && !sb_disasm) { 215848b8605Smrg fprintf(stderr, "--------------------------------------------------------------\n"); 216848b8605Smrg r600_bytecode_disasm(&shader->shader.bc); 217848b8605Smrg fprintf(stderr, "______________________________________________________________\n"); 218848b8605Smrg } else if ((dump && sb_disasm) || use_sb) { 219848b8605Smrg r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 220848b8605Smrg dump, use_sb); 221848b8605Smrg if (r) { 222848b8605Smrg R600_ERR("r600_sb_bytecode_process failed !\n"); 223848b8605Smrg goto error; 224848b8605Smrg } 225848b8605Smrg } 226848b8605Smrg 227848b8605Smrg if (shader->gs_copy_shader) { 228848b8605Smrg if (dump) { 229848b8605Smrg // dump copy shader 230848b8605Smrg r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 231848b8605Smrg &shader->gs_copy_shader->shader, dump, 0); 232848b8605Smrg if (r) 233848b8605Smrg goto error; 234848b8605Smrg } 235848b8605Smrg 236848b8605Smrg if ((r = store_shader(ctx, shader->gs_copy_shader))) 237848b8605Smrg goto error; 238848b8605Smrg } 239848b8605Smrg 240848b8605Smrg /* Store the shader in a buffer. */ 241848b8605Smrg if ((r = store_shader(ctx, shader))) 242848b8605Smrg goto error; 243848b8605Smrg 244848b8605Smrg /* Build state. */ 245848b8605Smrg switch (shader->shader.processor_type) { 246b8e80941Smrg case PIPE_SHADER_TESS_CTRL: 247b8e80941Smrg evergreen_update_hs_state(ctx, shader); 248b8e80941Smrg break; 249b8e80941Smrg case PIPE_SHADER_TESS_EVAL: 250b8e80941Smrg if (key.tes.as_es) 251b8e80941Smrg evergreen_update_es_state(ctx, shader); 252b8e80941Smrg else 253b8e80941Smrg evergreen_update_vs_state(ctx, shader); 254b8e80941Smrg break; 255b8e80941Smrg case PIPE_SHADER_GEOMETRY: 256848b8605Smrg if (rctx->b.chip_class >= EVERGREEN) { 257848b8605Smrg evergreen_update_gs_state(ctx, shader); 258848b8605Smrg evergreen_update_vs_state(ctx, shader->gs_copy_shader); 259848b8605Smrg } else { 260848b8605Smrg r600_update_gs_state(ctx, shader); 261848b8605Smrg r600_update_vs_state(ctx, shader->gs_copy_shader); 262848b8605Smrg } 263848b8605Smrg break; 264b8e80941Smrg case PIPE_SHADER_VERTEX: 265b8e80941Smrg export_shader = key.vs.as_es; 266848b8605Smrg if (rctx->b.chip_class >= EVERGREEN) { 267b8e80941Smrg if (key.vs.as_ls) 268b8e80941Smrg evergreen_update_ls_state(ctx, shader); 269b8e80941Smrg else if (key.vs.as_es) 270848b8605Smrg evergreen_update_es_state(ctx, shader); 271848b8605Smrg else 272848b8605Smrg evergreen_update_vs_state(ctx, shader); 273848b8605Smrg } else { 274848b8605Smrg if (export_shader) 275848b8605Smrg r600_update_es_state(ctx, shader); 276848b8605Smrg else 277848b8605Smrg r600_update_vs_state(ctx, shader); 278848b8605Smrg } 279848b8605Smrg break; 280b8e80941Smrg case PIPE_SHADER_FRAGMENT: 281848b8605Smrg if (rctx->b.chip_class >= EVERGREEN) { 282848b8605Smrg evergreen_update_ps_state(ctx, shader); 283848b8605Smrg } else { 284848b8605Smrg r600_update_ps_state(ctx, shader); 285848b8605Smrg } 286848b8605Smrg break; 287b8e80941Smrg case PIPE_SHADER_COMPUTE: 288b8e80941Smrg evergreen_update_ls_state(ctx, shader); 289b8e80941Smrg break; 290848b8605Smrg default: 291848b8605Smrg r = -EINVAL; 292848b8605Smrg goto error; 293848b8605Smrg } 294848b8605Smrg return 0; 295848b8605Smrg 296848b8605Smrgerror: 297848b8605Smrg r600_pipe_shader_destroy(ctx, shader); 298848b8605Smrg return r; 299848b8605Smrg} 300848b8605Smrg 301b8e80941Smrgvoid r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader) 302848b8605Smrg{ 303b8e80941Smrg r600_resource_reference(&shader->bo, NULL); 304848b8605Smrg r600_bytecode_clear(&shader->shader.bc); 305848b8605Smrg r600_release_command_buffer(&shader->command_buffer); 306848b8605Smrg} 307848b8605Smrg 308848b8605Smrg/* 309848b8605Smrg * tgsi -> r600 shader 310848b8605Smrg */ 311848b8605Smrgstruct r600_shader_tgsi_instruction; 312848b8605Smrg 313848b8605Smrgstruct r600_shader_src { 314848b8605Smrg unsigned sel; 315848b8605Smrg unsigned swizzle[4]; 316848b8605Smrg unsigned neg; 317848b8605Smrg unsigned abs; 318848b8605Smrg unsigned rel; 319848b8605Smrg unsigned kc_bank; 320b8e80941Smrg boolean kc_rel; /* true if cache bank is indexed */ 321848b8605Smrg uint32_t value[4]; 322848b8605Smrg}; 323848b8605Smrg 324b8e80941Smrgstruct eg_interp { 325b8e80941Smrg boolean enabled; 326b8e80941Smrg unsigned ij_index; 327b8e80941Smrg}; 328b8e80941Smrg 329848b8605Smrgstruct r600_shader_ctx { 330848b8605Smrg struct tgsi_shader_info info; 331b8e80941Smrg struct tgsi_array_info *array_infos; 332b8e80941Smrg /* flag for each tgsi temp array if its been spilled or not */ 333b8e80941Smrg bool *spilled_arrays; 334848b8605Smrg struct tgsi_parse_context parse; 335848b8605Smrg const struct tgsi_token *tokens; 336848b8605Smrg unsigned type; 337848b8605Smrg unsigned file_offset[TGSI_FILE_COUNT]; 338848b8605Smrg unsigned temp_reg; 339b8e80941Smrg const struct r600_shader_tgsi_instruction *inst_info; 340848b8605Smrg struct r600_bytecode *bc; 341848b8605Smrg struct r600_shader *shader; 342848b8605Smrg struct r600_shader_src src[4]; 343848b8605Smrg uint32_t *literals; 344848b8605Smrg uint32_t nliterals; 345848b8605Smrg uint32_t max_driver_temp_used; 346848b8605Smrg /* needed for evergreen interpolation */ 347b8e80941Smrg struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 348848b8605Smrg /* evergreen/cayman also store sample mask in face register */ 349848b8605Smrg int face_gpr; 350b8e80941Smrg /* sample id is .w component stored in fixed point position register */ 351b8e80941Smrg int fixed_pt_position_gpr; 352848b8605Smrg int colors_used; 353848b8605Smrg boolean clip_vertex_write; 354848b8605Smrg unsigned cv_output; 355848b8605Smrg unsigned edgeflag_output; 356b8e80941Smrg int helper_invoc_reg; 357b8e80941Smrg int cs_block_size_reg; 358b8e80941Smrg int cs_grid_size_reg; 359b8e80941Smrg bool cs_block_size_loaded, cs_grid_size_loaded; 360848b8605Smrg int fragcoord_input; 361848b8605Smrg int next_ring_offset; 362848b8605Smrg int gs_out_ring_offset; 363848b8605Smrg int gs_next_vertex; 364848b8605Smrg struct r600_shader *gs_for_vs; 365b8e80941Smrg int gs_export_gpr_tregs[4]; 366b8e80941Smrg int gs_rotated_input[2]; 367b8e80941Smrg const struct pipe_stream_output_info *gs_stream_output_info; 368b8e80941Smrg unsigned enabled_stream_buffers_mask; 369b8e80941Smrg unsigned tess_input_info; /* temp with tess input offsets */ 370b8e80941Smrg unsigned tess_output_info; /* temp with tess input offsets */ 371b8e80941Smrg unsigned thread_id_gpr; /* temp with thread id calculated for images */ 372848b8605Smrg}; 373848b8605Smrg 374848b8605Smrgstruct r600_shader_tgsi_instruction { 375848b8605Smrg unsigned op; 376848b8605Smrg int (*process)(struct r600_shader_ctx *ctx); 377848b8605Smrg}; 378848b8605Smrg 379b8e80941Smrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 380b8e80941Smrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 381848b8605Smrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 382b8e80941Smrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 383848b8605Smrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 384848b8605Smrgstatic int tgsi_else(struct r600_shader_ctx *ctx); 385848b8605Smrgstatic int tgsi_endif(struct r600_shader_ctx *ctx); 386848b8605Smrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx); 387848b8605Smrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx); 388848b8605Smrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 389b8e80941Smrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 390b8e80941Smrg unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 391b8e80941Smrg unsigned int dst_reg); 392b8e80941Smrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 393b8e80941Smrg const struct r600_shader_src *shader_src, 394b8e80941Smrg unsigned chan); 395b8e80941Smrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 396b8e80941Smrg unsigned dst_reg, unsigned mask); 397b8e80941Smrg 398b8e80941Smrgstatic bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) 399b8e80941Smrg{ 400b8e80941Smrg if (ctx->bc->family == CHIP_HEMLOCK || 401b8e80941Smrg ctx->bc->family == CHIP_CYPRESS || 402b8e80941Smrg ctx->bc->family == CHIP_JUNIPER) 403b8e80941Smrg return false; 404b8e80941Smrg return true; 405b8e80941Smrg} 406b8e80941Smrg 407b8e80941Smrgstatic int tgsi_last_instruction(unsigned writemask) 408b8e80941Smrg{ 409b8e80941Smrg int i, lasti = 0; 410b8e80941Smrg 411b8e80941Smrg for (i = 0; i < 4; i++) { 412b8e80941Smrg if (writemask & (1 << i)) { 413b8e80941Smrg lasti = i; 414b8e80941Smrg } 415b8e80941Smrg } 416b8e80941Smrg return lasti; 417b8e80941Smrg} 418848b8605Smrg 419848b8605Smrgstatic int tgsi_is_supported(struct r600_shader_ctx *ctx) 420848b8605Smrg{ 421848b8605Smrg struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 422b8e80941Smrg unsigned j; 423848b8605Smrg 424b8e80941Smrg if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 425848b8605Smrg R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 426848b8605Smrg return -EINVAL; 427848b8605Smrg } 428848b8605Smrg#if 0 429848b8605Smrg if (i->Instruction.Label) { 430848b8605Smrg R600_ERR("label unsupported\n"); 431848b8605Smrg return -EINVAL; 432848b8605Smrg } 433848b8605Smrg#endif 434848b8605Smrg for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 435848b8605Smrg if (i->Src[j].Register.Dimension) { 436848b8605Smrg switch (i->Src[j].Register.File) { 437848b8605Smrg case TGSI_FILE_CONSTANT: 438b8e80941Smrg case TGSI_FILE_HW_ATOMIC: 439848b8605Smrg break; 440848b8605Smrg case TGSI_FILE_INPUT: 441b8e80941Smrg if (ctx->type == PIPE_SHADER_GEOMETRY || 442b8e80941Smrg ctx->type == PIPE_SHADER_TESS_CTRL || 443b8e80941Smrg ctx->type == PIPE_SHADER_TESS_EVAL) 444b8e80941Smrg break; 445b8e80941Smrg case TGSI_FILE_OUTPUT: 446b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL) 447848b8605Smrg break; 448848b8605Smrg default: 449b8e80941Smrg R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 450b8e80941Smrg i->Src[j].Register.File, 451848b8605Smrg i->Src[j].Register.Dimension); 452848b8605Smrg return -EINVAL; 453848b8605Smrg } 454848b8605Smrg } 455848b8605Smrg } 456848b8605Smrg for (j = 0; j < i->Instruction.NumDstRegs; j++) { 457848b8605Smrg if (i->Dst[j].Register.Dimension) { 458b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL) 459b8e80941Smrg continue; 460848b8605Smrg R600_ERR("unsupported dst (dimension)\n"); 461848b8605Smrg return -EINVAL; 462848b8605Smrg } 463848b8605Smrg } 464848b8605Smrg return 0; 465848b8605Smrg} 466848b8605Smrg 467b8e80941Smrgint eg_get_interpolator_index(unsigned interpolate, unsigned location) 468848b8605Smrg{ 469b8e80941Smrg if (interpolate == TGSI_INTERPOLATE_COLOR || 470b8e80941Smrg interpolate == TGSI_INTERPOLATE_LINEAR || 471b8e80941Smrg interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 472b8e80941Smrg { 473b8e80941Smrg int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 474b8e80941Smrg int loc; 475848b8605Smrg 476b8e80941Smrg switch(location) { 477b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTER: 478b8e80941Smrg loc = 1; 479b8e80941Smrg break; 480b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTROID: 481b8e80941Smrg loc = 2; 482b8e80941Smrg break; 483b8e80941Smrg case TGSI_INTERPOLATE_LOC_SAMPLE: 484b8e80941Smrg default: 485b8e80941Smrg loc = 0; break; 486848b8605Smrg } 487b8e80941Smrg 488b8e80941Smrg return is_linear * 3 + loc; 489848b8605Smrg } 490848b8605Smrg 491b8e80941Smrg return -1; 492b8e80941Smrg} 493b8e80941Smrg 494b8e80941Smrgstatic void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 495b8e80941Smrg int input) 496b8e80941Smrg{ 497b8e80941Smrg int i = eg_get_interpolator_index( 498b8e80941Smrg ctx->shader->input[input].interpolate, 499b8e80941Smrg ctx->shader->input[input].interpolate_location); 500b8e80941Smrg assert(i >= 0); 501b8e80941Smrg ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 502848b8605Smrg} 503848b8605Smrg 504848b8605Smrgstatic int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 505848b8605Smrg{ 506848b8605Smrg int i, r; 507848b8605Smrg struct r600_bytecode_alu alu; 508848b8605Smrg int gpr = 0, base_chan = 0; 509848b8605Smrg int ij_index = ctx->shader->input[input].ij_index; 510848b8605Smrg 511848b8605Smrg /* work out gpr and base_chan from index */ 512848b8605Smrg gpr = ij_index / 2; 513848b8605Smrg base_chan = (2 * (ij_index % 2)) + 1; 514848b8605Smrg 515848b8605Smrg for (i = 0; i < 8; i++) { 516848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 517848b8605Smrg 518848b8605Smrg if (i < 4) 519848b8605Smrg alu.op = ALU_OP2_INTERP_ZW; 520848b8605Smrg else 521848b8605Smrg alu.op = ALU_OP2_INTERP_XY; 522848b8605Smrg 523848b8605Smrg if ((i > 1) && (i < 6)) { 524848b8605Smrg alu.dst.sel = ctx->shader->input[input].gpr; 525848b8605Smrg alu.dst.write = 1; 526848b8605Smrg } 527848b8605Smrg 528848b8605Smrg alu.dst.chan = i % 4; 529848b8605Smrg 530848b8605Smrg alu.src[0].sel = gpr; 531848b8605Smrg alu.src[0].chan = (base_chan - (i % 2)); 532848b8605Smrg 533848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 534848b8605Smrg 535848b8605Smrg alu.bank_swizzle_force = SQ_ALU_VEC_210; 536848b8605Smrg if ((i % 4) == 3) 537848b8605Smrg alu.last = 1; 538848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 539848b8605Smrg if (r) 540848b8605Smrg return r; 541848b8605Smrg } 542848b8605Smrg return 0; 543848b8605Smrg} 544848b8605Smrg 545848b8605Smrgstatic int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 546848b8605Smrg{ 547848b8605Smrg int i, r; 548848b8605Smrg struct r600_bytecode_alu alu; 549848b8605Smrg 550848b8605Smrg for (i = 0; i < 4; i++) { 551848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 552848b8605Smrg 553848b8605Smrg alu.op = ALU_OP1_INTERP_LOAD_P0; 554848b8605Smrg 555848b8605Smrg alu.dst.sel = ctx->shader->input[input].gpr; 556848b8605Smrg alu.dst.write = 1; 557848b8605Smrg 558848b8605Smrg alu.dst.chan = i; 559848b8605Smrg 560848b8605Smrg alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 561848b8605Smrg alu.src[0].chan = i; 562848b8605Smrg 563848b8605Smrg if (i == 3) 564848b8605Smrg alu.last = 1; 565848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 566848b8605Smrg if (r) 567848b8605Smrg return r; 568848b8605Smrg } 569848b8605Smrg return 0; 570848b8605Smrg} 571848b8605Smrg 572848b8605Smrg/* 573848b8605Smrg * Special export handling in shaders 574848b8605Smrg * 575848b8605Smrg * shader export ARRAY_BASE for EXPORT_POS: 576848b8605Smrg * 60 is position 577848b8605Smrg * 61 is misc vector 578848b8605Smrg * 62, 63 are clip distance vectors 579848b8605Smrg * 580848b8605Smrg * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 581848b8605Smrg * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 582848b8605Smrg * USE_VTX_POINT_SIZE - point size in the X channel of export 61 583848b8605Smrg * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 584848b8605Smrg * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 585848b8605Smrg * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 586848b8605Smrg * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 587848b8605Smrg * exclusive from render target index) 588848b8605Smrg * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 589848b8605Smrg * 590848b8605Smrg * 591848b8605Smrg * shader export ARRAY_BASE for EXPORT_PIXEL: 592848b8605Smrg * 0-7 CB targets 593848b8605Smrg * 61 computed Z vector 594848b8605Smrg * 595848b8605Smrg * The use of the values exported in the computed Z vector are controlled 596848b8605Smrg * by DB_SHADER_CONTROL: 597848b8605Smrg * Z_EXPORT_ENABLE - Z as a float in RED 598848b8605Smrg * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 599848b8605Smrg * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 600848b8605Smrg * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 601848b8605Smrg * DB_SOURCE_FORMAT - export control restrictions 602848b8605Smrg * 603848b8605Smrg */ 604848b8605Smrg 605848b8605Smrg 606848b8605Smrg/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 607848b8605Smrgstatic int r600_spi_sid(struct r600_shader_io * io) 608848b8605Smrg{ 609848b8605Smrg int index, name = io->name; 610848b8605Smrg 611848b8605Smrg /* These params are handled differently, they don't need 612848b8605Smrg * semantic indices, so we'll use 0 for them. 613848b8605Smrg */ 614848b8605Smrg if (name == TGSI_SEMANTIC_POSITION || 615848b8605Smrg name == TGSI_SEMANTIC_PSIZE || 616848b8605Smrg name == TGSI_SEMANTIC_EDGEFLAG || 617848b8605Smrg name == TGSI_SEMANTIC_FACE || 618848b8605Smrg name == TGSI_SEMANTIC_SAMPLEMASK) 619848b8605Smrg index = 0; 620848b8605Smrg else { 621848b8605Smrg if (name == TGSI_SEMANTIC_GENERIC) { 622848b8605Smrg /* For generic params simply use sid from tgsi */ 623848b8605Smrg index = io->sid; 624848b8605Smrg } else { 625848b8605Smrg /* For non-generic params - pack name and sid into 8 bits */ 626848b8605Smrg index = 0x80 | (name<<3) | (io->sid); 627848b8605Smrg } 628848b8605Smrg 629848b8605Smrg /* Make sure that all really used indices have nonzero value, so 630848b8605Smrg * we can just compare it to 0 later instead of comparing the name 631848b8605Smrg * with different values to detect special cases. */ 632848b8605Smrg index++; 633848b8605Smrg } 634848b8605Smrg 635848b8605Smrg return index; 636848b8605Smrg}; 637848b8605Smrg 638b8e80941Smrg/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 639b8e80941Smrgint r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 640b8e80941Smrg{ 641b8e80941Smrg switch (semantic_name) { 642b8e80941Smrg case TGSI_SEMANTIC_POSITION: 643b8e80941Smrg return 0; 644b8e80941Smrg case TGSI_SEMANTIC_PSIZE: 645b8e80941Smrg return 1; 646b8e80941Smrg case TGSI_SEMANTIC_CLIPDIST: 647b8e80941Smrg assert(index <= 1); 648b8e80941Smrg return 2 + index; 649b8e80941Smrg case TGSI_SEMANTIC_GENERIC: 650b8e80941Smrg if (index <= 63-4) 651b8e80941Smrg return 4 + index - 9; 652b8e80941Smrg else 653b8e80941Smrg /* same explanation as in the default statement, 654b8e80941Smrg * the only user hitting this is st/nine. 655b8e80941Smrg */ 656b8e80941Smrg return 0; 657b8e80941Smrg 658b8e80941Smrg /* patch indices are completely separate and thus start from 0 */ 659b8e80941Smrg case TGSI_SEMANTIC_TESSOUTER: 660b8e80941Smrg return 0; 661b8e80941Smrg case TGSI_SEMANTIC_TESSINNER: 662b8e80941Smrg return 1; 663b8e80941Smrg case TGSI_SEMANTIC_PATCH: 664b8e80941Smrg return 2 + index; 665b8e80941Smrg 666b8e80941Smrg default: 667b8e80941Smrg /* Don't fail here. The result of this function is only used 668b8e80941Smrg * for LS, TCS, TES, and GS, where legacy GL semantics can't 669b8e80941Smrg * occur, but this function is called for all vertex shaders 670b8e80941Smrg * before it's known whether LS will be compiled or not. 671b8e80941Smrg */ 672b8e80941Smrg return 0; 673b8e80941Smrg } 674b8e80941Smrg} 675b8e80941Smrg 676848b8605Smrg/* turn input into interpolate on EG */ 677848b8605Smrgstatic int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 678848b8605Smrg{ 679848b8605Smrg int r = 0; 680848b8605Smrg 681848b8605Smrg if (ctx->shader->input[index].spi_sid) { 682848b8605Smrg ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 683848b8605Smrg if (ctx->shader->input[index].interpolate > 0) { 684848b8605Smrg evergreen_interp_assign_ij_index(ctx, index); 685b8e80941Smrg r = evergreen_interp_alu(ctx, index); 686848b8605Smrg } else { 687b8e80941Smrg r = evergreen_interp_flat(ctx, index); 688848b8605Smrg } 689848b8605Smrg } 690848b8605Smrg return r; 691848b8605Smrg} 692848b8605Smrg 693848b8605Smrgstatic int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 694848b8605Smrg{ 695848b8605Smrg struct r600_bytecode_alu alu; 696848b8605Smrg int i, r; 697848b8605Smrg int gpr_front = ctx->shader->input[front].gpr; 698848b8605Smrg int gpr_back = ctx->shader->input[back].gpr; 699848b8605Smrg 700848b8605Smrg for (i = 0; i < 4; i++) { 701848b8605Smrg memset(&alu, 0, sizeof(alu)); 702848b8605Smrg alu.op = ALU_OP3_CNDGT; 703848b8605Smrg alu.is_op3 = 1; 704848b8605Smrg alu.dst.write = 1; 705848b8605Smrg alu.dst.sel = gpr_front; 706848b8605Smrg alu.src[0].sel = ctx->face_gpr; 707848b8605Smrg alu.src[1].sel = gpr_front; 708848b8605Smrg alu.src[2].sel = gpr_back; 709848b8605Smrg 710848b8605Smrg alu.dst.chan = i; 711848b8605Smrg alu.src[1].chan = i; 712848b8605Smrg alu.src[2].chan = i; 713848b8605Smrg alu.last = (i==3); 714848b8605Smrg 715848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 716848b8605Smrg return r; 717848b8605Smrg } 718848b8605Smrg 719848b8605Smrg return 0; 720848b8605Smrg} 721848b8605Smrg 722b8e80941Smrg/* execute a single slot ALU calculation */ 723b8e80941Smrgstatic int single_alu_op2(struct r600_shader_ctx *ctx, int op, 724b8e80941Smrg int dst_sel, int dst_chan, 725b8e80941Smrg int src0_sel, unsigned src0_chan_val, 726b8e80941Smrg int src1_sel, unsigned src1_chan_val) 727b8e80941Smrg{ 728b8e80941Smrg struct r600_bytecode_alu alu; 729b8e80941Smrg int r, i; 730b8e80941Smrg 731b8e80941Smrg if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 732b8e80941Smrg for (i = 0; i < 4; i++) { 733b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 734b8e80941Smrg alu.op = op; 735b8e80941Smrg alu.src[0].sel = src0_sel; 736b8e80941Smrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 737b8e80941Smrg alu.src[0].value = src0_chan_val; 738b8e80941Smrg else 739b8e80941Smrg alu.src[0].chan = src0_chan_val; 740b8e80941Smrg alu.src[1].sel = src1_sel; 741b8e80941Smrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 742b8e80941Smrg alu.src[1].value = src1_chan_val; 743b8e80941Smrg else 744b8e80941Smrg alu.src[1].chan = src1_chan_val; 745b8e80941Smrg alu.dst.sel = dst_sel; 746b8e80941Smrg alu.dst.chan = i; 747b8e80941Smrg alu.dst.write = i == dst_chan; 748b8e80941Smrg alu.last = (i == 3); 749b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 750b8e80941Smrg if (r) 751b8e80941Smrg return r; 752b8e80941Smrg } 753b8e80941Smrg return 0; 754b8e80941Smrg } 755b8e80941Smrg 756b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 757b8e80941Smrg alu.op = op; 758b8e80941Smrg alu.src[0].sel = src0_sel; 759b8e80941Smrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 760b8e80941Smrg alu.src[0].value = src0_chan_val; 761b8e80941Smrg else 762b8e80941Smrg alu.src[0].chan = src0_chan_val; 763b8e80941Smrg alu.src[1].sel = src1_sel; 764b8e80941Smrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 765b8e80941Smrg alu.src[1].value = src1_chan_val; 766b8e80941Smrg else 767b8e80941Smrg alu.src[1].chan = src1_chan_val; 768b8e80941Smrg alu.dst.sel = dst_sel; 769b8e80941Smrg alu.dst.chan = dst_chan; 770b8e80941Smrg alu.dst.write = 1; 771b8e80941Smrg alu.last = 1; 772b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 773b8e80941Smrg if (r) 774b8e80941Smrg return r; 775b8e80941Smrg return 0; 776b8e80941Smrg} 777b8e80941Smrg 778b8e80941Smrg/* execute a single slot ALU calculation */ 779b8e80941Smrgstatic int single_alu_op3(struct r600_shader_ctx *ctx, int op, 780b8e80941Smrg int dst_sel, int dst_chan, 781b8e80941Smrg int src0_sel, unsigned src0_chan_val, 782b8e80941Smrg int src1_sel, unsigned src1_chan_val, 783b8e80941Smrg int src2_sel, unsigned src2_chan_val) 784b8e80941Smrg{ 785b8e80941Smrg struct r600_bytecode_alu alu; 786b8e80941Smrg int r; 787b8e80941Smrg 788b8e80941Smrg /* validate this for other ops */ 789b8e80941Smrg assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT); 790b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 791b8e80941Smrg alu.op = op; 792b8e80941Smrg alu.src[0].sel = src0_sel; 793b8e80941Smrg if (src0_sel == V_SQ_ALU_SRC_LITERAL) 794b8e80941Smrg alu.src[0].value = src0_chan_val; 795b8e80941Smrg else 796b8e80941Smrg alu.src[0].chan = src0_chan_val; 797b8e80941Smrg alu.src[1].sel = src1_sel; 798b8e80941Smrg if (src1_sel == V_SQ_ALU_SRC_LITERAL) 799b8e80941Smrg alu.src[1].value = src1_chan_val; 800b8e80941Smrg else 801b8e80941Smrg alu.src[1].chan = src1_chan_val; 802b8e80941Smrg alu.src[2].sel = src2_sel; 803b8e80941Smrg if (src2_sel == V_SQ_ALU_SRC_LITERAL) 804b8e80941Smrg alu.src[2].value = src2_chan_val; 805b8e80941Smrg else 806b8e80941Smrg alu.src[2].chan = src2_chan_val; 807b8e80941Smrg alu.dst.sel = dst_sel; 808b8e80941Smrg alu.dst.chan = dst_chan; 809b8e80941Smrg alu.is_op3 = 1; 810b8e80941Smrg alu.last = 1; 811b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 812b8e80941Smrg if (r) 813b8e80941Smrg return r; 814b8e80941Smrg return 0; 815b8e80941Smrg} 816b8e80941Smrg 817b8e80941Smrg/* put it in temp_reg.x */ 818b8e80941Smrgstatic int get_lds_offset0(struct r600_shader_ctx *ctx, 819b8e80941Smrg int rel_patch_chan, 820b8e80941Smrg int temp_reg, bool is_patch_var) 821b8e80941Smrg{ 822b8e80941Smrg int r; 823b8e80941Smrg 824b8e80941Smrg /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 825b8e80941Smrg /* ADD 826b8e80941Smrg Dimension - patch0_offset (input_vals.z), 827b8e80941Smrg Non-dim - patch0_data_offset (input_vals.w) 828b8e80941Smrg */ 829b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 830b8e80941Smrg temp_reg, 0, 831b8e80941Smrg ctx->tess_output_info, 0, 832b8e80941Smrg 0, rel_patch_chan, 833b8e80941Smrg ctx->tess_output_info, is_patch_var ? 3 : 2); 834b8e80941Smrg if (r) 835b8e80941Smrg return r; 836b8e80941Smrg return 0; 837b8e80941Smrg} 838b8e80941Smrg 839b8e80941Smrgstatic inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 840b8e80941Smrg{ 841b8e80941Smrg return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 842b8e80941Smrg} 843b8e80941Smrg 844b8e80941Smrgstatic int r600_get_temp(struct r600_shader_ctx *ctx) 845b8e80941Smrg{ 846b8e80941Smrg return ctx->temp_reg + ctx->max_driver_temp_used++; 847b8e80941Smrg} 848b8e80941Smrg 849b8e80941Smrgstatic int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 850b8e80941Smrg{ 851b8e80941Smrg int i; 852b8e80941Smrg i = ctx->shader->noutput++; 853b8e80941Smrg ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 854b8e80941Smrg ctx->shader->output[i].sid = 0; 855b8e80941Smrg ctx->shader->output[i].gpr = 0; 856b8e80941Smrg ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 857b8e80941Smrg ctx->shader->output[i].write_mask = 0x4; 858b8e80941Smrg ctx->shader->output[i].spi_sid = prim_id_sid; 859b8e80941Smrg 860b8e80941Smrg return 0; 861b8e80941Smrg} 862b8e80941Smrg 863b8e80941Smrgstatic int tgsi_barrier(struct r600_shader_ctx *ctx) 864b8e80941Smrg{ 865b8e80941Smrg struct r600_bytecode_alu alu; 866b8e80941Smrg int r; 867b8e80941Smrg 868b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 869b8e80941Smrg alu.op = ctx->inst_info->op; 870b8e80941Smrg alu.last = 1; 871b8e80941Smrg 872b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 873b8e80941Smrg if (r) 874b8e80941Smrg return r; 875b8e80941Smrg return 0; 876b8e80941Smrg} 877b8e80941Smrg 878b8e80941Smrgstatic void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, unsigned *scratch_space_needed) 879b8e80941Smrg{ 880b8e80941Smrg // pick largest array and spill it, repeat until the number of temps is under limit or we run out of arrays 881b8e80941Smrg unsigned n = ctx->info.array_max[TGSI_FILE_TEMPORARY]; 882b8e80941Smrg unsigned narrays_left = n; 883b8e80941Smrg bool *spilled = ctx->spilled_arrays; // assumed calloc:ed 884b8e80941Smrg 885b8e80941Smrg *scratch_space_needed = 0; 886b8e80941Smrg while (*regno > 124 && narrays_left) { 887b8e80941Smrg unsigned i; 888b8e80941Smrg unsigned largest = 0; 889b8e80941Smrg unsigned largest_index = 0; 890b8e80941Smrg 891b8e80941Smrg for (i = 0; i < n; i++) { 892b8e80941Smrg unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 893b8e80941Smrg if (!spilled[i] && size > largest) { 894b8e80941Smrg largest = size; 895b8e80941Smrg largest_index = i; 896b8e80941Smrg } 897b8e80941Smrg } 898b8e80941Smrg 899b8e80941Smrg spilled[largest_index] = true; 900b8e80941Smrg *regno -= largest; 901b8e80941Smrg *scratch_space_needed += largest; 902b8e80941Smrg 903b8e80941Smrg narrays_left --; 904b8e80941Smrg } 905b8e80941Smrg 906b8e80941Smrg if (narrays_left == 0) { 907b8e80941Smrg ctx->info.indirect_files &= ~(1 << TGSI_FILE_TEMPORARY); 908b8e80941Smrg } 909b8e80941Smrg} 910b8e80941Smrg 911b8e80941Smrg/* Take spilled temp arrays into account when translating tgsi register 912b8e80941Smrg * indexes into r600 gprs if spilled is false, or scratch array offset if 913b8e80941Smrg * spilled is true */ 914b8e80941Smrgstatic int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, bool *spilled) 915b8e80941Smrg{ 916b8e80941Smrg unsigned i; 917b8e80941Smrg unsigned spilled_size = 0; 918b8e80941Smrg 919b8e80941Smrg for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) { 920b8e80941Smrg if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) { 921b8e80941Smrg if (ctx->spilled_arrays[i]) { 922b8e80941Smrg /* vec4 index into spilled scratch memory */ 923b8e80941Smrg *spilled = true; 924b8e80941Smrg return tgsi_reg_index - ctx->array_infos[i].range.First + spilled_size; 925b8e80941Smrg } 926b8e80941Smrg else { 927b8e80941Smrg /* regular GPR array */ 928b8e80941Smrg *spilled = false; 929b8e80941Smrg return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY]; 930b8e80941Smrg } 931b8e80941Smrg } 932b8e80941Smrg 933b8e80941Smrg if (tgsi_reg_index < ctx->array_infos[i].range.First) 934b8e80941Smrg break; 935b8e80941Smrg if (ctx->spilled_arrays[i]) { 936b8e80941Smrg spilled_size += ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 937b8e80941Smrg } 938b8e80941Smrg } 939b8e80941Smrg 940b8e80941Smrg /* regular GPR index, minus the holes from spilled arrays */ 941b8e80941Smrg *spilled = false; 942b8e80941Smrg 943b8e80941Smrg return tgsi_reg_index - spilled_size + ctx->file_offset[TGSI_FILE_TEMPORARY]; 944b8e80941Smrg} 945b8e80941Smrg 946b8e80941Smrg/* look up spill area base offset and array size for a spilled temp array */ 947b8e80941Smrgstatic void get_spilled_array_base_and_size(struct r600_shader_ctx *ctx, unsigned tgsi_reg_index, 948b8e80941Smrg unsigned *array_base, unsigned *array_size) 949b8e80941Smrg{ 950b8e80941Smrg unsigned i; 951b8e80941Smrg unsigned offset = 0; 952b8e80941Smrg 953b8e80941Smrg for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) { 954b8e80941Smrg if (ctx->spilled_arrays[i]) { 955b8e80941Smrg unsigned size = ctx->array_infos[i].range.Last - ctx->array_infos[i].range.First + 1; 956b8e80941Smrg 957b8e80941Smrg if (tgsi_reg_index >= ctx->array_infos[i].range.First && tgsi_reg_index <= ctx->array_infos[i].range.Last) { 958b8e80941Smrg *array_base = offset; 959b8e80941Smrg *array_size = size - 1; /* hw counts from 1 */ 960b8e80941Smrg 961b8e80941Smrg return; 962b8e80941Smrg } 963b8e80941Smrg 964b8e80941Smrg offset += size; 965b8e80941Smrg } 966b8e80941Smrg } 967b8e80941Smrg} 968b8e80941Smrg 969848b8605Smrgstatic int tgsi_declaration(struct r600_shader_ctx *ctx) 970848b8605Smrg{ 971848b8605Smrg struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 972848b8605Smrg int r, i, j, count = d->Range.Last - d->Range.First + 1; 973848b8605Smrg 974848b8605Smrg switch (d->Declaration.File) { 975848b8605Smrg case TGSI_FILE_INPUT: 976b8e80941Smrg for (j = 0; j < count; j++) { 977b8e80941Smrg i = ctx->shader->ninput + j; 978b8e80941Smrg assert(i < ARRAY_SIZE(ctx->shader->input)); 979b8e80941Smrg ctx->shader->input[i].name = d->Semantic.Name; 980b8e80941Smrg ctx->shader->input[i].sid = d->Semantic.Index + j; 981b8e80941Smrg ctx->shader->input[i].interpolate = d->Interp.Interpolate; 982b8e80941Smrg ctx->shader->input[i].interpolate_location = d->Interp.Location; 983b8e80941Smrg ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 984b8e80941Smrg if (ctx->type == PIPE_SHADER_FRAGMENT) { 985b8e80941Smrg ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 986b8e80941Smrg switch (ctx->shader->input[i].name) { 987b8e80941Smrg case TGSI_SEMANTIC_FACE: 988b8e80941Smrg if (ctx->face_gpr != -1) 989b8e80941Smrg ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 990b8e80941Smrg else 991b8e80941Smrg ctx->face_gpr = ctx->shader->input[i].gpr; 992b8e80941Smrg break; 993b8e80941Smrg case TGSI_SEMANTIC_COLOR: 994b8e80941Smrg ctx->colors_used++; 995b8e80941Smrg break; 996b8e80941Smrg case TGSI_SEMANTIC_POSITION: 997b8e80941Smrg ctx->fragcoord_input = i; 998b8e80941Smrg break; 999b8e80941Smrg case TGSI_SEMANTIC_PRIMID: 1000b8e80941Smrg /* set this for now */ 1001b8e80941Smrg ctx->shader->gs_prim_id_input = true; 1002b8e80941Smrg ctx->shader->ps_prim_id_input = i; 1003b8e80941Smrg break; 1004b8e80941Smrg } 1005b8e80941Smrg if (ctx->bc->chip_class >= EVERGREEN) { 1006b8e80941Smrg if ((r = evergreen_interp_input(ctx, i))) 1007b8e80941Smrg return r; 1008b8e80941Smrg } 1009b8e80941Smrg } else if (ctx->type == PIPE_SHADER_GEOMETRY) { 1010b8e80941Smrg /* FIXME probably skip inputs if they aren't passed in the ring */ 1011b8e80941Smrg ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 1012b8e80941Smrg ctx->next_ring_offset += 16; 1013b8e80941Smrg if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 1014b8e80941Smrg ctx->shader->gs_prim_id_input = true; 1015848b8605Smrg } 1016848b8605Smrg } 1017b8e80941Smrg ctx->shader->ninput += count; 1018848b8605Smrg break; 1019848b8605Smrg case TGSI_FILE_OUTPUT: 1020b8e80941Smrg for (j = 0; j < count; j++) { 1021b8e80941Smrg i = ctx->shader->noutput + j; 1022b8e80941Smrg assert(i < ARRAY_SIZE(ctx->shader->output)); 1023b8e80941Smrg ctx->shader->output[i].name = d->Semantic.Name; 1024b8e80941Smrg ctx->shader->output[i].sid = d->Semantic.Index + j; 1025b8e80941Smrg ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 1026b8e80941Smrg ctx->shader->output[i].interpolate = d->Interp.Interpolate; 1027b8e80941Smrg ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 1028b8e80941Smrg if (ctx->type == PIPE_SHADER_VERTEX || 1029b8e80941Smrg ctx->type == PIPE_SHADER_GEOMETRY || 1030b8e80941Smrg ctx->type == PIPE_SHADER_TESS_EVAL) { 1031b8e80941Smrg ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 1032b8e80941Smrg switch (d->Semantic.Name) { 1033b8e80941Smrg case TGSI_SEMANTIC_CLIPDIST: 1034b8e80941Smrg break; 1035b8e80941Smrg case TGSI_SEMANTIC_PSIZE: 1036b8e80941Smrg ctx->shader->vs_out_misc_write = 1; 1037b8e80941Smrg ctx->shader->vs_out_point_size = 1; 1038b8e80941Smrg break; 1039b8e80941Smrg case TGSI_SEMANTIC_EDGEFLAG: 1040b8e80941Smrg ctx->shader->vs_out_misc_write = 1; 1041b8e80941Smrg ctx->shader->vs_out_edgeflag = 1; 1042b8e80941Smrg ctx->edgeflag_output = i; 1043b8e80941Smrg break; 1044b8e80941Smrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 1045b8e80941Smrg ctx->shader->vs_out_misc_write = 1; 1046b8e80941Smrg ctx->shader->vs_out_viewport = 1; 1047b8e80941Smrg break; 1048b8e80941Smrg case TGSI_SEMANTIC_LAYER: 1049b8e80941Smrg ctx->shader->vs_out_misc_write = 1; 1050b8e80941Smrg ctx->shader->vs_out_layer = 1; 1051b8e80941Smrg break; 1052b8e80941Smrg case TGSI_SEMANTIC_CLIPVERTEX: 1053b8e80941Smrg ctx->clip_vertex_write = TRUE; 1054b8e80941Smrg ctx->cv_output = i; 1055b8e80941Smrg break; 1056b8e80941Smrg } 1057b8e80941Smrg if (ctx->type == PIPE_SHADER_GEOMETRY) { 1058b8e80941Smrg ctx->gs_out_ring_offset += 16; 1059b8e80941Smrg } 1060b8e80941Smrg } else if (ctx->type == PIPE_SHADER_FRAGMENT) { 1061b8e80941Smrg switch (d->Semantic.Name) { 1062b8e80941Smrg case TGSI_SEMANTIC_COLOR: 1063b8e80941Smrg ctx->shader->nr_ps_max_color_exports++; 1064b8e80941Smrg break; 1065b8e80941Smrg } 1066848b8605Smrg } 1067848b8605Smrg } 1068b8e80941Smrg ctx->shader->noutput += count; 1069848b8605Smrg break; 1070848b8605Smrg case TGSI_FILE_TEMPORARY: 1071848b8605Smrg if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1072848b8605Smrg if (d->Array.ArrayID) { 1073b8e80941Smrg bool spilled; 1074b8e80941Smrg unsigned idx = map_tgsi_reg_index_to_r600_gpr(ctx, 1075b8e80941Smrg d->Range.First, 1076b8e80941Smrg &spilled); 1077b8e80941Smrg 1078b8e80941Smrg if (!spilled) { 1079b8e80941Smrg r600_add_gpr_array(ctx->shader, idx, 1080b8e80941Smrg d->Range.Last - d->Range.First + 1, 0x0F); 1081b8e80941Smrg } 1082848b8605Smrg } 1083848b8605Smrg } 1084848b8605Smrg break; 1085848b8605Smrg 1086848b8605Smrg case TGSI_FILE_CONSTANT: 1087848b8605Smrg case TGSI_FILE_SAMPLER: 1088b8e80941Smrg case TGSI_FILE_SAMPLER_VIEW: 1089848b8605Smrg case TGSI_FILE_ADDRESS: 1090b8e80941Smrg case TGSI_FILE_BUFFER: 1091b8e80941Smrg case TGSI_FILE_IMAGE: 1092b8e80941Smrg case TGSI_FILE_MEMORY: 1093b8e80941Smrg break; 1094b8e80941Smrg 1095b8e80941Smrg case TGSI_FILE_HW_ATOMIC: 1096b8e80941Smrg i = ctx->shader->nhwatomic_ranges; 1097b8e80941Smrg ctx->shader->atomics[i].start = d->Range.First; 1098b8e80941Smrg ctx->shader->atomics[i].end = d->Range.Last; 1099b8e80941Smrg ctx->shader->atomics[i].hw_idx = ctx->shader->atomic_base + ctx->shader->nhwatomic; 1100b8e80941Smrg ctx->shader->atomics[i].array_id = d->Array.ArrayID; 1101b8e80941Smrg ctx->shader->atomics[i].buffer_id = d->Dim.Index2D; 1102b8e80941Smrg ctx->shader->nhwatomic_ranges++; 1103b8e80941Smrg ctx->shader->nhwatomic += count; 1104848b8605Smrg break; 1105848b8605Smrg 1106848b8605Smrg case TGSI_FILE_SYSTEM_VALUE: 1107b8e80941Smrg if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 1108b8e80941Smrg d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 1109b8e80941Smrg d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 1110b8e80941Smrg break; /* Already handled from allocate_system_value_inputs */ 1111b8e80941Smrg } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 1112848b8605Smrg break; 1113b8e80941Smrg } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 1114b8e80941Smrg break; 1115b8e80941Smrg else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 1116b8e80941Smrg break; 1117b8e80941Smrg else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 1118b8e80941Smrg d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 1119b8e80941Smrg int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 1120b8e80941Smrg int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 1121b8e80941Smrg unsigned temp_reg = r600_get_temp(ctx); 1122b8e80941Smrg 1123b8e80941Smrg r = get_lds_offset0(ctx, 2, temp_reg, true); 1124b8e80941Smrg if (r) 1125b8e80941Smrg return r; 1126b8e80941Smrg 1127b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1128b8e80941Smrg temp_reg, 0, 1129b8e80941Smrg temp_reg, 0, 1130b8e80941Smrg V_SQ_ALU_SRC_LITERAL, param * 16); 1131b8e80941Smrg if (r) 1132b8e80941Smrg return r; 1133b8e80941Smrg 1134b8e80941Smrg do_lds_fetch_values(ctx, temp_reg, dreg, 0xf); 1135848b8605Smrg } 1136b8e80941Smrg else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 1137b8e80941Smrg /* MOV r1.x, r0.x; 1138b8e80941Smrg MOV r1.y, r0.y; 1139b8e80941Smrg */ 1140b8e80941Smrg for (i = 0; i < 2; i++) { 1141848b8605Smrg struct r600_bytecode_alu alu; 1142848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1143b8e80941Smrg alu.op = ALU_OP1_MOV; 1144848b8605Smrg alu.src[0].sel = 0; 1145b8e80941Smrg alu.src[0].chan = 0 + i; 1146b8e80941Smrg alu.dst.sel = 1; 1147b8e80941Smrg alu.dst.chan = 0 + i; 1148848b8605Smrg alu.dst.write = 1; 1149b8e80941Smrg alu.last = (i == 1) ? 1 : 0; 1150848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1151848b8605Smrg return r; 1152848b8605Smrg } 1153b8e80941Smrg /* ADD r1.z, 1.0f, -r0.x */ 1154b8e80941Smrg struct r600_bytecode_alu alu; 1155b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1156b8e80941Smrg alu.op = ALU_OP2_ADD; 1157b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 1158b8e80941Smrg alu.src[1].sel = 1; 1159b8e80941Smrg alu.src[1].chan = 0; 1160b8e80941Smrg alu.src[1].neg = 1; 1161b8e80941Smrg alu.dst.sel = 1; 1162b8e80941Smrg alu.dst.chan = 2; 1163b8e80941Smrg alu.dst.write = 1; 1164b8e80941Smrg alu.last = 1; 1165b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1166b8e80941Smrg return r; 1167848b8605Smrg 1168b8e80941Smrg /* ADD r1.z, r1.z, -r1.y */ 1169b8e80941Smrg alu.op = ALU_OP2_ADD; 1170b8e80941Smrg alu.src[0].sel = 1; 1171b8e80941Smrg alu.src[0].chan = 2; 1172b8e80941Smrg alu.src[1].sel = 1; 1173b8e80941Smrg alu.src[1].chan = 1; 1174b8e80941Smrg alu.src[1].neg = 1; 1175b8e80941Smrg alu.dst.sel = 1; 1176b8e80941Smrg alu.dst.chan = 2; 1177b8e80941Smrg alu.dst.write = 1; 1178b8e80941Smrg alu.last = 1; 1179b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1180b8e80941Smrg return r; 1181b8e80941Smrg break; 1182b8e80941Smrg } 1183b8e80941Smrg break; 1184b8e80941Smrg default: 1185b8e80941Smrg R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 1186b8e80941Smrg return -EINVAL; 1187b8e80941Smrg } 1188b8e80941Smrg return 0; 1189b8e80941Smrg} 1190b8e80941Smrg 1191b8e80941Smrgstatic int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 1192b8e80941Smrg{ 1193b8e80941Smrg struct tgsi_parse_context parse; 1194b8e80941Smrg struct { 1195b8e80941Smrg boolean enabled; 1196b8e80941Smrg int *reg; 1197b8e80941Smrg unsigned name, alternate_name; 1198b8e80941Smrg } inputs[2] = { 1199b8e80941Smrg { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 1200b8e80941Smrg 1201b8e80941Smrg { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 1202b8e80941Smrg }; 1203b8e80941Smrg int num_regs = 0; 1204b8e80941Smrg unsigned k, i; 1205b8e80941Smrg 1206b8e80941Smrg if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1207b8e80941Smrg return 0; 1208b8e80941Smrg } 1209b8e80941Smrg 1210b8e80941Smrg /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1211b8e80941Smrg while (!tgsi_parse_end_of_tokens(&parse)) { 1212b8e80941Smrg tgsi_parse_token(&parse); 1213b8e80941Smrg 1214b8e80941Smrg if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1215b8e80941Smrg const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1216b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1217b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1218b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1219b8e80941Smrg { 1220b8e80941Smrg int interpolate, location, k; 1221b8e80941Smrg 1222b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1223b8e80941Smrg location = TGSI_INTERPOLATE_LOC_CENTER; 1224b8e80941Smrg } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1225b8e80941Smrg location = TGSI_INTERPOLATE_LOC_CENTER; 1226b8e80941Smrg /* Needs sample positions, currently those are always available */ 1227b8e80941Smrg } else { 1228b8e80941Smrg location = TGSI_INTERPOLATE_LOC_CENTROID; 1229b8e80941Smrg } 1230b8e80941Smrg 1231b8e80941Smrg interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1232b8e80941Smrg k = eg_get_interpolator_index(interpolate, location); 1233b8e80941Smrg if (k >= 0) 1234b8e80941Smrg ctx->eg_interpolators[k].enabled = true; 1235b8e80941Smrg } 1236b8e80941Smrg } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1237b8e80941Smrg struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1238b8e80941Smrg if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1239b8e80941Smrg for (k = 0; k < ARRAY_SIZE(inputs); k++) { 1240b8e80941Smrg if (d->Semantic.Name == inputs[k].name || 1241b8e80941Smrg d->Semantic.Name == inputs[k].alternate_name) { 1242b8e80941Smrg inputs[k].enabled = true; 1243b8e80941Smrg } 1244b8e80941Smrg } 1245b8e80941Smrg } 1246b8e80941Smrg } 1247b8e80941Smrg } 1248b8e80941Smrg 1249b8e80941Smrg tgsi_parse_free(&parse); 1250b8e80941Smrg 1251b8e80941Smrg if (ctx->info.reads_samplemask && 1252b8e80941Smrg (ctx->info.uses_linear_sample || ctx->info.uses_persp_sample)) { 1253b8e80941Smrg inputs[1].enabled = true; 1254b8e80941Smrg } 1255b8e80941Smrg 1256b8e80941Smrg if (ctx->bc->chip_class >= EVERGREEN) { 1257b8e80941Smrg int num_baryc = 0; 1258b8e80941Smrg /* assign gpr to each interpolator according to priority */ 1259b8e80941Smrg for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) { 1260b8e80941Smrg if (ctx->eg_interpolators[i].enabled) { 1261b8e80941Smrg ctx->eg_interpolators[i].ij_index = num_baryc; 1262b8e80941Smrg num_baryc++; 1263b8e80941Smrg } 1264b8e80941Smrg } 1265b8e80941Smrg num_baryc = (num_baryc + 1) >> 1; 1266b8e80941Smrg gpr_offset += num_baryc; 1267b8e80941Smrg } 1268b8e80941Smrg 1269b8e80941Smrg for (i = 0; i < ARRAY_SIZE(inputs); i++) { 1270b8e80941Smrg boolean enabled = inputs[i].enabled; 1271b8e80941Smrg int *reg = inputs[i].reg; 1272b8e80941Smrg unsigned name = inputs[i].name; 1273b8e80941Smrg 1274b8e80941Smrg if (enabled) { 1275b8e80941Smrg int gpr = gpr_offset + num_regs++; 1276b8e80941Smrg ctx->shader->nsys_inputs++; 1277b8e80941Smrg 1278b8e80941Smrg // add to inputs, allocate a gpr 1279b8e80941Smrg k = ctx->shader->ninput++; 1280b8e80941Smrg ctx->shader->input[k].name = name; 1281b8e80941Smrg ctx->shader->input[k].sid = 0; 1282b8e80941Smrg ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1283b8e80941Smrg ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1284b8e80941Smrg *reg = ctx->shader->input[k].gpr = gpr; 1285b8e80941Smrg } 1286b8e80941Smrg } 1287b8e80941Smrg 1288b8e80941Smrg return gpr_offset + num_regs; 1289848b8605Smrg} 1290848b8605Smrg 1291848b8605Smrg/* 1292848b8605Smrg * for evergreen we need to scan the shader to find the number of GPRs we need to 1293b8e80941Smrg * reserve for interpolation and system values 1294848b8605Smrg * 1295b8e80941Smrg * we need to know if we are going to emit any sample or centroid inputs 1296848b8605Smrg * if perspective and linear are required 1297848b8605Smrg*/ 1298848b8605Smrgstatic int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1299848b8605Smrg{ 1300b8e80941Smrg unsigned i; 1301848b8605Smrg 1302b8e80941Smrg memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1303848b8605Smrg 1304b8e80941Smrg /* 1305b8e80941Smrg * Could get this information from the shader info. But right now 1306b8e80941Smrg * we interpolate all declared inputs, whereas the shader info will 1307b8e80941Smrg * only contain the bits if the inputs are actually used, so it might 1308b8e80941Smrg * not be safe... 1309b8e80941Smrg */ 1310848b8605Smrg for (i = 0; i < ctx->info.num_inputs; i++) { 1311b8e80941Smrg int k; 1312b8e80941Smrg /* skip position/face/mask/sampleid */ 1313848b8605Smrg if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1314848b8605Smrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1315b8e80941Smrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1316b8e80941Smrg ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1317848b8605Smrg continue; 1318b8e80941Smrg 1319b8e80941Smrg k = eg_get_interpolator_index( 1320b8e80941Smrg ctx->info.input_interpolate[i], 1321b8e80941Smrg ctx->info.input_interpolate_loc[i]); 1322b8e80941Smrg if (k >= 0) 1323b8e80941Smrg ctx->eg_interpolators[k].enabled = TRUE; 1324b8e80941Smrg } 1325b8e80941Smrg 1326b8e80941Smrg /* XXX PULL MODEL and LINE STIPPLE */ 1327b8e80941Smrg 1328b8e80941Smrg return allocate_system_value_inputs(ctx, 0); 1329b8e80941Smrg} 1330b8e80941Smrg 1331b8e80941Smrg/* sample_id_sel == NULL means fetch for current sample */ 1332b8e80941Smrgstatic int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1333b8e80941Smrg{ 1334b8e80941Smrg struct r600_bytecode_vtx vtx; 1335b8e80941Smrg int r, t1; 1336b8e80941Smrg 1337b8e80941Smrg t1 = r600_get_temp(ctx); 1338b8e80941Smrg 1339b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1340b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 1341b8e80941Smrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1342b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1343b8e80941Smrg if (sample_id == NULL) { 1344b8e80941Smrg assert(ctx->fixed_pt_position_gpr != -1); 1345b8e80941Smrg 1346b8e80941Smrg vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1347b8e80941Smrg vtx.src_sel_x = 3; 1348b8e80941Smrg } 1349b8e80941Smrg else { 1350b8e80941Smrg struct r600_bytecode_alu alu; 1351b8e80941Smrg 1352b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1353b8e80941Smrg alu.op = ALU_OP1_MOV; 1354b8e80941Smrg r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1355b8e80941Smrg alu.dst.sel = t1; 1356b8e80941Smrg alu.dst.write = 1; 1357b8e80941Smrg alu.last = 1; 1358b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1359b8e80941Smrg if (r) 1360b8e80941Smrg return r; 1361b8e80941Smrg 1362b8e80941Smrg vtx.src_gpr = t1; 1363b8e80941Smrg vtx.src_sel_x = 0; 1364848b8605Smrg } 1365b8e80941Smrg vtx.mega_fetch_count = 16; 1366b8e80941Smrg vtx.dst_gpr = t1; 1367b8e80941Smrg vtx.dst_sel_x = 0; 1368b8e80941Smrg vtx.dst_sel_y = 1; 1369b8e80941Smrg vtx.dst_sel_z = 2; 1370b8e80941Smrg vtx.dst_sel_w = 3; 1371b8e80941Smrg vtx.data_format = FMT_32_32_32_32_FLOAT; 1372b8e80941Smrg vtx.num_format_all = 2; 1373b8e80941Smrg vtx.format_comp_all = 1; 1374b8e80941Smrg vtx.use_const_fields = 0; 1375b8e80941Smrg vtx.offset = 0; 1376b8e80941Smrg vtx.endian = r600_endian_swap(32); 1377b8e80941Smrg vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1378b8e80941Smrg 1379b8e80941Smrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1380b8e80941Smrg if (r) 1381b8e80941Smrg return r; 1382b8e80941Smrg 1383b8e80941Smrg return t1; 1384b8e80941Smrg} 1385b8e80941Smrg 1386b8e80941Smrgstatic int eg_load_helper_invocation(struct r600_shader_ctx *ctx) 1387b8e80941Smrg{ 1388b8e80941Smrg int r; 1389b8e80941Smrg struct r600_bytecode_alu alu; 1390b8e80941Smrg 1391b8e80941Smrg /* do a vtx fetch with wqm set on the vtx fetch */ 1392b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1393b8e80941Smrg alu.op = ALU_OP1_MOV; 1394b8e80941Smrg alu.dst.sel = ctx->helper_invoc_reg; 1395b8e80941Smrg alu.dst.chan = 0; 1396b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 1397b8e80941Smrg alu.src[0].value = 0xffffffff; 1398b8e80941Smrg alu.dst.write = 1; 1399b8e80941Smrg alu.last = 1; 1400b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1401b8e80941Smrg if (r) 1402b8e80941Smrg return r; 1403b8e80941Smrg 1404b8e80941Smrg /* do a vtx fetch in VPM mode */ 1405b8e80941Smrg struct r600_bytecode_vtx vtx; 1406b8e80941Smrg memset(&vtx, 0, sizeof(vtx)); 1407b8e80941Smrg vtx.op = FETCH_OP_GET_BUFFER_RESINFO; 1408b8e80941Smrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1409b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1410b8e80941Smrg vtx.src_gpr = 0; 1411b8e80941Smrg vtx.mega_fetch_count = 16; /* no idea here really... */ 1412b8e80941Smrg vtx.dst_gpr = ctx->helper_invoc_reg; 1413b8e80941Smrg vtx.dst_sel_x = 4; 1414b8e80941Smrg vtx.dst_sel_y = 7; /* SEL_Y */ 1415b8e80941Smrg vtx.dst_sel_z = 7; /* SEL_Z */ 1416b8e80941Smrg vtx.dst_sel_w = 7; /* SEL_W */ 1417b8e80941Smrg vtx.data_format = FMT_32; 1418b8e80941Smrg if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) 1419b8e80941Smrg return r; 1420b8e80941Smrg ctx->bc->cf_last->vpm = 1; 1421b8e80941Smrg return 0; 1422b8e80941Smrg} 1423b8e80941Smrg 1424b8e80941Smrgstatic int cm_load_helper_invocation(struct r600_shader_ctx *ctx) 1425b8e80941Smrg{ 1426b8e80941Smrg int r; 1427b8e80941Smrg struct r600_bytecode_alu alu; 1428b8e80941Smrg 1429b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1430b8e80941Smrg alu.op = ALU_OP1_MOV; 1431b8e80941Smrg alu.dst.sel = ctx->helper_invoc_reg; 1432b8e80941Smrg alu.dst.chan = 0; 1433b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 1434b8e80941Smrg alu.src[0].value = 0xffffffff; 1435b8e80941Smrg alu.dst.write = 1; 1436b8e80941Smrg alu.last = 1; 1437b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1438b8e80941Smrg if (r) 1439b8e80941Smrg return r; 1440b8e80941Smrg 1441b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1442b8e80941Smrg alu.op = ALU_OP1_MOV; 1443b8e80941Smrg alu.dst.sel = ctx->helper_invoc_reg; 1444b8e80941Smrg alu.dst.chan = 0; 1445b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 1446b8e80941Smrg alu.dst.write = 1; 1447b8e80941Smrg alu.last = 1; 1448b8e80941Smrg r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE); 1449b8e80941Smrg if (r) 1450b8e80941Smrg return r; 1451b8e80941Smrg 1452b8e80941Smrg return ctx->helper_invoc_reg; 1453b8e80941Smrg} 1454b8e80941Smrg 1455b8e80941Smrgstatic int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block) 1456b8e80941Smrg{ 1457b8e80941Smrg struct r600_bytecode_vtx vtx; 1458b8e80941Smrg int r, t1; 1459b8e80941Smrg 1460b8e80941Smrg if (ctx->cs_block_size_loaded) 1461b8e80941Smrg return ctx->cs_block_size_reg; 1462b8e80941Smrg if (ctx->cs_grid_size_loaded) 1463b8e80941Smrg return ctx->cs_grid_size_reg; 1464b8e80941Smrg 1465b8e80941Smrg t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg; 1466b8e80941Smrg struct r600_bytecode_alu alu; 1467b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1468b8e80941Smrg alu.op = ALU_OP1_MOV; 1469b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 1470b8e80941Smrg alu.dst.sel = t1; 1471b8e80941Smrg alu.dst.write = 1; 1472b8e80941Smrg alu.last = 1; 1473b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1474b8e80941Smrg if (r) 1475b8e80941Smrg return r; 1476b8e80941Smrg 1477b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1478b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 1479b8e80941Smrg vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1480b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1481b8e80941Smrg vtx.src_gpr = t1; 1482b8e80941Smrg vtx.src_sel_x = 0; 1483848b8605Smrg 1484b8e80941Smrg vtx.mega_fetch_count = 16; 1485b8e80941Smrg vtx.dst_gpr = t1; 1486b8e80941Smrg vtx.dst_sel_x = 0; 1487b8e80941Smrg vtx.dst_sel_y = 1; 1488b8e80941Smrg vtx.dst_sel_z = 2; 1489b8e80941Smrg vtx.dst_sel_w = 7; 1490b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 1491b8e80941Smrg vtx.num_format_all = 1; 1492b8e80941Smrg vtx.format_comp_all = 0; 1493b8e80941Smrg vtx.use_const_fields = 0; 1494b8e80941Smrg vtx.offset = load_block ? 0 : 16; // first element is size of buffer 1495b8e80941Smrg vtx.endian = r600_endian_swap(32); 1496b8e80941Smrg vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1497848b8605Smrg 1498b8e80941Smrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1499b8e80941Smrg if (r) 1500b8e80941Smrg return r; 1501848b8605Smrg 1502b8e80941Smrg if (load_block) 1503b8e80941Smrg ctx->cs_block_size_loaded = true; 1504b8e80941Smrg else 1505b8e80941Smrg ctx->cs_grid_size_loaded = true; 1506b8e80941Smrg return t1; 1507848b8605Smrg} 1508848b8605Smrg 1509848b8605Smrgstatic void tgsi_src(struct r600_shader_ctx *ctx, 1510848b8605Smrg const struct tgsi_full_src_register *tgsi_src, 1511848b8605Smrg struct r600_shader_src *r600_src) 1512848b8605Smrg{ 1513848b8605Smrg memset(r600_src, 0, sizeof(*r600_src)); 1514848b8605Smrg r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1515848b8605Smrg r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1516848b8605Smrg r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1517848b8605Smrg r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1518848b8605Smrg r600_src->neg = tgsi_src->Register.Negate; 1519848b8605Smrg r600_src->abs = tgsi_src->Register.Absolute; 1520848b8605Smrg 1521b8e80941Smrg if (tgsi_src->Register.File == TGSI_FILE_TEMPORARY) { 1522b8e80941Smrg bool spilled; 1523b8e80941Smrg unsigned idx; 1524b8e80941Smrg 1525b8e80941Smrg idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_src->Register.Index, &spilled); 1526b8e80941Smrg 1527b8e80941Smrg if (spilled) { 1528b8e80941Smrg int reg = r600_get_temp(ctx); 1529b8e80941Smrg int r; 1530b8e80941Smrg 1531b8e80941Smrg r600_src->sel = reg; 1532b8e80941Smrg 1533b8e80941Smrg if (ctx->bc->chip_class < R700) { 1534b8e80941Smrg struct r600_bytecode_output cf; 1535b8e80941Smrg 1536b8e80941Smrg memset(&cf, 0, sizeof(struct r600_bytecode_output)); 1537b8e80941Smrg cf.op = CF_OP_MEM_SCRATCH; 1538b8e80941Smrg cf.elem_size = 3; 1539b8e80941Smrg cf.gpr = reg; 1540b8e80941Smrg cf.comp_mask = 0xF; 1541b8e80941Smrg cf.swizzle_x = 0; 1542b8e80941Smrg cf.swizzle_y = 1; 1543b8e80941Smrg cf.swizzle_z = 2; 1544b8e80941Smrg cf.swizzle_w = 3; 1545b8e80941Smrg cf.burst_count = 1; 1546b8e80941Smrg 1547b8e80941Smrg get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index, 1548b8e80941Smrg &cf.array_base, &cf.array_size); 1549b8e80941Smrg 1550b8e80941Smrg if (tgsi_src->Register.Indirect) { 1551b8e80941Smrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 1552b8e80941Smrg cf.index_gpr = ctx->bc->ar_reg; 1553b8e80941Smrg } 1554b8e80941Smrg else { 1555b8e80941Smrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ; 1556b8e80941Smrg cf.array_base += idx; 1557b8e80941Smrg cf.array_size = 0; 1558b8e80941Smrg } 1559b8e80941Smrg 1560b8e80941Smrg r = r600_bytecode_add_output(ctx->bc, &cf); 1561b8e80941Smrg } 1562b8e80941Smrg else { 1563b8e80941Smrg struct r600_bytecode_vtx vtx; 1564b8e80941Smrg 1565b8e80941Smrg if (r600_bytecode_get_need_wait_ack(ctx->bc)) { 1566b8e80941Smrg r600_bytecode_need_wait_ack(ctx->bc, false); 1567b8e80941Smrg r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 1568b8e80941Smrg } 1569b8e80941Smrg 1570b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1571b8e80941Smrg vtx.op = FETCH_OP_READ_SCRATCH; 1572b8e80941Smrg vtx.dst_gpr = reg; 1573b8e80941Smrg vtx.uncached = 1; // Must bypass cache since prior spill written in same invocation 1574b8e80941Smrg vtx.elem_size = 3; 1575b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 1576b8e80941Smrg vtx.num_format_all = V_038010_SQ_NUM_FORMAT_INT; 1577b8e80941Smrg vtx.dst_sel_x = tgsi_src->Register.SwizzleX; 1578b8e80941Smrg vtx.dst_sel_y = tgsi_src->Register.SwizzleY; 1579b8e80941Smrg vtx.dst_sel_z = tgsi_src->Register.SwizzleZ; 1580b8e80941Smrg vtx.dst_sel_w = tgsi_src->Register.SwizzleW; 1581b8e80941Smrg 1582b8e80941Smrg get_spilled_array_base_and_size(ctx, tgsi_src->Register.Index, 1583b8e80941Smrg &vtx.array_base, &vtx.array_size); 1584b8e80941Smrg 1585b8e80941Smrg if (tgsi_src->Register.Indirect) { 1586b8e80941Smrg vtx.indexed = 1; 1587b8e80941Smrg vtx.src_gpr = ctx->bc->ar_reg; 1588b8e80941Smrg } 1589b8e80941Smrg else { 1590b8e80941Smrg vtx.array_base += idx; 1591b8e80941Smrg vtx.array_size = 0; 1592b8e80941Smrg } 1593b8e80941Smrg 1594b8e80941Smrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1595b8e80941Smrg } 1596b8e80941Smrg 1597b8e80941Smrg if (r) 1598b8e80941Smrg return; 1599b8e80941Smrg } 1600b8e80941Smrg else { 1601b8e80941Smrg if (tgsi_src->Register.Indirect) 1602b8e80941Smrg r600_src->rel = V_SQ_REL_RELATIVE; 1603b8e80941Smrg 1604b8e80941Smrg r600_src->sel = idx; 1605b8e80941Smrg } 1606b8e80941Smrg 1607b8e80941Smrg return; 1608b8e80941Smrg } 1609b8e80941Smrg 1610848b8605Smrg if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1611848b8605Smrg int index; 1612848b8605Smrg if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1613848b8605Smrg (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1614848b8605Smrg (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1615848b8605Smrg 1616848b8605Smrg index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1617b8e80941Smrg r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1618848b8605Smrg if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1619848b8605Smrg return; 1620848b8605Smrg } 1621848b8605Smrg index = tgsi_src->Register.Index; 1622848b8605Smrg r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1623848b8605Smrg memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1624848b8605Smrg } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1625848b8605Smrg if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1626848b8605Smrg r600_src->swizzle[0] = 2; // Z value 1627b8e80941Smrg r600_src->swizzle[1] = 2; 1628b8e80941Smrg r600_src->swizzle[2] = 2; 1629b8e80941Smrg r600_src->swizzle[3] = 2; 1630848b8605Smrg r600_src->sel = ctx->face_gpr; 1631b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1632b8e80941Smrg r600_src->swizzle[0] = 3; // W value 1633b8e80941Smrg r600_src->swizzle[1] = 3; 1634b8e80941Smrg r600_src->swizzle[2] = 3; 1635b8e80941Smrg r600_src->swizzle[3] = 3; 1636b8e80941Smrg r600_src->sel = ctx->fixed_pt_position_gpr; 1637b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1638b8e80941Smrg r600_src->swizzle[0] = 0; 1639b8e80941Smrg r600_src->swizzle[1] = 1; 1640b8e80941Smrg r600_src->swizzle[2] = 4; 1641b8e80941Smrg r600_src->swizzle[3] = 4; 1642b8e80941Smrg r600_src->sel = load_sample_position(ctx, NULL, -1); 1643848b8605Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1644848b8605Smrg r600_src->swizzle[0] = 3; 1645848b8605Smrg r600_src->swizzle[1] = 3; 1646848b8605Smrg r600_src->swizzle[2] = 3; 1647848b8605Smrg r600_src->swizzle[3] = 3; 1648848b8605Smrg r600_src->sel = 0; 1649848b8605Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1650848b8605Smrg r600_src->swizzle[0] = 0; 1651848b8605Smrg r600_src->swizzle[1] = 0; 1652848b8605Smrg r600_src->swizzle[2] = 0; 1653848b8605Smrg r600_src->swizzle[3] = 0; 1654848b8605Smrg r600_src->sel = 0; 1655b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_THREAD_ID) { 1656b8e80941Smrg r600_src->sel = 0; 1657b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_ID) { 1658b8e80941Smrg r600_src->sel = 1; 1659b8e80941Smrg } else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1660b8e80941Smrg r600_src->swizzle[0] = 3; 1661b8e80941Smrg r600_src->swizzle[1] = 3; 1662b8e80941Smrg r600_src->swizzle[2] = 3; 1663b8e80941Smrg r600_src->swizzle[3] = 3; 1664b8e80941Smrg r600_src->sel = 1; 1665b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1666b8e80941Smrg r600_src->swizzle[0] = 2; 1667b8e80941Smrg r600_src->swizzle[1] = 2; 1668b8e80941Smrg r600_src->swizzle[2] = 2; 1669b8e80941Smrg r600_src->swizzle[3] = 2; 1670b8e80941Smrg r600_src->sel = 0; 1671b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 1672b8e80941Smrg r600_src->sel = 1; 1673b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 1674b8e80941Smrg r600_src->sel = 3; 1675b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 1676b8e80941Smrg r600_src->sel = 2; 1677b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 1678b8e80941Smrg r600_src->sel = ctx->tess_input_info; 1679b8e80941Smrg r600_src->swizzle[0] = 2; 1680b8e80941Smrg r600_src->swizzle[1] = 2; 1681b8e80941Smrg r600_src->swizzle[2] = 2; 1682b8e80941Smrg r600_src->swizzle[3] = 2; 1683b8e80941Smrg } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1684b8e80941Smrg r600_src->sel = 0; 1685b8e80941Smrg r600_src->swizzle[0] = 0; 1686b8e80941Smrg r600_src->swizzle[1] = 0; 1687b8e80941Smrg r600_src->swizzle[2] = 0; 1688b8e80941Smrg r600_src->swizzle[3] = 0; 1689b8e80941Smrg } else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1690b8e80941Smrg r600_src->sel = 0; 1691b8e80941Smrg r600_src->swizzle[0] = 3; 1692b8e80941Smrg r600_src->swizzle[1] = 3; 1693b8e80941Smrg r600_src->swizzle[2] = 3; 1694b8e80941Smrg r600_src->swizzle[3] = 3; 1695b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) { 1696b8e80941Smrg r600_src->sel = load_block_grid_size(ctx, false); 1697b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) { 1698b8e80941Smrg r600_src->sel = load_block_grid_size(ctx, true); 1699b8e80941Smrg } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) { 1700b8e80941Smrg r600_src->sel = ctx->helper_invoc_reg; 1701b8e80941Smrg r600_src->swizzle[0] = 0; 1702b8e80941Smrg r600_src->swizzle[1] = 0; 1703b8e80941Smrg r600_src->swizzle[2] = 0; 1704b8e80941Smrg r600_src->swizzle[3] = 0; 1705848b8605Smrg } 1706848b8605Smrg } else { 1707848b8605Smrg if (tgsi_src->Register.Indirect) 1708848b8605Smrg r600_src->rel = V_SQ_REL_RELATIVE; 1709848b8605Smrg r600_src->sel = tgsi_src->Register.Index; 1710848b8605Smrg r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1711848b8605Smrg } 1712848b8605Smrg if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1713848b8605Smrg if (tgsi_src->Register.Dimension) { 1714848b8605Smrg r600_src->kc_bank = tgsi_src->Dimension.Index; 1715b8e80941Smrg if (tgsi_src->Dimension.Indirect) { 1716b8e80941Smrg r600_src->kc_rel = 1; 1717b8e80941Smrg } 1718848b8605Smrg } 1719848b8605Smrg } 1720848b8605Smrg} 1721848b8605Smrg 1722848b8605Smrgstatic int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1723b8e80941Smrg unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1724848b8605Smrg unsigned int dst_reg) 1725848b8605Smrg{ 1726848b8605Smrg struct r600_bytecode_vtx vtx; 1727848b8605Smrg unsigned int ar_reg; 1728848b8605Smrg int r; 1729848b8605Smrg 1730848b8605Smrg if (offset) { 1731848b8605Smrg struct r600_bytecode_alu alu; 1732848b8605Smrg 1733848b8605Smrg memset(&alu, 0, sizeof(alu)); 1734848b8605Smrg 1735848b8605Smrg alu.op = ALU_OP2_ADD_INT; 1736848b8605Smrg alu.src[0].sel = ctx->bc->ar_reg; 1737848b8605Smrg alu.src[0].chan = ar_chan; 1738848b8605Smrg 1739848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1740848b8605Smrg alu.src[1].value = offset; 1741848b8605Smrg 1742848b8605Smrg alu.dst.sel = dst_reg; 1743848b8605Smrg alu.dst.chan = ar_chan; 1744848b8605Smrg alu.dst.write = 1; 1745848b8605Smrg alu.last = 1; 1746848b8605Smrg 1747848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1748848b8605Smrg return r; 1749848b8605Smrg 1750848b8605Smrg ar_reg = dst_reg; 1751848b8605Smrg } else { 1752848b8605Smrg ar_reg = ctx->bc->ar_reg; 1753848b8605Smrg } 1754848b8605Smrg 1755848b8605Smrg memset(&vtx, 0, sizeof(vtx)); 1756848b8605Smrg vtx.buffer_id = cb_idx; 1757b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1758848b8605Smrg vtx.src_gpr = ar_reg; 1759848b8605Smrg vtx.src_sel_x = ar_chan; 1760848b8605Smrg vtx.mega_fetch_count = 16; 1761848b8605Smrg vtx.dst_gpr = dst_reg; 1762848b8605Smrg vtx.dst_sel_x = 0; /* SEL_X */ 1763848b8605Smrg vtx.dst_sel_y = 1; /* SEL_Y */ 1764848b8605Smrg vtx.dst_sel_z = 2; /* SEL_Z */ 1765848b8605Smrg vtx.dst_sel_w = 3; /* SEL_W */ 1766848b8605Smrg vtx.data_format = FMT_32_32_32_32_FLOAT; 1767848b8605Smrg vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1768848b8605Smrg vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1769848b8605Smrg vtx.endian = r600_endian_swap(32); 1770b8e80941Smrg vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1771848b8605Smrg 1772848b8605Smrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1773848b8605Smrg return r; 1774848b8605Smrg 1775848b8605Smrg return 0; 1776848b8605Smrg} 1777848b8605Smrg 1778848b8605Smrgstatic int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1779848b8605Smrg{ 1780848b8605Smrg struct r600_bytecode_vtx vtx; 1781848b8605Smrg int r; 1782848b8605Smrg unsigned index = src->Register.Index; 1783848b8605Smrg unsigned vtx_id = src->Dimension.Index; 1784b8e80941Smrg int offset_reg = ctx->gs_rotated_input[vtx_id / 3]; 1785848b8605Smrg int offset_chan = vtx_id % 3; 1786b8e80941Smrg int t2 = 0; 1787848b8605Smrg 1788848b8605Smrg /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1789848b8605Smrg * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1790848b8605Smrg 1791b8e80941Smrg if (offset_reg == ctx->gs_rotated_input[0] && offset_chan == 2) 1792848b8605Smrg offset_chan = 3; 1793848b8605Smrg 1794b8e80941Smrg if (src->Dimension.Indirect || src->Register.Indirect) 1795b8e80941Smrg t2 = r600_get_temp(ctx); 1796b8e80941Smrg 1797848b8605Smrg if (src->Dimension.Indirect) { 1798848b8605Smrg int treg[3]; 1799848b8605Smrg struct r600_bytecode_alu alu; 1800848b8605Smrg int r, i; 1801b8e80941Smrg unsigned addr_reg; 1802b8e80941Smrg addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index); 1803b8e80941Smrg if (src->DimIndirect.Index > 0) { 1804b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 1805b8e80941Smrg ctx->bc->ar_reg, 0, 1806b8e80941Smrg addr_reg, 0, 1807b8e80941Smrg 0, 0); 1808b8e80941Smrg if (r) 1809b8e80941Smrg return r; 1810b8e80941Smrg } 1811b8e80941Smrg /* 1812848b8605Smrg we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1813848b8605Smrg at least this is what fglrx seems to do. */ 1814848b8605Smrg for (i = 0; i < 3; i++) { 1815848b8605Smrg treg[i] = r600_get_temp(ctx); 1816848b8605Smrg } 1817b8e80941Smrg r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1818b8e80941Smrg 1819848b8605Smrg for (i = 0; i < 3; i++) { 1820848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1821848b8605Smrg alu.op = ALU_OP1_MOV; 1822b8e80941Smrg alu.src[0].sel = ctx->gs_rotated_input[0]; 1823848b8605Smrg alu.src[0].chan = i == 2 ? 3 : i; 1824848b8605Smrg alu.dst.sel = treg[i]; 1825848b8605Smrg alu.dst.chan = 0; 1826848b8605Smrg alu.dst.write = 1; 1827848b8605Smrg alu.last = 1; 1828848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1829848b8605Smrg if (r) 1830848b8605Smrg return r; 1831848b8605Smrg } 1832848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1833848b8605Smrg alu.op = ALU_OP1_MOV; 1834848b8605Smrg alu.src[0].sel = treg[0]; 1835848b8605Smrg alu.src[0].rel = 1; 1836848b8605Smrg alu.dst.sel = t2; 1837848b8605Smrg alu.dst.write = 1; 1838848b8605Smrg alu.last = 1; 1839848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 1840848b8605Smrg if (r) 1841848b8605Smrg return r; 1842848b8605Smrg offset_reg = t2; 1843b8e80941Smrg offset_chan = 0; 1844848b8605Smrg } 1845848b8605Smrg 1846b8e80941Smrg if (src->Register.Indirect) { 1847b8e80941Smrg int addr_reg; 1848b8e80941Smrg unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID]; 1849b8e80941Smrg 1850b8e80941Smrg addr_reg = get_address_file_reg(ctx, src->Indirect.Index); 1851b8e80941Smrg 1852b8e80941Smrg /* pull the value from index_reg */ 1853b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1854b8e80941Smrg t2, 1, 1855b8e80941Smrg addr_reg, 0, 1856b8e80941Smrg V_SQ_ALU_SRC_LITERAL, first); 1857b8e80941Smrg if (r) 1858b8e80941Smrg return r; 1859b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1860b8e80941Smrg t2, 0, 1861b8e80941Smrg t2, 1, 1862b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 4, 1863b8e80941Smrg offset_reg, offset_chan); 1864b8e80941Smrg if (r) 1865b8e80941Smrg return r; 1866b8e80941Smrg offset_reg = t2; 1867b8e80941Smrg offset_chan = 0; 1868b8e80941Smrg index = src->Register.Index - first; 1869b8e80941Smrg } 1870848b8605Smrg 1871848b8605Smrg memset(&vtx, 0, sizeof(vtx)); 1872848b8605Smrg vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1873b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1874848b8605Smrg vtx.src_gpr = offset_reg; 1875848b8605Smrg vtx.src_sel_x = offset_chan; 1876848b8605Smrg vtx.offset = index * 16; /*bytes*/ 1877848b8605Smrg vtx.mega_fetch_count = 16; 1878848b8605Smrg vtx.dst_gpr = dst_reg; 1879848b8605Smrg vtx.dst_sel_x = 0; /* SEL_X */ 1880848b8605Smrg vtx.dst_sel_y = 1; /* SEL_Y */ 1881848b8605Smrg vtx.dst_sel_z = 2; /* SEL_Z */ 1882848b8605Smrg vtx.dst_sel_w = 3; /* SEL_W */ 1883848b8605Smrg if (ctx->bc->chip_class >= EVERGREEN) { 1884848b8605Smrg vtx.use_const_fields = 1; 1885848b8605Smrg } else { 1886848b8605Smrg vtx.data_format = FMT_32_32_32_32_FLOAT; 1887848b8605Smrg } 1888848b8605Smrg 1889848b8605Smrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1890848b8605Smrg return r; 1891848b8605Smrg 1892848b8605Smrg return 0; 1893848b8605Smrg} 1894848b8605Smrg 1895848b8605Smrgstatic int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1896848b8605Smrg{ 1897848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1898b8e80941Smrg unsigned i; 1899848b8605Smrg 1900848b8605Smrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1901848b8605Smrg struct tgsi_full_src_register *src = &inst->Src[i]; 1902848b8605Smrg 1903848b8605Smrg if (src->Register.File == TGSI_FILE_INPUT) { 1904848b8605Smrg if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1905848b8605Smrg /* primitive id is in R0.z */ 1906848b8605Smrg ctx->src[i].sel = 0; 1907848b8605Smrg ctx->src[i].swizzle[0] = 2; 1908848b8605Smrg } 1909848b8605Smrg } 1910848b8605Smrg if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1911848b8605Smrg int treg = r600_get_temp(ctx); 1912848b8605Smrg 1913848b8605Smrg fetch_gs_input(ctx, src, treg); 1914848b8605Smrg ctx->src[i].sel = treg; 1915b8e80941Smrg ctx->src[i].rel = 0; 1916848b8605Smrg } 1917848b8605Smrg } 1918848b8605Smrg return 0; 1919848b8605Smrg} 1920848b8605Smrg 1921848b8605Smrg 1922b8e80941Smrg/* Tessellation shaders pass outputs to the next shader using LDS. 1923b8e80941Smrg * 1924b8e80941Smrg * LS outputs = TCS(HS) inputs 1925b8e80941Smrg * TCS(HS) outputs = TES(DS) inputs 1926b8e80941Smrg * 1927b8e80941Smrg * The LDS layout is: 1928b8e80941Smrg * - TCS inputs for patch 0 1929b8e80941Smrg * - TCS inputs for patch 1 1930b8e80941Smrg * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1931b8e80941Smrg * - ... 1932b8e80941Smrg * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1933b8e80941Smrg * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1934b8e80941Smrg * - TCS outputs for patch 1 1935b8e80941Smrg * - Per-patch TCS outputs for patch 1 1936b8e80941Smrg * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1937b8e80941Smrg * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1938b8e80941Smrg * - ... 1939b8e80941Smrg * 1940b8e80941Smrg * All three shaders VS(LS), TCS, TES share the same LDS space. 1941b8e80941Smrg */ 1942b8e80941Smrg/* this will return with the dw address in temp_reg.x */ 1943b8e80941Smrgstatic int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1944b8e80941Smrg const struct tgsi_full_dst_register *dst, 1945b8e80941Smrg const struct tgsi_full_src_register *src, 1946b8e80941Smrg int stride_bytes_reg, int stride_bytes_chan) 1947b8e80941Smrg{ 1948b8e80941Smrg struct tgsi_full_dst_register reg; 1949b8e80941Smrg ubyte *name, *index, *array_first; 1950b8e80941Smrg int r; 1951b8e80941Smrg int param; 1952b8e80941Smrg struct tgsi_shader_info *info = &ctx->info; 1953b8e80941Smrg /* Set the register description. The address computation is the same 1954b8e80941Smrg * for sources and destinations. */ 1955b8e80941Smrg if (src) { 1956b8e80941Smrg reg.Register.File = src->Register.File; 1957b8e80941Smrg reg.Register.Index = src->Register.Index; 1958b8e80941Smrg reg.Register.Indirect = src->Register.Indirect; 1959b8e80941Smrg reg.Register.Dimension = src->Register.Dimension; 1960b8e80941Smrg reg.Indirect = src->Indirect; 1961b8e80941Smrg reg.Dimension = src->Dimension; 1962b8e80941Smrg reg.DimIndirect = src->DimIndirect; 1963b8e80941Smrg } else 1964b8e80941Smrg reg = *dst; 1965b8e80941Smrg 1966b8e80941Smrg /* If the register is 2-dimensional (e.g. an array of vertices 1967b8e80941Smrg * in a primitive), calculate the base address of the vertex. */ 1968b8e80941Smrg if (reg.Register.Dimension) { 1969b8e80941Smrg int sel, chan; 1970b8e80941Smrg if (reg.Dimension.Indirect) { 1971b8e80941Smrg unsigned addr_reg; 1972b8e80941Smrg assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1973b8e80941Smrg 1974b8e80941Smrg addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1975b8e80941Smrg /* pull the value from index_reg */ 1976b8e80941Smrg sel = addr_reg; 1977b8e80941Smrg chan = 0; 1978b8e80941Smrg } else { 1979b8e80941Smrg sel = V_SQ_ALU_SRC_LITERAL; 1980b8e80941Smrg chan = reg.Dimension.Index; 1981848b8605Smrg } 1982848b8605Smrg 1983b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1984b8e80941Smrg temp_reg, 0, 1985b8e80941Smrg stride_bytes_reg, stride_bytes_chan, 1986b8e80941Smrg sel, chan, 1987b8e80941Smrg temp_reg, 0); 1988b8e80941Smrg if (r) 1989b8e80941Smrg return r; 1990b8e80941Smrg } 1991848b8605Smrg 1992b8e80941Smrg if (reg.Register.File == TGSI_FILE_INPUT) { 1993b8e80941Smrg name = info->input_semantic_name; 1994b8e80941Smrg index = info->input_semantic_index; 1995b8e80941Smrg array_first = info->input_array_first; 1996b8e80941Smrg } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1997b8e80941Smrg name = info->output_semantic_name; 1998b8e80941Smrg index = info->output_semantic_index; 1999b8e80941Smrg array_first = info->output_array_first; 2000b8e80941Smrg } else { 2001b8e80941Smrg assert(0); 2002b8e80941Smrg return -1; 2003b8e80941Smrg } 2004b8e80941Smrg if (reg.Register.Indirect) { 2005b8e80941Smrg int addr_reg; 2006b8e80941Smrg int first; 2007b8e80941Smrg /* Add the relative address of the element. */ 2008b8e80941Smrg if (reg.Indirect.ArrayID) 2009b8e80941Smrg first = array_first[reg.Indirect.ArrayID]; 2010b8e80941Smrg else 2011b8e80941Smrg first = reg.Register.Index; 2012b8e80941Smrg 2013b8e80941Smrg addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 2014b8e80941Smrg 2015b8e80941Smrg /* pull the value from index_reg */ 2016b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2017b8e80941Smrg temp_reg, 0, 2018b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 16, 2019b8e80941Smrg addr_reg, 0, 2020b8e80941Smrg temp_reg, 0); 2021b8e80941Smrg if (r) 2022b8e80941Smrg return r; 2023b8e80941Smrg 2024b8e80941Smrg param = r600_get_lds_unique_index(name[first], 2025b8e80941Smrg index[first]); 2026b8e80941Smrg 2027b8e80941Smrg } else { 2028b8e80941Smrg param = r600_get_lds_unique_index(name[reg.Register.Index], 2029b8e80941Smrg index[reg.Register.Index]); 2030b8e80941Smrg } 2031b8e80941Smrg 2032b8e80941Smrg /* add to base_addr - passed in temp_reg.x */ 2033b8e80941Smrg if (param) { 2034b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2035b8e80941Smrg temp_reg, 0, 2036b8e80941Smrg temp_reg, 0, 2037b8e80941Smrg V_SQ_ALU_SRC_LITERAL, param * 16); 2038b8e80941Smrg if (r) 2039b8e80941Smrg return r; 2040b8e80941Smrg 2041b8e80941Smrg } 2042b8e80941Smrg return 0; 2043b8e80941Smrg} 2044b8e80941Smrg 2045b8e80941Smrgstatic int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 2046b8e80941Smrg unsigned dst_reg, unsigned mask) 2047b8e80941Smrg{ 2048b8e80941Smrg struct r600_bytecode_alu alu; 2049b8e80941Smrg int r, i, lasti; 2050b8e80941Smrg 2051b8e80941Smrg if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 2052b8e80941Smrg ctx->bc->force_add_cf = 1; 2053b8e80941Smrg 2054b8e80941Smrg lasti = tgsi_last_instruction(mask); 2055b8e80941Smrg for (i = 1; i <= lasti; i++) { 2056b8e80941Smrg if (!(mask & (1 << i))) 2057b8e80941Smrg continue; 2058b8e80941Smrg 2059b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2060b8e80941Smrg temp_reg, i, 2061b8e80941Smrg temp_reg, 0, 2062b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 4 * i); 2063b8e80941Smrg if (r) 2064b8e80941Smrg return r; 2065b8e80941Smrg } 2066b8e80941Smrg for (i = 0; i <= lasti; i++) { 2067b8e80941Smrg if (!(mask & (1 << i))) 2068b8e80941Smrg continue; 2069b8e80941Smrg 2070b8e80941Smrg /* emit an LDS_READ_RET */ 2071b8e80941Smrg memset(&alu, 0, sizeof(alu)); 2072b8e80941Smrg alu.op = LDS_OP1_LDS_READ_RET; 2073b8e80941Smrg alu.src[0].sel = temp_reg; 2074b8e80941Smrg alu.src[0].chan = i; 2075b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0; 2076b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_0; 2077b8e80941Smrg alu.dst.chan = 0; 2078b8e80941Smrg alu.is_lds_idx_op = true; 2079b8e80941Smrg alu.last = 1; 2080b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2081b8e80941Smrg if (r) 2082b8e80941Smrg return r; 2083b8e80941Smrg } 2084b8e80941Smrg for (i = 0; i <= lasti; i++) { 2085b8e80941Smrg if (!(mask & (1 << i))) 2086b8e80941Smrg continue; 2087b8e80941Smrg 2088b8e80941Smrg /* then read from LDS_OQ_A_POP */ 2089b8e80941Smrg memset(&alu, 0, sizeof(alu)); 2090b8e80941Smrg 2091b8e80941Smrg alu.op = ALU_OP1_MOV; 2092b8e80941Smrg alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 2093b8e80941Smrg alu.src[0].chan = 0; 2094b8e80941Smrg alu.dst.sel = dst_reg; 2095b8e80941Smrg alu.dst.chan = i; 2096b8e80941Smrg alu.dst.write = 1; 2097b8e80941Smrg alu.last = 1; 2098b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2099b8e80941Smrg if (r) 2100b8e80941Smrg return r; 2101b8e80941Smrg } 2102b8e80941Smrg return 0; 2103b8e80941Smrg} 2104b8e80941Smrg 2105b8e80941Smrgstatic int fetch_mask(struct tgsi_src_register *reg) 2106b8e80941Smrg{ 2107b8e80941Smrg int mask = 0; 2108b8e80941Smrg mask |= 1 << reg->SwizzleX; 2109b8e80941Smrg mask |= 1 << reg->SwizzleY; 2110b8e80941Smrg mask |= 1 << reg->SwizzleZ; 2111b8e80941Smrg mask |= 1 << reg->SwizzleW; 2112b8e80941Smrg return mask; 2113b8e80941Smrg} 2114b8e80941Smrg 2115b8e80941Smrgstatic int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 2116b8e80941Smrg{ 2117b8e80941Smrg int r; 2118b8e80941Smrg unsigned temp_reg = r600_get_temp(ctx); 2119b8e80941Smrg 2120b8e80941Smrg r = get_lds_offset0(ctx, 2, temp_reg, 2121b8e80941Smrg src->Register.Dimension ? false : true); 2122b8e80941Smrg if (r) 2123b8e80941Smrg return r; 2124b8e80941Smrg 2125b8e80941Smrg /* the base address is now in temp.x */ 2126b8e80941Smrg r = r600_get_byte_address(ctx, temp_reg, 2127b8e80941Smrg NULL, src, ctx->tess_output_info, 1); 2128b8e80941Smrg if (r) 2129b8e80941Smrg return r; 2130b8e80941Smrg 2131b8e80941Smrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 2132b8e80941Smrg if (r) 2133b8e80941Smrg return r; 2134b8e80941Smrg return 0; 2135b8e80941Smrg} 2136b8e80941Smrg 2137b8e80941Smrgstatic int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 2138b8e80941Smrg{ 2139b8e80941Smrg int r; 2140b8e80941Smrg unsigned temp_reg = r600_get_temp(ctx); 2141b8e80941Smrg 2142b8e80941Smrg /* t.x = ips * r0.y */ 2143b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2144b8e80941Smrg temp_reg, 0, 2145b8e80941Smrg ctx->tess_input_info, 0, 2146b8e80941Smrg 0, 1); 2147b8e80941Smrg 2148b8e80941Smrg if (r) 2149b8e80941Smrg return r; 2150b8e80941Smrg 2151b8e80941Smrg /* the base address is now in temp.x */ 2152b8e80941Smrg r = r600_get_byte_address(ctx, temp_reg, 2153b8e80941Smrg NULL, src, ctx->tess_input_info, 1); 2154b8e80941Smrg if (r) 2155b8e80941Smrg return r; 2156b8e80941Smrg 2157b8e80941Smrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 2158b8e80941Smrg if (r) 2159b8e80941Smrg return r; 2160b8e80941Smrg return 0; 2161b8e80941Smrg} 2162b8e80941Smrg 2163b8e80941Smrgstatic int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 2164b8e80941Smrg{ 2165b8e80941Smrg int r; 2166b8e80941Smrg unsigned temp_reg = r600_get_temp(ctx); 2167b8e80941Smrg 2168b8e80941Smrg r = get_lds_offset0(ctx, 1, temp_reg, 2169b8e80941Smrg src->Register.Dimension ? false : true); 2170b8e80941Smrg if (r) 2171b8e80941Smrg return r; 2172b8e80941Smrg /* the base address is now in temp.x */ 2173b8e80941Smrg r = r600_get_byte_address(ctx, temp_reg, 2174b8e80941Smrg NULL, src, 2175b8e80941Smrg ctx->tess_output_info, 1); 2176b8e80941Smrg if (r) 2177b8e80941Smrg return r; 2178b8e80941Smrg 2179b8e80941Smrg r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); 2180b8e80941Smrg if (r) 2181b8e80941Smrg return r; 2182b8e80941Smrg return 0; 2183b8e80941Smrg} 2184b8e80941Smrg 2185b8e80941Smrgstatic int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 2186b8e80941Smrg{ 2187b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2188b8e80941Smrg unsigned i; 2189b8e80941Smrg 2190b8e80941Smrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2191b8e80941Smrg struct tgsi_full_src_register *src = &inst->Src[i]; 2192b8e80941Smrg 2193b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 2194b8e80941Smrg int treg = r600_get_temp(ctx); 2195b8e80941Smrg fetch_tes_input(ctx, src, treg); 2196b8e80941Smrg ctx->src[i].sel = treg; 2197b8e80941Smrg ctx->src[i].rel = 0; 2198b8e80941Smrg } 2199b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 2200b8e80941Smrg int treg = r600_get_temp(ctx); 2201b8e80941Smrg fetch_tcs_input(ctx, src, treg); 2202b8e80941Smrg ctx->src[i].sel = treg; 2203b8e80941Smrg ctx->src[i].rel = 0; 2204b8e80941Smrg } 2205b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 2206b8e80941Smrg int treg = r600_get_temp(ctx); 2207b8e80941Smrg fetch_tcs_output(ctx, src, treg); 2208b8e80941Smrg ctx->src[i].sel = treg; 2209b8e80941Smrg ctx->src[i].rel = 0; 2210b8e80941Smrg } 2211b8e80941Smrg } 2212b8e80941Smrg return 0; 2213b8e80941Smrg} 2214b8e80941Smrg 2215b8e80941Smrgstatic int tgsi_split_constant(struct r600_shader_ctx *ctx) 2216b8e80941Smrg{ 2217b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2218b8e80941Smrg struct r600_bytecode_alu alu; 2219b8e80941Smrg int i, j, k, nconst, r; 2220b8e80941Smrg 2221b8e80941Smrg for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 2222b8e80941Smrg if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 2223b8e80941Smrg nconst++; 2224b8e80941Smrg } 2225b8e80941Smrg tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 2226b8e80941Smrg } 2227b8e80941Smrg for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 2228b8e80941Smrg if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 2229b8e80941Smrg continue; 2230b8e80941Smrg } 2231b8e80941Smrg 2232b8e80941Smrg if (ctx->src[i].rel) { 2233b8e80941Smrg int chan = inst->Src[i].Indirect.Swizzle; 2234b8e80941Smrg int treg = r600_get_temp(ctx); 2235b8e80941Smrg if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 2236b8e80941Smrg return r; 2237b8e80941Smrg 2238b8e80941Smrg ctx->src[i].kc_bank = 0; 2239b8e80941Smrg ctx->src[i].kc_rel = 0; 2240b8e80941Smrg ctx->src[i].sel = treg; 2241b8e80941Smrg ctx->src[i].rel = 0; 2242848b8605Smrg j--; 2243848b8605Smrg } else if (j > 0) { 2244848b8605Smrg int treg = r600_get_temp(ctx); 2245848b8605Smrg for (k = 0; k < 4; k++) { 2246848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2247848b8605Smrg alu.op = ALU_OP1_MOV; 2248848b8605Smrg alu.src[0].sel = ctx->src[i].sel; 2249848b8605Smrg alu.src[0].chan = k; 2250848b8605Smrg alu.src[0].rel = ctx->src[i].rel; 2251848b8605Smrg alu.src[0].kc_bank = ctx->src[i].kc_bank; 2252b8e80941Smrg alu.src[0].kc_rel = ctx->src[i].kc_rel; 2253848b8605Smrg alu.dst.sel = treg; 2254848b8605Smrg alu.dst.chan = k; 2255848b8605Smrg alu.dst.write = 1; 2256848b8605Smrg if (k == 3) 2257848b8605Smrg alu.last = 1; 2258848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2259848b8605Smrg if (r) 2260848b8605Smrg return r; 2261848b8605Smrg } 2262848b8605Smrg ctx->src[i].sel = treg; 2263848b8605Smrg ctx->src[i].rel =0; 2264848b8605Smrg j--; 2265848b8605Smrg } 2266848b8605Smrg } 2267848b8605Smrg return 0; 2268848b8605Smrg} 2269848b8605Smrg 2270848b8605Smrg/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 2271848b8605Smrgstatic int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 2272848b8605Smrg{ 2273848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2274848b8605Smrg struct r600_bytecode_alu alu; 2275848b8605Smrg int i, j, k, nliteral, r; 2276848b8605Smrg 2277848b8605Smrg for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 2278848b8605Smrg if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 2279848b8605Smrg nliteral++; 2280848b8605Smrg } 2281848b8605Smrg } 2282848b8605Smrg for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 2283848b8605Smrg if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 2284848b8605Smrg int treg = r600_get_temp(ctx); 2285848b8605Smrg for (k = 0; k < 4; k++) { 2286848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2287848b8605Smrg alu.op = ALU_OP1_MOV; 2288848b8605Smrg alu.src[0].sel = ctx->src[i].sel; 2289848b8605Smrg alu.src[0].chan = k; 2290848b8605Smrg alu.src[0].value = ctx->src[i].value[k]; 2291848b8605Smrg alu.dst.sel = treg; 2292848b8605Smrg alu.dst.chan = k; 2293848b8605Smrg alu.dst.write = 1; 2294848b8605Smrg if (k == 3) 2295848b8605Smrg alu.last = 1; 2296848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2297848b8605Smrg if (r) 2298848b8605Smrg return r; 2299848b8605Smrg } 2300848b8605Smrg ctx->src[i].sel = treg; 2301848b8605Smrg j--; 2302848b8605Smrg } 2303848b8605Smrg } 2304848b8605Smrg return 0; 2305848b8605Smrg} 2306848b8605Smrg 2307848b8605Smrgstatic int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 2308848b8605Smrg{ 2309848b8605Smrg int i, r, count = ctx->shader->ninput; 2310848b8605Smrg 2311848b8605Smrg for (i = 0; i < count; i++) { 2312848b8605Smrg if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 2313848b8605Smrg r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 2314848b8605Smrg if (r) 2315848b8605Smrg return r; 2316848b8605Smrg } 2317848b8605Smrg } 2318848b8605Smrg return 0; 2319848b8605Smrg} 2320848b8605Smrg 2321b8e80941Smrgstatic int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 2322b8e80941Smrg int stream, unsigned *stream_item_size UNUSED) 2323848b8605Smrg{ 2324848b8605Smrg unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 2325b8e80941Smrg unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 2326b8e80941Smrg int j, r; 2327b8e80941Smrg unsigned i; 2328848b8605Smrg 2329848b8605Smrg /* Sanity checking. */ 2330b8e80941Smrg if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 2331848b8605Smrg R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 2332848b8605Smrg r = -EINVAL; 2333848b8605Smrg goto out_err; 2334848b8605Smrg } 2335848b8605Smrg for (i = 0; i < so->num_outputs; i++) { 2336848b8605Smrg if (so->output[i].output_buffer >= 4) { 2337848b8605Smrg R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 2338848b8605Smrg so->output[i].output_buffer); 2339848b8605Smrg r = -EINVAL; 2340848b8605Smrg goto out_err; 2341848b8605Smrg } 2342848b8605Smrg } 2343848b8605Smrg 2344848b8605Smrg /* Initialize locations where the outputs are stored. */ 2345848b8605Smrg for (i = 0; i < so->num_outputs; i++) { 2346848b8605Smrg 2347b8e80941Smrg so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 2348b8e80941Smrg start_comp[i] = so->output[i].start_component; 2349848b8605Smrg /* Lower outputs with dst_offset < start_component. 2350848b8605Smrg * 2351848b8605Smrg * We can only output 4D vectors with a write mask, e.g. we can 2352848b8605Smrg * only output the W component at offset 3, etc. If we want 2353848b8605Smrg * to store Y, Z, or W at buffer offset 0, we need to use MOV 2354848b8605Smrg * to move it to X and output X. */ 2355848b8605Smrg if (so->output[i].dst_offset < so->output[i].start_component) { 2356848b8605Smrg unsigned tmp = r600_get_temp(ctx); 2357848b8605Smrg 2358848b8605Smrg for (j = 0; j < so->output[i].num_components; j++) { 2359848b8605Smrg struct r600_bytecode_alu alu; 2360848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2361848b8605Smrg alu.op = ALU_OP1_MOV; 2362848b8605Smrg alu.src[0].sel = so_gpr[i]; 2363848b8605Smrg alu.src[0].chan = so->output[i].start_component + j; 2364848b8605Smrg 2365848b8605Smrg alu.dst.sel = tmp; 2366848b8605Smrg alu.dst.chan = j; 2367848b8605Smrg alu.dst.write = 1; 2368848b8605Smrg if (j == so->output[i].num_components - 1) 2369848b8605Smrg alu.last = 1; 2370848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2371848b8605Smrg if (r) 2372848b8605Smrg return r; 2373848b8605Smrg } 2374b8e80941Smrg start_comp[i] = 0; 2375848b8605Smrg so_gpr[i] = tmp; 2376848b8605Smrg } 2377848b8605Smrg } 2378848b8605Smrg 2379848b8605Smrg /* Write outputs to buffers. */ 2380848b8605Smrg for (i = 0; i < so->num_outputs; i++) { 2381848b8605Smrg struct r600_bytecode_output output; 2382848b8605Smrg 2383b8e80941Smrg if (stream != -1 && stream != so->output[i].stream) 2384b8e80941Smrg continue; 2385b8e80941Smrg 2386848b8605Smrg memset(&output, 0, sizeof(struct r600_bytecode_output)); 2387848b8605Smrg output.gpr = so_gpr[i]; 2388b8e80941Smrg output.elem_size = so->output[i].num_components - 1; 2389b8e80941Smrg if (output.elem_size == 2) 2390b8e80941Smrg output.elem_size = 3; // 3 not supported, write 4 with junk at end 2391b8e80941Smrg output.array_base = so->output[i].dst_offset - start_comp[i]; 2392848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2393848b8605Smrg output.burst_count = 1; 2394848b8605Smrg /* array_size is an upper limit for the burst_count 2395848b8605Smrg * with MEM_STREAM instructions */ 2396848b8605Smrg output.array_size = 0xFFF; 2397b8e80941Smrg output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 2398b8e80941Smrg 2399848b8605Smrg if (ctx->bc->chip_class >= EVERGREEN) { 2400848b8605Smrg switch (so->output[i].output_buffer) { 2401848b8605Smrg case 0: 2402848b8605Smrg output.op = CF_OP_MEM_STREAM0_BUF0; 2403848b8605Smrg break; 2404848b8605Smrg case 1: 2405848b8605Smrg output.op = CF_OP_MEM_STREAM0_BUF1; 2406848b8605Smrg break; 2407848b8605Smrg case 2: 2408848b8605Smrg output.op = CF_OP_MEM_STREAM0_BUF2; 2409848b8605Smrg break; 2410848b8605Smrg case 3: 2411848b8605Smrg output.op = CF_OP_MEM_STREAM0_BUF3; 2412848b8605Smrg break; 2413848b8605Smrg } 2414b8e80941Smrg output.op += so->output[i].stream * 4; 2415b8e80941Smrg assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 2416b8e80941Smrg ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2417848b8605Smrg } else { 2418848b8605Smrg switch (so->output[i].output_buffer) { 2419848b8605Smrg case 0: 2420848b8605Smrg output.op = CF_OP_MEM_STREAM0; 2421848b8605Smrg break; 2422848b8605Smrg case 1: 2423848b8605Smrg output.op = CF_OP_MEM_STREAM1; 2424848b8605Smrg break; 2425848b8605Smrg case 2: 2426848b8605Smrg output.op = CF_OP_MEM_STREAM2; 2427848b8605Smrg break; 2428848b8605Smrg case 3: 2429848b8605Smrg output.op = CF_OP_MEM_STREAM3; 2430848b8605Smrg break; 2431848b8605Smrg } 2432b8e80941Smrg ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2433848b8605Smrg } 2434848b8605Smrg r = r600_bytecode_add_output(ctx->bc, &output); 2435848b8605Smrg if (r) 2436848b8605Smrg goto out_err; 2437848b8605Smrg } 2438848b8605Smrg return 0; 2439848b8605Smrgout_err: 2440848b8605Smrg return r; 2441848b8605Smrg} 2442848b8605Smrg 2443848b8605Smrgstatic void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2444848b8605Smrg{ 2445848b8605Smrg struct r600_bytecode_alu alu; 2446848b8605Smrg unsigned reg; 2447848b8605Smrg 2448848b8605Smrg if (!ctx->shader->vs_out_edgeflag) 2449848b8605Smrg return; 2450848b8605Smrg 2451848b8605Smrg reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2452848b8605Smrg 2453848b8605Smrg /* clamp(x, 0, 1) */ 2454848b8605Smrg memset(&alu, 0, sizeof(alu)); 2455848b8605Smrg alu.op = ALU_OP1_MOV; 2456848b8605Smrg alu.src[0].sel = reg; 2457848b8605Smrg alu.dst.sel = reg; 2458848b8605Smrg alu.dst.write = 1; 2459848b8605Smrg alu.dst.clamp = 1; 2460848b8605Smrg alu.last = 1; 2461848b8605Smrg r600_bytecode_add_alu(ctx->bc, &alu); 2462848b8605Smrg 2463848b8605Smrg memset(&alu, 0, sizeof(alu)); 2464848b8605Smrg alu.op = ALU_OP1_FLT_TO_INT; 2465848b8605Smrg alu.src[0].sel = reg; 2466848b8605Smrg alu.dst.sel = reg; 2467848b8605Smrg alu.dst.write = 1; 2468848b8605Smrg alu.last = 1; 2469848b8605Smrg r600_bytecode_add_alu(ctx->bc, &alu); 2470848b8605Smrg} 2471848b8605Smrg 2472848b8605Smrgstatic int generate_gs_copy_shader(struct r600_context *rctx, 2473848b8605Smrg struct r600_pipe_shader *gs, 2474848b8605Smrg struct pipe_stream_output_info *so) 2475848b8605Smrg{ 2476848b8605Smrg struct r600_shader_ctx ctx = {}; 2477848b8605Smrg struct r600_shader *gs_shader = &gs->shader; 2478848b8605Smrg struct r600_pipe_shader *cshader; 2479b8e80941Smrg unsigned ocnt = gs_shader->noutput; 2480848b8605Smrg struct r600_bytecode_alu alu; 2481848b8605Smrg struct r600_bytecode_vtx vtx; 2482848b8605Smrg struct r600_bytecode_output output; 2483848b8605Smrg struct r600_bytecode_cf *cf_jump, *cf_pop, 2484848b8605Smrg *last_exp_pos = NULL, *last_exp_param = NULL; 2485b8e80941Smrg int next_clip_pos = 61, next_param = 0; 2486b8e80941Smrg unsigned i, j; 2487b8e80941Smrg int ring; 2488b8e80941Smrg bool only_ring_0 = true; 2489848b8605Smrg cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2490848b8605Smrg if (!cshader) 2491848b8605Smrg return 0; 2492848b8605Smrg 2493848b8605Smrg memcpy(cshader->shader.output, gs_shader->output, ocnt * 2494848b8605Smrg sizeof(struct r600_shader_io)); 2495848b8605Smrg 2496848b8605Smrg cshader->shader.noutput = ocnt; 2497848b8605Smrg 2498848b8605Smrg ctx.shader = &cshader->shader; 2499848b8605Smrg ctx.bc = &ctx.shader->bc; 2500b8e80941Smrg ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX; 2501848b8605Smrg 2502848b8605Smrg r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2503848b8605Smrg rctx->screen->has_compressed_msaa_texturing); 2504848b8605Smrg 2505848b8605Smrg ctx.bc->isa = rctx->isa; 2506848b8605Smrg 2507b8e80941Smrg cf_jump = NULL; 2508b8e80941Smrg memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 2509b8e80941Smrg 2510848b8605Smrg /* R0.x = R0.x & 0x3fffffff */ 2511848b8605Smrg memset(&alu, 0, sizeof(alu)); 2512848b8605Smrg alu.op = ALU_OP2_AND_INT; 2513848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2514848b8605Smrg alu.src[1].value = 0x3fffffff; 2515848b8605Smrg alu.dst.write = 1; 2516848b8605Smrg r600_bytecode_add_alu(ctx.bc, &alu); 2517848b8605Smrg 2518848b8605Smrg /* R0.y = R0.x >> 30 */ 2519848b8605Smrg memset(&alu, 0, sizeof(alu)); 2520848b8605Smrg alu.op = ALU_OP2_LSHR_INT; 2521848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2522848b8605Smrg alu.src[1].value = 0x1e; 2523848b8605Smrg alu.dst.chan = 1; 2524848b8605Smrg alu.dst.write = 1; 2525848b8605Smrg alu.last = 1; 2526848b8605Smrg r600_bytecode_add_alu(ctx.bc, &alu); 2527848b8605Smrg 2528848b8605Smrg /* fetch vertex data from GSVS ring */ 2529848b8605Smrg for (i = 0; i < ocnt; ++i) { 2530848b8605Smrg struct r600_shader_io *out = &ctx.shader->output[i]; 2531b8e80941Smrg 2532848b8605Smrg out->gpr = i + 1; 2533848b8605Smrg out->ring_offset = i * 16; 2534848b8605Smrg 2535848b8605Smrg memset(&vtx, 0, sizeof(vtx)); 2536848b8605Smrg vtx.op = FETCH_OP_VFETCH; 2537848b8605Smrg vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2538b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2539b8e80941Smrg vtx.mega_fetch_count = 16; 2540848b8605Smrg vtx.offset = out->ring_offset; 2541848b8605Smrg vtx.dst_gpr = out->gpr; 2542b8e80941Smrg vtx.src_gpr = 0; 2543848b8605Smrg vtx.dst_sel_x = 0; 2544848b8605Smrg vtx.dst_sel_y = 1; 2545848b8605Smrg vtx.dst_sel_z = 2; 2546848b8605Smrg vtx.dst_sel_w = 3; 2547848b8605Smrg if (rctx->b.chip_class >= EVERGREEN) { 2548848b8605Smrg vtx.use_const_fields = 1; 2549848b8605Smrg } else { 2550848b8605Smrg vtx.data_format = FMT_32_32_32_32_FLOAT; 2551848b8605Smrg } 2552848b8605Smrg 2553848b8605Smrg r600_bytecode_add_vtx(ctx.bc, &vtx); 2554848b8605Smrg } 2555b8e80941Smrg ctx.temp_reg = i + 1; 2556b8e80941Smrg for (ring = 3; ring >= 0; --ring) { 2557b8e80941Smrg bool enabled = false; 2558b8e80941Smrg for (i = 0; i < so->num_outputs; i++) { 2559b8e80941Smrg if (so->output[i].stream == ring) { 2560b8e80941Smrg enabled = true; 2561b8e80941Smrg if (ring > 0) 2562b8e80941Smrg only_ring_0 = false; 2563b8e80941Smrg break; 2564b8e80941Smrg } 2565b8e80941Smrg } 2566b8e80941Smrg if (ring != 0 && !enabled) { 2567b8e80941Smrg cshader->shader.ring_item_sizes[ring] = 0; 2568b8e80941Smrg continue; 2569b8e80941Smrg } 2570b8e80941Smrg 2571b8e80941Smrg if (cf_jump) { 2572b8e80941Smrg // Patch up jump label 2573b8e80941Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2574b8e80941Smrg cf_pop = ctx.bc->cf_last; 2575b8e80941Smrg 2576b8e80941Smrg cf_jump->cf_addr = cf_pop->id + 2; 2577b8e80941Smrg cf_jump->pop_count = 1; 2578b8e80941Smrg cf_pop->cf_addr = cf_pop->id + 2; 2579b8e80941Smrg cf_pop->pop_count = 1; 2580b8e80941Smrg } 2581b8e80941Smrg 2582b8e80941Smrg /* PRED_SETE_INT __, R0.y, ring */ 2583b8e80941Smrg memset(&alu, 0, sizeof(alu)); 2584b8e80941Smrg alu.op = ALU_OP2_PRED_SETE_INT; 2585b8e80941Smrg alu.src[0].chan = 1; 2586b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2587b8e80941Smrg alu.src[1].value = ring; 2588b8e80941Smrg alu.execute_mask = 1; 2589b8e80941Smrg alu.update_pred = 1; 2590b8e80941Smrg alu.last = 1; 2591b8e80941Smrg r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2592b8e80941Smrg 2593b8e80941Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2594b8e80941Smrg cf_jump = ctx.bc->cf_last; 2595b8e80941Smrg 2596b8e80941Smrg if (enabled) 2597b8e80941Smrg emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]); 2598b8e80941Smrg cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2599b8e80941Smrg } 2600b8e80941Smrg 2601b8e80941Smrg /* bc adds nops - copy it */ 2602b8e80941Smrg if (ctx.bc->chip_class == R600) { 2603b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2604b8e80941Smrg alu.op = ALU_OP0_NOP; 2605b8e80941Smrg alu.last = 1; 2606b8e80941Smrg r600_bytecode_add_alu(ctx.bc, &alu); 2607848b8605Smrg 2608b8e80941Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2609b8e80941Smrg } 2610848b8605Smrg 2611848b8605Smrg /* export vertex data */ 2612848b8605Smrg /* XXX factor out common code with r600_shader_from_tgsi ? */ 2613848b8605Smrg for (i = 0; i < ocnt; ++i) { 2614848b8605Smrg struct r600_shader_io *out = &ctx.shader->output[i]; 2615b8e80941Smrg bool instream0 = true; 2616848b8605Smrg if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2617848b8605Smrg continue; 2618848b8605Smrg 2619b8e80941Smrg for (j = 0; j < so->num_outputs; j++) { 2620b8e80941Smrg if (so->output[j].register_index == i) { 2621b8e80941Smrg if (so->output[j].stream == 0) 2622b8e80941Smrg break; 2623b8e80941Smrg if (so->output[j].stream > 0) 2624b8e80941Smrg instream0 = false; 2625b8e80941Smrg } 2626b8e80941Smrg } 2627b8e80941Smrg if (!instream0) 2628b8e80941Smrg continue; 2629848b8605Smrg memset(&output, 0, sizeof(output)); 2630848b8605Smrg output.gpr = out->gpr; 2631848b8605Smrg output.elem_size = 3; 2632848b8605Smrg output.swizzle_x = 0; 2633848b8605Smrg output.swizzle_y = 1; 2634848b8605Smrg output.swizzle_z = 2; 2635848b8605Smrg output.swizzle_w = 3; 2636848b8605Smrg output.burst_count = 1; 2637848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2638848b8605Smrg output.op = CF_OP_EXPORT; 2639848b8605Smrg switch (out->name) { 2640848b8605Smrg case TGSI_SEMANTIC_POSITION: 2641848b8605Smrg output.array_base = 60; 2642848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2643848b8605Smrg break; 2644848b8605Smrg 2645848b8605Smrg case TGSI_SEMANTIC_PSIZE: 2646848b8605Smrg output.array_base = 61; 2647848b8605Smrg if (next_clip_pos == 61) 2648848b8605Smrg next_clip_pos = 62; 2649848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2650848b8605Smrg output.swizzle_y = 7; 2651848b8605Smrg output.swizzle_z = 7; 2652848b8605Smrg output.swizzle_w = 7; 2653848b8605Smrg ctx.shader->vs_out_misc_write = 1; 2654848b8605Smrg ctx.shader->vs_out_point_size = 1; 2655848b8605Smrg break; 2656848b8605Smrg case TGSI_SEMANTIC_LAYER: 2657848b8605Smrg if (out->spi_sid) { 2658848b8605Smrg /* duplicate it as PARAM to pass to the pixel shader */ 2659848b8605Smrg output.array_base = next_param++; 2660848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2661848b8605Smrg last_exp_param = ctx.bc->cf_last; 2662848b8605Smrg } 2663848b8605Smrg output.array_base = 61; 2664848b8605Smrg if (next_clip_pos == 61) 2665848b8605Smrg next_clip_pos = 62; 2666848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2667848b8605Smrg output.swizzle_x = 7; 2668848b8605Smrg output.swizzle_y = 7; 2669848b8605Smrg output.swizzle_z = 0; 2670848b8605Smrg output.swizzle_w = 7; 2671848b8605Smrg ctx.shader->vs_out_misc_write = 1; 2672848b8605Smrg ctx.shader->vs_out_layer = 1; 2673848b8605Smrg break; 2674848b8605Smrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 2675848b8605Smrg if (out->spi_sid) { 2676848b8605Smrg /* duplicate it as PARAM to pass to the pixel shader */ 2677848b8605Smrg output.array_base = next_param++; 2678848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2679848b8605Smrg last_exp_param = ctx.bc->cf_last; 2680848b8605Smrg } 2681848b8605Smrg output.array_base = 61; 2682848b8605Smrg if (next_clip_pos == 61) 2683848b8605Smrg next_clip_pos = 62; 2684848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2685848b8605Smrg ctx.shader->vs_out_misc_write = 1; 2686848b8605Smrg ctx.shader->vs_out_viewport = 1; 2687848b8605Smrg output.swizzle_x = 7; 2688848b8605Smrg output.swizzle_y = 7; 2689848b8605Smrg output.swizzle_z = 7; 2690848b8605Smrg output.swizzle_w = 0; 2691848b8605Smrg break; 2692848b8605Smrg case TGSI_SEMANTIC_CLIPDIST: 2693848b8605Smrg /* spi_sid is 0 for clipdistance outputs that were generated 2694848b8605Smrg * for clipvertex - we don't need to pass them to PS */ 2695848b8605Smrg ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2696b8e80941Smrg ctx.shader->cull_dist_write = gs->shader.cull_dist_write; 2697b8e80941Smrg ctx.shader->cc_dist_mask = gs->shader.cc_dist_mask; 2698848b8605Smrg if (out->spi_sid) { 2699848b8605Smrg /* duplicate it as PARAM to pass to the pixel shader */ 2700848b8605Smrg output.array_base = next_param++; 2701848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2702848b8605Smrg last_exp_param = ctx.bc->cf_last; 2703848b8605Smrg } 2704848b8605Smrg output.array_base = next_clip_pos++; 2705848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2706848b8605Smrg break; 2707848b8605Smrg case TGSI_SEMANTIC_FOG: 2708848b8605Smrg output.swizzle_y = 4; /* 0 */ 2709848b8605Smrg output.swizzle_z = 4; /* 0 */ 2710848b8605Smrg output.swizzle_w = 5; /* 1 */ 2711848b8605Smrg break; 2712848b8605Smrg default: 2713848b8605Smrg output.array_base = next_param++; 2714848b8605Smrg break; 2715848b8605Smrg } 2716848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2717848b8605Smrg if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2718848b8605Smrg last_exp_param = ctx.bc->cf_last; 2719848b8605Smrg else 2720848b8605Smrg last_exp_pos = ctx.bc->cf_last; 2721848b8605Smrg } 2722848b8605Smrg 2723848b8605Smrg if (!last_exp_pos) { 2724848b8605Smrg memset(&output, 0, sizeof(output)); 2725848b8605Smrg output.gpr = 0; 2726848b8605Smrg output.elem_size = 3; 2727848b8605Smrg output.swizzle_x = 7; 2728848b8605Smrg output.swizzle_y = 7; 2729848b8605Smrg output.swizzle_z = 7; 2730848b8605Smrg output.swizzle_w = 7; 2731848b8605Smrg output.burst_count = 1; 2732848b8605Smrg output.type = 2; 2733848b8605Smrg output.op = CF_OP_EXPORT; 2734848b8605Smrg output.array_base = 60; 2735848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2736848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2737848b8605Smrg last_exp_pos = ctx.bc->cf_last; 2738848b8605Smrg } 2739848b8605Smrg 2740848b8605Smrg if (!last_exp_param) { 2741848b8605Smrg memset(&output, 0, sizeof(output)); 2742848b8605Smrg output.gpr = 0; 2743848b8605Smrg output.elem_size = 3; 2744848b8605Smrg output.swizzle_x = 7; 2745848b8605Smrg output.swizzle_y = 7; 2746848b8605Smrg output.swizzle_z = 7; 2747848b8605Smrg output.swizzle_w = 7; 2748848b8605Smrg output.burst_count = 1; 2749848b8605Smrg output.type = 2; 2750848b8605Smrg output.op = CF_OP_EXPORT; 2751848b8605Smrg output.array_base = next_param++; 2752848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2753848b8605Smrg r600_bytecode_add_output(ctx.bc, &output); 2754848b8605Smrg last_exp_param = ctx.bc->cf_last; 2755848b8605Smrg } 2756848b8605Smrg 2757848b8605Smrg last_exp_pos->op = CF_OP_EXPORT_DONE; 2758848b8605Smrg last_exp_param->op = CF_OP_EXPORT_DONE; 2759848b8605Smrg 2760848b8605Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2761848b8605Smrg cf_pop = ctx.bc->cf_last; 2762848b8605Smrg 2763848b8605Smrg cf_jump->cf_addr = cf_pop->id + 2; 2764848b8605Smrg cf_jump->pop_count = 1; 2765848b8605Smrg cf_pop->cf_addr = cf_pop->id + 2; 2766848b8605Smrg cf_pop->pop_count = 1; 2767848b8605Smrg 2768848b8605Smrg if (ctx.bc->chip_class == CAYMAN) 2769848b8605Smrg cm_bytecode_add_cf_end(ctx.bc); 2770848b8605Smrg else { 2771848b8605Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2772848b8605Smrg ctx.bc->cf_last->end_of_program = 1; 2773848b8605Smrg } 2774848b8605Smrg 2775848b8605Smrg gs->gs_copy_shader = cshader; 2776b8e80941Smrg cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2777848b8605Smrg 2778848b8605Smrg ctx.bc->nstack = 1; 2779848b8605Smrg 2780848b8605Smrg return r600_bytecode_build(ctx.bc); 2781848b8605Smrg} 2782848b8605Smrg 2783b8e80941Smrgstatic int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2784b8e80941Smrg{ 2785b8e80941Smrg if (ind) { 2786b8e80941Smrg struct r600_bytecode_alu alu; 2787b8e80941Smrg int r; 2788b8e80941Smrg 2789b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2790b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 2791b8e80941Smrg alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2792b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2793b8e80941Smrg alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2794b8e80941Smrg alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2795b8e80941Smrg alu.dst.write = 1; 2796b8e80941Smrg alu.last = 1; 2797b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 2798b8e80941Smrg if (r) 2799b8e80941Smrg return r; 2800b8e80941Smrg } 2801b8e80941Smrg return 0; 2802b8e80941Smrg} 2803b8e80941Smrg 2804b8e80941Smrgstatic int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so UNUSED, int stream, bool ind) 2805848b8605Smrg{ 2806848b8605Smrg struct r600_bytecode_output output; 2807b8e80941Smrg int ring_offset; 2808b8e80941Smrg unsigned i, k; 2809b8e80941Smrg int effective_stream = stream == -1 ? 0 : stream; 2810b8e80941Smrg int idx = 0; 2811848b8605Smrg 2812848b8605Smrg for (i = 0; i < ctx->shader->noutput; i++) { 2813848b8605Smrg if (ctx->gs_for_vs) { 2814848b8605Smrg /* for ES we need to lookup corresponding ring offset expected by GS 2815848b8605Smrg * (map this output to GS input by name and sid) */ 2816848b8605Smrg /* FIXME precompute offsets */ 2817848b8605Smrg ring_offset = -1; 2818848b8605Smrg for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2819848b8605Smrg struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2820848b8605Smrg struct r600_shader_io *out = &ctx->shader->output[i]; 2821848b8605Smrg if (in->name == out->name && in->sid == out->sid) 2822848b8605Smrg ring_offset = in->ring_offset; 2823848b8605Smrg } 2824848b8605Smrg 2825848b8605Smrg if (ring_offset == -1) 2826848b8605Smrg continue; 2827b8e80941Smrg } else { 2828b8e80941Smrg ring_offset = idx * 16; 2829b8e80941Smrg idx++; 2830b8e80941Smrg } 2831848b8605Smrg 2832b8e80941Smrg if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2833b8e80941Smrg continue; 2834848b8605Smrg /* next_ring_offset after parsing input decls contains total size of 2835848b8605Smrg * single vertex data, gs_next_vertex - current vertex index */ 2836848b8605Smrg if (!ind) 2837848b8605Smrg ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2838848b8605Smrg 2839848b8605Smrg memset(&output, 0, sizeof(struct r600_bytecode_output)); 2840848b8605Smrg output.gpr = ctx->shader->output[i].gpr; 2841848b8605Smrg output.elem_size = 3; 2842848b8605Smrg output.comp_mask = 0xF; 2843848b8605Smrg output.burst_count = 1; 2844848b8605Smrg 2845848b8605Smrg if (ind) 2846848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2847848b8605Smrg else 2848848b8605Smrg output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2849848b8605Smrg 2850b8e80941Smrg switch (stream) { 2851b8e80941Smrg default: 2852b8e80941Smrg case 0: 2853b8e80941Smrg output.op = CF_OP_MEM_RING; break; 2854b8e80941Smrg case 1: 2855b8e80941Smrg output.op = CF_OP_MEM_RING1; break; 2856b8e80941Smrg case 2: 2857b8e80941Smrg output.op = CF_OP_MEM_RING2; break; 2858b8e80941Smrg case 3: 2859b8e80941Smrg output.op = CF_OP_MEM_RING3; break; 2860b8e80941Smrg } 2861848b8605Smrg 2862848b8605Smrg if (ind) { 2863848b8605Smrg output.array_base = ring_offset >> 2; /* in dwords */ 2864848b8605Smrg output.array_size = 0xfff; 2865b8e80941Smrg output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2866848b8605Smrg } else 2867848b8605Smrg output.array_base = ring_offset >> 2; /* in dwords */ 2868848b8605Smrg r600_bytecode_add_output(ctx->bc, &output); 2869848b8605Smrg } 2870848b8605Smrg 2871848b8605Smrg ++ctx->gs_next_vertex; 2872848b8605Smrg return 0; 2873848b8605Smrg} 2874848b8605Smrg 2875b8e80941Smrg 2876b8e80941Smrgstatic int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2877848b8605Smrg{ 2878b8e80941Smrg int r; 2879b8e80941Smrg struct r600_bytecode_vtx vtx; 2880b8e80941Smrg int temp_val = ctx->temp_reg; 2881b8e80941Smrg /* need to store the TCS output somewhere */ 2882b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 2883b8e80941Smrg temp_val, 0, 2884b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 0, 2885b8e80941Smrg 0, 0); 2886b8e80941Smrg if (r) 2887b8e80941Smrg return r; 2888848b8605Smrg 2889b8e80941Smrg /* used by VS/TCS */ 2890b8e80941Smrg if (ctx->tess_input_info) { 2891b8e80941Smrg /* fetch tcs input values into resv space */ 2892b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2893b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 2894b8e80941Smrg vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2895b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2896b8e80941Smrg vtx.mega_fetch_count = 16; 2897b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 2898b8e80941Smrg vtx.num_format_all = 2; 2899b8e80941Smrg vtx.format_comp_all = 1; 2900b8e80941Smrg vtx.use_const_fields = 0; 2901b8e80941Smrg vtx.endian = r600_endian_swap(32); 2902b8e80941Smrg vtx.srf_mode_all = 1; 2903b8e80941Smrg vtx.offset = 0; 2904b8e80941Smrg vtx.dst_gpr = ctx->tess_input_info; 2905b8e80941Smrg vtx.dst_sel_x = 0; 2906b8e80941Smrg vtx.dst_sel_y = 1; 2907b8e80941Smrg vtx.dst_sel_z = 2; 2908b8e80941Smrg vtx.dst_sel_w = 3; 2909b8e80941Smrg vtx.src_gpr = temp_val; 2910b8e80941Smrg vtx.src_sel_x = 0; 2911b8e80941Smrg 2912b8e80941Smrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2913b8e80941Smrg if (r) 2914b8e80941Smrg return r; 2915b8e80941Smrg } 2916b8e80941Smrg 2917b8e80941Smrg /* used by TCS/TES */ 2918b8e80941Smrg if (ctx->tess_output_info) { 2919b8e80941Smrg /* fetch tcs output values into resv space */ 2920b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2921b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 2922b8e80941Smrg vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2923b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2924b8e80941Smrg vtx.mega_fetch_count = 16; 2925b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 2926b8e80941Smrg vtx.num_format_all = 2; 2927b8e80941Smrg vtx.format_comp_all = 1; 2928b8e80941Smrg vtx.use_const_fields = 0; 2929b8e80941Smrg vtx.endian = r600_endian_swap(32); 2930b8e80941Smrg vtx.srf_mode_all = 1; 2931b8e80941Smrg vtx.offset = 16; 2932b8e80941Smrg vtx.dst_gpr = ctx->tess_output_info; 2933b8e80941Smrg vtx.dst_sel_x = 0; 2934b8e80941Smrg vtx.dst_sel_y = 1; 2935b8e80941Smrg vtx.dst_sel_z = 2; 2936b8e80941Smrg vtx.dst_sel_w = 3; 2937b8e80941Smrg vtx.src_gpr = temp_val; 2938b8e80941Smrg vtx.src_sel_x = 0; 2939b8e80941Smrg 2940b8e80941Smrg r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2941b8e80941Smrg if (r) 2942b8e80941Smrg return r; 2943b8e80941Smrg } 2944b8e80941Smrg return 0; 2945b8e80941Smrg} 2946b8e80941Smrg 2947b8e80941Smrgstatic int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2948b8e80941Smrg{ 2949b8e80941Smrg int j, r; 2950b8e80941Smrg int temp_reg; 2951b8e80941Smrg unsigned i; 2952b8e80941Smrg 2953b8e80941Smrg /* fetch tcs input values into input_vals */ 2954b8e80941Smrg ctx->tess_input_info = r600_get_temp(ctx); 2955b8e80941Smrg ctx->tess_output_info = 0; 2956b8e80941Smrg r = r600_fetch_tess_io_info(ctx); 2957b8e80941Smrg if (r) 2958b8e80941Smrg return r; 2959b8e80941Smrg 2960b8e80941Smrg temp_reg = r600_get_temp(ctx); 2961b8e80941Smrg /* dst reg contains LDS address stride * idx */ 2962b8e80941Smrg /* MUL vertexID, vertex_dw_stride */ 2963b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2964b8e80941Smrg temp_reg, 0, 2965b8e80941Smrg ctx->tess_input_info, 1, 2966b8e80941Smrg 0, 1); /* rel id in r0.y? */ 2967b8e80941Smrg if (r) 2968b8e80941Smrg return r; 2969b8e80941Smrg 2970b8e80941Smrg for (i = 0; i < ctx->shader->noutput; i++) { 2971b8e80941Smrg struct r600_bytecode_alu alu; 2972b8e80941Smrg int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2973b8e80941Smrg 2974b8e80941Smrg if (param) { 2975b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2976b8e80941Smrg temp_reg, 1, 2977b8e80941Smrg temp_reg, 0, 2978b8e80941Smrg V_SQ_ALU_SRC_LITERAL, param * 16); 2979b8e80941Smrg if (r) 2980b8e80941Smrg return r; 2981b8e80941Smrg } 2982b8e80941Smrg 2983b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2984b8e80941Smrg temp_reg, 2, 2985b8e80941Smrg temp_reg, param ? 1 : 0, 2986b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 8); 2987b8e80941Smrg if (r) 2988b8e80941Smrg return r; 2989b8e80941Smrg 2990b8e80941Smrg 2991b8e80941Smrg for (j = 0; j < 2; j++) { 2992b8e80941Smrg int chan = (j == 1) ? 2 : (param ? 1 : 0); 2993b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2994b8e80941Smrg alu.op = LDS_OP3_LDS_WRITE_REL; 2995b8e80941Smrg alu.src[0].sel = temp_reg; 2996b8e80941Smrg alu.src[0].chan = chan; 2997b8e80941Smrg alu.src[1].sel = ctx->shader->output[i].gpr; 2998b8e80941Smrg alu.src[1].chan = j * 2; 2999b8e80941Smrg alu.src[2].sel = ctx->shader->output[i].gpr; 3000b8e80941Smrg alu.src[2].chan = (j * 2) + 1; 3001b8e80941Smrg alu.last = 1; 3002b8e80941Smrg alu.dst.chan = 0; 3003b8e80941Smrg alu.lds_idx = 1; 3004b8e80941Smrg alu.is_lds_idx_op = true; 3005b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3006b8e80941Smrg if (r) 3007b8e80941Smrg return r; 3008b8e80941Smrg } 3009b8e80941Smrg } 3010b8e80941Smrg return 0; 3011b8e80941Smrg} 3012b8e80941Smrg 3013b8e80941Smrgstatic int r600_store_tcs_output(struct r600_shader_ctx *ctx) 3014b8e80941Smrg{ 3015b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3016b8e80941Smrg const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 3017b8e80941Smrg int i, r, lasti; 3018b8e80941Smrg int temp_reg = r600_get_temp(ctx); 3019b8e80941Smrg struct r600_bytecode_alu alu; 3020b8e80941Smrg unsigned write_mask = dst->Register.WriteMask; 3021b8e80941Smrg 3022b8e80941Smrg if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 3023b8e80941Smrg return 0; 3024b8e80941Smrg 3025b8e80941Smrg r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 3026b8e80941Smrg if (r) 3027b8e80941Smrg return r; 3028b8e80941Smrg 3029b8e80941Smrg /* the base address is now in temp.x */ 3030b8e80941Smrg r = r600_get_byte_address(ctx, temp_reg, 3031b8e80941Smrg &inst->Dst[0], NULL, ctx->tess_output_info, 1); 3032b8e80941Smrg if (r) 3033b8e80941Smrg return r; 3034b8e80941Smrg 3035b8e80941Smrg /* LDS write */ 3036b8e80941Smrg lasti = tgsi_last_instruction(write_mask); 3037b8e80941Smrg for (i = 1; i <= lasti; i++) { 3038b8e80941Smrg 3039b8e80941Smrg if (!(write_mask & (1 << i))) 3040b8e80941Smrg continue; 3041b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 3042b8e80941Smrg temp_reg, i, 3043b8e80941Smrg temp_reg, 0, 3044b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 4 * i); 3045b8e80941Smrg if (r) 3046b8e80941Smrg return r; 3047b8e80941Smrg } 3048b8e80941Smrg 3049b8e80941Smrg for (i = 0; i <= lasti; i++) { 3050b8e80941Smrg if (!(write_mask & (1 << i))) 3051b8e80941Smrg continue; 3052b8e80941Smrg 3053b8e80941Smrg if ((i == 0 && ((write_mask & 3) == 3)) || 3054b8e80941Smrg (i == 2 && ((write_mask & 0xc) == 0xc))) { 3055b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3056b8e80941Smrg alu.op = LDS_OP3_LDS_WRITE_REL; 3057b8e80941Smrg alu.src[0].sel = temp_reg; 3058b8e80941Smrg alu.src[0].chan = i; 3059b8e80941Smrg 3060b8e80941Smrg alu.src[1].sel = dst->Register.Index; 3061b8e80941Smrg alu.src[1].sel += ctx->file_offset[dst->Register.File]; 3062b8e80941Smrg alu.src[1].chan = i; 3063b8e80941Smrg 3064b8e80941Smrg alu.src[2].sel = dst->Register.Index; 3065b8e80941Smrg alu.src[2].sel += ctx->file_offset[dst->Register.File]; 3066b8e80941Smrg alu.src[2].chan = i + 1; 3067b8e80941Smrg alu.lds_idx = 1; 3068b8e80941Smrg alu.dst.chan = 0; 3069b8e80941Smrg alu.last = 1; 3070b8e80941Smrg alu.is_lds_idx_op = true; 3071b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3072b8e80941Smrg if (r) 3073b8e80941Smrg return r; 3074b8e80941Smrg i += 1; 3075b8e80941Smrg continue; 3076b8e80941Smrg } 3077b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3078b8e80941Smrg alu.op = LDS_OP2_LDS_WRITE; 3079b8e80941Smrg alu.src[0].sel = temp_reg; 3080b8e80941Smrg alu.src[0].chan = i; 3081b8e80941Smrg 3082b8e80941Smrg alu.src[1].sel = dst->Register.Index; 3083b8e80941Smrg alu.src[1].sel += ctx->file_offset[dst->Register.File]; 3084b8e80941Smrg alu.src[1].chan = i; 3085b8e80941Smrg 3086b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_0; 3087b8e80941Smrg alu.dst.chan = 0; 3088b8e80941Smrg alu.last = 1; 3089b8e80941Smrg alu.is_lds_idx_op = true; 3090b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3091b8e80941Smrg if (r) 3092b8e80941Smrg return r; 3093b8e80941Smrg } 3094b8e80941Smrg return 0; 3095b8e80941Smrg} 3096b8e80941Smrg 3097b8e80941Smrgstatic int r600_tess_factor_read(struct r600_shader_ctx *ctx, 3098b8e80941Smrg int output_idx, int nc) 3099b8e80941Smrg{ 3100b8e80941Smrg int param; 3101b8e80941Smrg unsigned temp_reg = r600_get_temp(ctx); 3102b8e80941Smrg unsigned name = ctx->shader->output[output_idx].name; 3103b8e80941Smrg int dreg = ctx->shader->output[output_idx].gpr; 3104b8e80941Smrg int r; 3105b8e80941Smrg 3106b8e80941Smrg param = r600_get_lds_unique_index(name, 0); 3107b8e80941Smrg r = get_lds_offset0(ctx, 1, temp_reg, true); 3108b8e80941Smrg if (r) 3109b8e80941Smrg return r; 3110b8e80941Smrg 3111b8e80941Smrg if (param) { 3112b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 3113b8e80941Smrg temp_reg, 0, 3114b8e80941Smrg temp_reg, 0, 3115b8e80941Smrg V_SQ_ALU_SRC_LITERAL, param * 16); 3116b8e80941Smrg if (r) 3117b8e80941Smrg return r; 3118b8e80941Smrg } 3119b8e80941Smrg 3120b8e80941Smrg do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1)); 3121b8e80941Smrg return 0; 3122b8e80941Smrg} 3123b8e80941Smrg 3124b8e80941Smrgstatic int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 3125b8e80941Smrg{ 3126b8e80941Smrg int stride, outer_comps, inner_comps; 3127b8e80941Smrg int tessinner_idx = -1, tessouter_idx = -1; 3128b8e80941Smrg int i, r; 3129b8e80941Smrg unsigned j; 3130b8e80941Smrg int temp_reg = r600_get_temp(ctx); 3131b8e80941Smrg int treg[3] = {-1, -1, -1}; 3132b8e80941Smrg struct r600_bytecode_alu alu; 3133b8e80941Smrg struct r600_bytecode_cf *cf_jump, *cf_pop; 3134b8e80941Smrg 3135b8e80941Smrg /* only execute factor emission for invocation 0 */ 3136b8e80941Smrg /* PRED_SETE_INT __, R0.x, 0 */ 3137b8e80941Smrg memset(&alu, 0, sizeof(alu)); 3138b8e80941Smrg alu.op = ALU_OP2_PRED_SETE_INT; 3139b8e80941Smrg alu.src[0].chan = 2; 3140b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3141b8e80941Smrg alu.execute_mask = 1; 3142b8e80941Smrg alu.update_pred = 1; 3143b8e80941Smrg alu.last = 1; 3144b8e80941Smrg r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 3145b8e80941Smrg 3146b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 3147b8e80941Smrg cf_jump = ctx->bc->cf_last; 3148b8e80941Smrg 3149b8e80941Smrg treg[0] = r600_get_temp(ctx); 3150b8e80941Smrg switch (ctx->shader->tcs_prim_mode) { 3151b8e80941Smrg case PIPE_PRIM_LINES: 3152b8e80941Smrg stride = 8; /* 2 dwords, 1 vec2 store */ 3153b8e80941Smrg outer_comps = 2; 3154b8e80941Smrg inner_comps = 0; 3155b8e80941Smrg break; 3156b8e80941Smrg case PIPE_PRIM_TRIANGLES: 3157b8e80941Smrg stride = 16; /* 4 dwords, 1 vec4 store */ 3158b8e80941Smrg outer_comps = 3; 3159b8e80941Smrg inner_comps = 1; 3160b8e80941Smrg treg[1] = r600_get_temp(ctx); 3161b8e80941Smrg break; 3162b8e80941Smrg case PIPE_PRIM_QUADS: 3163b8e80941Smrg stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 3164b8e80941Smrg outer_comps = 4; 3165b8e80941Smrg inner_comps = 2; 3166b8e80941Smrg treg[1] = r600_get_temp(ctx); 3167b8e80941Smrg treg[2] = r600_get_temp(ctx); 3168b8e80941Smrg break; 3169b8e80941Smrg default: 3170b8e80941Smrg assert(0); 3171b8e80941Smrg return -1; 3172b8e80941Smrg } 3173b8e80941Smrg 3174b8e80941Smrg /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 3175b8e80941Smrg /* TF_WRITE takes index in R.x, value in R.y */ 3176b8e80941Smrg for (j = 0; j < ctx->shader->noutput; j++) { 3177b8e80941Smrg if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSINNER) 3178b8e80941Smrg tessinner_idx = j; 3179b8e80941Smrg if (ctx->shader->output[j].name == TGSI_SEMANTIC_TESSOUTER) 3180b8e80941Smrg tessouter_idx = j; 3181b8e80941Smrg } 3182b8e80941Smrg 3183b8e80941Smrg if (tessouter_idx == -1) 3184b8e80941Smrg return -1; 3185b8e80941Smrg 3186b8e80941Smrg if (tessinner_idx == -1 && inner_comps) 3187b8e80941Smrg return -1; 3188b8e80941Smrg 3189b8e80941Smrg if (tessouter_idx != -1) { 3190b8e80941Smrg r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps); 3191b8e80941Smrg if (r) 3192b8e80941Smrg return r; 3193b8e80941Smrg } 3194b8e80941Smrg 3195b8e80941Smrg if (tessinner_idx != -1) { 3196b8e80941Smrg r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps); 3197b8e80941Smrg if (r) 3198b8e80941Smrg return r; 3199b8e80941Smrg } 3200b8e80941Smrg 3201b8e80941Smrg /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 3202b8e80941Smrg /* r.x = relpatchid(r0.y) * tf_stride */ 3203b8e80941Smrg 3204b8e80941Smrg /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 3205b8e80941Smrg /* add incoming r0.w to it: t.x = t.x + r0.w */ 3206b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 3207b8e80941Smrg temp_reg, 0, 3208b8e80941Smrg 0, 1, 3209b8e80941Smrg V_SQ_ALU_SRC_LITERAL, stride, 3210b8e80941Smrg 0, 3); 3211b8e80941Smrg if (r) 3212b8e80941Smrg return r; 3213b8e80941Smrg 3214b8e80941Smrg for (i = 0; i < outer_comps + inner_comps; i++) { 3215b8e80941Smrg int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 3216b8e80941Smrg int out_comp = i >= outer_comps ? i - outer_comps : i; 3217b8e80941Smrg 3218b8e80941Smrg if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) { 3219b8e80941Smrg if (out_comp == 1) 3220b8e80941Smrg out_comp = 0; 3221b8e80941Smrg else if (out_comp == 0) 3222b8e80941Smrg out_comp = 1; 3223b8e80941Smrg } 3224b8e80941Smrg 3225b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 3226b8e80941Smrg treg[i / 2], (2 * (i % 2)), 3227b8e80941Smrg temp_reg, 0, 3228b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 4 * i); 3229b8e80941Smrg if (r) 3230b8e80941Smrg return r; 3231b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 3232b8e80941Smrg treg[i / 2], 1 + (2 * (i%2)), 3233b8e80941Smrg ctx->shader->output[out_idx].gpr, out_comp, 3234b8e80941Smrg 0, 0); 3235b8e80941Smrg if (r) 3236b8e80941Smrg return r; 3237b8e80941Smrg } 3238b8e80941Smrg for (i = 0; i < outer_comps + inner_comps; i++) { 3239b8e80941Smrg struct r600_bytecode_gds gds; 3240b8e80941Smrg 3241b8e80941Smrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 3242b8e80941Smrg gds.src_gpr = treg[i / 2]; 3243b8e80941Smrg gds.src_sel_x = 2 * (i % 2); 3244b8e80941Smrg gds.src_sel_y = 1 + (2 * (i % 2)); 3245b8e80941Smrg gds.src_sel_z = 4; 3246b8e80941Smrg gds.dst_sel_x = 7; 3247b8e80941Smrg gds.dst_sel_y = 7; 3248b8e80941Smrg gds.dst_sel_z = 7; 3249b8e80941Smrg gds.dst_sel_w = 7; 3250b8e80941Smrg gds.op = FETCH_OP_TF_WRITE; 3251b8e80941Smrg r = r600_bytecode_add_gds(ctx->bc, &gds); 3252b8e80941Smrg if (r) 3253b8e80941Smrg return r; 3254b8e80941Smrg } 3255b8e80941Smrg 3256b8e80941Smrg // Patch up jump label 3257b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 3258b8e80941Smrg cf_pop = ctx->bc->cf_last; 3259b8e80941Smrg 3260b8e80941Smrg cf_jump->cf_addr = cf_pop->id + 2; 3261b8e80941Smrg cf_jump->pop_count = 1; 3262b8e80941Smrg cf_pop->cf_addr = cf_pop->id + 2; 3263b8e80941Smrg cf_pop->pop_count = 1; 3264b8e80941Smrg 3265b8e80941Smrg return 0; 3266b8e80941Smrg} 3267b8e80941Smrg 3268b8e80941Smrg/* 3269b8e80941Smrg * We have to work out the thread ID for load and atomic 3270b8e80941Smrg * operations, which store the returned value to an index 3271b8e80941Smrg * in an intermediate buffer. 3272b8e80941Smrg * The index is calculated by taking the thread id, 3273b8e80941Smrg * calculated from the MBCNT instructions. 3274b8e80941Smrg * Then the shader engine ID is multiplied by 256, 3275b8e80941Smrg * and the wave id is added. 3276b8e80941Smrg * Then the result is multipled by 64 and thread id is 3277b8e80941Smrg * added. 3278b8e80941Smrg */ 3279b8e80941Smrgstatic int load_thread_id_gpr(struct r600_shader_ctx *ctx) 3280b8e80941Smrg{ 3281b8e80941Smrg struct r600_bytecode_alu alu; 3282b8e80941Smrg int r; 3283b8e80941Smrg 3284b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3285b8e80941Smrg alu.op = ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT; 3286b8e80941Smrg alu.dst.sel = ctx->temp_reg; 3287b8e80941Smrg alu.dst.chan = 0; 3288b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3289b8e80941Smrg alu.src[0].value = 0xffffffff; 3290b8e80941Smrg alu.dst.write = 1; 3291b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3292b8e80941Smrg if (r) 3293b8e80941Smrg return r; 3294b8e80941Smrg 3295b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3296b8e80941Smrg alu.op = ALU_OP1_MBCNT_32HI_INT; 3297b8e80941Smrg alu.dst.sel = ctx->temp_reg; 3298b8e80941Smrg alu.dst.chan = 1; 3299b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3300b8e80941Smrg alu.src[0].value = 0xffffffff; 3301b8e80941Smrg alu.dst.write = 1; 3302b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3303b8e80941Smrg if (r) 3304b8e80941Smrg return r; 3305b8e80941Smrg 3306b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3307b8e80941Smrg alu.op = ALU_OP3_MULADD_UINT24; 3308b8e80941Smrg alu.dst.sel = ctx->temp_reg; 3309b8e80941Smrg alu.dst.chan = 2; 3310b8e80941Smrg alu.src[0].sel = EG_V_SQ_ALU_SRC_SE_ID; 3311b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3312b8e80941Smrg alu.src[1].value = 256; 3313b8e80941Smrg alu.src[2].sel = EG_V_SQ_ALU_SRC_HW_WAVE_ID; 3314b8e80941Smrg alu.dst.write = 1; 3315b8e80941Smrg alu.is_op3 = 1; 3316b8e80941Smrg alu.last = 1; 3317b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 3318b8e80941Smrg if (r) 3319b8e80941Smrg return r; 3320b8e80941Smrg 3321b8e80941Smrg r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 3322b8e80941Smrg ctx->thread_id_gpr, 1, 3323b8e80941Smrg ctx->temp_reg, 2, 3324b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 0x40, 3325b8e80941Smrg ctx->temp_reg, 0); 3326b8e80941Smrg if (r) 3327b8e80941Smrg return r; 3328b8e80941Smrg return 0; 3329b8e80941Smrg} 3330b8e80941Smrg 3331b8e80941Smrgstatic int r600_shader_from_tgsi(struct r600_context *rctx, 3332b8e80941Smrg struct r600_pipe_shader *pipeshader, 3333b8e80941Smrg union r600_shader_key key) 3334b8e80941Smrg{ 3335b8e80941Smrg struct r600_screen *rscreen = rctx->screen; 3336b8e80941Smrg struct r600_shader *shader = &pipeshader->shader; 3337b8e80941Smrg struct tgsi_token *tokens = pipeshader->selector->tokens; 3338b8e80941Smrg struct pipe_stream_output_info so = pipeshader->selector->so; 3339b8e80941Smrg struct tgsi_full_immediate *immediate; 3340b8e80941Smrg struct r600_shader_ctx ctx; 3341b8e80941Smrg struct r600_bytecode_output output[ARRAY_SIZE(shader->output)]; 3342b8e80941Smrg unsigned output_done, noutput; 3343b8e80941Smrg unsigned opcode; 3344b8e80941Smrg int j, k, r = 0; 3345b8e80941Smrg unsigned i; 3346b8e80941Smrg int next_param_base = 0, next_clip_base; 3347b8e80941Smrg int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 3348b8e80941Smrg bool indirect_gprs; 3349b8e80941Smrg bool ring_outputs = false; 3350b8e80941Smrg bool lds_outputs = false; 3351b8e80941Smrg bool lds_inputs = false; 3352b8e80941Smrg bool pos_emitted = false; 3353848b8605Smrg 3354b8e80941Smrg ctx.bc = &shader->bc; 3355b8e80941Smrg ctx.shader = shader; 3356848b8605Smrg 3357848b8605Smrg r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 3358848b8605Smrg rscreen->has_compressed_msaa_texturing); 3359848b8605Smrg ctx.tokens = tokens; 3360848b8605Smrg tgsi_scan_shader(tokens, &ctx.info); 3361848b8605Smrg shader->indirect_files = ctx.info.indirect_files; 3362b8e80941Smrg 3363b8e80941Smrg int narrays = ctx.info.array_max[TGSI_FILE_TEMPORARY]; 3364b8e80941Smrg ctx.array_infos = calloc(narrays, sizeof(*ctx.array_infos)); 3365b8e80941Smrg ctx.spilled_arrays = calloc(narrays, sizeof(bool)); 3366b8e80941Smrg tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, narrays, ctx.array_infos); 3367b8e80941Smrg 3368b8e80941Smrg shader->uses_helper_invocation = false; 3369b8e80941Smrg shader->uses_doubles = ctx.info.uses_doubles; 3370b8e80941Smrg shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC]; 3371b8e80941Smrg shader->nsys_inputs = 0; 3372b8e80941Smrg 3373b8e80941Smrg shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 || 3374b8e80941Smrg ctx.info.file_count[TGSI_FILE_BUFFER] > 0; 3375b8e80941Smrg indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 3376848b8605Smrg tgsi_parse_init(&ctx.parse, tokens); 3377b8e80941Smrg ctx.type = ctx.info.processor; 3378848b8605Smrg shader->processor_type = ctx.type; 3379848b8605Smrg ctx.bc->type = shader->processor_type; 3380848b8605Smrg 3381b8e80941Smrg switch (ctx.type) { 3382b8e80941Smrg case PIPE_SHADER_VERTEX: 3383b8e80941Smrg shader->vs_as_gs_a = key.vs.as_gs_a; 3384b8e80941Smrg shader->vs_as_es = key.vs.as_es; 3385b8e80941Smrg shader->vs_as_ls = key.vs.as_ls; 3386b8e80941Smrg shader->atomic_base = key.vs.first_atomic_counter; 3387b8e80941Smrg if (shader->vs_as_es) 3388b8e80941Smrg ring_outputs = true; 3389b8e80941Smrg if (shader->vs_as_ls) 3390b8e80941Smrg lds_outputs = true; 3391b8e80941Smrg break; 3392b8e80941Smrg case PIPE_SHADER_GEOMETRY: 3393b8e80941Smrg ring_outputs = true; 3394b8e80941Smrg shader->atomic_base = key.gs.first_atomic_counter; 3395b8e80941Smrg shader->gs_tri_strip_adj_fix = key.gs.tri_strip_adj_fix; 3396b8e80941Smrg break; 3397b8e80941Smrg case PIPE_SHADER_TESS_CTRL: 3398b8e80941Smrg shader->tcs_prim_mode = key.tcs.prim_mode; 3399b8e80941Smrg shader->atomic_base = key.tcs.first_atomic_counter; 3400b8e80941Smrg lds_outputs = true; 3401b8e80941Smrg lds_inputs = true; 3402b8e80941Smrg break; 3403b8e80941Smrg case PIPE_SHADER_TESS_EVAL: 3404b8e80941Smrg shader->tes_as_es = key.tes.as_es; 3405b8e80941Smrg shader->atomic_base = key.tes.first_atomic_counter; 3406b8e80941Smrg lds_inputs = true; 3407b8e80941Smrg if (shader->tes_as_es) 3408b8e80941Smrg ring_outputs = true; 3409b8e80941Smrg break; 3410b8e80941Smrg case PIPE_SHADER_FRAGMENT: 3411b8e80941Smrg shader->two_side = key.ps.color_two_side; 3412b8e80941Smrg shader->atomic_base = key.ps.first_atomic_counter; 3413b8e80941Smrg shader->rat_base = key.ps.nr_cbufs; 3414b8e80941Smrg shader->image_size_const_offset = key.ps.image_size_const_offset; 3415b8e80941Smrg break; 3416b8e80941Smrg case PIPE_SHADER_COMPUTE: 3417b8e80941Smrg shader->rat_base = 0; 3418b8e80941Smrg shader->image_size_const_offset = ctx.info.file_count[TGSI_FILE_SAMPLER]; 3419b8e80941Smrg break; 3420b8e80941Smrg default: 3421b8e80941Smrg break; 3422b8e80941Smrg } 3423848b8605Smrg 3424b8e80941Smrg if (shader->vs_as_es || shader->tes_as_es) { 3425848b8605Smrg ctx.gs_for_vs = &rctx->gs_shader->current->shader; 3426848b8605Smrg } else { 3427848b8605Smrg ctx.gs_for_vs = NULL; 3428848b8605Smrg } 3429848b8605Smrg 3430848b8605Smrg ctx.next_ring_offset = 0; 3431848b8605Smrg ctx.gs_out_ring_offset = 0; 3432848b8605Smrg ctx.gs_next_vertex = 0; 3433b8e80941Smrg ctx.gs_stream_output_info = &so; 3434848b8605Smrg 3435b8e80941Smrg ctx.thread_id_gpr = -1; 3436848b8605Smrg ctx.face_gpr = -1; 3437b8e80941Smrg ctx.fixed_pt_position_gpr = -1; 3438848b8605Smrg ctx.fragcoord_input = -1; 3439848b8605Smrg ctx.colors_used = 0; 3440848b8605Smrg ctx.clip_vertex_write = 0; 3441848b8605Smrg 3442b8e80941Smrg ctx.helper_invoc_reg = -1; 3443b8e80941Smrg ctx.cs_block_size_reg = -1; 3444b8e80941Smrg ctx.cs_grid_size_reg = -1; 3445b8e80941Smrg ctx.cs_block_size_loaded = false; 3446b8e80941Smrg ctx.cs_grid_size_loaded = false; 3447b8e80941Smrg 3448848b8605Smrg shader->nr_ps_color_exports = 0; 3449848b8605Smrg shader->nr_ps_max_color_exports = 0; 3450848b8605Smrg 3451848b8605Smrg 3452848b8605Smrg /* register allocations */ 3453848b8605Smrg /* Values [0,127] correspond to GPR[0..127]. 3454848b8605Smrg * Values [128,159] correspond to constant buffer bank 0 3455848b8605Smrg * Values [160,191] correspond to constant buffer bank 1 3456848b8605Smrg * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 3457848b8605Smrg * Values [256,287] correspond to constant buffer bank 2 (EG) 3458848b8605Smrg * Values [288,319] correspond to constant buffer bank 3 (EG) 3459848b8605Smrg * Other special values are shown in the list below. 3460848b8605Smrg * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 3461848b8605Smrg * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 3462848b8605Smrg * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 3463848b8605Smrg * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 3464848b8605Smrg * 248 SQ_ALU_SRC_0: special constant 0.0. 3465848b8605Smrg * 249 SQ_ALU_SRC_1: special constant 1.0 float. 3466848b8605Smrg * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 3467848b8605Smrg * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 3468848b8605Smrg * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 3469848b8605Smrg * 253 SQ_ALU_SRC_LITERAL: literal constant. 3470848b8605Smrg * 254 SQ_ALU_SRC_PV: previous vector result. 3471848b8605Smrg * 255 SQ_ALU_SRC_PS: previous scalar result. 3472848b8605Smrg */ 3473848b8605Smrg for (i = 0; i < TGSI_FILE_COUNT; i++) { 3474848b8605Smrg ctx.file_offset[i] = 0; 3475848b8605Smrg } 3476848b8605Smrg 3477b8e80941Smrg if (ctx.type == PIPE_SHADER_VERTEX) { 3478b8e80941Smrg 3479848b8605Smrg ctx.file_offset[TGSI_FILE_INPUT] = 1; 3480b8e80941Smrg if (ctx.info.num_inputs) 3481848b8605Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 3482848b8605Smrg } 3483b8e80941Smrg if (ctx.type == PIPE_SHADER_FRAGMENT) { 3484b8e80941Smrg if (ctx.bc->chip_class >= EVERGREEN) 3485b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 3486b8e80941Smrg else 3487b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 3488b8e80941Smrg 3489b8e80941Smrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3490b8e80941Smrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) { 3491b8e80941Smrg ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 3492b8e80941Smrg shader->uses_helper_invocation = true; 3493b8e80941Smrg } 3494b8e80941Smrg } 3495848b8605Smrg } 3496b8e80941Smrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 3497848b8605Smrg /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3498848b8605Smrg ctx.file_offset[TGSI_FILE_INPUT] = 2; 3499848b8605Smrg } 3500b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 3501b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT] = 1; 3502b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3503b8e80941Smrg bool add_tesscoord = false, add_tess_inout = false; 3504b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT] = 1; 3505b8e80941Smrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3506b8e80941Smrg /* if we have tesscoord save one reg */ 3507b8e80941Smrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 3508b8e80941Smrg add_tesscoord = true; 3509b8e80941Smrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 3510b8e80941Smrg ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 3511b8e80941Smrg add_tess_inout = true; 3512b8e80941Smrg } 3513b8e80941Smrg if (add_tesscoord || add_tess_inout) 3514b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT]++; 3515b8e80941Smrg if (add_tess_inout) 3516b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT]+=2; 3517b8e80941Smrg } 3518b8e80941Smrg if (ctx.type == PIPE_SHADER_COMPUTE) { 3519b8e80941Smrg ctx.file_offset[TGSI_FILE_INPUT] = 2; 3520b8e80941Smrg for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3521b8e80941Smrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE) 3522b8e80941Smrg ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 3523b8e80941Smrg if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE) 3524b8e80941Smrg ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++; 3525b8e80941Smrg } 3526b8e80941Smrg } 3527848b8605Smrg 3528b8e80941Smrg ctx.file_offset[TGSI_FILE_OUTPUT] = 3529848b8605Smrg ctx.file_offset[TGSI_FILE_INPUT] + 3530848b8605Smrg ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3531848b8605Smrg ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3532848b8605Smrg ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 3533848b8605Smrg 3534848b8605Smrg /* Outside the GPR range. This will be translated to one of the 3535848b8605Smrg * kcache banks later. */ 3536848b8605Smrg ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 3537848b8605Smrg ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3538848b8605Smrg 3539b8e80941Smrg pipeshader->scratch_space_needed = 0; 3540b8e80941Smrg int regno = ctx.file_offset[TGSI_FILE_TEMPORARY] + 3541b8e80941Smrg ctx.info.file_max[TGSI_FILE_TEMPORARY]; 3542b8e80941Smrg if (regno > 124) { 3543b8e80941Smrg choose_spill_arrays(&ctx, ®no, &pipeshader->scratch_space_needed); 3544b8e80941Smrg shader->indirect_files = ctx.info.indirect_files; 3545b8e80941Smrg } 3546b8e80941Smrg shader->needs_scratch_space = pipeshader->scratch_space_needed != 0; 3547b8e80941Smrg 3548b8e80941Smrg ctx.bc->ar_reg = ++regno; 3549b8e80941Smrg ctx.bc->index_reg[0] = ++regno; 3550b8e80941Smrg ctx.bc->index_reg[1] = ++regno; 3551b8e80941Smrg 3552b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3553b8e80941Smrg ctx.tess_input_info = ++regno; 3554b8e80941Smrg ctx.tess_output_info = ++regno; 3555b8e80941Smrg } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3556b8e80941Smrg ctx.tess_input_info = ++regno; 3557b8e80941Smrg ctx.tess_output_info = ++regno; 3558b8e80941Smrg } else if (ctx.type == PIPE_SHADER_GEOMETRY) { 3559b8e80941Smrg ctx.gs_export_gpr_tregs[0] = ++regno; 3560b8e80941Smrg ctx.gs_export_gpr_tregs[1] = ++regno; 3561b8e80941Smrg ctx.gs_export_gpr_tregs[2] = ++regno; 3562b8e80941Smrg ctx.gs_export_gpr_tregs[3] = ++regno; 3563b8e80941Smrg if (ctx.shader->gs_tri_strip_adj_fix) { 3564b8e80941Smrg ctx.gs_rotated_input[0] = ++regno; 3565b8e80941Smrg ctx.gs_rotated_input[1] = ++regno; 3566b8e80941Smrg } else { 3567b8e80941Smrg ctx.gs_rotated_input[0] = 0; 3568b8e80941Smrg ctx.gs_rotated_input[1] = 1; 3569b8e80941Smrg } 3570b8e80941Smrg } 3571b8e80941Smrg 3572b8e80941Smrg if (shader->uses_images) { 3573b8e80941Smrg ctx.thread_id_gpr = ++regno; 3574b8e80941Smrg } 3575b8e80941Smrg ctx.temp_reg = ++regno; 3576b8e80941Smrg 3577b8e80941Smrg shader->max_arrays = 0; 3578b8e80941Smrg shader->num_arrays = 0; 3579848b8605Smrg if (indirect_gprs) { 3580848b8605Smrg 3581848b8605Smrg if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3582848b8605Smrg r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3583848b8605Smrg ctx.file_offset[TGSI_FILE_OUTPUT] - 3584848b8605Smrg ctx.file_offset[TGSI_FILE_INPUT], 3585848b8605Smrg 0x0F); 3586848b8605Smrg } 3587848b8605Smrg if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3588848b8605Smrg r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3589848b8605Smrg ctx.file_offset[TGSI_FILE_TEMPORARY] - 3590848b8605Smrg ctx.file_offset[TGSI_FILE_OUTPUT], 3591848b8605Smrg 0x0F); 3592848b8605Smrg } 3593848b8605Smrg } 3594848b8605Smrg 3595848b8605Smrg ctx.nliterals = 0; 3596848b8605Smrg ctx.literals = NULL; 3597b8e80941Smrg ctx.max_driver_temp_used = 0; 3598b8e80941Smrg 3599b8e80941Smrg shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 3600b8e80941Smrg ctx.info.colors_written == 1; 3601b8e80941Smrg shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 3602b8e80941Smrg shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 3603b8e80941Smrg 3604b8e80941Smrg if (ctx.type == PIPE_SHADER_VERTEX || 3605b8e80941Smrg ctx.type == PIPE_SHADER_GEOMETRY || 3606b8e80941Smrg ctx.type == PIPE_SHADER_TESS_EVAL) { 3607b8e80941Smrg shader->cc_dist_mask = (1 << (ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED] + 3608b8e80941Smrg ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED])) - 1; 3609b8e80941Smrg shader->clip_dist_write = (1 << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]) - 1; 3610b8e80941Smrg shader->cull_dist_write = ((1 << ctx.info.properties[TGSI_PROPERTY_NUM_CULLDIST_ENABLED]) - 1) << ctx.info.properties[TGSI_PROPERTY_NUM_CLIPDIST_ENABLED]; 3611b8e80941Smrg } 3612b8e80941Smrg 3613b8e80941Smrg if (shader->vs_as_gs_a) 3614b8e80941Smrg vs_add_primid_output(&ctx, key.vs.prim_id_out); 3615b8e80941Smrg 3616b8e80941Smrg if (ctx.thread_id_gpr != -1) { 3617b8e80941Smrg r = load_thread_id_gpr(&ctx); 3618b8e80941Smrg if (r) 3619b8e80941Smrg return r; 3620b8e80941Smrg } 3621b8e80941Smrg 3622b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_EVAL) 3623b8e80941Smrg r600_fetch_tess_io_info(&ctx); 3624b8e80941Smrg 3625848b8605Smrg while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3626848b8605Smrg tgsi_parse_token(&ctx.parse); 3627848b8605Smrg switch (ctx.parse.FullToken.Token.Type) { 3628848b8605Smrg case TGSI_TOKEN_TYPE_IMMEDIATE: 3629848b8605Smrg immediate = &ctx.parse.FullToken.FullImmediate; 3630848b8605Smrg ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 3631848b8605Smrg if(ctx.literals == NULL) { 3632848b8605Smrg r = -ENOMEM; 3633848b8605Smrg goto out_err; 3634848b8605Smrg } 3635848b8605Smrg ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 3636848b8605Smrg ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 3637848b8605Smrg ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 3638848b8605Smrg ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 3639848b8605Smrg ctx.nliterals++; 3640848b8605Smrg break; 3641848b8605Smrg case TGSI_TOKEN_TYPE_DECLARATION: 3642848b8605Smrg r = tgsi_declaration(&ctx); 3643848b8605Smrg if (r) 3644848b8605Smrg goto out_err; 3645848b8605Smrg break; 3646848b8605Smrg case TGSI_TOKEN_TYPE_INSTRUCTION: 3647848b8605Smrg case TGSI_TOKEN_TYPE_PROPERTY: 3648848b8605Smrg break; 3649848b8605Smrg default: 3650848b8605Smrg R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 3651848b8605Smrg r = -EINVAL; 3652848b8605Smrg goto out_err; 3653848b8605Smrg } 3654848b8605Smrg } 3655848b8605Smrg 3656b8e80941Smrg shader->ring_item_sizes[0] = ctx.next_ring_offset; 3657b8e80941Smrg shader->ring_item_sizes[1] = 0; 3658b8e80941Smrg shader->ring_item_sizes[2] = 0; 3659b8e80941Smrg shader->ring_item_sizes[3] = 0; 3660848b8605Smrg 3661848b8605Smrg /* Process two side if needed */ 3662848b8605Smrg if (shader->two_side && ctx.colors_used) { 3663848b8605Smrg int i, count = ctx.shader->ninput; 3664848b8605Smrg unsigned next_lds_loc = ctx.shader->nlds; 3665848b8605Smrg 3666848b8605Smrg /* additional inputs will be allocated right after the existing inputs, 3667848b8605Smrg * we won't need them after the color selection, so we don't need to 3668848b8605Smrg * reserve these gprs for the rest of the shader code and to adjust 3669848b8605Smrg * output offsets etc. */ 3670848b8605Smrg int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3671848b8605Smrg ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3672848b8605Smrg 3673b8e80941Smrg /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3674848b8605Smrg if (ctx.face_gpr == -1) { 3675848b8605Smrg i = ctx.shader->ninput++; 3676848b8605Smrg ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3677848b8605Smrg ctx.shader->input[i].spi_sid = 0; 3678848b8605Smrg ctx.shader->input[i].gpr = gpr++; 3679848b8605Smrg ctx.face_gpr = ctx.shader->input[i].gpr; 3680848b8605Smrg } 3681848b8605Smrg 3682848b8605Smrg for (i = 0; i < count; i++) { 3683848b8605Smrg if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3684848b8605Smrg int ni = ctx.shader->ninput++; 3685848b8605Smrg memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3686848b8605Smrg ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3687848b8605Smrg ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3688848b8605Smrg ctx.shader->input[ni].gpr = gpr++; 3689848b8605Smrg // TGSI to LLVM needs to know the lds position of inputs. 3690848b8605Smrg // Non LLVM path computes it later (in process_twoside_color) 3691848b8605Smrg ctx.shader->input[ni].lds_pos = next_lds_loc++; 3692848b8605Smrg ctx.shader->input[i].back_color_input = ni; 3693848b8605Smrg if (ctx.bc->chip_class >= EVERGREEN) { 3694848b8605Smrg if ((r = evergreen_interp_input(&ctx, ni))) 3695848b8605Smrg return r; 3696848b8605Smrg } 3697848b8605Smrg } 3698848b8605Smrg } 3699848b8605Smrg } 3700848b8605Smrg 3701848b8605Smrg if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3702848b8605Smrg shader->nr_ps_max_color_exports = 8; 3703848b8605Smrg 3704b8e80941Smrg if (ctx.shader->uses_helper_invocation) { 3705b8e80941Smrg if (ctx.bc->chip_class == CAYMAN) 3706b8e80941Smrg r = cm_load_helper_invocation(&ctx); 3707b8e80941Smrg else 3708b8e80941Smrg r = eg_load_helper_invocation(&ctx); 3709b8e80941Smrg if (r) 3710b8e80941Smrg return r; 3711b8e80941Smrg } 3712848b8605Smrg 3713b8e80941Smrg /* 3714b8e80941Smrg * XXX this relies on fixed_pt_position_gpr only being present when 3715b8e80941Smrg * this shader should be executed per sample. Should be the case for now... 3716b8e80941Smrg */ 3717b8e80941Smrg if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) { 3718b8e80941Smrg /* 3719b8e80941Smrg * Fix up sample mask. The hw always gives us coverage mask for 3720b8e80941Smrg * the pixel. However, for per-sample shading, we need the 3721b8e80941Smrg * coverage for the shader invocation only. 3722b8e80941Smrg * Also, with disabled msaa, only the first bit should be set 3723b8e80941Smrg * (luckily the same fixup works for both problems). 3724b8e80941Smrg * For now, we can only do it if we know this shader is always 3725b8e80941Smrg * executed per sample (due to usage of bits in the shader 3726b8e80941Smrg * forcing per-sample execution). 3727b8e80941Smrg * If the fb is not multisampled, we'd do unnecessary work but 3728b8e80941Smrg * it should still be correct. 3729b8e80941Smrg * It will however do nothing for sample shading according 3730b8e80941Smrg * to MinSampleShading. 3731b8e80941Smrg */ 3732b8e80941Smrg struct r600_bytecode_alu alu; 3733b8e80941Smrg int tmp = r600_get_temp(&ctx); 3734b8e80941Smrg assert(ctx.face_gpr != -1); 3735b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3736b8e80941Smrg 3737b8e80941Smrg alu.op = ALU_OP2_LSHL_INT; 3738b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3739b8e80941Smrg alu.src[0].value = 0x1; 3740b8e80941Smrg alu.src[1].sel = ctx.fixed_pt_position_gpr; 3741b8e80941Smrg alu.src[1].chan = 3; 3742b8e80941Smrg alu.dst.sel = tmp; 3743b8e80941Smrg alu.dst.chan = 0; 3744b8e80941Smrg alu.dst.write = 1; 3745b8e80941Smrg alu.last = 1; 3746b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3747b8e80941Smrg return r; 3748b8e80941Smrg 3749b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3750b8e80941Smrg alu.op = ALU_OP2_AND_INT; 3751b8e80941Smrg alu.src[0].sel = tmp; 3752b8e80941Smrg alu.src[1].sel = ctx.face_gpr; 3753b8e80941Smrg alu.src[1].chan = 2; 3754b8e80941Smrg alu.dst.sel = ctx.face_gpr; 3755b8e80941Smrg alu.dst.chan = 2; 3756b8e80941Smrg alu.dst.write = 1; 3757b8e80941Smrg alu.last = 1; 3758b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3759b8e80941Smrg return r; 3760b8e80941Smrg } 3761b8e80941Smrg 3762b8e80941Smrg if (ctx.fragcoord_input >= 0) { 3763b8e80941Smrg if (ctx.bc->chip_class == CAYMAN) { 3764b8e80941Smrg for (j = 0 ; j < 4; j++) { 3765848b8605Smrg struct r600_bytecode_alu alu; 3766848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3767848b8605Smrg alu.op = ALU_OP1_RECIP_IEEE; 3768848b8605Smrg alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3769848b8605Smrg alu.src[0].chan = 3; 3770848b8605Smrg 3771848b8605Smrg alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3772b8e80941Smrg alu.dst.chan = j; 3773b8e80941Smrg alu.dst.write = (j == 3); 3774b8e80941Smrg alu.last = (j == 3); 3775848b8605Smrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3776848b8605Smrg return r; 3777848b8605Smrg } 3778b8e80941Smrg } else { 3779b8e80941Smrg struct r600_bytecode_alu alu; 3780b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3781b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 3782b8e80941Smrg alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3783b8e80941Smrg alu.src[0].chan = 3; 3784b8e80941Smrg 3785b8e80941Smrg alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3786b8e80941Smrg alu.dst.chan = 3; 3787b8e80941Smrg alu.dst.write = 1; 3788b8e80941Smrg alu.last = 1; 3789b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3790b8e80941Smrg return r; 3791848b8605Smrg } 3792b8e80941Smrg } 3793848b8605Smrg 3794b8e80941Smrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 3795b8e80941Smrg struct r600_bytecode_alu alu; 3796b8e80941Smrg int r; 3797848b8605Smrg 3798b8e80941Smrg /* GS thread with no output workaround - emit a cut at start of GS */ 3799b8e80941Smrg if (ctx.bc->chip_class == R600) 3800b8e80941Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3801b8e80941Smrg 3802b8e80941Smrg for (j = 0; j < 4; j++) { 3803848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3804848b8605Smrg alu.op = ALU_OP1_MOV; 3805848b8605Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3806848b8605Smrg alu.src[0].value = 0; 3807b8e80941Smrg alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3808848b8605Smrg alu.dst.write = 1; 3809848b8605Smrg alu.last = 1; 3810848b8605Smrg r = r600_bytecode_add_alu(ctx.bc, &alu); 3811848b8605Smrg if (r) 3812848b8605Smrg return r; 3813848b8605Smrg } 3814b8e80941Smrg 3815b8e80941Smrg if (ctx.shader->gs_tri_strip_adj_fix) { 3816b8e80941Smrg r = single_alu_op2(&ctx, ALU_OP2_AND_INT, 3817b8e80941Smrg ctx.gs_rotated_input[0], 2, 3818b8e80941Smrg 0, 2, 3819b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 1); 3820b8e80941Smrg if (r) 3821848b8605Smrg return r; 3822848b8605Smrg 3823b8e80941Smrg for (i = 0; i < 6; i++) { 3824b8e80941Smrg int rotated = (i + 4) % 6; 3825b8e80941Smrg int offset_reg = i / 3; 3826b8e80941Smrg int offset_chan = i % 3; 3827b8e80941Smrg int rotated_offset_reg = rotated / 3; 3828b8e80941Smrg int rotated_offset_chan = rotated % 3; 3829b8e80941Smrg 3830b8e80941Smrg if (offset_reg == 0 && offset_chan == 2) 3831b8e80941Smrg offset_chan = 3; 3832b8e80941Smrg if (rotated_offset_reg == 0 && rotated_offset_chan == 2) 3833b8e80941Smrg rotated_offset_chan = 3; 3834b8e80941Smrg 3835b8e80941Smrg r = single_alu_op3(&ctx, ALU_OP3_CNDE_INT, 3836b8e80941Smrg ctx.gs_rotated_input[offset_reg], offset_chan, 3837b8e80941Smrg ctx.gs_rotated_input[0], 2, 3838b8e80941Smrg offset_reg, offset_chan, 3839b8e80941Smrg rotated_offset_reg, rotated_offset_chan); 3840848b8605Smrg if (r) 3841b8e80941Smrg return r; 3842b8e80941Smrg } 3843b8e80941Smrg } 3844b8e80941Smrg } 3845b8e80941Smrg 3846b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 3847b8e80941Smrg r600_fetch_tess_io_info(&ctx); 3848b8e80941Smrg 3849b8e80941Smrg if (shader->two_side && ctx.colors_used) { 3850b8e80941Smrg if ((r = process_twoside_color_inputs(&ctx))) 3851b8e80941Smrg return r; 3852b8e80941Smrg } 3853b8e80941Smrg 3854b8e80941Smrg tgsi_parse_init(&ctx.parse, tokens); 3855b8e80941Smrg while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3856b8e80941Smrg tgsi_parse_token(&ctx.parse); 3857b8e80941Smrg switch (ctx.parse.FullToken.Token.Type) { 3858b8e80941Smrg case TGSI_TOKEN_TYPE_INSTRUCTION: 3859b8e80941Smrg r = tgsi_is_supported(&ctx); 3860b8e80941Smrg if (r) 3861b8e80941Smrg goto out_err; 3862b8e80941Smrg ctx.max_driver_temp_used = 0; 3863b8e80941Smrg /* reserve first tmp for everyone */ 3864b8e80941Smrg r600_get_temp(&ctx); 3865848b8605Smrg 3866b8e80941Smrg opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3867b8e80941Smrg if ((r = tgsi_split_constant(&ctx))) 3868b8e80941Smrg goto out_err; 3869b8e80941Smrg if ((r = tgsi_split_literal_constant(&ctx))) 3870b8e80941Smrg goto out_err; 3871b8e80941Smrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 3872b8e80941Smrg if ((r = tgsi_split_gs_inputs(&ctx))) 3873848b8605Smrg goto out_err; 3874b8e80941Smrg } else if (lds_inputs) { 3875b8e80941Smrg if ((r = tgsi_split_lds_inputs(&ctx))) 3876848b8605Smrg goto out_err; 3877b8e80941Smrg } 3878b8e80941Smrg if (ctx.bc->chip_class == CAYMAN) 3879b8e80941Smrg ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3880b8e80941Smrg else if (ctx.bc->chip_class >= EVERGREEN) 3881b8e80941Smrg ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3882b8e80941Smrg else 3883b8e80941Smrg ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3884b8e80941Smrg 3885b8e80941Smrg ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise; 3886b8e80941Smrg 3887b8e80941Smrg r = ctx.inst_info->process(&ctx); 3888b8e80941Smrg if (r) 3889b8e80941Smrg goto out_err; 3890b8e80941Smrg 3891b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3892b8e80941Smrg r = r600_store_tcs_output(&ctx); 3893848b8605Smrg if (r) 3894848b8605Smrg goto out_err; 3895848b8605Smrg } 3896b8e80941Smrg break; 3897b8e80941Smrg default: 3898b8e80941Smrg break; 3899848b8605Smrg } 3900848b8605Smrg } 3901848b8605Smrg 3902848b8605Smrg /* Reset the temporary register counter. */ 3903848b8605Smrg ctx.max_driver_temp_used = 0; 3904848b8605Smrg 3905848b8605Smrg noutput = shader->noutput; 3906848b8605Smrg 3907848b8605Smrg if (!ring_outputs && ctx.clip_vertex_write) { 3908848b8605Smrg unsigned clipdist_temp[2]; 3909848b8605Smrg 3910848b8605Smrg clipdist_temp[0] = r600_get_temp(&ctx); 3911848b8605Smrg clipdist_temp[1] = r600_get_temp(&ctx); 3912848b8605Smrg 3913848b8605Smrg /* need to convert a clipvertex write into clipdistance writes and not export 3914848b8605Smrg the clip vertex anymore */ 3915848b8605Smrg 3916848b8605Smrg memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3917848b8605Smrg shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3918848b8605Smrg shader->output[noutput].gpr = clipdist_temp[0]; 3919848b8605Smrg noutput++; 3920848b8605Smrg shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3921848b8605Smrg shader->output[noutput].gpr = clipdist_temp[1]; 3922848b8605Smrg noutput++; 3923848b8605Smrg 3924848b8605Smrg /* reset spi_sid for clipvertex output to avoid confusing spi */ 3925848b8605Smrg shader->output[ctx.cv_output].spi_sid = 0; 3926848b8605Smrg 3927848b8605Smrg shader->clip_dist_write = 0xFF; 3928b8e80941Smrg shader->cc_dist_mask = 0xFF; 3929848b8605Smrg 3930848b8605Smrg for (i = 0; i < 8; i++) { 3931848b8605Smrg int oreg = i >> 2; 3932848b8605Smrg int ochan = i & 3; 3933848b8605Smrg 3934848b8605Smrg for (j = 0; j < 4; j++) { 3935848b8605Smrg struct r600_bytecode_alu alu; 3936848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3937848b8605Smrg alu.op = ALU_OP2_DOT4; 3938848b8605Smrg alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3939848b8605Smrg alu.src[0].chan = j; 3940848b8605Smrg 3941848b8605Smrg alu.src[1].sel = 512 + i; 3942b8e80941Smrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3943848b8605Smrg alu.src[1].chan = j; 3944848b8605Smrg 3945848b8605Smrg alu.dst.sel = clipdist_temp[oreg]; 3946848b8605Smrg alu.dst.chan = j; 3947848b8605Smrg alu.dst.write = (j == ochan); 3948848b8605Smrg if (j == 3) 3949848b8605Smrg alu.last = 1; 3950b8e80941Smrg r = r600_bytecode_add_alu(ctx.bc, &alu); 3951848b8605Smrg if (r) 3952848b8605Smrg return r; 3953848b8605Smrg } 3954848b8605Smrg } 3955848b8605Smrg } 3956848b8605Smrg 3957848b8605Smrg /* Add stream outputs. */ 3958b8e80941Smrg if (so.num_outputs) { 3959b8e80941Smrg bool emit = false; 3960b8e80941Smrg if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX) 3961b8e80941Smrg emit = true; 3962b8e80941Smrg if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL) 3963b8e80941Smrg emit = true; 3964b8e80941Smrg if (emit) 3965b8e80941Smrg emit_streamout(&ctx, &so, -1, NULL); 3966b8e80941Smrg } 3967b8e80941Smrg pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3968848b8605Smrg convert_edgeflag_to_int(&ctx); 3969848b8605Smrg 3970b8e80941Smrg if (ctx.type == PIPE_SHADER_TESS_CTRL) 3971b8e80941Smrg r600_emit_tess_factor(&ctx); 3972b8e80941Smrg 3973b8e80941Smrg if (lds_outputs) { 3974b8e80941Smrg if (ctx.type == PIPE_SHADER_VERTEX) { 3975b8e80941Smrg if (ctx.shader->noutput) 3976b8e80941Smrg emit_lds_vs_writes(&ctx); 3977b8e80941Smrg } 3978b8e80941Smrg } else if (ring_outputs) { 3979b8e80941Smrg if (shader->vs_as_es || shader->tes_as_es) { 3980b8e80941Smrg ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3981b8e80941Smrg ctx.gs_export_gpr_tregs[1] = -1; 3982b8e80941Smrg ctx.gs_export_gpr_tregs[2] = -1; 3983b8e80941Smrg ctx.gs_export_gpr_tregs[3] = -1; 3984b8e80941Smrg 3985b8e80941Smrg emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3986b8e80941Smrg } 3987848b8605Smrg } else { 3988848b8605Smrg /* Export output */ 3989848b8605Smrg next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3990848b8605Smrg 3991848b8605Smrg for (i = 0, j = 0; i < noutput; i++, j++) { 3992848b8605Smrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3993848b8605Smrg output[j].gpr = shader->output[i].gpr; 3994848b8605Smrg output[j].elem_size = 3; 3995848b8605Smrg output[j].swizzle_x = 0; 3996848b8605Smrg output[j].swizzle_y = 1; 3997848b8605Smrg output[j].swizzle_z = 2; 3998848b8605Smrg output[j].swizzle_w = 3; 3999848b8605Smrg output[j].burst_count = 1; 4000b8e80941Smrg output[j].type = 0xffffffff; 4001848b8605Smrg output[j].op = CF_OP_EXPORT; 4002848b8605Smrg switch (ctx.type) { 4003b8e80941Smrg case PIPE_SHADER_VERTEX: 4004b8e80941Smrg case PIPE_SHADER_TESS_EVAL: 4005848b8605Smrg switch (shader->output[i].name) { 4006848b8605Smrg case TGSI_SEMANTIC_POSITION: 4007848b8605Smrg output[j].array_base = 60; 4008848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4009848b8605Smrg pos_emitted = true; 4010848b8605Smrg break; 4011848b8605Smrg 4012848b8605Smrg case TGSI_SEMANTIC_PSIZE: 4013848b8605Smrg output[j].array_base = 61; 4014848b8605Smrg output[j].swizzle_y = 7; 4015848b8605Smrg output[j].swizzle_z = 7; 4016848b8605Smrg output[j].swizzle_w = 7; 4017848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4018848b8605Smrg pos_emitted = true; 4019848b8605Smrg break; 4020848b8605Smrg case TGSI_SEMANTIC_EDGEFLAG: 4021848b8605Smrg output[j].array_base = 61; 4022848b8605Smrg output[j].swizzle_x = 7; 4023848b8605Smrg output[j].swizzle_y = 0; 4024848b8605Smrg output[j].swizzle_z = 7; 4025848b8605Smrg output[j].swizzle_w = 7; 4026848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4027848b8605Smrg pos_emitted = true; 4028848b8605Smrg break; 4029848b8605Smrg case TGSI_SEMANTIC_LAYER: 4030848b8605Smrg /* spi_sid is 0 for outputs that are 4031848b8605Smrg * not consumed by PS */ 4032848b8605Smrg if (shader->output[i].spi_sid) { 4033848b8605Smrg output[j].array_base = next_param_base++; 4034848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4035848b8605Smrg j++; 4036848b8605Smrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4037848b8605Smrg } 4038848b8605Smrg output[j].array_base = 61; 4039848b8605Smrg output[j].swizzle_x = 7; 4040848b8605Smrg output[j].swizzle_y = 7; 4041848b8605Smrg output[j].swizzle_z = 0; 4042848b8605Smrg output[j].swizzle_w = 7; 4043848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4044848b8605Smrg pos_emitted = true; 4045848b8605Smrg break; 4046848b8605Smrg case TGSI_SEMANTIC_VIEWPORT_INDEX: 4047848b8605Smrg /* spi_sid is 0 for outputs that are 4048848b8605Smrg * not consumed by PS */ 4049848b8605Smrg if (shader->output[i].spi_sid) { 4050848b8605Smrg output[j].array_base = next_param_base++; 4051848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4052848b8605Smrg j++; 4053848b8605Smrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4054848b8605Smrg } 4055848b8605Smrg output[j].array_base = 61; 4056848b8605Smrg output[j].swizzle_x = 7; 4057848b8605Smrg output[j].swizzle_y = 7; 4058848b8605Smrg output[j].swizzle_z = 7; 4059848b8605Smrg output[j].swizzle_w = 0; 4060848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4061848b8605Smrg pos_emitted = true; 4062848b8605Smrg break; 4063848b8605Smrg case TGSI_SEMANTIC_CLIPVERTEX: 4064848b8605Smrg j--; 4065848b8605Smrg break; 4066848b8605Smrg case TGSI_SEMANTIC_CLIPDIST: 4067848b8605Smrg output[j].array_base = next_clip_base++; 4068848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4069848b8605Smrg pos_emitted = true; 4070848b8605Smrg /* spi_sid is 0 for clipdistance outputs that were generated 4071848b8605Smrg * for clipvertex - we don't need to pass them to PS */ 4072848b8605Smrg if (shader->output[i].spi_sid) { 4073848b8605Smrg j++; 4074848b8605Smrg /* duplicate it as PARAM to pass to the pixel shader */ 4075848b8605Smrg memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 4076848b8605Smrg output[j].array_base = next_param_base++; 4077848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4078848b8605Smrg } 4079848b8605Smrg break; 4080848b8605Smrg case TGSI_SEMANTIC_FOG: 4081848b8605Smrg output[j].swizzle_y = 4; /* 0 */ 4082848b8605Smrg output[j].swizzle_z = 4; /* 0 */ 4083848b8605Smrg output[j].swizzle_w = 5; /* 1 */ 4084848b8605Smrg break; 4085b8e80941Smrg case TGSI_SEMANTIC_PRIMID: 4086b8e80941Smrg output[j].swizzle_x = 2; 4087b8e80941Smrg output[j].swizzle_y = 4; /* 0 */ 4088b8e80941Smrg output[j].swizzle_z = 4; /* 0 */ 4089b8e80941Smrg output[j].swizzle_w = 4; /* 0 */ 4090b8e80941Smrg break; 4091848b8605Smrg } 4092b8e80941Smrg 4093848b8605Smrg break; 4094b8e80941Smrg case PIPE_SHADER_FRAGMENT: 4095848b8605Smrg if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 4096848b8605Smrg /* never export more colors than the number of CBs */ 4097848b8605Smrg if (shader->output[i].sid >= max_color_exports) { 4098848b8605Smrg /* skip export */ 4099848b8605Smrg j--; 4100848b8605Smrg continue; 4101848b8605Smrg } 4102b8e80941Smrg output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 4103848b8605Smrg output[j].array_base = shader->output[i].sid; 4104848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4105848b8605Smrg shader->nr_ps_color_exports++; 4106b8e80941Smrg shader->ps_color_export_mask |= (0xf << (shader->output[i].sid * 4)); 4107b8e80941Smrg 4108b8e80941Smrg /* If the i-th target format is set, all previous target formats must 4109b8e80941Smrg * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well. 4110b8e80941Smrg */ 4111b8e80941Smrg if (shader->output[i].sid > 0) 4112b8e80941Smrg for (unsigned x = 0; x < shader->output[i].sid; x++) 4113b8e80941Smrg shader->ps_color_export_mask |= (1 << (x*4)); 4114b8e80941Smrg 4115b8e80941Smrg if (shader->output[i].sid > shader->ps_export_highest) 4116b8e80941Smrg shader->ps_export_highest = shader->output[i].sid; 4117848b8605Smrg if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 4118848b8605Smrg for (k = 1; k < max_color_exports; k++) { 4119848b8605Smrg j++; 4120848b8605Smrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4121848b8605Smrg output[j].gpr = shader->output[i].gpr; 4122848b8605Smrg output[j].elem_size = 3; 4123848b8605Smrg output[j].swizzle_x = 0; 4124848b8605Smrg output[j].swizzle_y = 1; 4125848b8605Smrg output[j].swizzle_z = 2; 4126b8e80941Smrg output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 4127848b8605Smrg output[j].burst_count = 1; 4128848b8605Smrg output[j].array_base = k; 4129848b8605Smrg output[j].op = CF_OP_EXPORT; 4130848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4131848b8605Smrg shader->nr_ps_color_exports++; 4132b8e80941Smrg if (k > shader->ps_export_highest) 4133b8e80941Smrg shader->ps_export_highest = k; 4134b8e80941Smrg shader->ps_color_export_mask |= (0xf << (j * 4)); 4135848b8605Smrg } 4136848b8605Smrg } 4137848b8605Smrg } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 4138848b8605Smrg output[j].array_base = 61; 4139848b8605Smrg output[j].swizzle_x = 2; 4140848b8605Smrg output[j].swizzle_y = 7; 4141848b8605Smrg output[j].swizzle_z = output[j].swizzle_w = 7; 4142848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4143848b8605Smrg } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 4144848b8605Smrg output[j].array_base = 61; 4145848b8605Smrg output[j].swizzle_x = 7; 4146848b8605Smrg output[j].swizzle_y = 1; 4147848b8605Smrg output[j].swizzle_z = output[j].swizzle_w = 7; 4148848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4149b8e80941Smrg } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 4150b8e80941Smrg output[j].array_base = 61; 4151b8e80941Smrg output[j].swizzle_x = 7; 4152b8e80941Smrg output[j].swizzle_y = 7; 4153b8e80941Smrg output[j].swizzle_z = 0; 4154b8e80941Smrg output[j].swizzle_w = 7; 4155b8e80941Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4156848b8605Smrg } else { 4157848b8605Smrg R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 4158848b8605Smrg r = -EINVAL; 4159848b8605Smrg goto out_err; 4160848b8605Smrg } 4161848b8605Smrg break; 4162b8e80941Smrg case PIPE_SHADER_TESS_CTRL: 4163b8e80941Smrg break; 4164848b8605Smrg default: 4165848b8605Smrg R600_ERR("unsupported processor type %d\n", ctx.type); 4166848b8605Smrg r = -EINVAL; 4167848b8605Smrg goto out_err; 4168848b8605Smrg } 4169848b8605Smrg 4170b8e80941Smrg if (output[j].type == 0xffffffff) { 4171848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4172848b8605Smrg output[j].array_base = next_param_base++; 4173848b8605Smrg } 4174848b8605Smrg } 4175848b8605Smrg 4176848b8605Smrg /* add fake position export */ 4177b8e80941Smrg if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) { 4178848b8605Smrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4179848b8605Smrg output[j].gpr = 0; 4180848b8605Smrg output[j].elem_size = 3; 4181848b8605Smrg output[j].swizzle_x = 7; 4182848b8605Smrg output[j].swizzle_y = 7; 4183848b8605Smrg output[j].swizzle_z = 7; 4184848b8605Smrg output[j].swizzle_w = 7; 4185848b8605Smrg output[j].burst_count = 1; 4186848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 4187848b8605Smrg output[j].array_base = 60; 4188848b8605Smrg output[j].op = CF_OP_EXPORT; 4189848b8605Smrg j++; 4190848b8605Smrg } 4191848b8605Smrg 4192848b8605Smrg /* add fake param output for vertex shader if no param is exported */ 4193b8e80941Smrg if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) { 4194848b8605Smrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4195848b8605Smrg output[j].gpr = 0; 4196848b8605Smrg output[j].elem_size = 3; 4197848b8605Smrg output[j].swizzle_x = 7; 4198848b8605Smrg output[j].swizzle_y = 7; 4199848b8605Smrg output[j].swizzle_z = 7; 4200848b8605Smrg output[j].swizzle_w = 7; 4201848b8605Smrg output[j].burst_count = 1; 4202848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 4203848b8605Smrg output[j].array_base = 0; 4204848b8605Smrg output[j].op = CF_OP_EXPORT; 4205848b8605Smrg j++; 4206848b8605Smrg } 4207848b8605Smrg 4208848b8605Smrg /* add fake pixel export */ 4209b8e80941Smrg if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) { 4210848b8605Smrg memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 4211848b8605Smrg output[j].gpr = 0; 4212848b8605Smrg output[j].elem_size = 3; 4213848b8605Smrg output[j].swizzle_x = 7; 4214848b8605Smrg output[j].swizzle_y = 7; 4215848b8605Smrg output[j].swizzle_z = 7; 4216848b8605Smrg output[j].swizzle_w = 7; 4217848b8605Smrg output[j].burst_count = 1; 4218848b8605Smrg output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 4219848b8605Smrg output[j].array_base = 0; 4220848b8605Smrg output[j].op = CF_OP_EXPORT; 4221848b8605Smrg j++; 4222b8e80941Smrg shader->nr_ps_color_exports++; 4223b8e80941Smrg shader->ps_color_export_mask = 0xf; 4224848b8605Smrg } 4225848b8605Smrg 4226848b8605Smrg noutput = j; 4227848b8605Smrg 4228848b8605Smrg /* set export done on last export of each type */ 4229b8e80941Smrg for (k = noutput - 1, output_done = 0; k >= 0; k--) { 4230b8e80941Smrg if (!(output_done & (1 << output[k].type))) { 4231b8e80941Smrg output_done |= (1 << output[k].type); 4232b8e80941Smrg output[k].op = CF_OP_EXPORT_DONE; 4233848b8605Smrg } 4234848b8605Smrg } 4235848b8605Smrg /* add output to bytecode */ 4236b8e80941Smrg for (i = 0; i < noutput; i++) { 4237b8e80941Smrg r = r600_bytecode_add_output(ctx.bc, &output[i]); 4238b8e80941Smrg if (r) 4239b8e80941Smrg goto out_err; 4240848b8605Smrg } 4241848b8605Smrg } 4242848b8605Smrg 4243848b8605Smrg /* add program end */ 4244b8e80941Smrg if (ctx.bc->chip_class == CAYMAN) 4245b8e80941Smrg cm_bytecode_add_cf_end(ctx.bc); 4246b8e80941Smrg else { 4247b8e80941Smrg const struct cf_op_info *last = NULL; 4248848b8605Smrg 4249b8e80941Smrg if (ctx.bc->cf_last) 4250b8e80941Smrg last = r600_isa_cf(ctx.bc->cf_last->op); 4251848b8605Smrg 4252b8e80941Smrg /* alu clause instructions don't have EOP bit, so add NOP */ 4253b8e80941Smrg if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_POP) 4254b8e80941Smrg r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 4255848b8605Smrg 4256b8e80941Smrg ctx.bc->cf_last->end_of_program = 1; 4257848b8605Smrg } 4258848b8605Smrg 4259848b8605Smrg /* check GPR limit - we have 124 = 128 - 4 4260848b8605Smrg * (4 are reserved as alu clause temporary registers) */ 4261848b8605Smrg if (ctx.bc->ngpr > 124) { 4262848b8605Smrg R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 4263848b8605Smrg r = -ENOMEM; 4264848b8605Smrg goto out_err; 4265848b8605Smrg } 4266848b8605Smrg 4267b8e80941Smrg if (ctx.type == PIPE_SHADER_GEOMETRY) { 4268848b8605Smrg if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 4269848b8605Smrg return r; 4270848b8605Smrg } 4271848b8605Smrg 4272b8e80941Smrg free(ctx.spilled_arrays); 4273b8e80941Smrg free(ctx.array_infos); 4274848b8605Smrg free(ctx.literals); 4275848b8605Smrg tgsi_parse_free(&ctx.parse); 4276848b8605Smrg return 0; 4277848b8605Smrgout_err: 4278b8e80941Smrg free(ctx.spilled_arrays); 4279b8e80941Smrg free(ctx.array_infos); 4280848b8605Smrg free(ctx.literals); 4281848b8605Smrg tgsi_parse_free(&ctx.parse); 4282848b8605Smrg return r; 4283848b8605Smrg} 4284848b8605Smrg 4285848b8605Smrgstatic int tgsi_unsupported(struct r600_shader_ctx *ctx) 4286848b8605Smrg{ 4287b8e80941Smrg const unsigned tgsi_opcode = 4288b8e80941Smrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 4289848b8605Smrg R600_ERR("%s tgsi opcode unsupported\n", 4290b8e80941Smrg tgsi_get_opcode_name(tgsi_opcode)); 4291848b8605Smrg return -EINVAL; 4292848b8605Smrg} 4293848b8605Smrg 4294b8e80941Smrgstatic int tgsi_end(struct r600_shader_ctx *ctx UNUSED) 4295848b8605Smrg{ 4296848b8605Smrg return 0; 4297848b8605Smrg} 4298848b8605Smrg 4299848b8605Smrgstatic void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 4300848b8605Smrg const struct r600_shader_src *shader_src, 4301848b8605Smrg unsigned chan) 4302848b8605Smrg{ 4303848b8605Smrg bc_src->sel = shader_src->sel; 4304848b8605Smrg bc_src->chan = shader_src->swizzle[chan]; 4305848b8605Smrg bc_src->neg = shader_src->neg; 4306848b8605Smrg bc_src->abs = shader_src->abs; 4307848b8605Smrg bc_src->rel = shader_src->rel; 4308848b8605Smrg bc_src->value = shader_src->value[bc_src->chan]; 4309848b8605Smrg bc_src->kc_bank = shader_src->kc_bank; 4310b8e80941Smrg bc_src->kc_rel = shader_src->kc_rel; 4311848b8605Smrg} 4312848b8605Smrg 4313848b8605Smrgstatic void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 4314848b8605Smrg{ 4315848b8605Smrg bc_src->abs = 1; 4316848b8605Smrg bc_src->neg = 0; 4317848b8605Smrg} 4318848b8605Smrg 4319848b8605Smrgstatic void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 4320848b8605Smrg{ 4321848b8605Smrg bc_src->neg = !bc_src->neg; 4322848b8605Smrg} 4323848b8605Smrg 4324848b8605Smrgstatic void tgsi_dst(struct r600_shader_ctx *ctx, 4325848b8605Smrg const struct tgsi_full_dst_register *tgsi_dst, 4326848b8605Smrg unsigned swizzle, 4327848b8605Smrg struct r600_bytecode_alu_dst *r600_dst) 4328848b8605Smrg{ 4329848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4330848b8605Smrg 4331b8e80941Smrg if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) { 4332b8e80941Smrg bool spilled; 4333b8e80941Smrg unsigned idx; 4334b8e80941Smrg 4335b8e80941Smrg idx = map_tgsi_reg_index_to_r600_gpr(ctx, tgsi_dst->Register.Index, &spilled); 4336b8e80941Smrg 4337b8e80941Smrg if (spilled) { 4338b8e80941Smrg struct r600_bytecode_output cf; 4339b8e80941Smrg int reg = 0; 4340b8e80941Smrg int r; 4341b8e80941Smrg bool add_pending_output = true; 4342b8e80941Smrg 4343b8e80941Smrg memset(&cf, 0, sizeof(struct r600_bytecode_output)); 4344b8e80941Smrg get_spilled_array_base_and_size(ctx, tgsi_dst->Register.Index, 4345b8e80941Smrg &cf.array_base, &cf.array_size); 4346b8e80941Smrg 4347b8e80941Smrg /* If no component has spilled, reserve a register and add the spill code 4348b8e80941Smrg * ctx->bc->n_pending_outputs is cleared after each instruction group */ 4349b8e80941Smrg if (ctx->bc->n_pending_outputs == 0) { 4350b8e80941Smrg reg = r600_get_temp(ctx); 4351b8e80941Smrg } else { 4352b8e80941Smrg /* If we are already spilling and the output address is the same like 4353b8e80941Smrg * before then just reuse the same slot */ 4354b8e80941Smrg struct r600_bytecode_output *tmpl = &ctx->bc->pending_outputs[ctx->bc->n_pending_outputs-1]; 4355b8e80941Smrg if ((cf.array_base + idx == tmpl->array_base) || 4356b8e80941Smrg (cf.array_base == tmpl->array_base && 4357b8e80941Smrg tmpl->index_gpr == ctx->bc->ar_reg && 4358b8e80941Smrg tgsi_dst->Register.Indirect)) { 4359b8e80941Smrg reg = ctx->bc->pending_outputs[0].gpr; 4360b8e80941Smrg add_pending_output = false; 4361b8e80941Smrg } else { 4362b8e80941Smrg reg = r600_get_temp(ctx); 4363b8e80941Smrg } 4364b8e80941Smrg } 4365b8e80941Smrg 4366b8e80941Smrg r600_dst->sel = reg; 4367b8e80941Smrg r600_dst->chan = swizzle; 4368b8e80941Smrg r600_dst->write = 1; 4369b8e80941Smrg if (inst->Instruction.Saturate) { 4370b8e80941Smrg r600_dst->clamp = 1; 4371b8e80941Smrg } 4372b8e80941Smrg 4373b8e80941Smrg /* Add new outputs as pending */ 4374b8e80941Smrg if (add_pending_output) { 4375b8e80941Smrg cf.op = CF_OP_MEM_SCRATCH; 4376b8e80941Smrg cf.elem_size = 3; 4377b8e80941Smrg cf.gpr = reg; 4378b8e80941Smrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 4379b8e80941Smrg cf.mark = 1; 4380b8e80941Smrg cf.comp_mask = inst->Dst[0].Register.WriteMask; 4381b8e80941Smrg cf.swizzle_x = 0; 4382b8e80941Smrg cf.swizzle_y = 1; 4383b8e80941Smrg cf.swizzle_z = 2; 4384b8e80941Smrg cf.swizzle_w = 3; 4385b8e80941Smrg cf.burst_count = 1; 4386b8e80941Smrg 4387b8e80941Smrg if (tgsi_dst->Register.Indirect) { 4388b8e80941Smrg if (ctx->bc->chip_class < R700) 4389b8e80941Smrg cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 4390b8e80941Smrg else 4391b8e80941Smrg cf.type = 3; // V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK; 4392b8e80941Smrg cf.index_gpr = ctx->bc->ar_reg; 4393b8e80941Smrg } 4394b8e80941Smrg else { 4395b8e80941Smrg cf.array_base += idx; 4396b8e80941Smrg cf.array_size = 0; 4397b8e80941Smrg } 4398b8e80941Smrg 4399b8e80941Smrg r = r600_bytecode_add_pending_output(ctx->bc, &cf); 4400b8e80941Smrg if (r) 4401b8e80941Smrg return; 4402b8e80941Smrg 4403b8e80941Smrg if (ctx->bc->chip_class >= R700) 4404b8e80941Smrg r600_bytecode_need_wait_ack(ctx->bc, true); 4405b8e80941Smrg } 4406b8e80941Smrg return; 4407b8e80941Smrg } 4408b8e80941Smrg else { 4409b8e80941Smrg r600_dst->sel = idx; 4410b8e80941Smrg } 4411b8e80941Smrg } 4412b8e80941Smrg else { 4413b8e80941Smrg r600_dst->sel = tgsi_dst->Register.Index; 4414b8e80941Smrg r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 4415b8e80941Smrg } 4416848b8605Smrg r600_dst->chan = swizzle; 4417848b8605Smrg r600_dst->write = 1; 4418848b8605Smrg if (inst->Instruction.Saturate) { 4419848b8605Smrg r600_dst->clamp = 1; 4420848b8605Smrg } 4421b8e80941Smrg if (ctx->type == PIPE_SHADER_TESS_CTRL) { 4422b8e80941Smrg if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 4423b8e80941Smrg return; 4424848b8605Smrg } 4425848b8605Smrg } 4426b8e80941Smrg if (tgsi_dst->Register.Indirect) 4427b8e80941Smrg r600_dst->rel = V_SQ_REL_RELATIVE; 4428b8e80941Smrg 4429848b8605Smrg} 4430848b8605Smrg 4431b8e80941Smrgstatic int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap, int dest_temp, int op_override) 4432848b8605Smrg{ 4433848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4434848b8605Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 4435b8e80941Smrg struct r600_bytecode_alu alu; 4436848b8605Smrg int i, j, r, lasti = tgsi_last_instruction(write_mask); 4437b8e80941Smrg int use_tmp = 0; 4438b8e80941Smrg int swizzle_x = inst->Src[0].Register.SwizzleX; 4439b8e80941Smrg 4440b8e80941Smrg if (singledest) { 4441b8e80941Smrg switch (write_mask) { 4442b8e80941Smrg case 0x1: 4443b8e80941Smrg if (swizzle_x == 2) { 4444b8e80941Smrg write_mask = 0xc; 4445b8e80941Smrg use_tmp = 3; 4446b8e80941Smrg } else 4447b8e80941Smrg write_mask = 0x3; 4448b8e80941Smrg break; 4449b8e80941Smrg case 0x2: 4450b8e80941Smrg if (swizzle_x == 2) { 4451b8e80941Smrg write_mask = 0xc; 4452b8e80941Smrg use_tmp = 3; 4453b8e80941Smrg } else { 4454b8e80941Smrg write_mask = 0x3; 4455b8e80941Smrg use_tmp = 1; 4456b8e80941Smrg } 4457b8e80941Smrg break; 4458b8e80941Smrg case 0x4: 4459b8e80941Smrg if (swizzle_x == 0) { 4460b8e80941Smrg write_mask = 0x3; 4461b8e80941Smrg use_tmp = 1; 4462b8e80941Smrg } else 4463b8e80941Smrg write_mask = 0xc; 4464b8e80941Smrg break; 4465b8e80941Smrg case 0x8: 4466b8e80941Smrg if (swizzle_x == 0) { 4467b8e80941Smrg write_mask = 0x3; 4468b8e80941Smrg use_tmp = 1; 4469b8e80941Smrg } else { 4470b8e80941Smrg write_mask = 0xc; 4471b8e80941Smrg use_tmp = 3; 4472b8e80941Smrg } 4473b8e80941Smrg break; 4474b8e80941Smrg } 4475b8e80941Smrg } 4476848b8605Smrg 4477b8e80941Smrg lasti = tgsi_last_instruction(write_mask); 4478848b8605Smrg for (i = 0; i <= lasti; i++) { 4479b8e80941Smrg 4480848b8605Smrg if (!(write_mask & (1 << i))) 4481848b8605Smrg continue; 4482848b8605Smrg 4483848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4484b8e80941Smrg 4485b8e80941Smrg if (singledest) { 4486b8e80941Smrg if (use_tmp || dest_temp) { 4487b8e80941Smrg alu.dst.sel = use_tmp ? ctx->temp_reg : dest_temp; 4488b8e80941Smrg alu.dst.chan = i; 4489b8e80941Smrg alu.dst.write = 1; 4490b8e80941Smrg } else { 4491b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4492b8e80941Smrg } 4493b8e80941Smrg if (i == 1 || i == 3) 4494b8e80941Smrg alu.dst.write = 0; 4495848b8605Smrg } else 4496848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4497848b8605Smrg 4498b8e80941Smrg alu.op = op_override ? op_override : ctx->inst_info->op; 4499b8e80941Smrg if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 4500b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4501b8e80941Smrg } else if (!swap) { 4502848b8605Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4503b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4504848b8605Smrg } 4505848b8605Smrg } else { 4506b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 4507b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 4508848b8605Smrg } 4509b8e80941Smrg 4510848b8605Smrg /* handle some special cases */ 4511b8e80941Smrg if (i == 1 || i == 3) { 4512b8e80941Smrg switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 4513b8e80941Smrg case TGSI_OPCODE_DABS: 4514b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 4515b8e80941Smrg break; 4516b8e80941Smrg default: 4517b8e80941Smrg break; 4518b8e80941Smrg } 4519848b8605Smrg } 4520b8e80941Smrg if (i == lasti) { 4521848b8605Smrg alu.last = 1; 4522848b8605Smrg } 4523848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4524848b8605Smrg if (r) 4525848b8605Smrg return r; 4526848b8605Smrg } 4527848b8605Smrg 4528848b8605Smrg if (use_tmp) { 4529b8e80941Smrg write_mask = inst->Dst[0].Register.WriteMask; 4530b8e80941Smrg 4531b8e80941Smrg lasti = tgsi_last_instruction(write_mask); 4532848b8605Smrg /* move result from temp to dst */ 4533848b8605Smrg for (i = 0; i <= lasti; i++) { 4534848b8605Smrg if (!(write_mask & (1 << i))) 4535848b8605Smrg continue; 4536848b8605Smrg 4537848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4538848b8605Smrg alu.op = ALU_OP1_MOV; 4539b8e80941Smrg 4540b8e80941Smrg if (dest_temp) { 4541b8e80941Smrg alu.dst.sel = dest_temp; 4542b8e80941Smrg alu.dst.chan = i; 4543b8e80941Smrg alu.dst.write = 1; 4544b8e80941Smrg } else 4545b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4546848b8605Smrg alu.src[0].sel = ctx->temp_reg; 4547b8e80941Smrg alu.src[0].chan = use_tmp - 1; 4548848b8605Smrg alu.last = (i == lasti); 4549848b8605Smrg 4550848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4551848b8605Smrg if (r) 4552848b8605Smrg return r; 4553848b8605Smrg } 4554848b8605Smrg } 4555848b8605Smrg return 0; 4556848b8605Smrg} 4557848b8605Smrg 4558b8e80941Smrgstatic int tgsi_op2_64(struct r600_shader_ctx *ctx) 4559848b8605Smrg{ 4560b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4561b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 4562b8e80941Smrg /* confirm writemasking */ 4563b8e80941Smrg if ((write_mask & 0x3) != 0x3 && 4564b8e80941Smrg (write_mask & 0xc) != 0xc) { 4565b8e80941Smrg fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 4566b8e80941Smrg return -1; 4567b8e80941Smrg } 4568b8e80941Smrg return tgsi_op2_64_params(ctx, false, false, 0, 0); 4569b8e80941Smrg} 4570b8e80941Smrg 4571b8e80941Smrgstatic int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 4572848b8605Smrg{ 4573b8e80941Smrg return tgsi_op2_64_params(ctx, true, false, 0, 0); 4574848b8605Smrg} 4575848b8605Smrg 4576b8e80941Smrgstatic int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 4577848b8605Smrg{ 4578b8e80941Smrg return tgsi_op2_64_params(ctx, true, true, 0, 0); 4579848b8605Smrg} 4580848b8605Smrg 4581b8e80941Smrgstatic int tgsi_op3_64(struct r600_shader_ctx *ctx) 4582848b8605Smrg{ 4583848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4584848b8605Smrg struct r600_bytecode_alu alu; 4585b8e80941Smrg int i, j, r; 4586b8e80941Smrg int lasti = 3; 4587b8e80941Smrg int tmp = r600_get_temp(ctx); 4588848b8605Smrg 4589848b8605Smrg for (i = 0; i < lasti + 1; i++) { 4590848b8605Smrg 4591848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4592848b8605Smrg alu.op = ctx->inst_info->op; 4593b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4594b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 4595b8e80941Smrg } 4596848b8605Smrg 4597b8e80941Smrg if (inst->Dst[0].Register.WriteMask & (1 << i)) 4598b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4599b8e80941Smrg else 4600b8e80941Smrg alu.dst.sel = tmp; 4601848b8605Smrg 4602b8e80941Smrg alu.dst.chan = i; 4603b8e80941Smrg alu.is_op3 = 1; 4604848b8605Smrg if (i == lasti) { 4605848b8605Smrg alu.last = 1; 4606848b8605Smrg } 4607848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4608848b8605Smrg if (r) 4609848b8605Smrg return r; 4610848b8605Smrg } 4611848b8605Smrg return 0; 4612848b8605Smrg} 4613848b8605Smrg 4614b8e80941Smrgstatic int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 4615848b8605Smrg{ 4616848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4617848b8605Smrg struct r600_bytecode_alu alu; 4618b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 4619b8e80941Smrg int i, j, r, lasti = tgsi_last_instruction(write_mask); 4620b8e80941Smrg /* use temp register if trans_only and more than one dst component */ 4621b8e80941Smrg int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 4622b8e80941Smrg unsigned op = ctx->inst_info->op; 4623b8e80941Smrg 4624b8e80941Smrg if (op == ALU_OP2_MUL_IEEE && 4625b8e80941Smrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 4626b8e80941Smrg op = ALU_OP2_MUL; 4627b8e80941Smrg 4628b8e80941Smrg for (i = 0; i <= lasti; i++) { 4629b8e80941Smrg if (!(write_mask & (1 << i))) 4630b8e80941Smrg continue; 4631b8e80941Smrg 4632848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4633b8e80941Smrg if (use_tmp) { 4634b8e80941Smrg alu.dst.sel = ctx->temp_reg; 4635b8e80941Smrg alu.dst.chan = i; 4636b8e80941Smrg alu.dst.write = 1; 4637b8e80941Smrg } else 4638b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4639848b8605Smrg 4640b8e80941Smrg alu.op = op; 4641b8e80941Smrg if (!swap) { 4642b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4643b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4644848b8605Smrg } 4645b8e80941Smrg } else { 4646b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4647b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4648848b8605Smrg } 4649b8e80941Smrg if (i == lasti || trans_only) { 4650848b8605Smrg alu.last = 1; 4651b8e80941Smrg } 4652848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4653848b8605Smrg if (r) 4654848b8605Smrg return r; 4655848b8605Smrg } 4656848b8605Smrg 4657b8e80941Smrg if (use_tmp) { 4658b8e80941Smrg /* move result from temp to dst */ 4659b8e80941Smrg for (i = 0; i <= lasti; i++) { 4660b8e80941Smrg if (!(write_mask & (1 << i))) 4661b8e80941Smrg continue; 4662848b8605Smrg 4663848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4664b8e80941Smrg alu.op = ALU_OP1_MOV; 4665b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4666b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 4667b8e80941Smrg alu.src[0].chan = i; 4668b8e80941Smrg alu.last = (i == lasti); 4669b8e80941Smrg 4670848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4671848b8605Smrg if (r) 4672848b8605Smrg return r; 4673848b8605Smrg } 4674848b8605Smrg } 4675848b8605Smrg return 0; 4676848b8605Smrg} 4677848b8605Smrg 4678b8e80941Smrgstatic int tgsi_op2(struct r600_shader_ctx *ctx) 4679848b8605Smrg{ 4680b8e80941Smrg return tgsi_op2_s(ctx, 0, 0); 4681b8e80941Smrg} 4682848b8605Smrg 4683b8e80941Smrgstatic int tgsi_op2_swap(struct r600_shader_ctx *ctx) 4684b8e80941Smrg{ 4685b8e80941Smrg return tgsi_op2_s(ctx, 1, 0); 4686b8e80941Smrg} 4687848b8605Smrg 4688b8e80941Smrgstatic int tgsi_op2_trans(struct r600_shader_ctx *ctx) 4689b8e80941Smrg{ 4690b8e80941Smrg return tgsi_op2_s(ctx, 0, 1); 4691848b8605Smrg} 4692848b8605Smrg 4693b8e80941Smrgstatic int tgsi_ineg(struct r600_shader_ctx *ctx) 4694848b8605Smrg{ 4695848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4696848b8605Smrg struct r600_bytecode_alu alu; 4697848b8605Smrg int i, r; 4698b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4699848b8605Smrg 4700b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 4701848b8605Smrg 4702b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4703b8e80941Smrg continue; 4704848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4705848b8605Smrg alu.op = ctx->inst_info->op; 4706b8e80941Smrg 4707b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 4708b8e80941Smrg 4709b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4710848b8605Smrg 4711848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4712848b8605Smrg 4713b8e80941Smrg if (i == lasti) { 4714848b8605Smrg alu.last = 1; 4715b8e80941Smrg } 4716848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4717848b8605Smrg if (r) 4718848b8605Smrg return r; 4719848b8605Smrg } 4720848b8605Smrg return 0; 4721b8e80941Smrg 4722848b8605Smrg} 4723848b8605Smrg 4724b8e80941Smrgstatic int tgsi_dneg(struct r600_shader_ctx *ctx) 4725848b8605Smrg{ 4726848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4727848b8605Smrg struct r600_bytecode_alu alu; 4728848b8605Smrg int i, r; 4729848b8605Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4730848b8605Smrg 4731848b8605Smrg for (i = 0; i < lasti + 1; i++) { 4732b8e80941Smrg 4733848b8605Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4734848b8605Smrg continue; 4735848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4736848b8605Smrg alu.op = ALU_OP1_MOV; 4737848b8605Smrg 4738b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4739b8e80941Smrg 4740b8e80941Smrg if (i == 1 || i == 3) 4741b8e80941Smrg r600_bytecode_src_toggle_neg(&alu.src[0]); 4742848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4743b8e80941Smrg 4744b8e80941Smrg if (i == lasti) { 4745848b8605Smrg alu.last = 1; 4746b8e80941Smrg } 4747848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4748848b8605Smrg if (r) 4749848b8605Smrg return r; 4750848b8605Smrg } 4751848b8605Smrg return 0; 4752b8e80941Smrg 4753848b8605Smrg} 4754848b8605Smrg 4755b8e80941Smrgstatic int tgsi_dfracexp(struct r600_shader_ctx *ctx) 4756848b8605Smrg{ 4757848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4758848b8605Smrg struct r600_bytecode_alu alu; 4759b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 4760b8e80941Smrg int i, j, r; 4761848b8605Smrg 4762b8e80941Smrg for (i = 0; i <= 3; i++) { 4763b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4764b8e80941Smrg alu.op = ctx->inst_info->op; 4765848b8605Smrg 4766b8e80941Smrg alu.dst.sel = ctx->temp_reg; 4767b8e80941Smrg alu.dst.chan = i; 4768b8e80941Smrg alu.dst.write = 1; 4769b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4770b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4771b8e80941Smrg } 4772848b8605Smrg 4773b8e80941Smrg if (i == 3) 4774b8e80941Smrg alu.last = 1; 4775b8e80941Smrg 4776b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4777b8e80941Smrg if (r) 4778b8e80941Smrg return r; 4779b8e80941Smrg } 4780b8e80941Smrg 4781b8e80941Smrg /* Replicate significand result across channels. */ 4782b8e80941Smrg for (i = 0; i <= 3; i++) { 4783b8e80941Smrg if (!(write_mask & (1 << i))) 4784b8e80941Smrg continue; 4785b8e80941Smrg 4786b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4787b8e80941Smrg alu.op = ALU_OP1_MOV; 4788b8e80941Smrg alu.src[0].chan = (i & 1) + 2; 4789b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 4790b8e80941Smrg 4791b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4792b8e80941Smrg alu.dst.write = 1; 4793b8e80941Smrg alu.last = 1; 4794b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4795b8e80941Smrg if (r) 4796b8e80941Smrg return r; 4797b8e80941Smrg } 4798848b8605Smrg 4799b8e80941Smrg for (i = 0; i <= 3; i++) { 4800b8e80941Smrg if (inst->Dst[1].Register.WriteMask & (1 << i)) { 4801b8e80941Smrg /* MOV third channels to writemask dst1 */ 4802b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4803b8e80941Smrg alu.op = ALU_OP1_MOV; 4804b8e80941Smrg alu.src[0].chan = 1; 4805848b8605Smrg alu.src[0].sel = ctx->temp_reg; 4806b8e80941Smrg 4807b8e80941Smrg tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 4808848b8605Smrg alu.last = 1; 4809848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4810848b8605Smrg if (r) 4811848b8605Smrg return r; 4812b8e80941Smrg break; 4813848b8605Smrg } 4814848b8605Smrg } 4815b8e80941Smrg return 0; 4816b8e80941Smrg} 4817848b8605Smrg 4818b8e80941Smrg 4819b8e80941Smrgstatic int egcm_int_to_double(struct r600_shader_ctx *ctx) 4820b8e80941Smrg{ 4821b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4822b8e80941Smrg struct r600_bytecode_alu alu; 4823b8e80941Smrg int i, c, r; 4824b8e80941Smrg int write_mask = inst->Dst[0].Register.WriteMask; 4825b8e80941Smrg int temp_reg = r600_get_temp(ctx); 4826b8e80941Smrg 4827b8e80941Smrg assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 4828b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_U2D); 4829b8e80941Smrg 4830b8e80941Smrg for (c = 0; c < 2; c++) { 4831b8e80941Smrg int dchan = c * 2; 4832b8e80941Smrg if (write_mask & (0x3 << dchan)) { 4833b8e80941Smrg /* split into 24-bit int and 8-bit int */ 4834848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4835b8e80941Smrg alu.op = ALU_OP2_AND_INT; 4836b8e80941Smrg alu.dst.sel = temp_reg; 4837b8e80941Smrg alu.dst.chan = dchan; 4838b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], c); 4839b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4840b8e80941Smrg alu.src[1].value = 0xffffff00; 4841b8e80941Smrg alu.dst.write = 1; 4842b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4843b8e80941Smrg if (r) 4844b8e80941Smrg return r; 4845848b8605Smrg 4846b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4847b8e80941Smrg alu.op = ALU_OP2_AND_INT; 4848b8e80941Smrg alu.dst.sel = temp_reg; 4849b8e80941Smrg alu.dst.chan = dchan + 1; 4850b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], c); 4851b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4852b8e80941Smrg alu.src[1].value = 0xff; 4853b8e80941Smrg alu.dst.write = 1; 4854848b8605Smrg alu.last = 1; 4855848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4856848b8605Smrg if (r) 4857848b8605Smrg return r; 4858848b8605Smrg } 4859848b8605Smrg } 4860848b8605Smrg 4861b8e80941Smrg for (c = 0; c < 2; c++) { 4862b8e80941Smrg int dchan = c * 2; 4863b8e80941Smrg if (write_mask & (0x3 << dchan)) { 4864b8e80941Smrg for (i = dchan; i <= dchan + 1; i++) { 4865b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4866b8e80941Smrg alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT; 4867848b8605Smrg 4868b8e80941Smrg alu.src[0].sel = temp_reg; 4869b8e80941Smrg alu.src[0].chan = i; 4870b8e80941Smrg alu.dst.sel = temp_reg; 4871b8e80941Smrg alu.dst.chan = i; 4872b8e80941Smrg alu.dst.write = 1; 4873b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) 4874b8e80941Smrg alu.last = i == dchan + 1; 4875b8e80941Smrg else 4876b8e80941Smrg alu.last = 1; /* trans only ops on evergreen */ 4877b8e80941Smrg 4878b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4879b8e80941Smrg if (r) 4880b8e80941Smrg return r; 4881b8e80941Smrg } 4882b8e80941Smrg } 4883b8e80941Smrg } 4884848b8605Smrg 4885b8e80941Smrg for (c = 0; c < 2; c++) { 4886b8e80941Smrg int dchan = c * 2; 4887b8e80941Smrg if (write_mask & (0x3 << dchan)) { 4888b8e80941Smrg for (i = 0; i < 4; i++) { 4889b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4890b8e80941Smrg alu.op = ALU_OP1_FLT32_TO_FLT64; 4891b8e80941Smrg 4892b8e80941Smrg alu.src[0].chan = dchan + (i / 2); 4893b8e80941Smrg if (i == 0 || i == 2) 4894b8e80941Smrg alu.src[0].sel = temp_reg; 4895b8e80941Smrg else { 4896b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4897b8e80941Smrg alu.src[0].value = 0x0; 4898b8e80941Smrg } 4899b8e80941Smrg alu.dst.sel = ctx->temp_reg; 4900b8e80941Smrg alu.dst.chan = i; 4901b8e80941Smrg alu.last = i == 3; 4902b8e80941Smrg alu.dst.write = 1; 4903848b8605Smrg 4904b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4905b8e80941Smrg if (r) 4906b8e80941Smrg return r; 4907b8e80941Smrg } 4908848b8605Smrg 4909b8e80941Smrg for (i = 0; i <= 1; i++) { 4910b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4911b8e80941Smrg alu.op = ALU_OP2_ADD_64; 4912848b8605Smrg 4913b8e80941Smrg alu.src[0].chan = fp64_switch(i); 4914b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 4915b8e80941Smrg 4916b8e80941Smrg alu.src[1].chan = fp64_switch(i + 2); 4917b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 4918b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst); 4919b8e80941Smrg alu.last = i == 1; 4920b8e80941Smrg 4921b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4922b8e80941Smrg if (r) 4923b8e80941Smrg return r; 4924b8e80941Smrg } 4925b8e80941Smrg } 4926848b8605Smrg } 4927848b8605Smrg 4928b8e80941Smrg return 0; 4929b8e80941Smrg} 4930848b8605Smrg 4931b8e80941Smrgstatic int egcm_double_to_int(struct r600_shader_ctx *ctx) 4932b8e80941Smrg{ 4933b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4934b8e80941Smrg struct r600_bytecode_alu alu; 4935b8e80941Smrg int i, r; 4936b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4937b8e80941Smrg int treg = r600_get_temp(ctx); 4938b8e80941Smrg assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4939b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4940848b8605Smrg 4941b8e80941Smrg /* do a 64->32 into a temp register */ 4942b8e80941Smrg r = tgsi_op2_64_params(ctx, true, false, treg, ALU_OP1_FLT64_TO_FLT32); 4943b8e80941Smrg if (r) 4944b8e80941Smrg return r; 4945848b8605Smrg 4946b8e80941Smrg for (i = 0; i <= lasti; i++) { 4947b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4948b8e80941Smrg continue; 4949b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4950b8e80941Smrg alu.op = ctx->inst_info->op; 4951848b8605Smrg 4952b8e80941Smrg alu.src[0].chan = i; 4953b8e80941Smrg alu.src[0].sel = treg; 4954b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4955b8e80941Smrg alu.last = (i == lasti); 4956848b8605Smrg 4957848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 4958848b8605Smrg if (r) 4959848b8605Smrg return r; 4960848b8605Smrg } 4961848b8605Smrg 4962848b8605Smrg return 0; 4963848b8605Smrg} 4964848b8605Smrg 4965b8e80941Smrgstatic int cayman_emit_unary_double_raw(struct r600_bytecode *bc, 4966b8e80941Smrg unsigned op, 4967b8e80941Smrg int dst_reg, 4968b8e80941Smrg struct r600_shader_src *src, 4969b8e80941Smrg bool abs) 4970848b8605Smrg{ 4971848b8605Smrg struct r600_bytecode_alu alu; 4972b8e80941Smrg const int last_slot = 3; 4973b8e80941Smrg int r; 4974848b8605Smrg 4975b8e80941Smrg /* these have to write the result to X/Y by the looks of it */ 4976b8e80941Smrg for (int i = 0 ; i < last_slot; i++) { 4977848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4978b8e80941Smrg alu.op = op; 4979848b8605Smrg 4980b8e80941Smrg r600_bytecode_src(&alu.src[0], src, 1); 4981b8e80941Smrg r600_bytecode_src(&alu.src[1], src, 0); 4982848b8605Smrg 4983b8e80941Smrg if (abs) 4984b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[1]); 4985848b8605Smrg 4986b8e80941Smrg alu.dst.sel = dst_reg; 4987b8e80941Smrg alu.dst.chan = i; 4988b8e80941Smrg alu.dst.write = (i == 0 || i == 1); 4989b8e80941Smrg 4990b8e80941Smrg if (bc->chip_class != CAYMAN || i == last_slot - 1) 4991848b8605Smrg alu.last = 1; 4992b8e80941Smrg r = r600_bytecode_add_alu(bc, &alu); 4993848b8605Smrg if (r) 4994848b8605Smrg return r; 4995848b8605Smrg } 4996848b8605Smrg 4997848b8605Smrg return 0; 4998848b8605Smrg} 4999848b8605Smrg 5000b8e80941Smrgstatic int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 5001848b8605Smrg{ 5002848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5003b8e80941Smrg int i, r; 5004848b8605Smrg struct r600_bytecode_alu alu; 5005b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5006b8e80941Smrg int t1 = ctx->temp_reg; 5007848b8605Smrg 5008b8e80941Smrg /* should only be one src regs */ 5009b8e80941Smrg assert(inst->Instruction.NumSrcRegs == 1); 5010848b8605Smrg 5011b8e80941Smrg /* only support one double at a time */ 5012b8e80941Smrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 5013b8e80941Smrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 5014848b8605Smrg 5015b8e80941Smrg r = cayman_emit_unary_double_raw( 5016b8e80941Smrg ctx->bc, ctx->inst_info->op, t1, 5017b8e80941Smrg &ctx->src[0], 5018b8e80941Smrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 5019b8e80941Smrg ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT); 5020848b8605Smrg if (r) 5021848b8605Smrg return r; 5022848b8605Smrg 5023b8e80941Smrg for (i = 0 ; i <= lasti; i++) { 5024b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5025b8e80941Smrg continue; 5026848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5027b8e80941Smrg alu.op = ALU_OP1_MOV; 5028b8e80941Smrg alu.src[0].sel = t1; 5029b8e80941Smrg alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 5030b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5031848b8605Smrg alu.dst.write = 1; 5032b8e80941Smrg if (i == lasti) 5033b8e80941Smrg alu.last = 1; 5034848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5035848b8605Smrg if (r) 5036848b8605Smrg return r; 5037b8e80941Smrg } 5038b8e80941Smrg return 0; 5039b8e80941Smrg} 5040848b8605Smrg 5041b8e80941Smrgstatic int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 5042b8e80941Smrg{ 5043b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5044b8e80941Smrg int i, j, r; 5045b8e80941Smrg struct r600_bytecode_alu alu; 5046b8e80941Smrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 5047848b8605Smrg 5048b8e80941Smrg for (i = 0 ; i < last_slot; i++) { 5049b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5050b8e80941Smrg alu.op = ctx->inst_info->op; 5051b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5052b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 5053848b8605Smrg 5054b8e80941Smrg /* RSQ should take the absolute value of src */ 5055b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 5056b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[j]); 5057b8e80941Smrg } 5058b8e80941Smrg } 5059b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5060b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5061848b8605Smrg 5062b8e80941Smrg if (i == last_slot - 1) 5063b8e80941Smrg alu.last = 1; 5064b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5065b8e80941Smrg if (r) 5066b8e80941Smrg return r; 5067b8e80941Smrg } 5068848b8605Smrg return 0; 5069848b8605Smrg} 5070848b8605Smrg 5071b8e80941Smrgstatic int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 5072848b8605Smrg{ 5073848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5074b8e80941Smrg int i, j, k, r; 5075848b8605Smrg struct r600_bytecode_alu alu; 5076b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5077b8e80941Smrg int t1 = ctx->temp_reg; 5078848b8605Smrg 5079b8e80941Smrg for (k = 0; k <= lasti; k++) { 5080b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 5081b8e80941Smrg continue; 5082848b8605Smrg 5083b8e80941Smrg for (i = 0 ; i < 4; i++) { 5084b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5085b8e80941Smrg alu.op = ctx->inst_info->op; 5086b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5087b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 5088b8e80941Smrg } 5089b8e80941Smrg alu.dst.sel = t1; 5090b8e80941Smrg alu.dst.chan = i; 5091b8e80941Smrg alu.dst.write = (i == k); 5092b8e80941Smrg if (i == 3) 5093b8e80941Smrg alu.last = 1; 5094b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5095b8e80941Smrg if (r) 5096b8e80941Smrg return r; 5097b8e80941Smrg } 5098b8e80941Smrg } 5099848b8605Smrg 5100b8e80941Smrg for (i = 0 ; i <= lasti; i++) { 5101b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5102b8e80941Smrg continue; 5103b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5104b8e80941Smrg alu.op = ALU_OP1_MOV; 5105b8e80941Smrg alu.src[0].sel = t1; 5106b8e80941Smrg alu.src[0].chan = i; 5107b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5108b8e80941Smrg alu.dst.write = 1; 5109b8e80941Smrg if (i == lasti) 5110b8e80941Smrg alu.last = 1; 5111b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5112b8e80941Smrg if (r) 5113b8e80941Smrg return r; 5114848b8605Smrg } 5115b8e80941Smrg 5116b8e80941Smrg return 0; 5117848b8605Smrg} 5118848b8605Smrg 5119b8e80941Smrg 5120b8e80941Smrgstatic int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 5121848b8605Smrg{ 5122848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5123b8e80941Smrg int i, j, k, r; 5124848b8605Smrg struct r600_bytecode_alu alu; 5125b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5126b8e80941Smrg int t1 = ctx->temp_reg; 5127b8e80941Smrg 5128b8e80941Smrg /* t1 would get overwritten below if we actually tried to 5129b8e80941Smrg * multiply two pairs of doubles at a time. */ 5130b8e80941Smrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 5131b8e80941Smrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 5132b8e80941Smrg 5133b8e80941Smrg k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 5134848b8605Smrg 5135848b8605Smrg for (i = 0; i < 4; i++) { 5136848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5137b8e80941Smrg alu.op = ctx->inst_info->op; 5138b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5139b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); 5140b8e80941Smrg } 5141b8e80941Smrg alu.dst.sel = t1; 5142848b8605Smrg alu.dst.chan = i; 5143b8e80941Smrg alu.dst.write = 1; 5144848b8605Smrg if (i == 3) 5145848b8605Smrg alu.last = 1; 5146848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5147848b8605Smrg if (r) 5148848b8605Smrg return r; 5149848b8605Smrg } 5150b8e80941Smrg 5151b8e80941Smrg for (i = 0; i <= lasti; i++) { 5152b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5153b8e80941Smrg continue; 5154b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5155b8e80941Smrg alu.op = ALU_OP1_MOV; 5156b8e80941Smrg alu.src[0].sel = t1; 5157b8e80941Smrg alu.src[0].chan = i; 5158b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5159b8e80941Smrg alu.dst.write = 1; 5160b8e80941Smrg if (i == lasti) 5161b8e80941Smrg alu.last = 1; 5162b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5163b8e80941Smrg if (r) 5164b8e80941Smrg return r; 5165b8e80941Smrg } 5166b8e80941Smrg 5167848b8605Smrg return 0; 5168848b8605Smrg} 5169848b8605Smrg 5170b8e80941Smrg/* 5171b8e80941Smrg * Emit RECIP_64 + MUL_64 to implement division. 5172b8e80941Smrg */ 5173b8e80941Smrgstatic int cayman_ddiv_instr(struct r600_shader_ctx *ctx) 5174848b8605Smrg{ 5175848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5176b8e80941Smrg int r; 5177848b8605Smrg struct r600_bytecode_alu alu; 5178b8e80941Smrg int t1 = ctx->temp_reg; 5179b8e80941Smrg int k; 5180848b8605Smrg 5181b8e80941Smrg /* Only support one double at a time. This is the same constraint as 5182b8e80941Smrg * in DMUL lowering. */ 5183b8e80941Smrg assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 5184b8e80941Smrg inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 5185b8e80941Smrg 5186b8e80941Smrg k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 5187b8e80941Smrg 5188b8e80941Smrg r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false); 5189848b8605Smrg if (r) 5190848b8605Smrg return r; 5191848b8605Smrg 5192b8e80941Smrg for (int i = 0; i < 4; i++) { 5193848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5194b8e80941Smrg alu.op = ALU_OP2_MUL_64; 5195b8e80941Smrg 5196b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1)); 5197b8e80941Smrg 5198b8e80941Smrg alu.src[1].sel = t1; 5199b8e80941Smrg alu.src[1].chan = (i == 3) ? 0 : 1; 5200b8e80941Smrg 5201b8e80941Smrg alu.dst.sel = t1; 5202848b8605Smrg alu.dst.chan = i; 5203848b8605Smrg alu.dst.write = 1; 5204b8e80941Smrg if (i == 3) 5205848b8605Smrg alu.last = 1; 5206848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5207848b8605Smrg if (r) 5208848b8605Smrg return r; 5209848b8605Smrg } 5210848b8605Smrg 5211b8e80941Smrg for (int i = 0; i < 2; i++) { 5212848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5213b8e80941Smrg alu.op = ALU_OP1_MOV; 5214b8e80941Smrg alu.src[0].sel = t1; 5215b8e80941Smrg alu.src[0].chan = i; 5216b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst); 5217b8e80941Smrg alu.dst.write = 1; 5218b8e80941Smrg if (i == 1) 5219848b8605Smrg alu.last = 1; 5220848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5221848b8605Smrg if (r) 5222848b8605Smrg return r; 5223848b8605Smrg } 5224848b8605Smrg return 0; 5225848b8605Smrg} 5226848b8605Smrg 5227b8e80941Smrg/* 5228b8e80941Smrg * r600 - trunc to -PI..PI range 5229b8e80941Smrg * r700 - normalize by dividing by 2PI 5230b8e80941Smrg * see fdo bug 27901 5231b8e80941Smrg */ 5232b8e80941Smrgstatic int tgsi_setup_trig(struct r600_shader_ctx *ctx) 5233848b8605Smrg{ 5234848b8605Smrg int r; 5235b8e80941Smrg struct r600_bytecode_alu alu; 5236848b8605Smrg 5237848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5238b8e80941Smrg alu.op = ALU_OP3_MULADD; 5239b8e80941Smrg alu.is_op3 = 1; 5240b8e80941Smrg 5241b8e80941Smrg alu.dst.chan = 0; 5242848b8605Smrg alu.dst.sel = ctx->temp_reg; 5243848b8605Smrg alu.dst.write = 1; 5244b8e80941Smrg 5245b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5246b8e80941Smrg 5247b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5248b8e80941Smrg alu.src[1].chan = 0; 5249b8e80941Smrg alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI); 5250b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_0_5; 5251b8e80941Smrg alu.src[2].chan = 0; 5252848b8605Smrg alu.last = 1; 5253848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5254848b8605Smrg if (r) 5255848b8605Smrg return r; 5256b8e80941Smrg 5257848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5258b8e80941Smrg alu.op = ALU_OP1_FRACT; 5259b8e80941Smrg 5260b8e80941Smrg alu.dst.chan = 0; 5261848b8605Smrg alu.dst.sel = ctx->temp_reg; 5262848b8605Smrg alu.dst.write = 1; 5263b8e80941Smrg 5264b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5265b8e80941Smrg alu.src[0].chan = 0; 5266848b8605Smrg alu.last = 1; 5267848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5268848b8605Smrg if (r) 5269848b8605Smrg return r; 5270b8e80941Smrg 5271848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5272b8e80941Smrg alu.op = ALU_OP3_MULADD; 5273b8e80941Smrg alu.is_op3 = 1; 5274b8e80941Smrg 5275b8e80941Smrg alu.dst.chan = 0; 5276848b8605Smrg alu.dst.sel = ctx->temp_reg; 5277848b8605Smrg alu.dst.write = 1; 5278b8e80941Smrg 5279b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5280b8e80941Smrg alu.src[0].chan = 0; 5281b8e80941Smrg 5282b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5283b8e80941Smrg alu.src[1].chan = 0; 5284b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 5285b8e80941Smrg alu.src[2].chan = 0; 5286b8e80941Smrg 5287b8e80941Smrg if (ctx->bc->chip_class == R600) { 5288b8e80941Smrg alu.src[1].value = u_bitcast_f2u(2.0f * M_PI); 5289b8e80941Smrg alu.src[2].value = u_bitcast_f2u(-M_PI); 5290b8e80941Smrg } else { 5291b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1; 5292b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_0_5; 5293b8e80941Smrg alu.src[2].neg = 1; 5294b8e80941Smrg } 5295b8e80941Smrg 5296848b8605Smrg alu.last = 1; 5297848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5298848b8605Smrg if (r) 5299848b8605Smrg return r; 5300b8e80941Smrg return 0; 5301848b8605Smrg} 5302848b8605Smrg 5303b8e80941Smrgstatic int cayman_trig(struct r600_shader_ctx *ctx) 5304848b8605Smrg{ 5305848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5306848b8605Smrg struct r600_bytecode_alu alu; 5307b8e80941Smrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 5308b8e80941Smrg int i, r; 5309848b8605Smrg 5310b8e80941Smrg r = tgsi_setup_trig(ctx); 5311b8e80941Smrg if (r) 5312b8e80941Smrg return r; 5313848b8605Smrg 5314848b8605Smrg 5315b8e80941Smrg for (i = 0; i < last_slot; i++) { 5316b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5317b8e80941Smrg alu.op = ctx->inst_info->op; 5318b8e80941Smrg alu.dst.chan = i; 5319848b8605Smrg 5320b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5321b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5322848b8605Smrg 5323b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5324b8e80941Smrg alu.src[0].chan = 0; 5325b8e80941Smrg if (i == last_slot - 1) 5326848b8605Smrg alu.last = 1; 5327b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5328b8e80941Smrg if (r) 5329b8e80941Smrg return r; 5330b8e80941Smrg } 5331b8e80941Smrg return 0; 5332b8e80941Smrg} 5333848b8605Smrg 5334b8e80941Smrgstatic int tgsi_trig(struct r600_shader_ctx *ctx) 5335b8e80941Smrg{ 5336b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5337b8e80941Smrg struct r600_bytecode_alu alu; 5338b8e80941Smrg int i, r; 5339b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5340848b8605Smrg 5341b8e80941Smrg r = tgsi_setup_trig(ctx); 5342b8e80941Smrg if (r) 5343b8e80941Smrg return r; 5344848b8605Smrg 5345b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5346b8e80941Smrg alu.op = ctx->inst_info->op; 5347b8e80941Smrg alu.dst.chan = 0; 5348b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5349b8e80941Smrg alu.dst.write = 1; 5350848b8605Smrg 5351b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5352b8e80941Smrg alu.src[0].chan = 0; 5353b8e80941Smrg alu.last = 1; 5354b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5355b8e80941Smrg if (r) 5356b8e80941Smrg return r; 5357848b8605Smrg 5358b8e80941Smrg /* replicate result */ 5359b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 5360b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5361b8e80941Smrg continue; 5362848b8605Smrg 5363b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5364b8e80941Smrg alu.op = ALU_OP1_MOV; 5365848b8605Smrg 5366b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5367b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5368b8e80941Smrg if (i == lasti) 5369b8e80941Smrg alu.last = 1; 5370b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5371b8e80941Smrg if (r) 5372b8e80941Smrg return r; 5373b8e80941Smrg } 5374b8e80941Smrg return 0; 5375b8e80941Smrg} 5376848b8605Smrg 5377b8e80941Smrgstatic int tgsi_kill(struct r600_shader_ctx *ctx) 5378b8e80941Smrg{ 5379b8e80941Smrg const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5380b8e80941Smrg struct r600_bytecode_alu alu; 5381b8e80941Smrg int i, r; 5382848b8605Smrg 5383b8e80941Smrg for (i = 0; i < 4; i++) { 5384b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5385b8e80941Smrg alu.op = ctx->inst_info->op; 5386848b8605Smrg 5387b8e80941Smrg alu.dst.chan = i; 5388848b8605Smrg 5389b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 5390848b8605Smrg 5391b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 5392b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1; 5393b8e80941Smrg alu.src[1].neg = 1; 5394b8e80941Smrg } else { 5395848b8605Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5396b8e80941Smrg } 5397b8e80941Smrg if (i == 3) { 5398848b8605Smrg alu.last = 1; 5399b8e80941Smrg } 5400b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5401b8e80941Smrg if (r) 5402b8e80941Smrg return r; 5403b8e80941Smrg } 5404848b8605Smrg 5405b8e80941Smrg /* kill must be last in ALU */ 5406b8e80941Smrg ctx->bc->force_add_cf = 1; 5407b8e80941Smrg ctx->shader->uses_kill = TRUE; 5408b8e80941Smrg return 0; 5409b8e80941Smrg} 5410848b8605Smrg 5411b8e80941Smrgstatic int tgsi_lit(struct r600_shader_ctx *ctx) 5412b8e80941Smrg{ 5413b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5414b8e80941Smrg struct r600_bytecode_alu alu; 5415b8e80941Smrg int r; 5416848b8605Smrg 5417b8e80941Smrg /* tmp.x = max(src.y, 0.0) */ 5418b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5419b8e80941Smrg alu.op = ALU_OP2_MAX; 5420b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 5421b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 5422b8e80941Smrg alu.src[1].chan = 1; 5423848b8605Smrg 5424b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5425b8e80941Smrg alu.dst.chan = 0; 5426b8e80941Smrg alu.dst.write = 1; 5427848b8605Smrg 5428b8e80941Smrg alu.last = 1; 5429b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5430b8e80941Smrg if (r) 5431b8e80941Smrg return r; 5432b8e80941Smrg 5433b8e80941Smrg if (inst->Dst[0].Register.WriteMask & (1 << 2)) 5434b8e80941Smrg { 5435b8e80941Smrg int chan; 5436b8e80941Smrg int sel; 5437b8e80941Smrg unsigned i; 5438848b8605Smrg 5439848b8605Smrg if (ctx->bc->chip_class == CAYMAN) { 5440b8e80941Smrg for (i = 0; i < 3; i++) { 5441b8e80941Smrg /* tmp.z = log(tmp.x) */ 5442848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5443b8e80941Smrg alu.op = ALU_OP1_LOG_CLAMPED; 5444b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5445848b8605Smrg alu.src[0].chan = 0; 5446b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5447b8e80941Smrg alu.dst.chan = i; 5448b8e80941Smrg if (i == 2) { 5449b8e80941Smrg alu.dst.write = 1; 5450848b8605Smrg alu.last = 1; 5451b8e80941Smrg } else 5452b8e80941Smrg alu.dst.write = 0; 5453b8e80941Smrg 5454b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5455b8e80941Smrg if (r) 5456848b8605Smrg return r; 5457848b8605Smrg } 5458b8e80941Smrg } else { 5459b8e80941Smrg /* tmp.z = log(tmp.x) */ 5460848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5461b8e80941Smrg alu.op = ALU_OP1_LOG_CLAMPED; 5462b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5463848b8605Smrg alu.src[0].chan = 0; 5464b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5465b8e80941Smrg alu.dst.chan = 2; 5466848b8605Smrg alu.dst.write = 1; 5467848b8605Smrg alu.last = 1; 5468848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5469848b8605Smrg if (r) 5470848b8605Smrg return r; 5471848b8605Smrg } 5472848b8605Smrg 5473b8e80941Smrg chan = alu.dst.chan; 5474b8e80941Smrg sel = alu.dst.sel; 5475848b8605Smrg 5476b8e80941Smrg /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 5477848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5478b8e80941Smrg alu.op = ALU_OP3_MUL_LIT; 5479b8e80941Smrg alu.src[0].sel = sel; 5480b8e80941Smrg alu.src[0].chan = chan; 5481b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 5482b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 5483b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5484b8e80941Smrg alu.dst.chan = 0; 5485848b8605Smrg alu.dst.write = 1; 5486b8e80941Smrg alu.is_op3 = 1; 5487848b8605Smrg alu.last = 1; 5488b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5489b8e80941Smrg if (r) 5490848b8605Smrg return r; 5491848b8605Smrg 5492848b8605Smrg if (ctx->bc->chip_class == CAYMAN) { 5493b8e80941Smrg for (i = 0; i < 3; i++) { 5494b8e80941Smrg /* dst.z = exp(tmp.x) */ 5495848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5496b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 5497b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5498848b8605Smrg alu.src[0].chan = 0; 5499b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5500b8e80941Smrg if (i == 2) { 5501b8e80941Smrg alu.dst.write = 1; 5502b8e80941Smrg alu.last = 1; 5503b8e80941Smrg } else 5504b8e80941Smrg alu.dst.write = 0; 5505b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5506b8e80941Smrg if (r) 5507848b8605Smrg return r; 5508848b8605Smrg } 5509848b8605Smrg } else { 5510b8e80941Smrg /* dst.z = exp(tmp.x) */ 5511848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5512b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 5513b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5514848b8605Smrg alu.src[0].chan = 0; 5515b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 5516848b8605Smrg alu.last = 1; 5517b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5518b8e80941Smrg if (r) 5519848b8605Smrg return r; 5520848b8605Smrg } 5521b8e80941Smrg } 5522848b8605Smrg 5523b8e80941Smrg /* dst.x, <- 1.0 */ 5524b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5525b8e80941Smrg alu.op = ALU_OP1_MOV; 5526b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 5527b8e80941Smrg alu.src[0].chan = 0; 5528b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 5529b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 5530b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5531b8e80941Smrg if (r) 5532b8e80941Smrg return r; 5533848b8605Smrg 5534b8e80941Smrg /* dst.y = max(src.x, 0.0) */ 5535b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5536b8e80941Smrg alu.op = ALU_OP2_MAX; 5537b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5538b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 5539b8e80941Smrg alu.src[1].chan = 0; 5540b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 5541b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 5542b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5543b8e80941Smrg if (r) 5544b8e80941Smrg return r; 5545848b8605Smrg 5546b8e80941Smrg /* dst.w, <- 1.0 */ 5547b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5548b8e80941Smrg alu.op = ALU_OP1_MOV; 5549b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 5550b8e80941Smrg alu.src[0].chan = 0; 5551b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 5552b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 5553b8e80941Smrg alu.last = 1; 5554b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5555b8e80941Smrg if (r) 5556b8e80941Smrg return r; 5557848b8605Smrg 5558b8e80941Smrg return 0; 5559b8e80941Smrg} 5560848b8605Smrg 5561b8e80941Smrgstatic int tgsi_rsq(struct r600_shader_ctx *ctx) 5562b8e80941Smrg{ 5563b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5564b8e80941Smrg struct r600_bytecode_alu alu; 5565b8e80941Smrg int i, r; 5566848b8605Smrg 5567b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5568848b8605Smrg 5569b8e80941Smrg alu.op = ALU_OP1_RECIPSQRT_IEEE; 5570848b8605Smrg 5571b8e80941Smrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 5572b8e80941Smrg r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 5573b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[i]); 5574b8e80941Smrg } 5575b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5576b8e80941Smrg alu.dst.write = 1; 5577b8e80941Smrg alu.last = 1; 5578b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5579b8e80941Smrg if (r) 5580b8e80941Smrg return r; 5581b8e80941Smrg /* replicate result */ 5582b8e80941Smrg return tgsi_helper_tempx_replicate(ctx); 5583b8e80941Smrg} 5584848b8605Smrg 5585b8e80941Smrgstatic int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 5586b8e80941Smrg{ 5587b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5588b8e80941Smrg struct r600_bytecode_alu alu; 5589b8e80941Smrg int i, r; 5590848b8605Smrg 5591b8e80941Smrg for (i = 0; i < 4; i++) { 5592848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5593b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5594b8e80941Smrg alu.op = ALU_OP1_MOV; 5595b8e80941Smrg alu.dst.chan = i; 5596b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5597b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5598b8e80941Smrg if (i == 3) 5599b8e80941Smrg alu.last = 1; 5600b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5601b8e80941Smrg if (r) 5602848b8605Smrg return r; 5603b8e80941Smrg } 5604b8e80941Smrg return 0; 5605b8e80941Smrg} 5606848b8605Smrg 5607b8e80941Smrgstatic int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 5608b8e80941Smrg{ 5609b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5610b8e80941Smrg struct r600_bytecode_alu alu; 5611b8e80941Smrg int i, r; 5612848b8605Smrg 5613b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5614b8e80941Smrg alu.op = ctx->inst_info->op; 5615b8e80941Smrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 5616b8e80941Smrg r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 5617b8e80941Smrg } 5618b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5619b8e80941Smrg alu.dst.write = 1; 5620b8e80941Smrg alu.last = 1; 5621b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5622b8e80941Smrg if (r) 5623b8e80941Smrg return r; 5624b8e80941Smrg /* replicate result */ 5625b8e80941Smrg return tgsi_helper_tempx_replicate(ctx); 5626b8e80941Smrg} 5627848b8605Smrg 5628b8e80941Smrgstatic int cayman_pow(struct r600_shader_ctx *ctx) 5629b8e80941Smrg{ 5630b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5631b8e80941Smrg int i, r; 5632b8e80941Smrg struct r600_bytecode_alu alu; 5633b8e80941Smrg int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 5634848b8605Smrg 5635b8e80941Smrg for (i = 0; i < 3; i++) { 5636848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5637b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 5638b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5639b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5640b8e80941Smrg alu.dst.chan = i; 5641848b8605Smrg alu.dst.write = 1; 5642b8e80941Smrg if (i == 2) 5643b8e80941Smrg alu.last = 1; 5644b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5645b8e80941Smrg if (r) 5646848b8605Smrg return r; 5647b8e80941Smrg } 5648848b8605Smrg 5649b8e80941Smrg /* b * LOG2(a) */ 5650b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5651b8e80941Smrg alu.op = ALU_OP2_MUL; 5652b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5653b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 5654b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5655b8e80941Smrg alu.dst.write = 1; 5656b8e80941Smrg alu.last = 1; 5657b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5658b8e80941Smrg if (r) 5659b8e80941Smrg return r; 5660848b8605Smrg 5661b8e80941Smrg for (i = 0; i < last_slot; i++) { 5662b8e80941Smrg /* POW(a,b) = EXP2(b * LOG2(a))*/ 5663b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5664b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 5665b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5666848b8605Smrg 5667b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5668b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5669b8e80941Smrg if (i == last_slot - 1) 5670b8e80941Smrg alu.last = 1; 5671b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5672b8e80941Smrg if (r) 5673b8e80941Smrg return r; 5674b8e80941Smrg } 5675b8e80941Smrg return 0; 5676b8e80941Smrg} 5677848b8605Smrg 5678b8e80941Smrgstatic int tgsi_pow(struct r600_shader_ctx *ctx) 5679b8e80941Smrg{ 5680b8e80941Smrg struct r600_bytecode_alu alu; 5681b8e80941Smrg int r; 5682848b8605Smrg 5683b8e80941Smrg /* LOG2(a) */ 5684b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5685b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 5686b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5687b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5688b8e80941Smrg alu.dst.write = 1; 5689b8e80941Smrg alu.last = 1; 5690b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5691b8e80941Smrg if (r) 5692b8e80941Smrg return r; 5693b8e80941Smrg /* b * LOG2(a) */ 5694b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5695b8e80941Smrg alu.op = ALU_OP2_MUL; 5696b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5697b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 5698b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5699b8e80941Smrg alu.dst.write = 1; 5700b8e80941Smrg alu.last = 1; 5701b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5702b8e80941Smrg if (r) 5703b8e80941Smrg return r; 5704b8e80941Smrg /* POW(a,b) = EXP2(b * LOG2(a))*/ 5705b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5706b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 5707b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 5708b8e80941Smrg alu.dst.sel = ctx->temp_reg; 5709b8e80941Smrg alu.dst.write = 1; 5710b8e80941Smrg alu.last = 1; 5711b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5712b8e80941Smrg if (r) 5713b8e80941Smrg return r; 5714b8e80941Smrg return tgsi_helper_tempx_replicate(ctx); 5715b8e80941Smrg} 5716848b8605Smrg 5717b8e80941Smrgstatic int emit_mul_int_op(struct r600_bytecode *bc, 5718b8e80941Smrg struct r600_bytecode_alu *alu_src) 5719b8e80941Smrg{ 5720b8e80941Smrg struct r600_bytecode_alu alu; 5721b8e80941Smrg int i, r; 5722b8e80941Smrg alu = *alu_src; 5723b8e80941Smrg if (bc->chip_class == CAYMAN) { 5724b8e80941Smrg for (i = 0; i < 4; i++) { 5725b8e80941Smrg alu.dst.chan = i; 5726b8e80941Smrg alu.dst.write = (i == alu_src->dst.chan); 5727b8e80941Smrg alu.last = (i == 3); 5728848b8605Smrg 5729b8e80941Smrg r = r600_bytecode_add_alu(bc, &alu); 5730b8e80941Smrg if (r) 5731848b8605Smrg return r; 5732848b8605Smrg } 5733b8e80941Smrg } else { 5734b8e80941Smrg alu.last = 1; 5735b8e80941Smrg r = r600_bytecode_add_alu(bc, &alu); 5736b8e80941Smrg if (r) 5737b8e80941Smrg return r; 5738b8e80941Smrg } 5739b8e80941Smrg return 0; 5740b8e80941Smrg} 5741848b8605Smrg 5742b8e80941Smrgstatic int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 5743b8e80941Smrg{ 5744b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5745b8e80941Smrg struct r600_bytecode_alu alu; 5746b8e80941Smrg int i, r, j; 5747b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 5748b8e80941Smrg int lasti = tgsi_last_instruction(write_mask); 5749b8e80941Smrg int tmp0 = ctx->temp_reg; 5750b8e80941Smrg int tmp1 = r600_get_temp(ctx); 5751b8e80941Smrg int tmp2 = r600_get_temp(ctx); 5752b8e80941Smrg int tmp3 = r600_get_temp(ctx); 5753b8e80941Smrg int tmp4 = 0; 5754848b8605Smrg 5755b8e80941Smrg /* Use additional temp if dst register and src register are the same */ 5756b8e80941Smrg if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index || 5757b8e80941Smrg inst->Src[1].Register.Index == inst->Dst[0].Register.Index) { 5758b8e80941Smrg tmp4 = r600_get_temp(ctx); 5759b8e80941Smrg } 5760848b8605Smrg 5761b8e80941Smrg /* Unsigned path: 5762b8e80941Smrg * 5763b8e80941Smrg * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 5764b8e80941Smrg * 5765b8e80941Smrg * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 5766b8e80941Smrg * 2. tmp0.z = lo (tmp0.x * src2) 5767b8e80941Smrg * 3. tmp0.w = -tmp0.z 5768b8e80941Smrg * 4. tmp0.y = hi (tmp0.x * src2) 5769b8e80941Smrg * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 5770b8e80941Smrg * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 5771b8e80941Smrg * 7. tmp1.x = tmp0.x - tmp0.w 5772b8e80941Smrg * 8. tmp1.y = tmp0.x + tmp0.w 5773b8e80941Smrg * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 5774b8e80941Smrg * 10. tmp0.z = hi(tmp0.x * src1) = q 5775b8e80941Smrg * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 5776b8e80941Smrg * 5777b8e80941Smrg * 12. tmp0.w = src1 - tmp0.y = r 5778b8e80941Smrg * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 5779b8e80941Smrg * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 5780b8e80941Smrg * 5781b8e80941Smrg * if DIV 5782b8e80941Smrg * 5783b8e80941Smrg * 15. tmp1.z = tmp0.z + 1 = q + 1 5784b8e80941Smrg * 16. tmp1.w = tmp0.z - 1 = q - 1 5785b8e80941Smrg * 5786b8e80941Smrg * else MOD 5787b8e80941Smrg * 5788b8e80941Smrg * 15. tmp1.z = tmp0.w - src2 = r - src2 5789b8e80941Smrg * 16. tmp1.w = tmp0.w + src2 = r + src2 5790b8e80941Smrg * 5791b8e80941Smrg * endif 5792b8e80941Smrg * 5793b8e80941Smrg * 17. tmp1.x = tmp1.x & tmp1.y 5794b8e80941Smrg * 5795b8e80941Smrg * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 5796b8e80941Smrg * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 5797b8e80941Smrg * 5798b8e80941Smrg * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 5799b8e80941Smrg * 20. dst = src2==0 ? MAX_UINT : tmp0.z 5800b8e80941Smrg * 5801b8e80941Smrg * Signed path: 5802b8e80941Smrg * 5803b8e80941Smrg * Same as unsigned, using abs values of the operands, 5804b8e80941Smrg * and fixing the sign of the result in the end. 5805b8e80941Smrg */ 5806848b8605Smrg 5807b8e80941Smrg for (i = 0; i < 4; i++) { 5808b8e80941Smrg if (!(write_mask & (1<<i))) 5809b8e80941Smrg continue; 5810848b8605Smrg 5811b8e80941Smrg if (signed_op) { 5812b8e80941Smrg 5813b8e80941Smrg /* tmp2.x = -src0 */ 5814848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5815b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 5816848b8605Smrg 5817b8e80941Smrg alu.dst.sel = tmp2; 5818b8e80941Smrg alu.dst.chan = 0; 5819848b8605Smrg alu.dst.write = 1; 5820848b8605Smrg 5821b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 5822b8e80941Smrg 5823b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5824848b8605Smrg 5825848b8605Smrg alu.last = 1; 5826848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5827848b8605Smrg return r; 5828848b8605Smrg 5829b8e80941Smrg /* tmp2.y = -src1 */ 5830b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5831b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 5832848b8605Smrg 5833b8e80941Smrg alu.dst.sel = tmp2; 5834b8e80941Smrg alu.dst.chan = 1; 5835b8e80941Smrg alu.dst.write = 1; 5836848b8605Smrg 5837b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 5838848b8605Smrg 5839b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5840848b8605Smrg 5841b8e80941Smrg alu.last = 1; 5842b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5843b8e80941Smrg return r; 5844848b8605Smrg 5845b8e80941Smrg /* tmp2.z sign bit is set if src0 and src2 signs are different */ 5846b8e80941Smrg /* it will be a sign of the quotient */ 5847b8e80941Smrg if (!mod) { 5848848b8605Smrg 5849b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5850b8e80941Smrg alu.op = ALU_OP2_XOR_INT; 5851848b8605Smrg 5852b8e80941Smrg alu.dst.sel = tmp2; 5853b8e80941Smrg alu.dst.chan = 2; 5854b8e80941Smrg alu.dst.write = 1; 5855848b8605Smrg 5856b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5857b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5858848b8605Smrg 5859b8e80941Smrg alu.last = 1; 5860b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5861b8e80941Smrg return r; 5862b8e80941Smrg } 5863848b8605Smrg 5864b8e80941Smrg /* tmp2.x = |src0| */ 5865b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5866b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 5867b8e80941Smrg alu.is_op3 = 1; 5868848b8605Smrg 5869b8e80941Smrg alu.dst.sel = tmp2; 5870b8e80941Smrg alu.dst.chan = 0; 5871b8e80941Smrg alu.dst.write = 1; 5872848b8605Smrg 5873b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5874b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5875b8e80941Smrg alu.src[2].sel = tmp2; 5876b8e80941Smrg alu.src[2].chan = 0; 5877848b8605Smrg 5878b8e80941Smrg alu.last = 1; 5879b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5880b8e80941Smrg return r; 5881848b8605Smrg 5882b8e80941Smrg /* tmp2.y = |src1| */ 5883848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5884b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 5885b8e80941Smrg alu.is_op3 = 1; 5886848b8605Smrg 5887b8e80941Smrg alu.dst.sel = tmp2; 5888b8e80941Smrg alu.dst.chan = 1; 5889848b8605Smrg alu.dst.write = 1; 5890848b8605Smrg 5891b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5892b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5893b8e80941Smrg alu.src[2].sel = tmp2; 5894b8e80941Smrg alu.src[2].chan = 1; 5895848b8605Smrg 5896848b8605Smrg alu.last = 1; 5897848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5898848b8605Smrg return r; 5899848b8605Smrg 5900b8e80941Smrg } 5901b8e80941Smrg 5902b8e80941Smrg /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5903b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 5904b8e80941Smrg /* tmp3.x = u2f(src2) */ 5905848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5906b8e80941Smrg alu.op = ALU_OP1_UINT_TO_FLT; 5907848b8605Smrg 5908b8e80941Smrg alu.dst.sel = tmp3; 5909b8e80941Smrg alu.dst.chan = 0; 5910848b8605Smrg alu.dst.write = 1; 5911848b8605Smrg 5912848b8605Smrg if (signed_op) { 5913b8e80941Smrg alu.src[0].sel = tmp2; 5914b8e80941Smrg alu.src[0].chan = 1; 5915848b8605Smrg } else { 5916b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5917848b8605Smrg } 5918848b8605Smrg 5919848b8605Smrg alu.last = 1; 5920848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5921848b8605Smrg return r; 5922848b8605Smrg 5923b8e80941Smrg /* tmp0.x = recip(tmp3.x) */ 5924b8e80941Smrg for (j = 0 ; j < 3; j++) { 5925b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5926b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 5927848b8605Smrg 5928b8e80941Smrg alu.dst.sel = tmp0; 5929b8e80941Smrg alu.dst.chan = j; 5930b8e80941Smrg alu.dst.write = (j == 0); 5931848b8605Smrg 5932b8e80941Smrg alu.src[0].sel = tmp3; 5933b8e80941Smrg alu.src[0].chan = 0; 5934b8e80941Smrg 5935b8e80941Smrg if (j == 2) 5936b8e80941Smrg alu.last = 1; 5937b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5938b8e80941Smrg return r; 5939b8e80941Smrg } 5940b8e80941Smrg 5941b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5942b8e80941Smrg alu.op = ALU_OP2_MUL; 5943848b8605Smrg 5944848b8605Smrg alu.src[0].sel = tmp0; 5945b8e80941Smrg alu.src[0].chan = 0; 5946b8e80941Smrg 5947b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5948b8e80941Smrg alu.src[1].value = 0x4f800000; 5949848b8605Smrg 5950b8e80941Smrg alu.dst.sel = tmp3; 5951b8e80941Smrg alu.dst.write = 1; 5952848b8605Smrg alu.last = 1; 5953b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 5954b8e80941Smrg if (r) 5955848b8605Smrg return r; 5956848b8605Smrg 5957848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5958b8e80941Smrg alu.op = ALU_OP1_FLT_TO_UINT; 5959848b8605Smrg 5960b8e80941Smrg alu.dst.sel = tmp0; 5961b8e80941Smrg alu.dst.chan = 0; 5962848b8605Smrg alu.dst.write = 1; 5963848b8605Smrg 5964b8e80941Smrg alu.src[0].sel = tmp3; 5965b8e80941Smrg alu.src[0].chan = 0; 5966848b8605Smrg 5967848b8605Smrg alu.last = 1; 5968848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5969848b8605Smrg return r; 5970848b8605Smrg 5971b8e80941Smrg } else { 5972b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5973b8e80941Smrg alu.op = ALU_OP1_RECIP_UINT; 5974b8e80941Smrg 5975b8e80941Smrg alu.dst.sel = tmp0; 5976b8e80941Smrg alu.dst.chan = 0; 5977b8e80941Smrg alu.dst.write = 1; 5978b8e80941Smrg 5979b8e80941Smrg if (signed_op) { 5980b8e80941Smrg alu.src[0].sel = tmp2; 5981b8e80941Smrg alu.src[0].chan = 1; 5982b8e80941Smrg } else { 5983b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5984b8e80941Smrg } 5985b8e80941Smrg 5986b8e80941Smrg alu.last = 1; 5987b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5988b8e80941Smrg return r; 5989848b8605Smrg } 5990848b8605Smrg 5991b8e80941Smrg /* 2. tmp0.z = lo (tmp0.x * src2) */ 5992848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5993b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 5994848b8605Smrg 5995b8e80941Smrg alu.dst.sel = tmp0; 5996b8e80941Smrg alu.dst.chan = 2; 5997848b8605Smrg alu.dst.write = 1; 5998848b8605Smrg 5999b8e80941Smrg alu.src[0].sel = tmp0; 6000848b8605Smrg alu.src[0].chan = 0; 6001b8e80941Smrg if (signed_op) { 6002b8e80941Smrg alu.src[1].sel = tmp2; 6003b8e80941Smrg alu.src[1].chan = 1; 6004b8e80941Smrg } else { 6005b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6006b8e80941Smrg } 6007848b8605Smrg 6008b8e80941Smrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6009848b8605Smrg return r; 6010848b8605Smrg 6011b8e80941Smrg /* 3. tmp0.w = -tmp0.z */ 6012848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6013b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6014848b8605Smrg 6015848b8605Smrg alu.dst.sel = tmp0; 6016b8e80941Smrg alu.dst.chan = 3; 6017848b8605Smrg alu.dst.write = 1; 6018848b8605Smrg 6019b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 6020848b8605Smrg alu.src[1].sel = tmp0; 6021b8e80941Smrg alu.src[1].chan = 2; 6022848b8605Smrg 6023848b8605Smrg alu.last = 1; 6024848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6025848b8605Smrg return r; 6026848b8605Smrg 6027b8e80941Smrg /* 4. tmp0.y = hi (tmp0.x * src2) */ 6028848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6029b8e80941Smrg alu.op = ALU_OP2_MULHI_UINT; 6030b8e80941Smrg 6031b8e80941Smrg alu.dst.sel = tmp0; 6032b8e80941Smrg alu.dst.chan = 1; 6033b8e80941Smrg alu.dst.write = 1; 6034b8e80941Smrg 6035b8e80941Smrg alu.src[0].sel = tmp0; 6036b8e80941Smrg alu.src[0].chan = 0; 6037848b8605Smrg 6038848b8605Smrg if (signed_op) { 6039b8e80941Smrg alu.src[1].sel = tmp2; 6040b8e80941Smrg alu.src[1].chan = 1; 6041848b8605Smrg } else { 6042b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6043848b8605Smrg } 6044848b8605Smrg 6045b8e80941Smrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6046b8e80941Smrg return r; 6047b8e80941Smrg 6048b8e80941Smrg /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 6049b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6050b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 6051b8e80941Smrg alu.is_op3 = 1; 6052b8e80941Smrg 6053b8e80941Smrg alu.dst.sel = tmp0; 6054b8e80941Smrg alu.dst.chan = 2; 6055b8e80941Smrg alu.dst.write = 1; 6056b8e80941Smrg 6057b8e80941Smrg alu.src[0].sel = tmp0; 6058848b8605Smrg alu.src[0].chan = 1; 6059b8e80941Smrg alu.src[1].sel = tmp0; 6060848b8605Smrg alu.src[1].chan = 3; 6061848b8605Smrg alu.src[2].sel = tmp0; 6062848b8605Smrg alu.src[2].chan = 2; 6063848b8605Smrg 6064848b8605Smrg alu.last = 1; 6065848b8605Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6066848b8605Smrg return r; 6067848b8605Smrg 6068b8e80941Smrg /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 6069b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6070b8e80941Smrg alu.op = ALU_OP2_MULHI_UINT; 6071848b8605Smrg 6072b8e80941Smrg alu.dst.sel = tmp0; 6073b8e80941Smrg alu.dst.chan = 3; 6074b8e80941Smrg alu.dst.write = 1; 6075848b8605Smrg 6076b8e80941Smrg alu.src[0].sel = tmp0; 6077b8e80941Smrg alu.src[0].chan = 2; 6078848b8605Smrg 6079b8e80941Smrg alu.src[1].sel = tmp0; 6080b8e80941Smrg alu.src[1].chan = 0; 6081848b8605Smrg 6082b8e80941Smrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6083b8e80941Smrg return r; 6084848b8605Smrg 6085b8e80941Smrg /* 7. tmp1.x = tmp0.x - tmp0.w */ 6086b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6087b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6088848b8605Smrg 6089b8e80941Smrg alu.dst.sel = tmp1; 6090b8e80941Smrg alu.dst.chan = 0; 6091b8e80941Smrg alu.dst.write = 1; 6092848b8605Smrg 6093b8e80941Smrg alu.src[0].sel = tmp0; 6094b8e80941Smrg alu.src[0].chan = 0; 6095b8e80941Smrg alu.src[1].sel = tmp0; 6096b8e80941Smrg alu.src[1].chan = 3; 6097848b8605Smrg 6098b8e80941Smrg alu.last = 1; 6099b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6100b8e80941Smrg return r; 6101848b8605Smrg 6102b8e80941Smrg /* 8. tmp1.y = tmp0.x + tmp0.w */ 6103b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6104b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 6105848b8605Smrg 6106b8e80941Smrg alu.dst.sel = tmp1; 6107b8e80941Smrg alu.dst.chan = 1; 6108b8e80941Smrg alu.dst.write = 1; 6109848b8605Smrg 6110b8e80941Smrg alu.src[0].sel = tmp0; 6111b8e80941Smrg alu.src[0].chan = 0; 6112b8e80941Smrg alu.src[1].sel = tmp0; 6113b8e80941Smrg alu.src[1].chan = 3; 6114848b8605Smrg 6115b8e80941Smrg alu.last = 1; 6116b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6117b8e80941Smrg return r; 6118848b8605Smrg 6119b8e80941Smrg /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 6120b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6121b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 6122b8e80941Smrg alu.is_op3 = 1; 6123848b8605Smrg 6124b8e80941Smrg alu.dst.sel = tmp0; 6125b8e80941Smrg alu.dst.chan = 0; 6126b8e80941Smrg alu.dst.write = 1; 6127848b8605Smrg 6128b8e80941Smrg alu.src[0].sel = tmp0; 6129b8e80941Smrg alu.src[0].chan = 1; 6130b8e80941Smrg alu.src[1].sel = tmp1; 6131b8e80941Smrg alu.src[1].chan = 1; 6132b8e80941Smrg alu.src[2].sel = tmp1; 6133b8e80941Smrg alu.src[2].chan = 0; 6134848b8605Smrg 6135b8e80941Smrg alu.last = 1; 6136b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6137b8e80941Smrg return r; 6138848b8605Smrg 6139b8e80941Smrg /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 6140b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6141b8e80941Smrg alu.op = ALU_OP2_MULHI_UINT; 6142848b8605Smrg 6143b8e80941Smrg alu.dst.sel = tmp0; 6144b8e80941Smrg alu.dst.chan = 2; 6145b8e80941Smrg alu.dst.write = 1; 6146848b8605Smrg 6147b8e80941Smrg alu.src[0].sel = tmp0; 6148b8e80941Smrg alu.src[0].chan = 0; 6149848b8605Smrg 6150b8e80941Smrg if (signed_op) { 6151b8e80941Smrg alu.src[1].sel = tmp2; 6152b8e80941Smrg alu.src[1].chan = 0; 6153b8e80941Smrg } else { 6154b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6155b8e80941Smrg } 6156848b8605Smrg 6157b8e80941Smrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6158b8e80941Smrg return r; 6159848b8605Smrg 6160b8e80941Smrg /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 6161b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6162b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 6163848b8605Smrg 6164b8e80941Smrg alu.dst.sel = tmp0; 6165b8e80941Smrg alu.dst.chan = 1; 6166b8e80941Smrg alu.dst.write = 1; 6167848b8605Smrg 6168b8e80941Smrg if (signed_op) { 6169b8e80941Smrg alu.src[0].sel = tmp2; 6170b8e80941Smrg alu.src[0].chan = 1; 6171b8e80941Smrg } else { 6172b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6173b8e80941Smrg } 6174848b8605Smrg 6175b8e80941Smrg alu.src[1].sel = tmp0; 6176b8e80941Smrg alu.src[1].chan = 2; 6177848b8605Smrg 6178b8e80941Smrg if ((r = emit_mul_int_op(ctx->bc, &alu))) 6179b8e80941Smrg return r; 6180848b8605Smrg 6181b8e80941Smrg /* 12. tmp0.w = src1 - tmp0.y = r */ 6182848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6183b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6184848b8605Smrg 6185b8e80941Smrg alu.dst.sel = tmp0; 6186b8e80941Smrg alu.dst.chan = 3; 6187848b8605Smrg alu.dst.write = 1; 6188848b8605Smrg 6189b8e80941Smrg if (signed_op) { 6190b8e80941Smrg alu.src[0].sel = tmp2; 6191b8e80941Smrg alu.src[0].chan = 0; 6192b8e80941Smrg } else { 6193b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6194b8e80941Smrg } 6195848b8605Smrg 6196b8e80941Smrg alu.src[1].sel = tmp0; 6197b8e80941Smrg alu.src[1].chan = 1; 6198848b8605Smrg 6199b8e80941Smrg alu.last = 1; 6200b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6201848b8605Smrg return r; 6202848b8605Smrg 6203b8e80941Smrg /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 6204848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6205b8e80941Smrg alu.op = ALU_OP2_SETGE_UINT; 6206848b8605Smrg 6207b8e80941Smrg alu.dst.sel = tmp1; 6208b8e80941Smrg alu.dst.chan = 0; 6209848b8605Smrg alu.dst.write = 1; 6210848b8605Smrg 6211b8e80941Smrg alu.src[0].sel = tmp0; 6212b8e80941Smrg alu.src[0].chan = 3; 6213b8e80941Smrg if (signed_op) { 6214b8e80941Smrg alu.src[1].sel = tmp2; 6215b8e80941Smrg alu.src[1].chan = 1; 6216b8e80941Smrg } else { 6217b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6218b8e80941Smrg } 6219848b8605Smrg 6220b8e80941Smrg alu.last = 1; 6221b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6222848b8605Smrg return r; 6223848b8605Smrg 6224b8e80941Smrg /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 6225848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6226b8e80941Smrg alu.op = ALU_OP2_SETGE_UINT; 6227b8e80941Smrg 6228b8e80941Smrg alu.dst.sel = tmp1; 6229b8e80941Smrg alu.dst.chan = 1; 6230848b8605Smrg alu.dst.write = 1; 6231848b8605Smrg 6232b8e80941Smrg if (signed_op) { 6233b8e80941Smrg alu.src[0].sel = tmp2; 6234b8e80941Smrg alu.src[0].chan = 0; 6235b8e80941Smrg } else { 6236b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6237b8e80941Smrg } 6238848b8605Smrg 6239b8e80941Smrg alu.src[1].sel = tmp0; 6240b8e80941Smrg alu.src[1].chan = 1; 6241848b8605Smrg 6242b8e80941Smrg alu.last = 1; 6243b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6244848b8605Smrg return r; 6245848b8605Smrg 6246b8e80941Smrg if (mod) { /* UMOD */ 6247848b8605Smrg 6248b8e80941Smrg /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 6249b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6250b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6251848b8605Smrg 6252b8e80941Smrg alu.dst.sel = tmp1; 6253b8e80941Smrg alu.dst.chan = 2; 6254b8e80941Smrg alu.dst.write = 1; 6255848b8605Smrg 6256b8e80941Smrg alu.src[0].sel = tmp0; 6257b8e80941Smrg alu.src[0].chan = 3; 6258848b8605Smrg 6259b8e80941Smrg if (signed_op) { 6260b8e80941Smrg alu.src[1].sel = tmp2; 6261b8e80941Smrg alu.src[1].chan = 1; 6262b8e80941Smrg } else { 6263b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6264b8e80941Smrg } 6265848b8605Smrg 6266848b8605Smrg alu.last = 1; 6267b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6268b8e80941Smrg return r; 6269848b8605Smrg 6270b8e80941Smrg /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 6271b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6272b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 6273848b8605Smrg 6274b8e80941Smrg alu.dst.sel = tmp1; 6275b8e80941Smrg alu.dst.chan = 3; 6276b8e80941Smrg alu.dst.write = 1; 6277848b8605Smrg 6278b8e80941Smrg alu.src[0].sel = tmp0; 6279b8e80941Smrg alu.src[0].chan = 3; 6280b8e80941Smrg if (signed_op) { 6281b8e80941Smrg alu.src[1].sel = tmp2; 6282b8e80941Smrg alu.src[1].chan = 1; 6283b8e80941Smrg } else { 6284b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 6285b8e80941Smrg } 6286848b8605Smrg 6287b8e80941Smrg alu.last = 1; 6288b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6289b8e80941Smrg return r; 6290848b8605Smrg 6291b8e80941Smrg } else { /* UDIV */ 6292848b8605Smrg 6293b8e80941Smrg /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 6294b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6295b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 6296b8e80941Smrg 6297b8e80941Smrg alu.dst.sel = tmp1; 6298b8e80941Smrg alu.dst.chan = 2; 6299b8e80941Smrg alu.dst.write = 1; 6300b8e80941Smrg 6301b8e80941Smrg alu.src[0].sel = tmp0; 6302b8e80941Smrg alu.src[0].chan = 2; 6303b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6304848b8605Smrg 6305848b8605Smrg alu.last = 1; 6306b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6307b8e80941Smrg return r; 6308848b8605Smrg 6309b8e80941Smrg /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 6310b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6311b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 6312848b8605Smrg 6313b8e80941Smrg alu.dst.sel = tmp1; 6314b8e80941Smrg alu.dst.chan = 3; 6315b8e80941Smrg alu.dst.write = 1; 6316848b8605Smrg 6317b8e80941Smrg alu.src[0].sel = tmp0; 6318b8e80941Smrg alu.src[0].chan = 2; 6319b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 6320848b8605Smrg 6321b8e80941Smrg alu.last = 1; 6322b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6323b8e80941Smrg return r; 6324b8e80941Smrg 6325b8e80941Smrg } 6326b8e80941Smrg 6327b8e80941Smrg /* 17. tmp1.x = tmp1.x & tmp1.y */ 6328848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6329b8e80941Smrg alu.op = ALU_OP2_AND_INT; 6330848b8605Smrg 6331b8e80941Smrg alu.dst.sel = tmp1; 6332b8e80941Smrg alu.dst.chan = 0; 6333b8e80941Smrg alu.dst.write = 1; 6334848b8605Smrg 6335b8e80941Smrg alu.src[0].sel = tmp1; 6336b8e80941Smrg alu.src[0].chan = 0; 6337b8e80941Smrg alu.src[1].sel = tmp1; 6338b8e80941Smrg alu.src[1].chan = 1; 6339848b8605Smrg 6340b8e80941Smrg alu.last = 1; 6341b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6342848b8605Smrg return r; 6343848b8605Smrg 6344b8e80941Smrg /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 6345b8e80941Smrg /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 6346848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6347b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 6348848b8605Smrg alu.is_op3 = 1; 6349848b8605Smrg 6350b8e80941Smrg alu.dst.sel = tmp0; 6351b8e80941Smrg alu.dst.chan = 2; 6352b8e80941Smrg alu.dst.write = 1; 6353848b8605Smrg 6354b8e80941Smrg alu.src[0].sel = tmp1; 6355b8e80941Smrg alu.src[0].chan = 0; 6356b8e80941Smrg alu.src[1].sel = tmp0; 6357b8e80941Smrg alu.src[1].chan = mod ? 3 : 2; 6358b8e80941Smrg alu.src[2].sel = tmp1; 6359b8e80941Smrg alu.src[2].chan = 2; 6360848b8605Smrg 6361b8e80941Smrg alu.last = 1; 6362b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6363b8e80941Smrg return r; 6364848b8605Smrg 6365b8e80941Smrg /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 6366b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6367b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 6368b8e80941Smrg alu.is_op3 = 1; 6369b8e80941Smrg 6370b8e80941Smrg if (signed_op) { 6371b8e80941Smrg alu.dst.sel = tmp0; 6372b8e80941Smrg alu.dst.chan = 2; 6373b8e80941Smrg alu.dst.write = 1; 6374b8e80941Smrg } else { 6375b8e80941Smrg if (tmp4 > 0) { 6376b8e80941Smrg alu.dst.sel = tmp4; 6377b8e80941Smrg alu.dst.chan = i; 6378b8e80941Smrg alu.dst.write = 1; 6379b8e80941Smrg } else { 6380b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6381b8e80941Smrg } 6382b8e80941Smrg } 6383b8e80941Smrg 6384b8e80941Smrg alu.src[0].sel = tmp1; 6385b8e80941Smrg alu.src[0].chan = 1; 6386b8e80941Smrg alu.src[1].sel = tmp1; 6387b8e80941Smrg alu.src[1].chan = 3; 6388b8e80941Smrg alu.src[2].sel = tmp0; 6389b8e80941Smrg alu.src[2].chan = 2; 6390b8e80941Smrg 6391b8e80941Smrg alu.last = 1; 6392b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6393848b8605Smrg return r; 6394b8e80941Smrg 6395b8e80941Smrg if (signed_op) { 6396b8e80941Smrg 6397b8e80941Smrg /* fix the sign of the result */ 6398b8e80941Smrg 6399b8e80941Smrg if (mod) { 6400b8e80941Smrg 6401b8e80941Smrg /* tmp0.x = -tmp0.z */ 6402b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6403b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6404b8e80941Smrg 6405b8e80941Smrg alu.dst.sel = tmp0; 6406b8e80941Smrg alu.dst.chan = 0; 6407b8e80941Smrg alu.dst.write = 1; 6408b8e80941Smrg 6409b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 6410b8e80941Smrg alu.src[1].sel = tmp0; 6411b8e80941Smrg alu.src[1].chan = 2; 6412b8e80941Smrg 6413b8e80941Smrg alu.last = 1; 6414b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6415b8e80941Smrg return r; 6416b8e80941Smrg 6417b8e80941Smrg /* sign of the remainder is the same as the sign of src0 */ 6418b8e80941Smrg /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 6419b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6420b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 6421b8e80941Smrg alu.is_op3 = 1; 6422b8e80941Smrg 6423b8e80941Smrg if (tmp4 > 0) { 6424b8e80941Smrg alu.dst.sel = tmp4; 6425b8e80941Smrg alu.dst.chan = i; 6426b8e80941Smrg alu.dst.write = 1; 6427b8e80941Smrg } else { 6428b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6429b8e80941Smrg } 6430b8e80941Smrg 6431b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6432b8e80941Smrg alu.src[1].sel = tmp0; 6433b8e80941Smrg alu.src[1].chan = 2; 6434b8e80941Smrg alu.src[2].sel = tmp0; 6435b8e80941Smrg alu.src[2].chan = 0; 6436b8e80941Smrg 6437b8e80941Smrg alu.last = 1; 6438b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6439b8e80941Smrg return r; 6440b8e80941Smrg 6441b8e80941Smrg } else { 6442b8e80941Smrg 6443b8e80941Smrg /* tmp0.x = -tmp0.z */ 6444b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6445b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6446b8e80941Smrg 6447b8e80941Smrg alu.dst.sel = tmp0; 6448b8e80941Smrg alu.dst.chan = 0; 6449b8e80941Smrg alu.dst.write = 1; 6450b8e80941Smrg 6451b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 6452b8e80941Smrg alu.src[1].sel = tmp0; 6453b8e80941Smrg alu.src[1].chan = 2; 6454b8e80941Smrg 6455b8e80941Smrg alu.last = 1; 6456b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6457b8e80941Smrg return r; 6458b8e80941Smrg 6459b8e80941Smrg /* fix the quotient sign (same as the sign of src0*src1) */ 6460b8e80941Smrg /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 6461b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6462b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 6463b8e80941Smrg alu.is_op3 = 1; 6464b8e80941Smrg 6465b8e80941Smrg if (tmp4 > 0) { 6466b8e80941Smrg alu.dst.sel = tmp4; 6467b8e80941Smrg alu.dst.chan = i; 6468b8e80941Smrg alu.dst.write = 1; 6469b8e80941Smrg } else { 6470b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6471b8e80941Smrg } 6472b8e80941Smrg 6473b8e80941Smrg alu.src[0].sel = tmp2; 6474b8e80941Smrg alu.src[0].chan = 2; 6475b8e80941Smrg alu.src[1].sel = tmp0; 6476b8e80941Smrg alu.src[1].chan = 2; 6477b8e80941Smrg alu.src[2].sel = tmp0; 6478b8e80941Smrg alu.src[2].chan = 0; 6479b8e80941Smrg 6480b8e80941Smrg alu.last = 1; 6481b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6482b8e80941Smrg return r; 6483b8e80941Smrg } 6484b8e80941Smrg } 6485b8e80941Smrg } 6486b8e80941Smrg 6487b8e80941Smrg if (tmp4 > 0) { 6488b8e80941Smrg for (i = 0; i <= lasti; ++i) { 6489b8e80941Smrg if (!(write_mask & (1<<i))) 6490b8e80941Smrg continue; 6491b8e80941Smrg 6492b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6493b8e80941Smrg alu.op = ALU_OP1_MOV; 6494b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6495b8e80941Smrg alu.src[0].sel = tmp4; 6496b8e80941Smrg alu.src[0].chan = i; 6497b8e80941Smrg 6498b8e80941Smrg if (i == lasti) 6499b8e80941Smrg alu.last = 1; 6500b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 6501b8e80941Smrg return r; 6502b8e80941Smrg } 6503848b8605Smrg } 6504b8e80941Smrg 6505848b8605Smrg return 0; 6506848b8605Smrg} 6507848b8605Smrg 6508b8e80941Smrgstatic int tgsi_udiv(struct r600_shader_ctx *ctx) 6509848b8605Smrg{ 6510b8e80941Smrg return tgsi_divmod(ctx, 0, 0); 6511b8e80941Smrg} 6512848b8605Smrg 6513b8e80941Smrgstatic int tgsi_umod(struct r600_shader_ctx *ctx) 6514b8e80941Smrg{ 6515b8e80941Smrg return tgsi_divmod(ctx, 1, 0); 6516b8e80941Smrg} 6517848b8605Smrg 6518b8e80941Smrgstatic int tgsi_idiv(struct r600_shader_ctx *ctx) 6519b8e80941Smrg{ 6520b8e80941Smrg return tgsi_divmod(ctx, 0, 1); 6521b8e80941Smrg} 6522848b8605Smrg 6523b8e80941Smrgstatic int tgsi_imod(struct r600_shader_ctx *ctx) 6524b8e80941Smrg{ 6525b8e80941Smrg return tgsi_divmod(ctx, 1, 1); 6526b8e80941Smrg} 6527848b8605Smrg 6528848b8605Smrg 6529b8e80941Smrgstatic int tgsi_f2i(struct r600_shader_ctx *ctx) 6530b8e80941Smrg{ 6531b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6532b8e80941Smrg struct r600_bytecode_alu alu; 6533b8e80941Smrg int i, r; 6534b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6535b8e80941Smrg int last_inst = tgsi_last_instruction(write_mask); 6536848b8605Smrg 6537848b8605Smrg for (i = 0; i < 4; i++) { 6538848b8605Smrg if (!(write_mask & (1<<i))) 6539848b8605Smrg continue; 6540848b8605Smrg 6541848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6542b8e80941Smrg alu.op = ALU_OP1_TRUNC; 6543b8e80941Smrg 6544b8e80941Smrg alu.dst.sel = ctx->temp_reg; 6545848b8605Smrg alu.dst.chan = i; 6546848b8605Smrg alu.dst.write = 1; 6547848b8605Smrg 6548b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6549b8e80941Smrg if (i == last_inst) 6550b8e80941Smrg alu.last = 1; 6551848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6552848b8605Smrg if (r) 6553848b8605Smrg return r; 6554848b8605Smrg } 6555848b8605Smrg 6556848b8605Smrg for (i = 0; i < 4; i++) { 6557848b8605Smrg if (!(write_mask & (1<<i))) 6558848b8605Smrg continue; 6559848b8605Smrg 6560848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6561b8e80941Smrg alu.op = ctx->inst_info->op; 6562b8e80941Smrg 6563848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6564848b8605Smrg 6565b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 6566848b8605Smrg alu.src[0].chan = i; 6567848b8605Smrg 6568b8e80941Smrg if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 6569b8e80941Smrg alu.last = 1; 6570848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6571848b8605Smrg if (r) 6572848b8605Smrg return r; 6573848b8605Smrg } 6574848b8605Smrg 6575848b8605Smrg return 0; 6576848b8605Smrg} 6577848b8605Smrg 6578b8e80941Smrgstatic int tgsi_iabs(struct r600_shader_ctx *ctx) 6579848b8605Smrg{ 6580848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6581848b8605Smrg struct r600_bytecode_alu alu; 6582b8e80941Smrg int i, r; 6583848b8605Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6584848b8605Smrg int last_inst = tgsi_last_instruction(write_mask); 6585848b8605Smrg 6586b8e80941Smrg /* tmp = -src */ 6587848b8605Smrg for (i = 0; i < 4; i++) { 6588848b8605Smrg if (!(write_mask & (1<<i))) 6589848b8605Smrg continue; 6590848b8605Smrg 6591848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6592b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6593b8e80941Smrg 6594b8e80941Smrg alu.dst.sel = ctx->temp_reg; 6595848b8605Smrg alu.dst.chan = i; 6596848b8605Smrg alu.dst.write = 1; 6597848b8605Smrg 6598b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6599b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 6600848b8605Smrg 6601b8e80941Smrg if (i == last_inst) 6602b8e80941Smrg alu.last = 1; 6603848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6604848b8605Smrg if (r) 6605848b8605Smrg return r; 6606848b8605Smrg } 6607848b8605Smrg 6608b8e80941Smrg /* dst = (src >= 0 ? src : tmp) */ 6609848b8605Smrg for (i = 0; i < 4; i++) { 6610848b8605Smrg if (!(write_mask & (1<<i))) 6611848b8605Smrg continue; 6612848b8605Smrg 6613848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6614b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 6615b8e80941Smrg alu.is_op3 = 1; 6616848b8605Smrg alu.dst.write = 1; 6617848b8605Smrg 6618b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6619848b8605Smrg 6620b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6621b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6622b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 6623b8e80941Smrg alu.src[2].chan = i; 6624b8e80941Smrg 6625b8e80941Smrg if (i == last_inst) 6626b8e80941Smrg alu.last = 1; 6627848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6628848b8605Smrg if (r) 6629848b8605Smrg return r; 6630848b8605Smrg } 6631b8e80941Smrg return 0; 6632b8e80941Smrg} 6633b8e80941Smrg 6634b8e80941Smrgstatic int tgsi_issg(struct r600_shader_ctx *ctx) 6635b8e80941Smrg{ 6636b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6637b8e80941Smrg struct r600_bytecode_alu alu; 6638b8e80941Smrg int i, r; 6639b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6640b8e80941Smrg int last_inst = tgsi_last_instruction(write_mask); 6641848b8605Smrg 6642b8e80941Smrg /* tmp = (src >= 0 ? src : -1) */ 6643848b8605Smrg for (i = 0; i < 4; i++) { 6644848b8605Smrg if (!(write_mask & (1<<i))) 6645848b8605Smrg continue; 6646848b8605Smrg 6647848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6648848b8605Smrg alu.op = ALU_OP3_CNDGE_INT; 6649848b8605Smrg alu.is_op3 = 1; 6650b8e80941Smrg 6651b8e80941Smrg alu.dst.sel = ctx->temp_reg; 6652848b8605Smrg alu.dst.chan = i; 6653848b8605Smrg alu.dst.write = 1; 6654848b8605Smrg 6655b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6656b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6657b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 6658848b8605Smrg 6659b8e80941Smrg if (i == last_inst) 6660b8e80941Smrg alu.last = 1; 6661848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6662848b8605Smrg if (r) 6663848b8605Smrg return r; 6664848b8605Smrg } 6665848b8605Smrg 6666b8e80941Smrg /* dst = (tmp > 0 ? 1 : tmp) */ 6667848b8605Smrg for (i = 0; i < 4; i++) { 6668b8e80941Smrg if (!(write_mask & (1<<i))) 6669b8e80941Smrg continue; 6670b8e80941Smrg 6671848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6672b8e80941Smrg alu.op = ALU_OP3_CNDGT_INT; 6673b8e80941Smrg alu.is_op3 = 1; 6674b8e80941Smrg alu.dst.write = 1; 6675b8e80941Smrg 6676b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6677b8e80941Smrg 6678b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 6679b8e80941Smrg alu.src[0].chan = i; 6680b8e80941Smrg 6681b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6682b8e80941Smrg 6683b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 6684b8e80941Smrg alu.src[2].chan = i; 6685b8e80941Smrg 6686b8e80941Smrg if (i == last_inst) 6687848b8605Smrg alu.last = 1; 6688848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6689848b8605Smrg if (r) 6690848b8605Smrg return r; 6691848b8605Smrg } 6692848b8605Smrg return 0; 6693848b8605Smrg} 6694848b8605Smrg 6695b8e80941Smrg 6696b8e80941Smrg 6697b8e80941Smrgstatic int tgsi_ssg(struct r600_shader_ctx *ctx) 6698848b8605Smrg{ 6699848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6700b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6701b8e80941Smrg int last_inst = tgsi_last_instruction(write_mask); 6702848b8605Smrg struct r600_bytecode_alu alu; 6703b8e80941Smrg int i, r; 6704848b8605Smrg 6705b8e80941Smrg /* tmp = (src > 0 ? 1 : src) */ 6706b8e80941Smrg for (i = 0; i <= last_inst; i++) { 6707b8e80941Smrg if (!(write_mask & (1 << i))) 6708848b8605Smrg continue; 6709848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6710b8e80941Smrg alu.op = ALU_OP3_CNDGT; 6711b8e80941Smrg alu.is_op3 = 1; 6712848b8605Smrg 6713b8e80941Smrg alu.dst.sel = ctx->temp_reg; 6714848b8605Smrg alu.dst.chan = i; 6715b8e80941Smrg 6716b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6717b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1; 6718b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6719b8e80941Smrg 6720b8e80941Smrg if (i == last_inst) 6721848b8605Smrg alu.last = 1; 6722848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6723848b8605Smrg if (r) 6724848b8605Smrg return r; 6725848b8605Smrg } 6726848b8605Smrg 6727b8e80941Smrg /* dst = (-tmp > 0 ? -1 : tmp) */ 6728b8e80941Smrg for (i = 0; i <= last_inst; i++) { 6729b8e80941Smrg if (!(write_mask & (1 << i))) 6730b8e80941Smrg continue; 6731848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6732b8e80941Smrg alu.op = ALU_OP3_CNDGT; 6733b8e80941Smrg alu.is_op3 = 1; 6734848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6735b8e80941Smrg 6736b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 6737b8e80941Smrg alu.src[0].chan = i; 6738b8e80941Smrg alu.src[0].neg = 1; 6739b8e80941Smrg 6740b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1; 6741b8e80941Smrg alu.src[1].neg = 1; 6742b8e80941Smrg 6743b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 6744b8e80941Smrg alu.src[2].chan = i; 6745b8e80941Smrg 6746b8e80941Smrg if (i == last_inst) 6747848b8605Smrg alu.last = 1; 6748848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6749848b8605Smrg if (r) 6750848b8605Smrg return r; 6751848b8605Smrg } 6752848b8605Smrg return 0; 6753848b8605Smrg} 6754848b8605Smrg 6755b8e80941Smrgstatic int tgsi_bfi(struct r600_shader_ctx *ctx) 6756848b8605Smrg{ 6757848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6758848b8605Smrg struct r600_bytecode_alu alu; 6759b8e80941Smrg int i, r, t1, t2; 6760848b8605Smrg 6761b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6762b8e80941Smrg int last_inst = tgsi_last_instruction(write_mask); 6763848b8605Smrg 6764b8e80941Smrg t1 = r600_get_temp(ctx); 6765848b8605Smrg 6766b8e80941Smrg for (i = 0; i < 4; i++) { 6767b8e80941Smrg if (!(write_mask & (1<<i))) 6768b8e80941Smrg continue; 6769848b8605Smrg 6770b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6771b8e80941Smrg alu.op = ALU_OP2_SETGE_INT; 6772b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6773b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6774b8e80941Smrg alu.src[1].value = 32; 6775b8e80941Smrg alu.dst.sel = ctx->temp_reg; 6776b8e80941Smrg alu.dst.chan = i; 6777b8e80941Smrg alu.dst.write = 1; 6778b8e80941Smrg alu.last = i == last_inst; 6779b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6780b8e80941Smrg if (r) 6781b8e80941Smrg return r; 6782b8e80941Smrg } 6783848b8605Smrg 6784848b8605Smrg for (i = 0; i < 4; i++) { 6785b8e80941Smrg if (!(write_mask & (1<<i))) 6786848b8605Smrg continue; 6787848b8605Smrg 6788b8e80941Smrg /* create mask tmp */ 6789848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6790b8e80941Smrg alu.op = ALU_OP2_BFM_INT; 6791b8e80941Smrg alu.dst.sel = t1; 6792848b8605Smrg alu.dst.chan = i; 6793848b8605Smrg alu.dst.write = 1; 6794b8e80941Smrg alu.last = i == last_inst; 6795848b8605Smrg 6796b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6797b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6798848b8605Smrg 6799848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6800848b8605Smrg if (r) 6801848b8605Smrg return r; 6802848b8605Smrg } 6803848b8605Smrg 6804b8e80941Smrg t2 = r600_get_temp(ctx); 6805b8e80941Smrg 6806b8e80941Smrg for (i = 0; i < 4; i++) { 6807b8e80941Smrg if (!(write_mask & (1<<i))) 6808b8e80941Smrg continue; 6809b8e80941Smrg 6810b8e80941Smrg /* shift insert left */ 6811848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6812b8e80941Smrg alu.op = ALU_OP2_LSHL_INT; 6813b8e80941Smrg alu.dst.sel = t2; 6814b8e80941Smrg alu.dst.chan = i; 6815b8e80941Smrg alu.dst.write = 1; 6816b8e80941Smrg alu.last = i == last_inst; 6817848b8605Smrg 6818b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6819b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6820b8e80941Smrg 6821b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6822b8e80941Smrg if (r) 6823b8e80941Smrg return r; 6824b8e80941Smrg } 6825b8e80941Smrg 6826b8e80941Smrg for (i = 0; i < 4; i++) { 6827b8e80941Smrg if (!(write_mask & (1<<i))) 6828b8e80941Smrg continue; 6829b8e80941Smrg 6830b8e80941Smrg /* actual bitfield insert */ 6831b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6832b8e80941Smrg alu.op = ALU_OP3_BFI_INT; 6833b8e80941Smrg alu.is_op3 = 1; 6834b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6835b8e80941Smrg alu.dst.chan = i; 6836848b8605Smrg alu.dst.write = 1; 6837b8e80941Smrg alu.last = i == last_inst; 6838848b8605Smrg 6839b8e80941Smrg alu.src[0].sel = t1; 6840b8e80941Smrg alu.src[0].chan = i; 6841b8e80941Smrg alu.src[1].sel = t2; 6842b8e80941Smrg alu.src[1].chan = i; 6843b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6844848b8605Smrg 6845b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6846b8e80941Smrg if (r) 6847b8e80941Smrg return r; 6848b8e80941Smrg } 6849848b8605Smrg 6850b8e80941Smrg for (i = 0; i < 4; i++) { 6851b8e80941Smrg if (!(write_mask & (1<<i))) 6852b8e80941Smrg continue; 6853b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6854b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 6855b8e80941Smrg alu.is_op3 = 1; 6856b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 6857b8e80941Smrg alu.src[0].chan = i; 6858b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 6859b8e80941Smrg 6860b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6861b8e80941Smrg 6862b8e80941Smrg alu.src[1].sel = alu.dst.sel; 6863b8e80941Smrg alu.src[1].chan = i; 6864b8e80941Smrg 6865b8e80941Smrg alu.last = i == last_inst; 6866848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6867848b8605Smrg if (r) 6868848b8605Smrg return r; 6869848b8605Smrg } 6870848b8605Smrg return 0; 6871848b8605Smrg} 6872848b8605Smrg 6873b8e80941Smrgstatic int tgsi_msb(struct r600_shader_ctx *ctx) 6874848b8605Smrg{ 6875848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6876848b8605Smrg struct r600_bytecode_alu alu; 6877b8e80941Smrg int i, r, t1, t2; 6878848b8605Smrg 6879b8e80941Smrg unsigned write_mask = inst->Dst[0].Register.WriteMask; 6880b8e80941Smrg int last_inst = tgsi_last_instruction(write_mask); 6881848b8605Smrg 6882b8e80941Smrg assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6883b8e80941Smrg ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6884b8e80941Smrg 6885b8e80941Smrg t1 = ctx->temp_reg; 6886b8e80941Smrg 6887b8e80941Smrg /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6888b8e80941Smrg for (i = 0; i < 4; i++) { 6889b8e80941Smrg if (!(write_mask & (1<<i))) 6890b8e80941Smrg continue; 6891b8e80941Smrg 6892b8e80941Smrg /* t1 = FFBH_INT / FFBH_UINT */ 6893b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6894b8e80941Smrg alu.op = ctx->inst_info->op; 6895b8e80941Smrg alu.dst.sel = t1; 6896b8e80941Smrg alu.dst.chan = i; 6897b8e80941Smrg alu.dst.write = 1; 6898b8e80941Smrg alu.last = i == last_inst; 6899b8e80941Smrg 6900b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6901b8e80941Smrg 6902b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6903b8e80941Smrg if (r) 6904b8e80941Smrg return r; 6905848b8605Smrg } 6906848b8605Smrg 6907b8e80941Smrg t2 = r600_get_temp(ctx); 6908848b8605Smrg 6909b8e80941Smrg for (i = 0; i < 4; i++) { 6910b8e80941Smrg if (!(write_mask & (1<<i))) 6911b8e80941Smrg continue; 6912848b8605Smrg 6913b8e80941Smrg /* t2 = 31 - t1 */ 6914b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6915b8e80941Smrg alu.op = ALU_OP2_SUB_INT; 6916b8e80941Smrg alu.dst.sel = t2; 6917b8e80941Smrg alu.dst.chan = i; 6918b8e80941Smrg alu.dst.write = 1; 6919b8e80941Smrg alu.last = i == last_inst; 6920848b8605Smrg 6921b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6922b8e80941Smrg alu.src[0].value = 31; 6923b8e80941Smrg alu.src[1].sel = t1; 6924b8e80941Smrg alu.src[1].chan = i; 6925848b8605Smrg 6926b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6927b8e80941Smrg if (r) 6928b8e80941Smrg return r; 6929b8e80941Smrg } 6930848b8605Smrg 6931b8e80941Smrg for (i = 0; i < 4; i++) { 6932b8e80941Smrg if (!(write_mask & (1<<i))) 6933b8e80941Smrg continue; 6934848b8605Smrg 6935b8e80941Smrg /* result = t1 >= 0 ? t2 : t1 */ 6936b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6937b8e80941Smrg alu.op = ALU_OP3_CNDGE_INT; 6938b8e80941Smrg alu.is_op3 = 1; 6939b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6940b8e80941Smrg alu.dst.chan = i; 6941b8e80941Smrg alu.dst.write = 1; 6942b8e80941Smrg alu.last = i == last_inst; 6943848b8605Smrg 6944b8e80941Smrg alu.src[0].sel = t1; 6945b8e80941Smrg alu.src[0].chan = i; 6946b8e80941Smrg alu.src[1].sel = t2; 6947b8e80941Smrg alu.src[1].chan = i; 6948b8e80941Smrg alu.src[2].sel = t1; 6949b8e80941Smrg alu.src[2].chan = i; 6950b8e80941Smrg 6951b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 6952b8e80941Smrg if (r) 6953b8e80941Smrg return r; 6954848b8605Smrg } 6955848b8605Smrg 6956b8e80941Smrg return 0; 6957b8e80941Smrg} 6958848b8605Smrg 6959b8e80941Smrgstatic int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6960b8e80941Smrg{ 6961b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6962b8e80941Smrg struct r600_bytecode_alu alu; 6963b8e80941Smrg int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6964b8e80941Smrg unsigned location; 6965b8e80941Smrg const int input = inst->Src[0].Register.Index + ctx->shader->nsys_inputs; 6966848b8605Smrg 6967b8e80941Smrg assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6968848b8605Smrg 6969b8e80941Smrg /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6970b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6971b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6972b8e80941Smrg location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6973b8e80941Smrg } 6974b8e80941Smrg else { 6975b8e80941Smrg location = TGSI_INTERPOLATE_LOC_CENTROID; 6976b8e80941Smrg } 6977848b8605Smrg 6978b8e80941Smrg k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6979b8e80941Smrg if (k < 0) 6980b8e80941Smrg k = 0; 6981b8e80941Smrg interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6982b8e80941Smrg interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6983b8e80941Smrg 6984b8e80941Smrg /* NOTE: currently offset is not perspective correct */ 6985b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6986b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6987b8e80941Smrg int sample_gpr = -1; 6988b8e80941Smrg int gradientsH, gradientsV; 6989b8e80941Smrg struct r600_bytecode_tex tex; 6990b8e80941Smrg 6991b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6992b8e80941Smrg sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6993b8e80941Smrg } 6994b8e80941Smrg 6995b8e80941Smrg gradientsH = r600_get_temp(ctx); 6996b8e80941Smrg gradientsV = r600_get_temp(ctx); 6997b8e80941Smrg for (i = 0; i < 2; i++) { 6998b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6999b8e80941Smrg tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 7000b8e80941Smrg tex.src_gpr = interp_gpr; 7001b8e80941Smrg tex.src_sel_x = interp_base_chan + 0; 7002b8e80941Smrg tex.src_sel_y = interp_base_chan + 1; 7003b8e80941Smrg tex.src_sel_z = 0; 7004b8e80941Smrg tex.src_sel_w = 0; 7005b8e80941Smrg tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 7006b8e80941Smrg tex.dst_sel_x = 0; 7007b8e80941Smrg tex.dst_sel_y = 1; 7008b8e80941Smrg tex.dst_sel_z = 7; 7009b8e80941Smrg tex.dst_sel_w = 7; 7010b8e80941Smrg tex.inst_mod = 1; // Use per pixel gradient calculation 7011b8e80941Smrg tex.sampler_id = 0; 7012b8e80941Smrg tex.resource_id = tex.sampler_id; 7013848b8605Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 7014848b8605Smrg if (r) 7015848b8605Smrg return r; 7016848b8605Smrg } 7017848b8605Smrg 7018b8e80941Smrg for (i = 0; i < 2; i++) { 7019848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7020b8e80941Smrg alu.op = ALU_OP3_MULADD; 7021b8e80941Smrg alu.is_op3 = 1; 7022b8e80941Smrg alu.src[0].sel = gradientsH; 7023b8e80941Smrg alu.src[0].chan = i; 7024b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 7025b8e80941Smrg alu.src[1].sel = sample_gpr; 7026b8e80941Smrg alu.src[1].chan = 2; 7027b8e80941Smrg } 7028b8e80941Smrg else { 7029b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 7030b8e80941Smrg } 7031b8e80941Smrg alu.src[2].sel = interp_gpr; 7032b8e80941Smrg alu.src[2].chan = interp_base_chan + i; 7033848b8605Smrg alu.dst.sel = ctx->temp_reg; 7034b8e80941Smrg alu.dst.chan = i; 7035b8e80941Smrg alu.last = i == 1; 7036b8e80941Smrg 7037848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7038848b8605Smrg if (r) 7039848b8605Smrg return r; 7040848b8605Smrg } 7041848b8605Smrg 7042b8e80941Smrg for (i = 0; i < 2; i++) { 7043848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7044b8e80941Smrg alu.op = ALU_OP3_MULADD; 7045b8e80941Smrg alu.is_op3 = 1; 7046b8e80941Smrg alu.src[0].sel = gradientsV; 7047b8e80941Smrg alu.src[0].chan = i; 7048b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 7049b8e80941Smrg alu.src[1].sel = sample_gpr; 7050b8e80941Smrg alu.src[1].chan = 3; 7051b8e80941Smrg } 7052b8e80941Smrg else { 7053b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 7054b8e80941Smrg } 7055b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 7056b8e80941Smrg alu.src[2].chan = i; 7057848b8605Smrg alu.dst.sel = ctx->temp_reg; 7058848b8605Smrg alu.dst.chan = i; 7059b8e80941Smrg alu.last = i == 1; 7060b8e80941Smrg 7061848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7062848b8605Smrg if (r) 7063848b8605Smrg return r; 7064848b8605Smrg } 7065848b8605Smrg } 7066848b8605Smrg 7067b8e80941Smrg tmp = r600_get_temp(ctx); 7068b8e80941Smrg for (i = 0; i < 8; i++) { 7069b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7070b8e80941Smrg alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 7071848b8605Smrg 7072b8e80941Smrg alu.dst.sel = tmp; 7073b8e80941Smrg if ((i > 1 && i < 6)) { 7074848b8605Smrg alu.dst.write = 1; 7075848b8605Smrg } 7076b8e80941Smrg else { 7077b8e80941Smrg alu.dst.write = 0; 7078b8e80941Smrg } 7079b8e80941Smrg alu.dst.chan = i % 4; 7080848b8605Smrg 7081b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 7082b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 7083848b8605Smrg alu.src[0].sel = ctx->temp_reg; 7084b8e80941Smrg alu.src[0].chan = 1 - (i % 2); 7085b8e80941Smrg } else { 7086b8e80941Smrg alu.src[0].sel = interp_gpr; 7087b8e80941Smrg alu.src[0].chan = interp_base_chan + 1 - (i % 2); 7088848b8605Smrg } 7089b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 7090b8e80941Smrg alu.src[1].chan = 0; 7091848b8605Smrg 7092b8e80941Smrg alu.last = i % 4 == 3; 7093b8e80941Smrg alu.bank_swizzle_force = SQ_ALU_VEC_210; 7094848b8605Smrg 7095b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7096b8e80941Smrg if (r) 7097b8e80941Smrg return r; 7098b8e80941Smrg } 7099848b8605Smrg 7100b8e80941Smrg // INTERP can't swizzle dst 7101b8e80941Smrg lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7102b8e80941Smrg for (i = 0; i <= lasti; i++) { 7103b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7104b8e80941Smrg continue; 7105848b8605Smrg 7106b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7107b8e80941Smrg alu.op = ALU_OP1_MOV; 7108b8e80941Smrg alu.src[0].sel = tmp; 7109b8e80941Smrg alu.src[0].chan = ctx->src[0].swizzle[i]; 7110b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7111848b8605Smrg alu.dst.write = 1; 7112b8e80941Smrg alu.last = i == lasti; 7113848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7114848b8605Smrg if (r) 7115848b8605Smrg return r; 7116b8e80941Smrg } 7117848b8605Smrg 7118b8e80941Smrg return 0; 7119b8e80941Smrg} 7120848b8605Smrg 7121848b8605Smrg 7122b8e80941Smrgstatic int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 7123b8e80941Smrg{ 7124b8e80941Smrg struct r600_bytecode_alu alu; 7125b8e80941Smrg int i, r; 7126848b8605Smrg 7127b8e80941Smrg for (i = 0; i < 4; i++) { 7128b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7129b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 7130b8e80941Smrg alu.op = ALU_OP0_NOP; 7131b8e80941Smrg alu.dst.chan = i; 7132b8e80941Smrg } else { 7133848b8605Smrg alu.op = ALU_OP1_MOV; 7134b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7135b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7136b8e80941Smrg alu.src[0].chan = i; 7137b8e80941Smrg } 7138b8e80941Smrg if (i == 3) { 7139848b8605Smrg alu.last = 1; 7140848b8605Smrg } 7141b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7142b8e80941Smrg if (r) 7143b8e80941Smrg return r; 7144b8e80941Smrg } 7145b8e80941Smrg return 0; 7146b8e80941Smrg} 7147848b8605Smrg 7148b8e80941Smrgstatic int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 7149b8e80941Smrg unsigned writemask, 7150b8e80941Smrg struct r600_bytecode_alu_src *bc_src, 7151b8e80941Smrg const struct r600_shader_src *shader_src) 7152b8e80941Smrg{ 7153b8e80941Smrg struct r600_bytecode_alu alu; 7154b8e80941Smrg int i, r; 7155b8e80941Smrg int lasti = tgsi_last_instruction(writemask); 7156b8e80941Smrg int temp_reg = 0; 7157848b8605Smrg 7158b8e80941Smrg r600_bytecode_src(&bc_src[0], shader_src, 0); 7159b8e80941Smrg r600_bytecode_src(&bc_src[1], shader_src, 1); 7160b8e80941Smrg r600_bytecode_src(&bc_src[2], shader_src, 2); 7161b8e80941Smrg r600_bytecode_src(&bc_src[3], shader_src, 3); 7162848b8605Smrg 7163b8e80941Smrg if (bc_src->abs) { 7164b8e80941Smrg temp_reg = r600_get_temp(ctx); 7165848b8605Smrg 7166b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 7167b8e80941Smrg if (!(writemask & (1 << i))) 7168b8e80941Smrg continue; 7169848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7170848b8605Smrg alu.op = ALU_OP1_MOV; 7171b8e80941Smrg alu.dst.sel = temp_reg; 7172b8e80941Smrg alu.dst.chan = i; 7173848b8605Smrg alu.dst.write = 1; 7174b8e80941Smrg alu.src[0] = bc_src[i]; 7175b8e80941Smrg if (i == lasti) { 7176b8e80941Smrg alu.last = 1; 7177b8e80941Smrg } 7178848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7179848b8605Smrg if (r) 7180848b8605Smrg return r; 7181b8e80941Smrg memset(&bc_src[i], 0, sizeof(*bc_src)); 7182b8e80941Smrg bc_src[i].sel = temp_reg; 7183b8e80941Smrg bc_src[i].chan = i; 7184848b8605Smrg } 7185848b8605Smrg } 7186b8e80941Smrg return 0; 7187b8e80941Smrg} 7188848b8605Smrg 7189b8e80941Smrgstatic int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst) 7190b8e80941Smrg{ 7191b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7192b8e80941Smrg struct r600_bytecode_alu alu; 7193b8e80941Smrg struct r600_bytecode_alu_src srcs[4][4]; 7194b8e80941Smrg int i, j, r; 7195b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7196b8e80941Smrg unsigned op = ctx->inst_info->op; 7197b8e80941Smrg 7198b8e80941Smrg if (op == ALU_OP3_MULADD_IEEE && 7199b8e80941Smrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 7200b8e80941Smrg op = ALU_OP3_MULADD; 7201b8e80941Smrg 7202b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7203b8e80941Smrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 7204b8e80941Smrg srcs[j], &ctx->src[j]); 7205b8e80941Smrg if (r) 7206b8e80941Smrg return r; 7207b8e80941Smrg } 7208b8e80941Smrg 7209b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 7210b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7211b8e80941Smrg continue; 7212b8e80941Smrg 7213b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7214b8e80941Smrg alu.op = op; 7215b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7216b8e80941Smrg alu.src[j] = srcs[j][i]; 7217b8e80941Smrg } 7218b8e80941Smrg 7219b8e80941Smrg if (dst == -1) { 7220b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7221b8e80941Smrg } else { 7222b8e80941Smrg alu.dst.sel = dst; 7223b8e80941Smrg } 7224b8e80941Smrg alu.dst.chan = i; 7225b8e80941Smrg alu.dst.write = 1; 7226b8e80941Smrg alu.is_op3 = 1; 7227b8e80941Smrg if (i == lasti) { 7228b8e80941Smrg alu.last = 1; 7229b8e80941Smrg } 7230b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7231b8e80941Smrg if (r) 7232b8e80941Smrg return r; 7233b8e80941Smrg } 7234b8e80941Smrg return 0; 7235b8e80941Smrg} 7236b8e80941Smrg 7237b8e80941Smrgstatic int tgsi_op3(struct r600_shader_ctx *ctx) 7238b8e80941Smrg{ 7239b8e80941Smrg return tgsi_op3_dst(ctx, -1); 7240b8e80941Smrg} 7241b8e80941Smrg 7242b8e80941Smrgstatic int tgsi_dp(struct r600_shader_ctx *ctx) 7243b8e80941Smrg{ 7244b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7245b8e80941Smrg struct r600_bytecode_alu alu; 7246b8e80941Smrg int i, j, r; 7247b8e80941Smrg unsigned op = ctx->inst_info->op; 7248b8e80941Smrg if (op == ALU_OP2_DOT4_IEEE && 7249b8e80941Smrg ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) 7250b8e80941Smrg op = ALU_OP2_DOT4; 7251b8e80941Smrg 7252b8e80941Smrg for (i = 0; i < 4; i++) { 7253b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7254b8e80941Smrg alu.op = op; 7255b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7256b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 7257b8e80941Smrg } 7258b8e80941Smrg 7259b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7260b8e80941Smrg alu.dst.chan = i; 7261b8e80941Smrg alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 7262b8e80941Smrg /* handle some special cases */ 7263b8e80941Smrg switch (inst->Instruction.Opcode) { 7264b8e80941Smrg case TGSI_OPCODE_DP2: 7265b8e80941Smrg if (i > 1) { 7266b8e80941Smrg alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 7267b8e80941Smrg alu.src[0].chan = alu.src[1].chan = 0; 7268b8e80941Smrg } 7269b8e80941Smrg break; 7270b8e80941Smrg case TGSI_OPCODE_DP3: 7271b8e80941Smrg if (i > 2) { 7272b8e80941Smrg alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 7273b8e80941Smrg alu.src[0].chan = alu.src[1].chan = 0; 7274b8e80941Smrg } 7275b8e80941Smrg break; 7276b8e80941Smrg default: 7277b8e80941Smrg break; 7278b8e80941Smrg } 7279b8e80941Smrg if (i == 3) { 7280b8e80941Smrg alu.last = 1; 7281b8e80941Smrg } 7282b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7283b8e80941Smrg if (r) 7284b8e80941Smrg return r; 7285b8e80941Smrg } 7286b8e80941Smrg return 0; 7287b8e80941Smrg} 7288b8e80941Smrg 7289b8e80941Smrgstatic inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 7290b8e80941Smrg unsigned index) 7291b8e80941Smrg{ 7292b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7293b8e80941Smrg return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 7294b8e80941Smrg inst->Src[index].Register.File != TGSI_FILE_INPUT && 7295b8e80941Smrg inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 7296b8e80941Smrg ctx->src[index].neg || ctx->src[index].abs || 7297b8e80941Smrg (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY); 7298b8e80941Smrg} 7299b8e80941Smrg 7300b8e80941Smrgstatic inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 7301b8e80941Smrg unsigned index) 7302b8e80941Smrg{ 7303b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7304b8e80941Smrg return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 7305b8e80941Smrg} 7306b8e80941Smrg 7307b8e80941Smrgstatic int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 7308b8e80941Smrg{ 7309b8e80941Smrg struct r600_bytecode_vtx vtx; 7310b8e80941Smrg struct r600_bytecode_alu alu; 7311b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7312b8e80941Smrg int src_gpr, r, i; 7313b8e80941Smrg int id = tgsi_tex_get_src_gpr(ctx, 1); 7314b8e80941Smrg int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 7315b8e80941Smrg 7316b8e80941Smrg src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 7317b8e80941Smrg if (src_requires_loading) { 7318b8e80941Smrg for (i = 0; i < 4; i++) { 7319848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7320848b8605Smrg alu.op = ALU_OP1_MOV; 7321848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7322848b8605Smrg alu.dst.sel = ctx->temp_reg; 7323848b8605Smrg alu.dst.chan = i; 7324848b8605Smrg if (i == 3) 7325848b8605Smrg alu.last = 1; 7326848b8605Smrg alu.dst.write = 1; 7327848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7328848b8605Smrg if (r) 7329848b8605Smrg return r; 7330848b8605Smrg } 7331848b8605Smrg src_gpr = ctx->temp_reg; 7332848b8605Smrg } 7333848b8605Smrg 7334b8e80941Smrg memset(&vtx, 0, sizeof(vtx)); 7335b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 7336b8e80941Smrg vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 7337b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 7338b8e80941Smrg vtx.src_gpr = src_gpr; 7339b8e80941Smrg vtx.mega_fetch_count = 16; 7340b8e80941Smrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7341b8e80941Smrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 7342b8e80941Smrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 7343b8e80941Smrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 7344b8e80941Smrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 7345b8e80941Smrg vtx.use_const_fields = 1; 7346b8e80941Smrg vtx.buffer_index_mode = sampler_index_mode; 7347848b8605Smrg 7348b8e80941Smrg if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 7349b8e80941Smrg return r; 7350848b8605Smrg 7351b8e80941Smrg if (ctx->bc->chip_class >= EVERGREEN) 7352b8e80941Smrg return 0; 7353848b8605Smrg 7354b8e80941Smrg for (i = 0; i < 4; i++) { 7355b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7356b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7357b8e80941Smrg continue; 7358848b8605Smrg 7359b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7360b8e80941Smrg alu.op = ALU_OP2_AND_INT; 7361848b8605Smrg 7362b8e80941Smrg alu.dst.chan = i; 7363b8e80941Smrg alu.dst.sel = vtx.dst_gpr; 7364b8e80941Smrg alu.dst.write = 1; 7365848b8605Smrg 7366b8e80941Smrg alu.src[0].sel = vtx.dst_gpr; 7367b8e80941Smrg alu.src[0].chan = i; 7368848b8605Smrg 7369b8e80941Smrg alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 7370b8e80941Smrg alu.src[1].sel += (id * 2); 7371b8e80941Smrg alu.src[1].chan = i % 4; 7372b8e80941Smrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7373b8e80941Smrg 7374b8e80941Smrg if (i == lasti) 7375b8e80941Smrg alu.last = 1; 7376b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7377b8e80941Smrg if (r) 7378b8e80941Smrg return r; 7379b8e80941Smrg } 7380b8e80941Smrg 7381b8e80941Smrg if (inst->Dst[0].Register.WriteMask & 3) { 7382b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7383b8e80941Smrg alu.op = ALU_OP2_OR_INT; 7384b8e80941Smrg 7385b8e80941Smrg alu.dst.chan = 3; 7386b8e80941Smrg alu.dst.sel = vtx.dst_gpr; 7387b8e80941Smrg alu.dst.write = 1; 7388b8e80941Smrg 7389b8e80941Smrg alu.src[0].sel = vtx.dst_gpr; 7390b8e80941Smrg alu.src[0].chan = 3; 7391b8e80941Smrg 7392b8e80941Smrg alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 7393b8e80941Smrg alu.src[1].chan = 0; 7394b8e80941Smrg alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7395b8e80941Smrg 7396b8e80941Smrg alu.last = 1; 7397b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7398b8e80941Smrg if (r) 7399b8e80941Smrg return r; 7400b8e80941Smrg } 7401b8e80941Smrg return 0; 7402b8e80941Smrg} 7403b8e80941Smrg 7404b8e80941Smrgstatic int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset, int eg_buffer_base) 7405b8e80941Smrg{ 7406b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7407b8e80941Smrg int r; 7408b8e80941Smrg int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset; 7409b8e80941Smrg int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 7410b8e80941Smrg 7411b8e80941Smrg if (ctx->bc->chip_class < EVERGREEN) { 7412b8e80941Smrg struct r600_bytecode_alu alu; 7413b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7414b8e80941Smrg alu.op = ALU_OP1_MOV; 7415b8e80941Smrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7416b8e80941Smrg /* r600 we have them at channel 2 of the second dword */ 7417b8e80941Smrg alu.src[0].sel += (id * 2) + 1; 7418b8e80941Smrg alu.src[0].chan = 1; 7419b8e80941Smrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7420b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 7421b8e80941Smrg alu.last = 1; 7422b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7423b8e80941Smrg if (r) 7424b8e80941Smrg return r; 7425b8e80941Smrg return 0; 7426b8e80941Smrg } else { 7427b8e80941Smrg struct r600_bytecode_vtx vtx; 7428b8e80941Smrg memset(&vtx, 0, sizeof(vtx)); 7429b8e80941Smrg vtx.op = FETCH_OP_GET_BUFFER_RESINFO; 7430b8e80941Smrg vtx.buffer_id = id + eg_buffer_base; 7431b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 7432b8e80941Smrg vtx.src_gpr = 0; 7433b8e80941Smrg vtx.mega_fetch_count = 16; /* no idea here really... */ 7434b8e80941Smrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7435b8e80941Smrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 7436b8e80941Smrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7; /* SEL_Y */ 7437b8e80941Smrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7; /* SEL_Z */ 7438b8e80941Smrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7; /* SEL_W */ 7439b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 7440b8e80941Smrg vtx.buffer_index_mode = sampler_index_mode; 7441b8e80941Smrg 7442b8e80941Smrg if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) 7443b8e80941Smrg return r; 7444b8e80941Smrg return 0; 7445b8e80941Smrg } 7446b8e80941Smrg} 7447b8e80941Smrg 7448b8e80941Smrg 7449b8e80941Smrgstatic int tgsi_tex(struct r600_shader_ctx *ctx) 7450b8e80941Smrg{ 7451b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7452b8e80941Smrg struct r600_bytecode_tex tex; 7453b8e80941Smrg struct r600_bytecode_tex grad_offs[3]; 7454b8e80941Smrg struct r600_bytecode_alu alu; 7455b8e80941Smrg unsigned src_gpr; 7456b8e80941Smrg int r, i, j, n_grad_offs = 0; 7457b8e80941Smrg int opcode; 7458b8e80941Smrg bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 7459b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXF && 7460b8e80941Smrg (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 7461b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 7462b8e80941Smrg 7463b8e80941Smrg bool txf_add_offsets = inst->Texture.NumOffsets && 7464b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXF && 7465b8e80941Smrg inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 7466b8e80941Smrg 7467b8e80941Smrg /* Texture fetch instructions can only use gprs as source. 7468b8e80941Smrg * Also they cannot negate the source or take the absolute value */ 7469b8e80941Smrg const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 7470b8e80941Smrg tgsi_tex_src_requires_loading(ctx, 0)) || 7471b8e80941Smrg read_compressed_msaa || txf_add_offsets; 7472b8e80941Smrg 7473b8e80941Smrg boolean src_loaded = FALSE; 7474b8e80941Smrg unsigned sampler_src_reg = 1; 7475b8e80941Smrg int8_t offset_x = 0, offset_y = 0, offset_z = 0; 7476b8e80941Smrg boolean has_txq_cube_array_z = false; 7477b8e80941Smrg unsigned sampler_index_mode; 7478b8e80941Smrg int array_index_offset_channel = -1; 7479b8e80941Smrg 7480b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 7481b8e80941Smrg ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7482b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 7483b8e80941Smrg if (inst->Dst[0].Register.WriteMask & 4) { 7484b8e80941Smrg ctx->shader->has_txq_cube_array_z_comp = true; 7485b8e80941Smrg has_txq_cube_array_z = true; 7486b8e80941Smrg } 7487b8e80941Smrg 7488b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 7489b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7490b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 7491b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TG4) 7492b8e80941Smrg sampler_src_reg = 2; 7493b8e80941Smrg 7494b8e80941Smrg /* TGSI moves the sampler to src reg 3 for TXD */ 7495b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 7496b8e80941Smrg sampler_src_reg = 3; 7497b8e80941Smrg 7498b8e80941Smrg sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 7499b8e80941Smrg 7500b8e80941Smrg src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 7501b8e80941Smrg 7502b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 7503b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 7504b8e80941Smrg if (ctx->bc->chip_class < EVERGREEN) 7505b8e80941Smrg ctx->shader->uses_tex_buffers = true; 7506b8e80941Smrg return r600_do_buffer_txq(ctx, 1, 0, R600_MAX_CONST_BUFFERS); 7507b8e80941Smrg } 7508b8e80941Smrg else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 7509b8e80941Smrg if (ctx->bc->chip_class < EVERGREEN) 7510b8e80941Smrg ctx->shader->uses_tex_buffers = true; 7511b8e80941Smrg return do_vtx_fetch_inst(ctx, src_requires_loading); 7512b8e80941Smrg } 7513b8e80941Smrg } 7514b8e80941Smrg 7515b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 7516b8e80941Smrg int out_chan; 7517b8e80941Smrg /* Add perspective divide */ 7518b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 7519b8e80941Smrg out_chan = 2; 7520b8e80941Smrg for (i = 0; i < 3; i++) { 7521848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7522b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 7523b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7524b8e80941Smrg 7525b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7526848b8605Smrg alu.dst.chan = i; 7527b8e80941Smrg if (i == 2) 7528848b8605Smrg alu.last = 1; 7529b8e80941Smrg if (out_chan == i) 7530b8e80941Smrg alu.dst.write = 1; 7531848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7532848b8605Smrg if (r) 7533848b8605Smrg return r; 7534848b8605Smrg } 7535b8e80941Smrg 7536848b8605Smrg } else { 7537b8e80941Smrg out_chan = 3; 7538848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7539b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 7540b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7541b8e80941Smrg 7542b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7543b8e80941Smrg alu.dst.chan = out_chan; 7544b8e80941Smrg alu.last = 1; 7545b8e80941Smrg alu.dst.write = 1; 7546b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7547b8e80941Smrg if (r) 7548b8e80941Smrg return r; 7549b8e80941Smrg } 7550b8e80941Smrg 7551b8e80941Smrg for (i = 0; i < 3; i++) { 7552b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7553b8e80941Smrg alu.op = ALU_OP2_MUL; 7554b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7555b8e80941Smrg alu.src[0].chan = out_chan; 7556b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7557b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7558b8e80941Smrg alu.dst.chan = i; 7559b8e80941Smrg alu.dst.write = 1; 7560b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7561b8e80941Smrg if (r) 7562b8e80941Smrg return r; 7563b8e80941Smrg } 7564b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7565b8e80941Smrg alu.op = ALU_OP1_MOV; 7566b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 7567b8e80941Smrg alu.src[0].chan = 0; 7568b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7569b8e80941Smrg alu.dst.chan = 3; 7570b8e80941Smrg alu.last = 1; 7571b8e80941Smrg alu.dst.write = 1; 7572b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7573b8e80941Smrg if (r) 7574b8e80941Smrg return r; 7575b8e80941Smrg src_loaded = TRUE; 7576b8e80941Smrg src_gpr = ctx->temp_reg; 7577b8e80941Smrg } 7578b8e80941Smrg 7579b8e80941Smrg 7580b8e80941Smrg if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7581b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7582b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7583b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7584b8e80941Smrg inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { 7585b8e80941Smrg 7586b8e80941Smrg static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 7587b8e80941Smrg static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 7588b8e80941Smrg 7589b8e80941Smrg /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 7590b8e80941Smrg for (i = 0; i < 4; i++) { 7591b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7592b8e80941Smrg alu.op = ALU_OP2_CUBE; 7593b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7594b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 7595b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7596b8e80941Smrg alu.dst.chan = i; 7597b8e80941Smrg if (i == 3) 7598b8e80941Smrg alu.last = 1; 7599b8e80941Smrg alu.dst.write = 1; 7600b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7601b8e80941Smrg if (r) 7602b8e80941Smrg return r; 7603b8e80941Smrg } 7604b8e80941Smrg 7605b8e80941Smrg /* tmp1.z = RCP_e(|tmp1.z|) */ 7606b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 7607b8e80941Smrg for (i = 0; i < 3; i++) { 7608b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7609b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 7610b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7611b8e80941Smrg alu.src[0].chan = 2; 7612b8e80941Smrg alu.src[0].abs = 1; 7613b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7614b8e80941Smrg alu.dst.chan = i; 7615b8e80941Smrg if (i == 2) 7616b8e80941Smrg alu.dst.write = 1; 7617b8e80941Smrg if (i == 2) 7618b8e80941Smrg alu.last = 1; 7619b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7620b8e80941Smrg if (r) 7621b8e80941Smrg return r; 7622b8e80941Smrg } 7623b8e80941Smrg } else { 7624b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7625b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 7626b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7627b8e80941Smrg alu.src[0].chan = 2; 7628b8e80941Smrg alu.src[0].abs = 1; 7629b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7630b8e80941Smrg alu.dst.chan = 2; 7631848b8605Smrg alu.dst.write = 1; 7632848b8605Smrg alu.last = 1; 7633848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7634848b8605Smrg if (r) 7635848b8605Smrg return r; 7636848b8605Smrg } 7637848b8605Smrg 7638b8e80941Smrg /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 7639b8e80941Smrg * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 7640b8e80941Smrg * muladd has no writemask, have to use another temp 7641b8e80941Smrg */ 7642b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7643b8e80941Smrg alu.op = ALU_OP3_MULADD; 7644b8e80941Smrg alu.is_op3 = 1; 7645b8e80941Smrg 7646b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7647b8e80941Smrg alu.src[0].chan = 0; 7648b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 7649b8e80941Smrg alu.src[1].chan = 2; 7650b8e80941Smrg 7651b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 7652b8e80941Smrg alu.src[2].chan = 0; 7653b8e80941Smrg alu.src[2].value = u_bitcast_f2u(1.5f); 7654b8e80941Smrg 7655b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7656b8e80941Smrg alu.dst.chan = 0; 7657b8e80941Smrg alu.dst.write = 1; 7658b8e80941Smrg 7659b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7660b8e80941Smrg if (r) 7661b8e80941Smrg return r; 7662b8e80941Smrg 7663b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7664b8e80941Smrg alu.op = ALU_OP3_MULADD; 7665b8e80941Smrg alu.is_op3 = 1; 7666b8e80941Smrg 7667b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7668b8e80941Smrg alu.src[0].chan = 1; 7669b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 7670b8e80941Smrg alu.src[1].chan = 2; 7671b8e80941Smrg 7672b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 7673b8e80941Smrg alu.src[2].chan = 0; 7674b8e80941Smrg alu.src[2].value = u_bitcast_f2u(1.5f); 7675b8e80941Smrg 7676b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7677b8e80941Smrg alu.dst.chan = 1; 7678b8e80941Smrg alu.dst.write = 1; 7679b8e80941Smrg 7680b8e80941Smrg alu.last = 1; 7681b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7682b8e80941Smrg if (r) 7683b8e80941Smrg return r; 7684b8e80941Smrg /* write initial compare value into Z component 7685b8e80941Smrg - W src 0 for shadow cube 7686b8e80941Smrg - X src 1 for shadow cube array */ 7687b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7688b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7689b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7690b8e80941Smrg alu.op = ALU_OP1_MOV; 7691b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 7692b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7693b8e80941Smrg else 7694b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7695b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7696b8e80941Smrg alu.dst.chan = 2; 7697b8e80941Smrg alu.dst.write = 1; 7698b8e80941Smrg alu.last = 1; 7699b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7700b8e80941Smrg if (r) 7701b8e80941Smrg return r; 7702b8e80941Smrg } 7703b8e80941Smrg 7704b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7705b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7706b8e80941Smrg if (ctx->bc->chip_class >= EVERGREEN) { 7707b8e80941Smrg int mytmp = r600_get_temp(ctx); 7708b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7709b8e80941Smrg alu.op = ALU_OP1_MOV; 7710b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7711b8e80941Smrg alu.src[0].chan = 3; 7712b8e80941Smrg alu.dst.sel = mytmp; 7713b8e80941Smrg alu.dst.chan = 0; 7714b8e80941Smrg alu.dst.write = 1; 7715b8e80941Smrg alu.last = 1; 7716b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7717b8e80941Smrg if (r) 7718b8e80941Smrg return r; 7719b8e80941Smrg 7720b8e80941Smrg /* Evaluate the array index according to floor(idx + 0.5). This 7721b8e80941Smrg * needs to be done before merging the face select value, because 7722b8e80941Smrg * otherwise the fractional part of the array index will interfere 7723b8e80941Smrg * with the face select value */ 7724b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7725b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7726b8e80941Smrg alu.op = ALU_OP1_RNDNE; 7727b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7728b8e80941Smrg alu.dst.chan = 3; 7729b8e80941Smrg alu.dst.write = 1; 7730b8e80941Smrg alu.last = 1; 7731b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7732b8e80941Smrg if (r) 7733b8e80941Smrg return r; 7734b8e80941Smrg 7735b8e80941Smrg /* Because the array slice index and the cube face index are merged 7736b8e80941Smrg * into one value we have to make sure the array slice index is >= 0, 7737b8e80941Smrg * otherwise the face selection will fail */ 7738b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7739b8e80941Smrg alu.op = ALU_OP2_MAX; 7740b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7741b8e80941Smrg alu.src[0].chan = 3; 7742b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0; 7743b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7744b8e80941Smrg alu.dst.chan = 3; 7745b8e80941Smrg alu.dst.write = 1; 7746b8e80941Smrg alu.last = 1; 7747b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7748b8e80941Smrg if (r) 7749b8e80941Smrg return r; 7750b8e80941Smrg 7751b8e80941Smrg /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 7752b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7753b8e80941Smrg alu.op = ALU_OP3_MULADD; 7754b8e80941Smrg alu.is_op3 = 1; 7755b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7756b8e80941Smrg alu.src[0].chan = 3; 7757b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7758b8e80941Smrg alu.src[1].chan = 0; 7759b8e80941Smrg alu.src[1].value = u_bitcast_f2u(8.0f); 7760b8e80941Smrg alu.src[2].sel = mytmp; 7761b8e80941Smrg alu.src[2].chan = 0; 7762b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7763b8e80941Smrg alu.dst.chan = 3; 7764b8e80941Smrg alu.dst.write = 1; 7765b8e80941Smrg alu.last = 1; 7766b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7767b8e80941Smrg if (r) 7768b8e80941Smrg return r; 7769b8e80941Smrg } else if (ctx->bc->chip_class < EVERGREEN) { 7770b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7771b8e80941Smrg tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 7772b8e80941Smrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7773b8e80941Smrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7774b8e80941Smrg tex.src_gpr = r600_get_temp(ctx); 7775b8e80941Smrg tex.src_sel_x = 0; 7776b8e80941Smrg tex.src_sel_y = 0; 7777b8e80941Smrg tex.src_sel_z = 0; 7778b8e80941Smrg tex.src_sel_w = 0; 7779b8e80941Smrg tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7780b8e80941Smrg tex.coord_type_x = 1; 7781b8e80941Smrg tex.coord_type_y = 1; 7782b8e80941Smrg tex.coord_type_z = 1; 7783b8e80941Smrg tex.coord_type_w = 1; 7784b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7785b8e80941Smrg alu.op = ALU_OP1_MOV; 7786b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7787b8e80941Smrg alu.dst.sel = tex.src_gpr; 7788b8e80941Smrg alu.dst.chan = 0; 7789b8e80941Smrg alu.last = 1; 7790b8e80941Smrg alu.dst.write = 1; 7791b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7792b8e80941Smrg if (r) 7793b8e80941Smrg return r; 7794b8e80941Smrg 7795b8e80941Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 7796b8e80941Smrg if (r) 7797b8e80941Smrg return r; 7798b8e80941Smrg } 7799b8e80941Smrg 7800b8e80941Smrg } 7801b8e80941Smrg 7802b8e80941Smrg /* for cube forms of lod and bias we need to route things */ 7803b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 7804b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXL || 7805b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7806b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 7807b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7808b8e80941Smrg alu.op = ALU_OP1_MOV; 7809b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7810b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 7811b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7812b8e80941Smrg else 7813b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7814b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7815b8e80941Smrg alu.dst.chan = 2; 7816b8e80941Smrg alu.last = 1; 7817b8e80941Smrg alu.dst.write = 1; 7818b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7819b8e80941Smrg if (r) 7820b8e80941Smrg return r; 7821b8e80941Smrg } 7822b8e80941Smrg 7823b8e80941Smrg src_loaded = TRUE; 7824b8e80941Smrg src_gpr = ctx->temp_reg; 7825b8e80941Smrg } 7826b8e80941Smrg 7827b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 7828b8e80941Smrg int temp_h = 0, temp_v = 0; 7829b8e80941Smrg int start_val = 0; 7830b8e80941Smrg 7831b8e80941Smrg /* if we've already loaded the src (i.e. CUBE don't reload it). */ 7832b8e80941Smrg if (src_loaded == TRUE) 7833b8e80941Smrg start_val = 1; 7834b8e80941Smrg else 7835b8e80941Smrg src_loaded = TRUE; 7836b8e80941Smrg for (i = start_val; i < 3; i++) { 7837b8e80941Smrg int treg = r600_get_temp(ctx); 7838b8e80941Smrg 7839b8e80941Smrg if (i == 0) 7840b8e80941Smrg src_gpr = treg; 7841b8e80941Smrg else if (i == 1) 7842b8e80941Smrg temp_h = treg; 7843b8e80941Smrg else 7844b8e80941Smrg temp_v = treg; 7845b8e80941Smrg 7846b8e80941Smrg for (j = 0; j < 4; j++) { 7847b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7848b8e80941Smrg alu.op = ALU_OP1_MOV; 7849b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 7850b8e80941Smrg alu.dst.sel = treg; 7851b8e80941Smrg alu.dst.chan = j; 7852b8e80941Smrg if (j == 3) 7853b8e80941Smrg alu.last = 1; 7854b8e80941Smrg alu.dst.write = 1; 7855b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7856b8e80941Smrg if (r) 7857b8e80941Smrg return r; 7858b8e80941Smrg } 7859b8e80941Smrg } 7860b8e80941Smrg for (i = 1; i < 3; i++) { 7861b8e80941Smrg /* set gradients h/v */ 7862b8e80941Smrg struct r600_bytecode_tex *t = &grad_offs[n_grad_offs++]; 7863b8e80941Smrg memset(t, 0, sizeof(struct r600_bytecode_tex)); 7864b8e80941Smrg t->op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 7865b8e80941Smrg FETCH_OP_SET_GRADIENTS_V; 7866b8e80941Smrg t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7867b8e80941Smrg t->sampler_index_mode = sampler_index_mode; 7868b8e80941Smrg t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS; 7869b8e80941Smrg t->resource_index_mode = sampler_index_mode; 7870b8e80941Smrg 7871b8e80941Smrg t->src_gpr = (i == 1) ? temp_h : temp_v; 7872b8e80941Smrg t->src_sel_x = 0; 7873b8e80941Smrg t->src_sel_y = 1; 7874b8e80941Smrg t->src_sel_z = 2; 7875b8e80941Smrg t->src_sel_w = 3; 7876b8e80941Smrg 7877b8e80941Smrg t->dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 7878b8e80941Smrg t->dst_sel_x = t->dst_sel_y = t->dst_sel_z = t->dst_sel_w = 7; 7879b8e80941Smrg if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 7880b8e80941Smrg t->coord_type_x = 1; 7881b8e80941Smrg t->coord_type_y = 1; 7882b8e80941Smrg t->coord_type_z = 1; 7883b8e80941Smrg t->coord_type_w = 1; 7884b8e80941Smrg } 7885b8e80941Smrg } 7886b8e80941Smrg } 7887b8e80941Smrg 7888b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7889b8e80941Smrg /* Gather4 should follow the same rules as bilinear filtering, but the hardware 7890b8e80941Smrg * incorrectly forces nearest filtering if the texture format is integer. 7891b8e80941Smrg * The only effect it has on Gather4, which always returns 4 texels for 7892b8e80941Smrg * bilinear filtering, is that the final coordinates are off by 0.5 of 7893b8e80941Smrg * the texel size. 7894b8e80941Smrg * 7895b8e80941Smrg * The workaround is to subtract 0.5 from the unnormalized coordinates, 7896b8e80941Smrg * or (0.5 / size) from the normalized coordinates. 7897b8e80941Smrg */ 7898b8e80941Smrg if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT || 7899b8e80941Smrg inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) { 7900b8e80941Smrg int treg = r600_get_temp(ctx); 7901b8e80941Smrg 7902b8e80941Smrg /* mov array and comparison oordinate to temp_reg if needed */ 7903b8e80941Smrg if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7904b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7905b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) && !src_loaded) { 7906b8e80941Smrg int end = inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ? 3 : 2; 7907b8e80941Smrg for (i = 2; i <= end; i++) { 7908b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7909b8e80941Smrg alu.op = ALU_OP1_MOV; 7910b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7911b8e80941Smrg alu.dst.chan = i; 7912b8e80941Smrg alu.dst.write = 1; 7913b8e80941Smrg alu.last = (i == end); 7914b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7915b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7916b8e80941Smrg if (r) 7917b8e80941Smrg return r; 7918b8e80941Smrg } 7919b8e80941Smrg } 7920b8e80941Smrg 7921b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_RECT || 7922b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) { 7923b8e80941Smrg for (i = 0; i < 2; i++) { 7924b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7925b8e80941Smrg alu.op = ALU_OP2_ADD; 7926b8e80941Smrg alu.dst.sel = ctx->temp_reg; 7927b8e80941Smrg alu.dst.chan = i; 7928b8e80941Smrg alu.dst.write = 1; 7929b8e80941Smrg alu.last = i == 1; 7930b8e80941Smrg if (src_loaded) { 7931b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 7932b8e80941Smrg alu.src[0].chan = i; 7933b8e80941Smrg } else 7934b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7935b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0_5; 7936b8e80941Smrg alu.src[1].neg = 1; 7937b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7938b8e80941Smrg if (r) 7939b8e80941Smrg return r; 7940b8e80941Smrg } 7941b8e80941Smrg } else { 7942b8e80941Smrg /* execute a TXQ */ 7943b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7944b8e80941Smrg tex.op = FETCH_OP_GET_TEXTURE_RESINFO; 7945b8e80941Smrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7946b8e80941Smrg tex.sampler_index_mode = sampler_index_mode; 7947b8e80941Smrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7948b8e80941Smrg tex.resource_index_mode = sampler_index_mode; 7949b8e80941Smrg tex.dst_gpr = treg; 7950b8e80941Smrg tex.src_sel_x = 4; 7951b8e80941Smrg tex.src_sel_y = 4; 7952b8e80941Smrg tex.src_sel_z = 4; 7953b8e80941Smrg tex.src_sel_w = 4; 7954b8e80941Smrg tex.dst_sel_x = 0; 7955b8e80941Smrg tex.dst_sel_y = 1; 7956b8e80941Smrg tex.dst_sel_z = 7; 7957b8e80941Smrg tex.dst_sel_w = 7; 7958b8e80941Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 7959b8e80941Smrg if (r) 7960b8e80941Smrg return r; 7961b8e80941Smrg 7962b8e80941Smrg /* coord.xy = -0.5 * (1.0/int_to_flt(size)) + coord.xy */ 7963b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 7964b8e80941Smrg /* */ 7965b8e80941Smrg for (i = 0; i < 2; i++) { 7966b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7967b8e80941Smrg alu.op = ALU_OP1_INT_TO_FLT; 7968b8e80941Smrg alu.dst.sel = treg; 7969b8e80941Smrg alu.dst.chan = i; 7970b8e80941Smrg alu.dst.write = 1; 7971b8e80941Smrg alu.src[0].sel = treg; 7972b8e80941Smrg alu.src[0].chan = i; 7973b8e80941Smrg alu.last = (i == 1) ? 1 : 0; 7974b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7975b8e80941Smrg if (r) 7976b8e80941Smrg return r; 7977b8e80941Smrg } 7978b8e80941Smrg for (j = 0; j < 2; j++) { 7979b8e80941Smrg for (i = 0; i < 3; i++) { 7980b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7981b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 7982b8e80941Smrg alu.src[0].sel = treg; 7983b8e80941Smrg alu.src[0].chan = j; 7984b8e80941Smrg alu.dst.sel = treg; 7985b8e80941Smrg alu.dst.chan = i; 7986b8e80941Smrg if (i == 2) 7987b8e80941Smrg alu.last = 1; 7988b8e80941Smrg if (i == j) 7989b8e80941Smrg alu.dst.write = 1; 7990b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 7991b8e80941Smrg if (r) 7992b8e80941Smrg return r; 7993b8e80941Smrg } 7994b8e80941Smrg } 7995b8e80941Smrg } else { 7996b8e80941Smrg for (i = 0; i < 2; i++) { 7997b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7998b8e80941Smrg alu.op = ALU_OP1_INT_TO_FLT; 7999b8e80941Smrg alu.dst.sel = treg; 8000b8e80941Smrg alu.dst.chan = i; 8001b8e80941Smrg alu.dst.write = 1; 8002b8e80941Smrg alu.src[0].sel = treg; 8003b8e80941Smrg alu.src[0].chan = i; 8004b8e80941Smrg alu.last = 1; 8005b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8006b8e80941Smrg if (r) 8007b8e80941Smrg return r; 8008b8e80941Smrg } 8009b8e80941Smrg for (i = 0; i < 2; i++) { 8010b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8011b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 8012b8e80941Smrg alu.src[0].sel = treg; 8013b8e80941Smrg alu.src[0].chan = i; 8014b8e80941Smrg alu.dst.sel = treg; 8015b8e80941Smrg alu.dst.chan = i; 8016b8e80941Smrg alu.last = 1; 8017b8e80941Smrg alu.dst.write = 1; 8018b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8019b8e80941Smrg if (r) 8020b8e80941Smrg return r; 8021b8e80941Smrg } 8022b8e80941Smrg } 8023b8e80941Smrg for (i = 0; i < 2; i++) { 8024b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8025b8e80941Smrg alu.op = ALU_OP3_MULADD; 8026b8e80941Smrg alu.is_op3 = 1; 8027b8e80941Smrg alu.dst.sel = ctx->temp_reg; 8028b8e80941Smrg alu.dst.chan = i; 8029b8e80941Smrg alu.dst.write = 1; 8030b8e80941Smrg alu.last = i == 1; 8031b8e80941Smrg alu.src[0].sel = treg; 8032b8e80941Smrg alu.src[0].chan = i; 8033b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0_5; 8034b8e80941Smrg alu.src[1].neg = 1; 8035b8e80941Smrg if (src_loaded) { 8036b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 8037b8e80941Smrg alu.src[2].chan = i; 8038b8e80941Smrg } else 8039b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 8040b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8041b8e80941Smrg if (r) 8042b8e80941Smrg return r; 8043b8e80941Smrg } 8044b8e80941Smrg } 8045b8e80941Smrg src_loaded = TRUE; 8046b8e80941Smrg src_gpr = ctx->temp_reg; 8047b8e80941Smrg } 8048b8e80941Smrg } 8049b8e80941Smrg 8050b8e80941Smrg if (src_requires_loading && !src_loaded) { 8051b8e80941Smrg for (i = 0; i < 4; i++) { 8052b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8053b8e80941Smrg alu.op = ALU_OP1_MOV; 8054b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8055b8e80941Smrg alu.dst.sel = ctx->temp_reg; 8056b8e80941Smrg alu.dst.chan = i; 8057b8e80941Smrg if (i == 3) 8058b8e80941Smrg alu.last = 1; 8059b8e80941Smrg alu.dst.write = 1; 8060b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8061b8e80941Smrg if (r) 8062b8e80941Smrg return r; 8063b8e80941Smrg } 8064b8e80941Smrg src_loaded = TRUE; 8065b8e80941Smrg src_gpr = ctx->temp_reg; 8066b8e80941Smrg } 8067b8e80941Smrg 8068b8e80941Smrg /* get offset values */ 8069b8e80941Smrg if (inst->Texture.NumOffsets) { 8070b8e80941Smrg assert(inst->Texture.NumOffsets == 1); 8071b8e80941Smrg 8072b8e80941Smrg /* The texture offset feature doesn't work with the TXF instruction 8073b8e80941Smrg * and must be emulated by adding the offset to the texture coordinates. */ 8074b8e80941Smrg if (txf_add_offsets) { 8075b8e80941Smrg const struct tgsi_texture_offset *off = inst->TexOffsets; 8076b8e80941Smrg 8077b8e80941Smrg switch (inst->Texture.Texture) { 8078b8e80941Smrg case TGSI_TEXTURE_3D: 8079b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8080b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 8081b8e80941Smrg alu.src[0].sel = src_gpr; 8082b8e80941Smrg alu.src[0].chan = 2; 8083b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8084b8e80941Smrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 8085b8e80941Smrg alu.dst.sel = src_gpr; 8086b8e80941Smrg alu.dst.chan = 2; 8087b8e80941Smrg alu.dst.write = 1; 8088b8e80941Smrg alu.last = 1; 8089b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8090b8e80941Smrg if (r) 8091b8e80941Smrg return r; 8092b8e80941Smrg /* fall through */ 8093b8e80941Smrg 8094b8e80941Smrg case TGSI_TEXTURE_2D: 8095b8e80941Smrg case TGSI_TEXTURE_SHADOW2D: 8096b8e80941Smrg case TGSI_TEXTURE_RECT: 8097b8e80941Smrg case TGSI_TEXTURE_SHADOWRECT: 8098b8e80941Smrg case TGSI_TEXTURE_2D_ARRAY: 8099b8e80941Smrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 8100b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8101b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 8102b8e80941Smrg alu.src[0].sel = src_gpr; 8103b8e80941Smrg alu.src[0].chan = 1; 8104b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8105b8e80941Smrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 8106b8e80941Smrg alu.dst.sel = src_gpr; 8107b8e80941Smrg alu.dst.chan = 1; 8108b8e80941Smrg alu.dst.write = 1; 8109b8e80941Smrg alu.last = 1; 8110b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8111b8e80941Smrg if (r) 8112b8e80941Smrg return r; 8113b8e80941Smrg /* fall through */ 8114b8e80941Smrg 8115b8e80941Smrg case TGSI_TEXTURE_1D: 8116b8e80941Smrg case TGSI_TEXTURE_SHADOW1D: 8117b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 8118b8e80941Smrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 8119b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8120b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 8121b8e80941Smrg alu.src[0].sel = src_gpr; 8122b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8123b8e80941Smrg alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 8124b8e80941Smrg alu.dst.sel = src_gpr; 8125b8e80941Smrg alu.dst.write = 1; 8126b8e80941Smrg alu.last = 1; 8127b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8128b8e80941Smrg if (r) 8129b8e80941Smrg return r; 8130b8e80941Smrg break; 8131b8e80941Smrg /* texture offsets do not apply to other texture targets */ 8132b8e80941Smrg } 8133b8e80941Smrg } else { 8134b8e80941Smrg switch (inst->Texture.Texture) { 8135b8e80941Smrg case TGSI_TEXTURE_3D: 8136b8e80941Smrg offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 8137b8e80941Smrg /* fallthrough */ 8138b8e80941Smrg case TGSI_TEXTURE_2D: 8139b8e80941Smrg case TGSI_TEXTURE_SHADOW2D: 8140b8e80941Smrg case TGSI_TEXTURE_RECT: 8141b8e80941Smrg case TGSI_TEXTURE_SHADOWRECT: 8142b8e80941Smrg case TGSI_TEXTURE_2D_ARRAY: 8143b8e80941Smrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 8144b8e80941Smrg offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 8145b8e80941Smrg /* fallthrough */ 8146b8e80941Smrg case TGSI_TEXTURE_1D: 8147b8e80941Smrg case TGSI_TEXTURE_SHADOW1D: 8148b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 8149b8e80941Smrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 8150b8e80941Smrg offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 8151b8e80941Smrg } 8152b8e80941Smrg } 8153b8e80941Smrg } 8154b8e80941Smrg 8155b8e80941Smrg /* Obtain the sample index for reading a compressed MSAA color texture. 8156b8e80941Smrg * To read the FMASK, we use the ldfptr instruction, which tells us 8157b8e80941Smrg * where the samples are stored. 8158b8e80941Smrg * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 8159b8e80941Smrg * which is the identity mapping. Each nibble says which physical sample 8160b8e80941Smrg * should be fetched to get that sample. 8161b8e80941Smrg * 8162b8e80941Smrg * Assume src.z contains the sample index. It should be modified like this: 8163b8e80941Smrg * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 8164b8e80941Smrg * Then fetch the texel with src. 8165b8e80941Smrg */ 8166b8e80941Smrg if (read_compressed_msaa) { 8167b8e80941Smrg unsigned sample_chan = 3; 8168b8e80941Smrg unsigned temp = r600_get_temp(ctx); 8169b8e80941Smrg assert(src_loaded); 8170b8e80941Smrg 8171b8e80941Smrg /* temp.w = ldfptr() */ 8172b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 8173b8e80941Smrg tex.op = FETCH_OP_LD; 8174b8e80941Smrg tex.inst_mod = 1; /* to indicate this is ldfptr */ 8175b8e80941Smrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 8176b8e80941Smrg tex.sampler_index_mode = sampler_index_mode; 8177b8e80941Smrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 8178b8e80941Smrg tex.resource_index_mode = sampler_index_mode; 8179b8e80941Smrg tex.src_gpr = src_gpr; 8180b8e80941Smrg tex.dst_gpr = temp; 8181b8e80941Smrg tex.dst_sel_x = 7; /* mask out these components */ 8182b8e80941Smrg tex.dst_sel_y = 7; 8183b8e80941Smrg tex.dst_sel_z = 7; 8184b8e80941Smrg tex.dst_sel_w = 0; /* store X */ 8185b8e80941Smrg tex.src_sel_x = 0; 8186b8e80941Smrg tex.src_sel_y = 1; 8187b8e80941Smrg tex.src_sel_z = 2; 8188b8e80941Smrg tex.src_sel_w = 3; 8189b8e80941Smrg tex.offset_x = offset_x; 8190b8e80941Smrg tex.offset_y = offset_y; 8191b8e80941Smrg tex.offset_z = offset_z; 8192b8e80941Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 8193b8e80941Smrg if (r) 8194b8e80941Smrg return r; 8195b8e80941Smrg 8196b8e80941Smrg /* temp.x = sample_index*4 */ 8197b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8198b8e80941Smrg alu.op = ALU_OP2_MULLO_INT; 8199b8e80941Smrg alu.src[0].sel = src_gpr; 8200b8e80941Smrg alu.src[0].chan = sample_chan; 8201b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8202b8e80941Smrg alu.src[1].value = 4; 8203b8e80941Smrg alu.dst.sel = temp; 8204b8e80941Smrg alu.dst.chan = 0; 8205b8e80941Smrg alu.dst.write = 1; 8206b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 8207b8e80941Smrg if (r) 8208b8e80941Smrg return r; 8209b8e80941Smrg 8210b8e80941Smrg /* sample_index = temp.w >> temp.x */ 8211b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8212b8e80941Smrg alu.op = ALU_OP2_LSHR_INT; 8213b8e80941Smrg alu.src[0].sel = temp; 8214b8e80941Smrg alu.src[0].chan = 3; 8215b8e80941Smrg alu.src[1].sel = temp; 8216b8e80941Smrg alu.src[1].chan = 0; 8217b8e80941Smrg alu.dst.sel = src_gpr; 8218b8e80941Smrg alu.dst.chan = sample_chan; 8219b8e80941Smrg alu.dst.write = 1; 8220b8e80941Smrg alu.last = 1; 8221b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8222b8e80941Smrg if (r) 8223b8e80941Smrg return r; 8224b8e80941Smrg 8225b8e80941Smrg /* sample_index & 0xF */ 8226b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8227b8e80941Smrg alu.op = ALU_OP2_AND_INT; 8228b8e80941Smrg alu.src[0].sel = src_gpr; 8229b8e80941Smrg alu.src[0].chan = sample_chan; 8230b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8231b8e80941Smrg alu.src[1].value = 0xF; 8232b8e80941Smrg alu.dst.sel = src_gpr; 8233b8e80941Smrg alu.dst.chan = sample_chan; 8234b8e80941Smrg alu.dst.write = 1; 8235b8e80941Smrg alu.last = 1; 8236b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8237b8e80941Smrg if (r) 8238b8e80941Smrg return r; 8239b8e80941Smrg#if 0 8240b8e80941Smrg /* visualize the FMASK */ 8241b8e80941Smrg for (i = 0; i < 4; i++) { 8242b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8243b8e80941Smrg alu.op = ALU_OP1_INT_TO_FLT; 8244b8e80941Smrg alu.src[0].sel = src_gpr; 8245b8e80941Smrg alu.src[0].chan = sample_chan; 8246b8e80941Smrg alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 8247b8e80941Smrg alu.dst.chan = i; 8248b8e80941Smrg alu.dst.write = 1; 8249b8e80941Smrg alu.last = 1; 8250b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8251b8e80941Smrg if (r) 8252b8e80941Smrg return r; 8253b8e80941Smrg } 8254b8e80941Smrg return 0; 8255b8e80941Smrg#endif 8256b8e80941Smrg } 8257b8e80941Smrg 8258b8e80941Smrg /* does this shader want a num layers from TXQ for a cube array? */ 8259b8e80941Smrg if (has_txq_cube_array_z) { 8260b8e80941Smrg int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 8261b8e80941Smrg 8262b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8263b8e80941Smrg alu.op = ALU_OP1_MOV; 8264b8e80941Smrg 8265b8e80941Smrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 8266b8e80941Smrg if (ctx->bc->chip_class >= EVERGREEN) { 8267b8e80941Smrg /* with eg each dword is number of cubes */ 8268b8e80941Smrg alu.src[0].sel += id / 4; 8269b8e80941Smrg alu.src[0].chan = id % 4; 8270b8e80941Smrg } else { 8271b8e80941Smrg /* r600 we have them at channel 2 of the second dword */ 8272b8e80941Smrg alu.src[0].sel += (id * 2) + 1; 8273b8e80941Smrg alu.src[0].chan = 2; 8274b8e80941Smrg } 8275b8e80941Smrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 8276b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 8277b8e80941Smrg alu.last = 1; 8278b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8279b8e80941Smrg if (r) 8280b8e80941Smrg return r; 8281b8e80941Smrg /* disable writemask from texture instruction */ 8282b8e80941Smrg inst->Dst[0].Register.WriteMask &= ~4; 8283b8e80941Smrg } 8284b8e80941Smrg 8285b8e80941Smrg opcode = ctx->inst_info->op; 8286b8e80941Smrg if (opcode == FETCH_OP_GATHER4 && 8287b8e80941Smrg inst->TexOffsets[0].File != TGSI_FILE_NULL && 8288b8e80941Smrg inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 8289b8e80941Smrg struct r600_bytecode_tex *t; 8290b8e80941Smrg opcode = FETCH_OP_GATHER4_O; 8291b8e80941Smrg 8292b8e80941Smrg /* GATHER4_O/GATHER4_C_O use offset values loaded by 8293b8e80941Smrg SET_TEXTURE_OFFSETS instruction. The immediate offset values 8294b8e80941Smrg encoded in the instruction are ignored. */ 8295b8e80941Smrg t = &grad_offs[n_grad_offs++]; 8296b8e80941Smrg memset(t, 0, sizeof(struct r600_bytecode_tex)); 8297b8e80941Smrg t->op = FETCH_OP_SET_TEXTURE_OFFSETS; 8298b8e80941Smrg t->sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 8299b8e80941Smrg t->sampler_index_mode = sampler_index_mode; 8300b8e80941Smrg t->resource_id = t->sampler_id + R600_MAX_CONST_BUFFERS; 8301b8e80941Smrg t->resource_index_mode = sampler_index_mode; 8302b8e80941Smrg 8303b8e80941Smrg t->src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 8304b8e80941Smrg t->src_sel_x = inst->TexOffsets[0].SwizzleX; 8305b8e80941Smrg t->src_sel_y = inst->TexOffsets[0].SwizzleY; 8306b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 8307b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 8308b8e80941Smrg /* make sure array index selector is 0, this is just a safety 8309b8e80941Smrg * precausion because TGSI seems to emit something strange here */ 8310b8e80941Smrg t->src_sel_z = 4; 8311b8e80941Smrg else 8312b8e80941Smrg t->src_sel_z = inst->TexOffsets[0].SwizzleZ; 8313b8e80941Smrg 8314b8e80941Smrg t->src_sel_w = 4; 8315b8e80941Smrg 8316b8e80941Smrg t->dst_sel_x = 7; 8317b8e80941Smrg t->dst_sel_y = 7; 8318b8e80941Smrg t->dst_sel_z = 7; 8319b8e80941Smrg t->dst_sel_w = 7; 8320b8e80941Smrg } 8321b8e80941Smrg 8322b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 8323b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 8324b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 8325b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 8326b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 8327b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 8328b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 8329b8e80941Smrg switch (opcode) { 8330b8e80941Smrg case FETCH_OP_SAMPLE: 8331b8e80941Smrg opcode = FETCH_OP_SAMPLE_C; 8332b8e80941Smrg break; 8333b8e80941Smrg case FETCH_OP_SAMPLE_L: 8334b8e80941Smrg opcode = FETCH_OP_SAMPLE_C_L; 8335b8e80941Smrg break; 8336b8e80941Smrg case FETCH_OP_SAMPLE_LB: 8337b8e80941Smrg opcode = FETCH_OP_SAMPLE_C_LB; 8338b8e80941Smrg break; 8339b8e80941Smrg case FETCH_OP_SAMPLE_G: 8340b8e80941Smrg opcode = FETCH_OP_SAMPLE_C_G; 8341b8e80941Smrg break; 8342b8e80941Smrg /* Texture gather variants */ 8343b8e80941Smrg case FETCH_OP_GATHER4: 8344b8e80941Smrg opcode = FETCH_OP_GATHER4_C; 8345b8e80941Smrg break; 8346b8e80941Smrg case FETCH_OP_GATHER4_O: 8347b8e80941Smrg opcode = FETCH_OP_GATHER4_C_O; 8348b8e80941Smrg break; 8349b8e80941Smrg } 8350b8e80941Smrg } 8351b8e80941Smrg 8352b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 8353b8e80941Smrg tex.op = opcode; 8354b8e80941Smrg 8355b8e80941Smrg tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 8356b8e80941Smrg tex.sampler_index_mode = sampler_index_mode; 8357b8e80941Smrg tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 8358b8e80941Smrg tex.resource_index_mode = sampler_index_mode; 8359b8e80941Smrg tex.src_gpr = src_gpr; 8360b8e80941Smrg tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 8361b8e80941Smrg 8362b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 8363b8e80941Smrg inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 8364b8e80941Smrg tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 8365b8e80941Smrg } 8366b8e80941Smrg 8367b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 8368b8e80941Smrg int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 8369b8e80941Smrg tex.inst_mod = texture_component_select; 8370b8e80941Smrg 8371b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 8372b8e80941Smrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 8373b8e80941Smrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 8374b8e80941Smrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 8375b8e80941Smrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 8376b8e80941Smrg } else { 8377b8e80941Smrg /* GATHER4 result order is different from TGSI TG4 */ 8378b8e80941Smrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 1 : 7; 8379b8e80941Smrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 2 : 7; 8380b8e80941Smrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 0 : 7; 8381b8e80941Smrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 8382b8e80941Smrg } 8383b8e80941Smrg } 8384b8e80941Smrg else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 8385b8e80941Smrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 8386b8e80941Smrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 8387b8e80941Smrg tex.dst_sel_z = 7; 8388b8e80941Smrg tex.dst_sel_w = 7; 8389b8e80941Smrg } 8390b8e80941Smrg else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 8391b8e80941Smrg tex.dst_sel_x = 3; 8392b8e80941Smrg tex.dst_sel_y = 7; 8393b8e80941Smrg tex.dst_sel_z = 7; 8394b8e80941Smrg tex.dst_sel_w = 7; 8395b8e80941Smrg } 8396b8e80941Smrg else { 8397b8e80941Smrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 8398b8e80941Smrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 8399b8e80941Smrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 8400b8e80941Smrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 8401b8e80941Smrg } 8402b8e80941Smrg 8403b8e80941Smrg 8404b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 8405b8e80941Smrg tex.src_sel_x = 4; 8406b8e80941Smrg tex.src_sel_y = 4; 8407b8e80941Smrg tex.src_sel_z = 4; 8408b8e80941Smrg tex.src_sel_w = 4; 8409b8e80941Smrg } else if (src_loaded) { 8410b8e80941Smrg tex.src_sel_x = 0; 8411b8e80941Smrg tex.src_sel_y = 1; 8412b8e80941Smrg tex.src_sel_z = 2; 8413b8e80941Smrg tex.src_sel_w = 3; 8414b8e80941Smrg } else { 8415b8e80941Smrg tex.src_sel_x = ctx->src[0].swizzle[0]; 8416b8e80941Smrg tex.src_sel_y = ctx->src[0].swizzle[1]; 8417b8e80941Smrg tex.src_sel_z = ctx->src[0].swizzle[2]; 8418b8e80941Smrg tex.src_sel_w = ctx->src[0].swizzle[3]; 8419b8e80941Smrg tex.src_rel = ctx->src[0].rel; 8420b8e80941Smrg } 8421b8e80941Smrg 8422b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 8423b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 8424b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 8425b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 8426b8e80941Smrg tex.src_sel_x = 1; 8427b8e80941Smrg tex.src_sel_y = 0; 8428b8e80941Smrg tex.src_sel_z = 3; 8429b8e80941Smrg tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 8430b8e80941Smrg } 8431b8e80941Smrg 8432b8e80941Smrg if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 8433b8e80941Smrg inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 8434b8e80941Smrg tex.coord_type_x = 1; 8435b8e80941Smrg tex.coord_type_y = 1; 8436b8e80941Smrg } 8437b8e80941Smrg tex.coord_type_z = 1; 8438b8e80941Smrg tex.coord_type_w = 1; 8439b8e80941Smrg 8440b8e80941Smrg tex.offset_x = offset_x; 8441b8e80941Smrg tex.offset_y = offset_y; 8442b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 8443b8e80941Smrg (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 8444b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 8445b8e80941Smrg tex.offset_z = 0; 8446b8e80941Smrg } 8447b8e80941Smrg else { 8448b8e80941Smrg tex.offset_z = offset_z; 8449b8e80941Smrg } 8450b8e80941Smrg 8451b8e80941Smrg /* Put the depth for comparison in W. 8452b8e80941Smrg * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 8453b8e80941Smrg * Some instructions expect the depth in Z. */ 8454b8e80941Smrg if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 8455b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 8456b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 8457b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 8458b8e80941Smrg opcode != FETCH_OP_SAMPLE_C_L && 8459b8e80941Smrg opcode != FETCH_OP_SAMPLE_C_LB) { 8460b8e80941Smrg tex.src_sel_w = tex.src_sel_z; 8461b8e80941Smrg } 8462b8e80941Smrg 8463b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 8464b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 8465b8e80941Smrg if (opcode == FETCH_OP_SAMPLE_C_L || 8466b8e80941Smrg opcode == FETCH_OP_SAMPLE_C_LB) { 8467b8e80941Smrg /* the array index is read from Y */ 8468b8e80941Smrg tex.coord_type_y = 0; 8469b8e80941Smrg array_index_offset_channel = tex.src_sel_y; 8470b8e80941Smrg } else { 8471b8e80941Smrg /* the array index is read from Z */ 8472b8e80941Smrg tex.coord_type_z = 0; 8473b8e80941Smrg tex.src_sel_z = tex.src_sel_y; 8474b8e80941Smrg array_index_offset_channel = tex.src_sel_z; 8475b8e80941Smrg } 8476b8e80941Smrg } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 8477b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 8478b8e80941Smrg tex.coord_type_z = 0; 8479b8e80941Smrg array_index_offset_channel = tex.src_sel_z; 8480b8e80941Smrg } else if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 8481b8e80941Smrg inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 8482b8e80941Smrg (ctx->bc->chip_class >= EVERGREEN)) 8483b8e80941Smrg /* the array index is read from Z, coordinate will be corrected elsewhere */ 8484b8e80941Smrg tex.coord_type_z = 0; 8485b8e80941Smrg 8486b8e80941Smrg /* We have array access to 1D or 2D ARRAY, the coordinates are not int -> 8487b8e80941Smrg * evaluate the array index */ 8488b8e80941Smrg if (array_index_offset_channel >= 0 && 8489b8e80941Smrg opcode != FETCH_OP_LD && 8490b8e80941Smrg opcode != FETCH_OP_GET_TEXTURE_RESINFO) { 8491b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8492b8e80941Smrg alu.src[0].sel = tex.src_gpr; 8493b8e80941Smrg alu.src[0].chan = array_index_offset_channel; 8494b8e80941Smrg alu.src[0].rel = tex.src_rel; 8495b8e80941Smrg alu.op = ALU_OP1_RNDNE; 8496b8e80941Smrg alu.dst.sel = tex.src_gpr; 8497b8e80941Smrg alu.dst.chan = array_index_offset_channel; 8498b8e80941Smrg alu.dst.rel = tex.src_rel; 8499b8e80941Smrg alu.dst.write = 1; 8500b8e80941Smrg alu.last = 1; 8501b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8502b8e80941Smrg if (r) 8503b8e80941Smrg return r; 8504b8e80941Smrg } 8505b8e80941Smrg 8506b8e80941Smrg /* mask unused source components */ 8507b8e80941Smrg if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 8508b8e80941Smrg switch (inst->Texture.Texture) { 8509b8e80941Smrg case TGSI_TEXTURE_2D: 8510b8e80941Smrg case TGSI_TEXTURE_RECT: 8511b8e80941Smrg tex.src_sel_z = 7; 8512b8e80941Smrg tex.src_sel_w = 7; 8513b8e80941Smrg break; 8514b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 8515b8e80941Smrg tex.src_sel_y = 7; 8516b8e80941Smrg tex.src_sel_w = 7; 8517b8e80941Smrg break; 8518b8e80941Smrg case TGSI_TEXTURE_1D: 8519b8e80941Smrg tex.src_sel_y = 7; 8520b8e80941Smrg tex.src_sel_z = 7; 8521b8e80941Smrg tex.src_sel_w = 7; 8522b8e80941Smrg break; 8523b8e80941Smrg } 8524b8e80941Smrg } 8525b8e80941Smrg 8526b8e80941Smrg /* Emit set gradient and offset instructions. */ 8527b8e80941Smrg for (i = 0; i < n_grad_offs; ++i) { 8528b8e80941Smrg r = r600_bytecode_add_tex(ctx->bc, &grad_offs[i]); 8529b8e80941Smrg if (r) 8530b8e80941Smrg return r; 8531b8e80941Smrg } 8532b8e80941Smrg 8533b8e80941Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 8534b8e80941Smrg if (r) 8535b8e80941Smrg return r; 8536b8e80941Smrg 8537b8e80941Smrg /* add shadow ambient support - gallium doesn't do it yet */ 8538b8e80941Smrg return 0; 8539b8e80941Smrg} 8540b8e80941Smrg 8541b8e80941Smrgstatic int find_hw_atomic_counter(struct r600_shader_ctx *ctx, 8542b8e80941Smrg struct tgsi_full_src_register *src) 8543b8e80941Smrg{ 8544b8e80941Smrg unsigned i; 8545b8e80941Smrg 8546b8e80941Smrg if (src->Register.Indirect) { 8547b8e80941Smrg for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) { 8548b8e80941Smrg if (src->Indirect.ArrayID == ctx->shader->atomics[i].array_id) 8549b8e80941Smrg return ctx->shader->atomics[i].hw_idx; 8550b8e80941Smrg } 8551b8e80941Smrg } else { 8552b8e80941Smrg uint32_t index = src->Register.Index; 8553b8e80941Smrg for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) { 8554b8e80941Smrg if (ctx->shader->atomics[i].buffer_id != (unsigned)src->Dimension.Index) 8555b8e80941Smrg continue; 8556b8e80941Smrg if (index > ctx->shader->atomics[i].end) 8557b8e80941Smrg continue; 8558b8e80941Smrg if (index < ctx->shader->atomics[i].start) 8559b8e80941Smrg continue; 8560b8e80941Smrg uint32_t offset = (index - ctx->shader->atomics[i].start); 8561b8e80941Smrg return ctx->shader->atomics[i].hw_idx + offset; 8562b8e80941Smrg } 8563b8e80941Smrg } 8564b8e80941Smrg assert(0); 8565b8e80941Smrg return -1; 8566b8e80941Smrg} 8567b8e80941Smrg 8568b8e80941Smrgstatic int tgsi_set_gds_temp(struct r600_shader_ctx *ctx, 8569b8e80941Smrg int *uav_id_p, int *uav_index_mode_p) 8570b8e80941Smrg{ 8571b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8572b8e80941Smrg int uav_id, uav_index_mode = 0; 8573b8e80941Smrg int r; 8574b8e80941Smrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 8575b8e80941Smrg 8576b8e80941Smrg uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]); 8577b8e80941Smrg 8578b8e80941Smrg if (inst->Src[0].Register.Indirect) { 8579b8e80941Smrg if (is_cm) { 8580b8e80941Smrg struct r600_bytecode_alu alu; 8581b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8582b8e80941Smrg alu.op = ALU_OP2_LSHL_INT; 8583b8e80941Smrg alu.src[0].sel = get_address_file_reg(ctx, inst->Src[0].Indirect.Index); 8584b8e80941Smrg alu.src[0].chan = 0; 8585b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8586b8e80941Smrg alu.src[1].value = 2; 8587b8e80941Smrg alu.dst.sel = ctx->temp_reg; 8588b8e80941Smrg alu.dst.chan = 0; 8589b8e80941Smrg alu.dst.write = 1; 8590b8e80941Smrg alu.last = 1; 8591b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8592b8e80941Smrg if (r) 8593b8e80941Smrg return r; 8594b8e80941Smrg 8595b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 8596b8e80941Smrg ctx->temp_reg, 0, 8597b8e80941Smrg ctx->temp_reg, 0, 8598b8e80941Smrg V_SQ_ALU_SRC_LITERAL, uav_id * 4); 8599b8e80941Smrg if (r) 8600b8e80941Smrg return r; 8601b8e80941Smrg } else 8602b8e80941Smrg uav_index_mode = 2; 8603b8e80941Smrg } else if (is_cm) { 8604b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 8605b8e80941Smrg ctx->temp_reg, 0, 8606b8e80941Smrg V_SQ_ALU_SRC_LITERAL, uav_id * 4, 8607b8e80941Smrg 0, 0); 8608b8e80941Smrg if (r) 8609b8e80941Smrg return r; 8610b8e80941Smrg } 8611b8e80941Smrg *uav_id_p = uav_id; 8612b8e80941Smrg *uav_index_mode_p = uav_index_mode; 8613b8e80941Smrg return 0; 8614b8e80941Smrg} 8615b8e80941Smrg 8616b8e80941Smrgstatic int tgsi_load_gds(struct r600_shader_ctx *ctx) 8617b8e80941Smrg{ 8618b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8619b8e80941Smrg int r; 8620b8e80941Smrg struct r600_bytecode_gds gds; 8621b8e80941Smrg int uav_id = 0; 8622b8e80941Smrg int uav_index_mode = 0; 8623b8e80941Smrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 8624b8e80941Smrg 8625b8e80941Smrg r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode); 8626b8e80941Smrg if (r) 8627b8e80941Smrg return r; 8628b8e80941Smrg 8629b8e80941Smrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 8630b8e80941Smrg gds.op = FETCH_OP_GDS_READ_RET; 8631b8e80941Smrg gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 8632b8e80941Smrg gds.uav_id = is_cm ? 0 : uav_id; 8633b8e80941Smrg gds.uav_index_mode = is_cm ? 0 : uav_index_mode; 8634b8e80941Smrg gds.src_gpr = ctx->temp_reg; 8635b8e80941Smrg gds.src_sel_x = (is_cm) ? 0 : 4; 8636b8e80941Smrg gds.src_sel_y = 4; 8637b8e80941Smrg gds.src_sel_z = 4; 8638b8e80941Smrg gds.dst_sel_x = 0; 8639b8e80941Smrg gds.dst_sel_y = 7; 8640b8e80941Smrg gds.dst_sel_z = 7; 8641b8e80941Smrg gds.dst_sel_w = 7; 8642b8e80941Smrg gds.src_gpr2 = 0; 8643b8e80941Smrg gds.alloc_consume = !is_cm; 8644b8e80941Smrg r = r600_bytecode_add_gds(ctx->bc, &gds); 8645b8e80941Smrg if (r) 8646b8e80941Smrg return r; 8647b8e80941Smrg 8648b8e80941Smrg ctx->bc->cf_last->vpm = 1; 8649b8e80941Smrg return 0; 8650b8e80941Smrg} 8651b8e80941Smrg 8652b8e80941Smrg/* this fixes up 1D arrays properly */ 8653b8e80941Smrgstatic int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr) 8654b8e80941Smrg{ 8655b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8656b8e80941Smrg int r, i; 8657b8e80941Smrg struct r600_bytecode_alu alu; 8658b8e80941Smrg int temp_reg = r600_get_temp(ctx); 8659b8e80941Smrg 8660b8e80941Smrg for (i = 0; i < 4; i++) { 8661b8e80941Smrg bool def_val = true, write_zero = false; 8662b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8663b8e80941Smrg alu.op = ALU_OP1_MOV; 8664b8e80941Smrg alu.dst.sel = temp_reg; 8665b8e80941Smrg alu.dst.chan = i; 8666b8e80941Smrg 8667b8e80941Smrg switch (inst->Memory.Texture) { 8668b8e80941Smrg case TGSI_TEXTURE_BUFFER: 8669b8e80941Smrg case TGSI_TEXTURE_1D: 8670b8e80941Smrg if (i == 1 || i == 2 || i == 3) { 8671b8e80941Smrg write_zero = true; 8672b8e80941Smrg } 8673b8e80941Smrg break; 8674b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 8675b8e80941Smrg if (i == 1 || i == 3) 8676b8e80941Smrg write_zero = true; 8677b8e80941Smrg else if (i == 2) { 8678b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1); 8679b8e80941Smrg def_val = false; 8680b8e80941Smrg } 8681b8e80941Smrg break; 8682b8e80941Smrg case TGSI_TEXTURE_2D: 8683b8e80941Smrg if (i == 2 || i == 3) 8684b8e80941Smrg write_zero = true; 8685b8e80941Smrg break; 8686b8e80941Smrg default: 8687b8e80941Smrg if (i == 3) 8688b8e80941Smrg write_zero = true; 8689b8e80941Smrg break; 8690b8e80941Smrg } 8691b8e80941Smrg 8692b8e80941Smrg if (write_zero) { 8693b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 8694b8e80941Smrg alu.src[0].value = 0; 8695b8e80941Smrg } else if (def_val) { 8696b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i); 8697b8e80941Smrg } 8698b8e80941Smrg 8699b8e80941Smrg if (i == 3) 8700b8e80941Smrg alu.last = 1; 8701b8e80941Smrg alu.dst.write = 1; 8702b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8703b8e80941Smrg if (r) 8704b8e80941Smrg return r; 8705b8e80941Smrg } 8706b8e80941Smrg *idx_gpr = temp_reg; 8707b8e80941Smrg return 0; 8708b8e80941Smrg} 8709b8e80941Smrg 8710b8e80941Smrgstatic int load_buffer_coord(struct r600_shader_ctx *ctx, int src_idx, 8711b8e80941Smrg int temp_reg) 8712b8e80941Smrg{ 8713b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8714b8e80941Smrg int r; 8715b8e80941Smrg if (inst->Src[src_idx].Register.File == TGSI_FILE_IMMEDIATE) { 8716b8e80941Smrg int value = (ctx->literals[4 * inst->Src[src_idx].Register.Index + inst->Src[src_idx].Register.SwizzleX]); 8717b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 8718b8e80941Smrg temp_reg, 0, 8719b8e80941Smrg V_SQ_ALU_SRC_LITERAL, value >> 2, 8720b8e80941Smrg 0, 0); 8721b8e80941Smrg if (r) 8722b8e80941Smrg return r; 8723b8e80941Smrg } else { 8724b8e80941Smrg struct r600_bytecode_alu alu; 8725b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8726b8e80941Smrg alu.op = ALU_OP2_LSHR_INT; 8727b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[src_idx], 0); 8728b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8729b8e80941Smrg alu.src[1].value = 2; 8730b8e80941Smrg alu.dst.sel = temp_reg; 8731b8e80941Smrg alu.dst.write = 1; 8732b8e80941Smrg alu.last = 1; 8733b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8734b8e80941Smrg if (r) 8735b8e80941Smrg return r; 8736b8e80941Smrg } 8737b8e80941Smrg return 0; 8738b8e80941Smrg} 8739b8e80941Smrg 8740b8e80941Smrgstatic int tgsi_load_buffer(struct r600_shader_ctx *ctx) 8741b8e80941Smrg{ 8742b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8743b8e80941Smrg /* have to work out the offset into the RAT immediate return buffer */ 8744b8e80941Smrg struct r600_bytecode_vtx vtx; 8745b8e80941Smrg struct r600_bytecode_cf *cf; 8746b8e80941Smrg int r; 8747b8e80941Smrg int temp_reg = r600_get_temp(ctx); 8748b8e80941Smrg unsigned rat_index_mode; 8749b8e80941Smrg unsigned base; 8750b8e80941Smrg 8751b8e80941Smrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 8752b8e80941Smrg base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE]; 8753b8e80941Smrg 8754b8e80941Smrg r = load_buffer_coord(ctx, 1, temp_reg); 8755b8e80941Smrg if (r) 8756b8e80941Smrg return r; 8757b8e80941Smrg ctx->bc->cf_last->barrier = 1; 8758b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 8759b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 8760b8e80941Smrg vtx.buffer_id = inst->Src[0].Register.Index + base; 8761b8e80941Smrg vtx.buffer_index_mode = rat_index_mode; 8762b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 8763b8e80941Smrg vtx.src_gpr = temp_reg; 8764b8e80941Smrg vtx.src_sel_x = 0; 8765b8e80941Smrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 8766b8e80941Smrg vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 8767b8e80941Smrg vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 8768b8e80941Smrg vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 8769b8e80941Smrg vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 8770b8e80941Smrg vtx.num_format_all = 1; 8771b8e80941Smrg vtx.format_comp_all = 1; 8772b8e80941Smrg vtx.srf_mode_all = 0; 8773b8e80941Smrg 8774b8e80941Smrg if (inst->Dst[0].Register.WriteMask & 8) { 8775b8e80941Smrg vtx.data_format = FMT_32_32_32_32; 8776b8e80941Smrg vtx.use_const_fields = 0; 8777b8e80941Smrg } else if (inst->Dst[0].Register.WriteMask & 4) { 8778b8e80941Smrg vtx.data_format = FMT_32_32_32; 8779b8e80941Smrg vtx.use_const_fields = 0; 8780b8e80941Smrg } else if (inst->Dst[0].Register.WriteMask & 2) { 8781b8e80941Smrg vtx.data_format = FMT_32_32; 8782b8e80941Smrg vtx.use_const_fields = 0; 8783b8e80941Smrg } else { 8784b8e80941Smrg vtx.data_format = FMT_32; 8785b8e80941Smrg vtx.use_const_fields = 0; 8786b8e80941Smrg } 8787b8e80941Smrg 8788b8e80941Smrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 8789b8e80941Smrg if (r) 8790b8e80941Smrg return r; 8791b8e80941Smrg cf = ctx->bc->cf_last; 8792b8e80941Smrg cf->barrier = 1; 8793b8e80941Smrg return 0; 8794b8e80941Smrg} 8795b8e80941Smrg 8796b8e80941Smrgstatic int tgsi_load_rat(struct r600_shader_ctx *ctx) 8797b8e80941Smrg{ 8798b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8799b8e80941Smrg /* have to work out the offset into the RAT immediate return buffer */ 8800b8e80941Smrg struct r600_bytecode_vtx vtx; 8801b8e80941Smrg struct r600_bytecode_cf *cf; 8802b8e80941Smrg int r; 8803b8e80941Smrg int idx_gpr; 8804b8e80941Smrg unsigned format, num_format, format_comp, endian; 8805b8e80941Smrg const struct util_format_description *desc; 8806b8e80941Smrg unsigned rat_index_mode; 8807b8e80941Smrg unsigned immed_base; 8808b8e80941Smrg 8809b8e80941Smrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 8810b8e80941Smrg 8811b8e80941Smrg immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; 8812b8e80941Smrg r = load_index_src(ctx, 1, &idx_gpr); 8813b8e80941Smrg if (r) 8814b8e80941Smrg return r; 8815b8e80941Smrg 8816b8e80941Smrg if (rat_index_mode) 8817b8e80941Smrg egcm_load_index_reg(ctx->bc, 1, false); 8818b8e80941Smrg 8819b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 8820b8e80941Smrg cf = ctx->bc->cf_last; 8821b8e80941Smrg 8822b8e80941Smrg cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index; 8823b8e80941Smrg cf->rat.inst = V_RAT_INST_NOP_RTN; 8824b8e80941Smrg cf->rat.index_mode = rat_index_mode; 8825b8e80941Smrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 8826b8e80941Smrg cf->output.gpr = ctx->thread_id_gpr; 8827b8e80941Smrg cf->output.index_gpr = idx_gpr; 8828b8e80941Smrg cf->output.comp_mask = 0xf; 8829b8e80941Smrg cf->output.burst_count = 1; 8830b8e80941Smrg cf->vpm = 1; 8831b8e80941Smrg cf->barrier = 1; 8832b8e80941Smrg cf->mark = 1; 8833b8e80941Smrg cf->output.elem_size = 0; 8834b8e80941Smrg 8835b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 8836b8e80941Smrg cf = ctx->bc->cf_last; 8837b8e80941Smrg cf->barrier = 1; 8838b8e80941Smrg 8839b8e80941Smrg desc = util_format_description(inst->Memory.Format); 8840b8e80941Smrg r600_vertex_data_type(inst->Memory.Format, 8841b8e80941Smrg &format, &num_format, &format_comp, &endian); 8842b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 8843b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 8844b8e80941Smrg vtx.buffer_id = immed_base + inst->Src[0].Register.Index; 8845b8e80941Smrg vtx.buffer_index_mode = rat_index_mode; 8846b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 8847b8e80941Smrg vtx.src_gpr = ctx->thread_id_gpr; 8848b8e80941Smrg vtx.src_sel_x = 1; 8849b8e80941Smrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 8850b8e80941Smrg vtx.dst_sel_x = desc->swizzle[0]; 8851b8e80941Smrg vtx.dst_sel_y = desc->swizzle[1]; 8852b8e80941Smrg vtx.dst_sel_z = desc->swizzle[2]; 8853b8e80941Smrg vtx.dst_sel_w = desc->swizzle[3]; 8854b8e80941Smrg vtx.srf_mode_all = 1; 8855b8e80941Smrg vtx.data_format = format; 8856b8e80941Smrg vtx.num_format_all = num_format; 8857b8e80941Smrg vtx.format_comp_all = format_comp; 8858b8e80941Smrg vtx.endian = endian; 8859b8e80941Smrg vtx.offset = 0; 8860b8e80941Smrg vtx.mega_fetch_count = 3; 8861b8e80941Smrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 8862b8e80941Smrg if (r) 8863b8e80941Smrg return r; 8864b8e80941Smrg cf = ctx->bc->cf_last; 8865b8e80941Smrg cf->barrier = 1; 8866b8e80941Smrg return 0; 8867b8e80941Smrg} 8868b8e80941Smrg 8869b8e80941Smrgstatic int tgsi_load_lds(struct r600_shader_ctx *ctx) 8870b8e80941Smrg{ 8871b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8872b8e80941Smrg struct r600_bytecode_alu alu; 8873b8e80941Smrg int r; 8874b8e80941Smrg int temp_reg = r600_get_temp(ctx); 8875b8e80941Smrg 8876b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8877b8e80941Smrg alu.op = ALU_OP1_MOV; 8878b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 8879b8e80941Smrg alu.dst.sel = temp_reg; 8880b8e80941Smrg alu.dst.write = 1; 8881b8e80941Smrg alu.last = 1; 8882b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8883b8e80941Smrg if (r) 8884b8e80941Smrg return r; 8885b8e80941Smrg 8886b8e80941Smrg r = do_lds_fetch_values(ctx, temp_reg, 8887b8e80941Smrg ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask); 8888b8e80941Smrg if (r) 8889b8e80941Smrg return r; 8890b8e80941Smrg return 0; 8891b8e80941Smrg} 8892b8e80941Smrg 8893b8e80941Smrgstatic int tgsi_load(struct r600_shader_ctx *ctx) 8894b8e80941Smrg{ 8895b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8896b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 8897b8e80941Smrg return tgsi_load_rat(ctx); 8898b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) 8899b8e80941Smrg return tgsi_load_gds(ctx); 8900b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 8901b8e80941Smrg return tgsi_load_buffer(ctx); 8902b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 8903b8e80941Smrg return tgsi_load_lds(ctx); 8904b8e80941Smrg return 0; 8905b8e80941Smrg} 8906b8e80941Smrg 8907b8e80941Smrgstatic int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx) 8908b8e80941Smrg{ 8909b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8910b8e80941Smrg struct r600_bytecode_cf *cf; 8911b8e80941Smrg int r, i; 8912b8e80941Smrg unsigned rat_index_mode; 8913b8e80941Smrg int lasti; 8914b8e80941Smrg int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx); 8915b8e80941Smrg 8916b8e80941Smrg r = load_buffer_coord(ctx, 0, treg2); 8917b8e80941Smrg if (r) 8918b8e80941Smrg return r; 8919b8e80941Smrg 8920b8e80941Smrg rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 8921b8e80941Smrg if (rat_index_mode) 8922b8e80941Smrg egcm_load_index_reg(ctx->bc, 1, false); 8923b8e80941Smrg 8924b8e80941Smrg for (i = 0; i <= 3; i++) { 8925b8e80941Smrg struct r600_bytecode_alu alu; 8926b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8927b8e80941Smrg alu.op = ALU_OP1_MOV; 8928b8e80941Smrg alu.dst.sel = temp_reg; 8929b8e80941Smrg alu.dst.chan = i; 8930b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_0; 8931b8e80941Smrg alu.last = (i == 3); 8932b8e80941Smrg alu.dst.write = 1; 8933b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8934b8e80941Smrg if (r) 8935b8e80941Smrg return r; 8936b8e80941Smrg } 8937b8e80941Smrg 8938b8e80941Smrg lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8939b8e80941Smrg for (i = 0; i <= lasti; i++) { 8940b8e80941Smrg struct r600_bytecode_alu alu; 8941b8e80941Smrg if (!((1 << i) & inst->Dst[0].Register.WriteMask)) 8942b8e80941Smrg continue; 8943b8e80941Smrg 8944b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 8945b8e80941Smrg temp_reg, 0, 8946b8e80941Smrg treg2, 0, 8947b8e80941Smrg V_SQ_ALU_SRC_LITERAL, i); 8948b8e80941Smrg if (r) 8949b8e80941Smrg return r; 8950b8e80941Smrg 8951b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8952b8e80941Smrg alu.op = ALU_OP1_MOV; 8953b8e80941Smrg alu.dst.sel = ctx->temp_reg; 8954b8e80941Smrg alu.dst.chan = 0; 8955b8e80941Smrg 8956b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 8957b8e80941Smrg alu.last = 1; 8958b8e80941Smrg alu.dst.write = 1; 8959b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 8960b8e80941Smrg if (r) 8961b8e80941Smrg return r; 8962b8e80941Smrg 8963b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 8964b8e80941Smrg cf = ctx->bc->cf_last; 8965b8e80941Smrg 8966b8e80941Smrg cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE]; 8967b8e80941Smrg cf->rat.inst = V_RAT_INST_STORE_TYPED; 8968b8e80941Smrg cf->rat.index_mode = rat_index_mode; 8969b8e80941Smrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 8970b8e80941Smrg cf->output.gpr = ctx->temp_reg; 8971b8e80941Smrg cf->output.index_gpr = temp_reg; 8972b8e80941Smrg cf->output.comp_mask = 1; 8973b8e80941Smrg cf->output.burst_count = 1; 8974b8e80941Smrg cf->vpm = 1; 8975b8e80941Smrg cf->barrier = 1; 8976b8e80941Smrg cf->output.elem_size = 0; 8977b8e80941Smrg } 8978b8e80941Smrg return 0; 8979b8e80941Smrg} 8980b8e80941Smrg 8981b8e80941Smrgstatic int tgsi_store_rat(struct r600_shader_ctx *ctx) 8982b8e80941Smrg{ 8983b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8984b8e80941Smrg struct r600_bytecode_cf *cf; 8985b8e80941Smrg bool src_requires_loading = false; 8986b8e80941Smrg int val_gpr, idx_gpr; 8987b8e80941Smrg int r, i; 8988b8e80941Smrg unsigned rat_index_mode; 8989b8e80941Smrg 8990b8e80941Smrg rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 8991b8e80941Smrg 8992b8e80941Smrg r = load_index_src(ctx, 0, &idx_gpr); 8993b8e80941Smrg if (r) 8994b8e80941Smrg return r; 8995b8e80941Smrg 8996b8e80941Smrg if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY) 8997b8e80941Smrg src_requires_loading = true; 8998b8e80941Smrg 8999b8e80941Smrg if (src_requires_loading) { 9000b8e80941Smrg struct r600_bytecode_alu alu; 9001b8e80941Smrg for (i = 0; i < 4; i++) { 9002b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9003b8e80941Smrg alu.op = ALU_OP1_MOV; 9004b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9005b8e80941Smrg alu.dst.chan = i; 9006b8e80941Smrg 9007b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 9008b8e80941Smrg if (i == 3) 9009b8e80941Smrg alu.last = 1; 9010b8e80941Smrg alu.dst.write = 1; 9011b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9012b8e80941Smrg if (r) 9013b8e80941Smrg return r; 9014b8e80941Smrg } 9015b8e80941Smrg val_gpr = ctx->temp_reg; 9016b8e80941Smrg } else 9017b8e80941Smrg val_gpr = tgsi_tex_get_src_gpr(ctx, 1); 9018b8e80941Smrg if (rat_index_mode) 9019b8e80941Smrg egcm_load_index_reg(ctx->bc, 1, false); 9020b8e80941Smrg 9021b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 9022b8e80941Smrg cf = ctx->bc->cf_last; 9023b8e80941Smrg 9024b8e80941Smrg cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index; 9025b8e80941Smrg cf->rat.inst = V_RAT_INST_STORE_TYPED; 9026b8e80941Smrg cf->rat.index_mode = rat_index_mode; 9027b8e80941Smrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 9028b8e80941Smrg cf->output.gpr = val_gpr; 9029b8e80941Smrg cf->output.index_gpr = idx_gpr; 9030b8e80941Smrg cf->output.comp_mask = 0xf; 9031b8e80941Smrg cf->output.burst_count = 1; 9032b8e80941Smrg cf->vpm = 1; 9033b8e80941Smrg cf->barrier = 1; 9034b8e80941Smrg cf->output.elem_size = 0; 9035b8e80941Smrg return 0; 9036b8e80941Smrg} 9037b8e80941Smrg 9038b8e80941Smrgstatic int tgsi_store_lds(struct r600_shader_ctx *ctx) 9039b8e80941Smrg{ 9040b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9041b8e80941Smrg struct r600_bytecode_alu alu; 9042b8e80941Smrg int r, i, lasti; 9043b8e80941Smrg int write_mask = inst->Dst[0].Register.WriteMask; 9044b8e80941Smrg int temp_reg = r600_get_temp(ctx); 9045b8e80941Smrg 9046b8e80941Smrg /* LDS write */ 9047b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9048b8e80941Smrg alu.op = ALU_OP1_MOV; 9049b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9050b8e80941Smrg alu.dst.sel = temp_reg; 9051b8e80941Smrg alu.dst.write = 1; 9052b8e80941Smrg alu.last = 1; 9053b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9054b8e80941Smrg if (r) 9055b8e80941Smrg return r; 9056b8e80941Smrg 9057b8e80941Smrg lasti = tgsi_last_instruction(write_mask); 9058b8e80941Smrg for (i = 1; i <= lasti; i++) { 9059b8e80941Smrg if (!(write_mask & (1 << i))) 9060b8e80941Smrg continue; 9061b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 9062b8e80941Smrg temp_reg, i, 9063b8e80941Smrg temp_reg, 0, 9064b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 4 * i); 9065b8e80941Smrg if (r) 9066b8e80941Smrg return r; 9067b8e80941Smrg } 9068b8e80941Smrg for (i = 0; i <= lasti; i++) { 9069b8e80941Smrg if (!(write_mask & (1 << i))) 9070b8e80941Smrg continue; 9071b8e80941Smrg 9072b8e80941Smrg if ((i == 0 && ((write_mask & 3) == 3)) || 9073b8e80941Smrg (i == 2 && ((write_mask & 0xc) == 0xc))) { 9074b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9075b8e80941Smrg alu.op = LDS_OP3_LDS_WRITE_REL; 9076b8e80941Smrg 9077b8e80941Smrg alu.src[0].sel = temp_reg; 9078b8e80941Smrg alu.src[0].chan = i; 9079b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 9080b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1); 9081b8e80941Smrg alu.last = 1; 9082b8e80941Smrg alu.is_lds_idx_op = true; 9083b8e80941Smrg alu.lds_idx = 1; 9084b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9085b8e80941Smrg if (r) 9086b8e80941Smrg return r; 9087b8e80941Smrg i += 1; 9088b8e80941Smrg continue; 9089b8e80941Smrg } 9090b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9091b8e80941Smrg alu.op = LDS_OP2_LDS_WRITE; 9092b8e80941Smrg 9093b8e80941Smrg alu.src[0].sel = temp_reg; 9094b8e80941Smrg alu.src[0].chan = i; 9095b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 9096b8e80941Smrg 9097b8e80941Smrg alu.last = 1; 9098b8e80941Smrg alu.is_lds_idx_op = true; 9099b8e80941Smrg 9100b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9101b8e80941Smrg if (r) 9102b8e80941Smrg return r; 9103b8e80941Smrg } 9104b8e80941Smrg return 0; 9105b8e80941Smrg} 9106b8e80941Smrg 9107b8e80941Smrgstatic int tgsi_store(struct r600_shader_ctx *ctx) 9108b8e80941Smrg{ 9109b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9110b8e80941Smrg if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 9111b8e80941Smrg return tgsi_store_buffer_rat(ctx); 9112b8e80941Smrg else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 9113b8e80941Smrg return tgsi_store_lds(ctx); 9114b8e80941Smrg else 9115b8e80941Smrg return tgsi_store_rat(ctx); 9116b8e80941Smrg} 9117b8e80941Smrg 9118b8e80941Smrgstatic int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) 9119b8e80941Smrg{ 9120b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9121b8e80941Smrg /* have to work out the offset into the RAT immediate return buffer */ 9122b8e80941Smrg struct r600_bytecode_alu alu; 9123b8e80941Smrg struct r600_bytecode_vtx vtx; 9124b8e80941Smrg struct r600_bytecode_cf *cf; 9125b8e80941Smrg int r; 9126b8e80941Smrg int idx_gpr; 9127b8e80941Smrg unsigned format, num_format, format_comp, endian; 9128b8e80941Smrg const struct util_format_description *desc; 9129b8e80941Smrg unsigned rat_index_mode; 9130b8e80941Smrg unsigned immed_base; 9131b8e80941Smrg unsigned rat_base; 9132b8e80941Smrg 9133b8e80941Smrg immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; 9134b8e80941Smrg rat_base = ctx->shader->rat_base; 9135b8e80941Smrg 9136b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 9137b8e80941Smrg immed_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 9138b8e80941Smrg rat_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 9139b8e80941Smrg 9140b8e80941Smrg r = load_buffer_coord(ctx, 1, ctx->temp_reg); 9141b8e80941Smrg if (r) 9142b8e80941Smrg return r; 9143b8e80941Smrg idx_gpr = ctx->temp_reg; 9144b8e80941Smrg } else { 9145b8e80941Smrg r = load_index_src(ctx, 1, &idx_gpr); 9146b8e80941Smrg if (r) 9147b8e80941Smrg return r; 9148b8e80941Smrg } 9149b8e80941Smrg 9150b8e80941Smrg rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 9151b8e80941Smrg 9152b8e80941Smrg if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) { 9153848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9154b8e80941Smrg alu.op = ALU_OP1_MOV; 9155b8e80941Smrg alu.dst.sel = ctx->thread_id_gpr; 9156b8e80941Smrg alu.dst.chan = 0; 9157848b8605Smrg alu.dst.write = 1; 9158b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[3], 0); 9159848b8605Smrg alu.last = 1; 9160848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9161848b8605Smrg if (r) 9162848b8605Smrg return r; 9163848b8605Smrg 9164848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9165b8e80941Smrg alu.op = ALU_OP1_MOV; 9166b8e80941Smrg alu.dst.sel = ctx->thread_id_gpr; 9167b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) 9168b8e80941Smrg alu.dst.chan = 2; 9169b8e80941Smrg else 9170b8e80941Smrg alu.dst.chan = 3; 9171848b8605Smrg alu.dst.write = 1; 9172b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 9173848b8605Smrg alu.last = 1; 9174848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9175848b8605Smrg if (r) 9176848b8605Smrg return r; 9177b8e80941Smrg } else { 9178b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9179b8e80941Smrg alu.op = ALU_OP1_MOV; 9180b8e80941Smrg alu.dst.sel = ctx->thread_id_gpr; 9181b8e80941Smrg alu.dst.chan = 0; 9182b8e80941Smrg alu.dst.write = 1; 9183b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 9184b8e80941Smrg alu.last = 1; 9185b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9186b8e80941Smrg if (r) 9187b8e80941Smrg return r; 9188b8e80941Smrg } 9189b8e80941Smrg 9190b8e80941Smrg if (rat_index_mode) 9191b8e80941Smrg egcm_load_index_reg(ctx->bc, 1, false); 9192b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); 9193b8e80941Smrg cf = ctx->bc->cf_last; 9194b8e80941Smrg 9195b8e80941Smrg cf->rat.id = rat_base + inst->Src[0].Register.Index; 9196b8e80941Smrg cf->rat.inst = ctx->inst_info->op; 9197b8e80941Smrg cf->rat.index_mode = rat_index_mode; 9198b8e80941Smrg cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; 9199b8e80941Smrg cf->output.gpr = ctx->thread_id_gpr; 9200b8e80941Smrg cf->output.index_gpr = idx_gpr; 9201b8e80941Smrg cf->output.comp_mask = 0xf; 9202b8e80941Smrg cf->output.burst_count = 1; 9203b8e80941Smrg cf->vpm = 1; 9204b8e80941Smrg cf->barrier = 1; 9205b8e80941Smrg cf->mark = 1; 9206b8e80941Smrg cf->output.elem_size = 0; 9207b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); 9208b8e80941Smrg cf = ctx->bc->cf_last; 9209b8e80941Smrg cf->barrier = 1; 9210b8e80941Smrg cf->cf_addr = 1; 9211b8e80941Smrg 9212b8e80941Smrg memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 9213b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) { 9214b8e80941Smrg desc = util_format_description(inst->Memory.Format); 9215b8e80941Smrg r600_vertex_data_type(inst->Memory.Format, 9216b8e80941Smrg &format, &num_format, &format_comp, &endian); 9217b8e80941Smrg vtx.dst_sel_x = desc->swizzle[0]; 9218b8e80941Smrg } else { 9219b8e80941Smrg format = FMT_32; 9220b8e80941Smrg num_format = 1; 9221b8e80941Smrg format_comp = 0; 9222b8e80941Smrg endian = 0; 9223b8e80941Smrg vtx.dst_sel_x = 0; 9224b8e80941Smrg } 9225b8e80941Smrg vtx.op = FETCH_OP_VFETCH; 9226b8e80941Smrg vtx.buffer_id = immed_base + inst->Src[0].Register.Index; 9227b8e80941Smrg vtx.buffer_index_mode = rat_index_mode; 9228b8e80941Smrg vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 9229b8e80941Smrg vtx.src_gpr = ctx->thread_id_gpr; 9230b8e80941Smrg vtx.src_sel_x = 1; 9231b8e80941Smrg vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 9232b8e80941Smrg vtx.dst_sel_y = 7; 9233b8e80941Smrg vtx.dst_sel_z = 7; 9234b8e80941Smrg vtx.dst_sel_w = 7; 9235b8e80941Smrg vtx.use_const_fields = 0; 9236b8e80941Smrg vtx.srf_mode_all = 1; 9237b8e80941Smrg vtx.data_format = format; 9238b8e80941Smrg vtx.num_format_all = num_format; 9239b8e80941Smrg vtx.format_comp_all = format_comp; 9240b8e80941Smrg vtx.endian = endian; 9241b8e80941Smrg vtx.offset = 0; 9242b8e80941Smrg vtx.mega_fetch_count = 0xf; 9243b8e80941Smrg r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); 9244b8e80941Smrg if (r) 9245b8e80941Smrg return r; 9246b8e80941Smrg cf = ctx->bc->cf_last; 9247b8e80941Smrg cf->vpm = 1; 9248b8e80941Smrg cf->barrier = 1; 9249b8e80941Smrg return 0; 9250b8e80941Smrg} 9251b8e80941Smrg 9252b8e80941Smrgstatic int get_gds_op(int opcode) 9253b8e80941Smrg{ 9254b8e80941Smrg switch (opcode) { 9255b8e80941Smrg case TGSI_OPCODE_ATOMUADD: 9256b8e80941Smrg return FETCH_OP_GDS_ADD_RET; 9257b8e80941Smrg case TGSI_OPCODE_ATOMAND: 9258b8e80941Smrg return FETCH_OP_GDS_AND_RET; 9259b8e80941Smrg case TGSI_OPCODE_ATOMOR: 9260b8e80941Smrg return FETCH_OP_GDS_OR_RET; 9261b8e80941Smrg case TGSI_OPCODE_ATOMXOR: 9262b8e80941Smrg return FETCH_OP_GDS_XOR_RET; 9263b8e80941Smrg case TGSI_OPCODE_ATOMUMIN: 9264b8e80941Smrg return FETCH_OP_GDS_MIN_UINT_RET; 9265b8e80941Smrg case TGSI_OPCODE_ATOMUMAX: 9266b8e80941Smrg return FETCH_OP_GDS_MAX_UINT_RET; 9267b8e80941Smrg case TGSI_OPCODE_ATOMXCHG: 9268b8e80941Smrg return FETCH_OP_GDS_XCHG_RET; 9269b8e80941Smrg case TGSI_OPCODE_ATOMCAS: 9270b8e80941Smrg return FETCH_OP_GDS_CMP_XCHG_RET; 9271b8e80941Smrg default: 9272b8e80941Smrg return -1; 9273b8e80941Smrg } 9274b8e80941Smrg} 9275b8e80941Smrg 9276b8e80941Smrgstatic int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx) 9277b8e80941Smrg{ 9278b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9279b8e80941Smrg struct r600_bytecode_gds gds; 9280b8e80941Smrg struct r600_bytecode_alu alu; 9281b8e80941Smrg int gds_op = get_gds_op(inst->Instruction.Opcode); 9282b8e80941Smrg int r; 9283b8e80941Smrg int uav_id = 0; 9284b8e80941Smrg int uav_index_mode = 0; 9285b8e80941Smrg bool is_cm = (ctx->bc->chip_class == CAYMAN); 9286b8e80941Smrg 9287b8e80941Smrg if (gds_op == -1) { 9288b8e80941Smrg fprintf(stderr, "unknown GDS op for opcode %d\n", inst->Instruction.Opcode); 9289b8e80941Smrg return -1; 9290b8e80941Smrg } 9291b8e80941Smrg 9292b8e80941Smrg r = tgsi_set_gds_temp(ctx, &uav_id, &uav_index_mode); 9293b8e80941Smrg if (r) 9294b8e80941Smrg return r; 9295b8e80941Smrg 9296b8e80941Smrg if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) { 9297b8e80941Smrg if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) { 9298b8e80941Smrg int value = (ctx->literals[4 * inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]); 9299848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9300b8e80941Smrg alu.op = ALU_OP1_MOV; 9301b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9302b8e80941Smrg alu.dst.chan = is_cm ? 2 : 1; 9303b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 9304b8e80941Smrg alu.src[0].value = value; 9305b8e80941Smrg alu.last = 1; 9306848b8605Smrg alu.dst.write = 1; 9307b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9308b8e80941Smrg if (r) 9309b8e80941Smrg return r; 9310b8e80941Smrg } else { 9311b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9312b8e80941Smrg alu.op = ALU_OP1_MOV; 9313b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9314b8e80941Smrg alu.dst.chan = is_cm ? 2 : 1; 9315b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[3], 0); 9316848b8605Smrg alu.last = 1; 9317b8e80941Smrg alu.dst.write = 1; 9318848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9319848b8605Smrg if (r) 9320848b8605Smrg return r; 9321848b8605Smrg } 9322848b8605Smrg } 9323b8e80941Smrg if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) { 9324b8e80941Smrg int value = (ctx->literals[4 * inst->Src[2].Register.Index + inst->Src[2].Register.SwizzleX]); 9325b8e80941Smrg int abs_value = abs(value); 9326b8e80941Smrg if (abs_value != value && gds_op == FETCH_OP_GDS_ADD_RET) 9327b8e80941Smrg gds_op = FETCH_OP_GDS_SUB_RET; 9328848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9329848b8605Smrg alu.op = ALU_OP1_MOV; 9330b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9331b8e80941Smrg alu.dst.chan = is_cm ? 1 : 0; 9332b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 9333b8e80941Smrg alu.src[0].value = abs_value; 9334848b8605Smrg alu.last = 1; 9335b8e80941Smrg alu.dst.write = 1; 9336b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9337b8e80941Smrg if (r) 9338b8e80941Smrg return r; 9339b8e80941Smrg } else { 9340b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9341b8e80941Smrg alu.op = ALU_OP1_MOV; 9342b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9343b8e80941Smrg alu.dst.chan = is_cm ? 1 : 0; 9344b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); 9345b8e80941Smrg alu.last = 1; 9346b8e80941Smrg alu.dst.write = 1; 9347848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9348848b8605Smrg if (r) 9349848b8605Smrg return r; 9350848b8605Smrg } 9351848b8605Smrg 9352848b8605Smrg 9353b8e80941Smrg memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 9354b8e80941Smrg gds.op = gds_op; 9355b8e80941Smrg gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 9356b8e80941Smrg gds.uav_id = is_cm ? 0 : uav_id; 9357b8e80941Smrg gds.uav_index_mode = is_cm ? 0 : uav_index_mode; 9358b8e80941Smrg gds.src_gpr = ctx->temp_reg; 9359b8e80941Smrg gds.src_gpr2 = 0; 9360b8e80941Smrg gds.src_sel_x = is_cm ? 0 : 4; 9361b8e80941Smrg gds.src_sel_y = is_cm ? 1 : 0; 9362b8e80941Smrg if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) 9363b8e80941Smrg gds.src_sel_z = is_cm ? 2 : 1; 9364b8e80941Smrg else 9365b8e80941Smrg gds.src_sel_z = 7; 9366b8e80941Smrg gds.dst_sel_x = 0; 9367b8e80941Smrg gds.dst_sel_y = 7; 9368b8e80941Smrg gds.dst_sel_z = 7; 9369b8e80941Smrg gds.dst_sel_w = 7; 9370b8e80941Smrg gds.alloc_consume = !is_cm; 9371b8e80941Smrg 9372b8e80941Smrg r = r600_bytecode_add_gds(ctx->bc, &gds); 9373b8e80941Smrg if (r) 9374b8e80941Smrg return r; 9375b8e80941Smrg ctx->bc->cf_last->vpm = 1; 9376b8e80941Smrg return 0; 9377b8e80941Smrg} 9378848b8605Smrg 9379b8e80941Smrgstatic int get_lds_op(int opcode) 9380b8e80941Smrg{ 9381b8e80941Smrg switch (opcode) { 9382b8e80941Smrg case TGSI_OPCODE_ATOMUADD: 9383b8e80941Smrg return LDS_OP2_LDS_ADD_RET; 9384b8e80941Smrg case TGSI_OPCODE_ATOMAND: 9385b8e80941Smrg return LDS_OP2_LDS_AND_RET; 9386b8e80941Smrg case TGSI_OPCODE_ATOMOR: 9387b8e80941Smrg return LDS_OP2_LDS_OR_RET; 9388b8e80941Smrg case TGSI_OPCODE_ATOMXOR: 9389b8e80941Smrg return LDS_OP2_LDS_XOR_RET; 9390b8e80941Smrg case TGSI_OPCODE_ATOMUMIN: 9391b8e80941Smrg return LDS_OP2_LDS_MIN_UINT_RET; 9392b8e80941Smrg case TGSI_OPCODE_ATOMUMAX: 9393b8e80941Smrg return LDS_OP2_LDS_MAX_UINT_RET; 9394b8e80941Smrg case TGSI_OPCODE_ATOMIMIN: 9395b8e80941Smrg return LDS_OP2_LDS_MIN_INT_RET; 9396b8e80941Smrg case TGSI_OPCODE_ATOMIMAX: 9397b8e80941Smrg return LDS_OP2_LDS_MAX_INT_RET; 9398b8e80941Smrg case TGSI_OPCODE_ATOMXCHG: 9399b8e80941Smrg return LDS_OP2_LDS_XCHG_RET; 9400b8e80941Smrg case TGSI_OPCODE_ATOMCAS: 9401b8e80941Smrg return LDS_OP3_LDS_CMP_XCHG_RET; 9402b8e80941Smrg default: 9403b8e80941Smrg return -1; 9404848b8605Smrg } 9405b8e80941Smrg} 9406848b8605Smrg 9407b8e80941Smrgstatic int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx) 9408b8e80941Smrg{ 9409b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9410b8e80941Smrg int lds_op = get_lds_op(inst->Instruction.Opcode); 9411b8e80941Smrg int r; 9412848b8605Smrg 9413b8e80941Smrg struct r600_bytecode_alu alu; 9414b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9415b8e80941Smrg alu.op = lds_op; 9416b8e80941Smrg alu.is_lds_idx_op = true; 9417b8e80941Smrg alu.last = 1; 9418b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 9419b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], 0); 9420b8e80941Smrg if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET) 9421b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[3], 0); 9422b8e80941Smrg else 9423b8e80941Smrg alu.src[2].sel = V_SQ_ALU_SRC_0; 9424b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9425b8e80941Smrg if (r) 9426b8e80941Smrg return r; 9427848b8605Smrg 9428b8e80941Smrg /* then read from LDS_OQ_A_POP */ 9429b8e80941Smrg memset(&alu, 0, sizeof(alu)); 9430848b8605Smrg 9431b8e80941Smrg alu.op = ALU_OP1_MOV; 9432b8e80941Smrg alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 9433b8e80941Smrg alu.src[0].chan = 0; 9434b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 9435b8e80941Smrg alu.dst.write = 1; 9436b8e80941Smrg alu.last = 1; 9437b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9438b8e80941Smrg if (r) 9439b8e80941Smrg return r; 9440848b8605Smrg 9441b8e80941Smrg return 0; 9442b8e80941Smrg} 9443848b8605Smrg 9444b8e80941Smrgstatic int tgsi_atomic_op(struct r600_shader_ctx *ctx) 9445b8e80941Smrg{ 9446b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9447b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 9448b8e80941Smrg return tgsi_atomic_op_rat(ctx); 9449b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) 9450b8e80941Smrg return tgsi_atomic_op_gds(ctx); 9451b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 9452b8e80941Smrg return tgsi_atomic_op_rat(ctx); 9453b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 9454b8e80941Smrg return tgsi_atomic_op_lds(ctx); 9455b8e80941Smrg return 0; 9456b8e80941Smrg} 9457848b8605Smrg 9458b8e80941Smrgstatic int tgsi_resq(struct r600_shader_ctx *ctx) 9459b8e80941Smrg{ 9460b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9461b8e80941Smrg unsigned sampler_index_mode; 9462b8e80941Smrg struct r600_bytecode_tex tex; 9463b8e80941Smrg int r; 9464b8e80941Smrg boolean has_txq_cube_array_z = false; 9465848b8605Smrg 9466b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || 9467b8e80941Smrg (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { 9468b8e80941Smrg if (ctx->bc->chip_class < EVERGREEN) 9469b8e80941Smrg ctx->shader->uses_tex_buffers = true; 9470b8e80941Smrg unsigned eg_buffer_base = 0; 9471b8e80941Smrg eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET; 9472b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 9473b8e80941Smrg eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE]; 9474b8e80941Smrg return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset, eg_buffer_base); 9475848b8605Smrg } 9476848b8605Smrg 9477b8e80941Smrg if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY && 9478b8e80941Smrg inst->Dst[0].Register.WriteMask & 4) { 9479b8e80941Smrg ctx->shader->has_txq_cube_array_z_comp = true; 9480b8e80941Smrg has_txq_cube_array_z = true; 9481848b8605Smrg } 9482848b8605Smrg 9483b8e80941Smrg sampler_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 9484b8e80941Smrg if (sampler_index_mode) 9485b8e80941Smrg egcm_load_index_reg(ctx->bc, 1, false); 9486848b8605Smrg 9487848b8605Smrg 9488b8e80941Smrg /* does this shader want a num layers from TXQ for a cube array? */ 9489b8e80941Smrg if (has_txq_cube_array_z) { 9490b8e80941Smrg int id = tgsi_tex_get_src_gpr(ctx, 0) + ctx->shader->image_size_const_offset; 9491b8e80941Smrg struct r600_bytecode_alu alu; 9492848b8605Smrg 9493b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9494b8e80941Smrg alu.op = ALU_OP1_MOV; 9495848b8605Smrg 9496b8e80941Smrg alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 9497b8e80941Smrg /* with eg each dword is either number of cubes */ 9498b8e80941Smrg alu.src[0].sel += id / 4; 9499b8e80941Smrg alu.src[0].chan = id % 4; 9500b8e80941Smrg alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 9501b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 9502b8e80941Smrg alu.last = 1; 9503b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9504b8e80941Smrg if (r) 9505b8e80941Smrg return r; 9506b8e80941Smrg /* disable writemask from texture instruction */ 9507b8e80941Smrg inst->Dst[0].Register.WriteMask &= ~4; 9508848b8605Smrg } 9509b8e80941Smrg memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 9510b8e80941Smrg tex.op = ctx->inst_info->op; 9511b8e80941Smrg tex.sampler_id = R600_IMAGE_REAL_RESOURCE_OFFSET + inst->Src[0].Register.Index; 9512b8e80941Smrg tex.sampler_index_mode = sampler_index_mode; 9513b8e80941Smrg tex.resource_id = tex.sampler_id; 9514b8e80941Smrg tex.resource_index_mode = sampler_index_mode; 9515b8e80941Smrg tex.src_sel_x = 4; 9516b8e80941Smrg tex.src_sel_y = 4; 9517b8e80941Smrg tex.src_sel_z = 4; 9518b8e80941Smrg tex.src_sel_w = 4; 9519b8e80941Smrg tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 9520b8e80941Smrg tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 9521b8e80941Smrg tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 9522b8e80941Smrg tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 9523b8e80941Smrg tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 9524848b8605Smrg r = r600_bytecode_add_tex(ctx->bc, &tex); 9525848b8605Smrg if (r) 9526848b8605Smrg return r; 9527848b8605Smrg 9528848b8605Smrg return 0; 9529848b8605Smrg} 9530848b8605Smrg 9531848b8605Smrgstatic int tgsi_lrp(struct r600_shader_ctx *ctx) 9532848b8605Smrg{ 9533848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9534848b8605Smrg struct r600_bytecode_alu alu; 9535b8e80941Smrg unsigned lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 9536b8e80941Smrg struct r600_bytecode_alu_src srcs[2][4]; 9537848b8605Smrg unsigned i; 9538848b8605Smrg int r; 9539848b8605Smrg 9540848b8605Smrg /* optimize if it's just an equal balance */ 9541848b8605Smrg if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 9542848b8605Smrg for (i = 0; i < lasti + 1; i++) { 9543848b8605Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9544848b8605Smrg continue; 9545848b8605Smrg 9546848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9547848b8605Smrg alu.op = ALU_OP2_ADD; 9548848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 9549848b8605Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 9550848b8605Smrg alu.omod = 3; 9551848b8605Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9552848b8605Smrg alu.dst.chan = i; 9553848b8605Smrg if (i == lasti) { 9554848b8605Smrg alu.last = 1; 9555848b8605Smrg } 9556848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9557848b8605Smrg if (r) 9558848b8605Smrg return r; 9559848b8605Smrg } 9560848b8605Smrg return 0; 9561848b8605Smrg } 9562848b8605Smrg 9563848b8605Smrg /* 1 - src0 */ 9564848b8605Smrg for (i = 0; i < lasti + 1; i++) { 9565848b8605Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9566848b8605Smrg continue; 9567848b8605Smrg 9568848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9569848b8605Smrg alu.op = ALU_OP2_ADD; 9570848b8605Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 9571848b8605Smrg alu.src[0].chan = 0; 9572848b8605Smrg r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 9573848b8605Smrg r600_bytecode_src_toggle_neg(&alu.src[1]); 9574848b8605Smrg alu.dst.sel = ctx->temp_reg; 9575848b8605Smrg alu.dst.chan = i; 9576848b8605Smrg if (i == lasti) { 9577848b8605Smrg alu.last = 1; 9578848b8605Smrg } 9579848b8605Smrg alu.dst.write = 1; 9580848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9581848b8605Smrg if (r) 9582848b8605Smrg return r; 9583848b8605Smrg } 9584848b8605Smrg 9585848b8605Smrg /* (1 - src0) * src2 */ 9586848b8605Smrg for (i = 0; i < lasti + 1; i++) { 9587848b8605Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9588848b8605Smrg continue; 9589848b8605Smrg 9590848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9591848b8605Smrg alu.op = ALU_OP2_MUL; 9592848b8605Smrg alu.src[0].sel = ctx->temp_reg; 9593848b8605Smrg alu.src[0].chan = i; 9594848b8605Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 9595848b8605Smrg alu.dst.sel = ctx->temp_reg; 9596848b8605Smrg alu.dst.chan = i; 9597848b8605Smrg if (i == lasti) { 9598848b8605Smrg alu.last = 1; 9599848b8605Smrg } 9600848b8605Smrg alu.dst.write = 1; 9601848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9602848b8605Smrg if (r) 9603848b8605Smrg return r; 9604848b8605Smrg } 9605848b8605Smrg 9606848b8605Smrg /* src0 * src1 + (1 - src0) * src2 */ 9607b8e80941Smrg 9608b8e80941Smrg for (i = 0; i < 2; i++) { 9609b8e80941Smrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 9610b8e80941Smrg srcs[i], &ctx->src[i]); 9611b8e80941Smrg if (r) 9612b8e80941Smrg return r; 9613b8e80941Smrg } 9614b8e80941Smrg 9615b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 9616b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9617b8e80941Smrg continue; 9618b8e80941Smrg 9619b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9620b8e80941Smrg alu.op = ALU_OP3_MULADD; 9621b8e80941Smrg alu.is_op3 = 1; 9622b8e80941Smrg alu.src[0] = srcs[0][i]; 9623b8e80941Smrg alu.src[1] = srcs[1][i]; 9624b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 9625b8e80941Smrg alu.src[2].chan = i; 9626b8e80941Smrg 9627b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9628b8e80941Smrg alu.dst.chan = i; 9629b8e80941Smrg if (i == lasti) { 9630b8e80941Smrg alu.last = 1; 9631b8e80941Smrg } 9632b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9633b8e80941Smrg if (r) 9634b8e80941Smrg return r; 9635b8e80941Smrg } 9636b8e80941Smrg return 0; 9637b8e80941Smrg} 9638b8e80941Smrg 9639b8e80941Smrgstatic int tgsi_cmp(struct r600_shader_ctx *ctx) 9640b8e80941Smrg{ 9641b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9642b8e80941Smrg struct r600_bytecode_alu alu; 9643b8e80941Smrg int i, r, j; 9644b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 9645b8e80941Smrg struct r600_bytecode_alu_src srcs[3][4]; 9646b8e80941Smrg 9647b8e80941Smrg unsigned op; 9648b8e80941Smrg 9649b8e80941Smrg if (ctx->src[0].abs && ctx->src[0].neg) { 9650b8e80941Smrg op = ALU_OP3_CNDE; 9651b8e80941Smrg ctx->src[0].abs = 0; 9652b8e80941Smrg ctx->src[0].neg = 0; 9653b8e80941Smrg } else { 9654b8e80941Smrg op = ALU_OP3_CNDGE; 9655b8e80941Smrg } 9656b8e80941Smrg 9657b8e80941Smrg for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 9658b8e80941Smrg r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, 9659b8e80941Smrg srcs[j], &ctx->src[j]); 9660b8e80941Smrg if (r) 9661b8e80941Smrg return r; 9662b8e80941Smrg } 9663b8e80941Smrg 9664b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 9665b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9666b8e80941Smrg continue; 9667b8e80941Smrg 9668b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9669b8e80941Smrg alu.op = op; 9670b8e80941Smrg alu.src[0] = srcs[0][i]; 9671b8e80941Smrg alu.src[1] = srcs[2][i]; 9672b8e80941Smrg alu.src[2] = srcs[1][i]; 9673b8e80941Smrg 9674b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9675b8e80941Smrg alu.dst.chan = i; 9676b8e80941Smrg alu.dst.write = 1; 9677b8e80941Smrg alu.is_op3 = 1; 9678b8e80941Smrg if (i == lasti) 9679b8e80941Smrg alu.last = 1; 9680b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9681b8e80941Smrg if (r) 9682b8e80941Smrg return r; 9683b8e80941Smrg } 9684b8e80941Smrg return 0; 9685b8e80941Smrg} 9686b8e80941Smrg 9687b8e80941Smrgstatic int tgsi_ucmp(struct r600_shader_ctx *ctx) 9688b8e80941Smrg{ 9689b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9690b8e80941Smrg struct r600_bytecode_alu alu; 9691b8e80941Smrg int i, r; 9692b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 9693b8e80941Smrg 9694848b8605Smrg for (i = 0; i < lasti + 1; i++) { 9695848b8605Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9696848b8605Smrg continue; 9697848b8605Smrg 9698b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9699b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 9700b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 9701b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 9702b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 9703b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9704b8e80941Smrg alu.dst.chan = i; 9705b8e80941Smrg alu.dst.write = 1; 9706b8e80941Smrg alu.is_op3 = 1; 9707b8e80941Smrg if (i == lasti) 9708b8e80941Smrg alu.last = 1; 9709b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9710b8e80941Smrg if (r) 9711b8e80941Smrg return r; 9712b8e80941Smrg } 9713b8e80941Smrg return 0; 9714b8e80941Smrg} 9715b8e80941Smrg 9716b8e80941Smrgstatic int tgsi_exp(struct r600_shader_ctx *ctx) 9717b8e80941Smrg{ 9718b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9719b8e80941Smrg struct r600_bytecode_alu alu; 9720b8e80941Smrg int r; 9721b8e80941Smrg unsigned i; 9722b8e80941Smrg 9723b8e80941Smrg /* result.x = 2^floor(src); */ 9724b8e80941Smrg if (inst->Dst[0].Register.WriteMask & 1) { 9725b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9726b8e80941Smrg 9727b8e80941Smrg alu.op = ALU_OP1_FLOOR; 9728b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9729b8e80941Smrg 9730b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9731b8e80941Smrg alu.dst.chan = 0; 9732b8e80941Smrg alu.dst.write = 1; 9733b8e80941Smrg alu.last = 1; 9734b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9735b8e80941Smrg if (r) 9736b8e80941Smrg return r; 9737b8e80941Smrg 9738b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 9739b8e80941Smrg for (i = 0; i < 3; i++) { 9740b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9741b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9742b8e80941Smrg alu.src[0].chan = 0; 9743b8e80941Smrg 9744b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9745b8e80941Smrg alu.dst.chan = i; 9746b8e80941Smrg alu.dst.write = i == 0; 9747b8e80941Smrg alu.last = i == 2; 9748b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9749b8e80941Smrg if (r) 9750b8e80941Smrg return r; 9751b8e80941Smrg } 9752b8e80941Smrg } else { 9753b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9754b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9755b8e80941Smrg alu.src[0].chan = 0; 9756b8e80941Smrg 9757b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9758b8e80941Smrg alu.dst.chan = 0; 9759b8e80941Smrg alu.dst.write = 1; 9760b8e80941Smrg alu.last = 1; 9761b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9762b8e80941Smrg if (r) 9763b8e80941Smrg return r; 9764b8e80941Smrg } 9765b8e80941Smrg } 9766b8e80941Smrg 9767b8e80941Smrg /* result.y = tmp - floor(tmp); */ 9768b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 9769b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9770b8e80941Smrg 9771b8e80941Smrg alu.op = ALU_OP1_FRACT; 9772b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9773b8e80941Smrg 9774b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9775b8e80941Smrg#if 0 9776b8e80941Smrg r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9777b8e80941Smrg if (r) 9778b8e80941Smrg return r; 9779b8e80941Smrg#endif 9780b8e80941Smrg alu.dst.write = 1; 9781b8e80941Smrg alu.dst.chan = 1; 9782b8e80941Smrg 9783b8e80941Smrg alu.last = 1; 9784848b8605Smrg 9785848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9786848b8605Smrg if (r) 9787848b8605Smrg return r; 9788848b8605Smrg } 9789848b8605Smrg 9790b8e80941Smrg /* result.z = RoughApprox2ToX(tmp);*/ 9791b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 9792b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 9793b8e80941Smrg for (i = 0; i < 3; i++) { 9794b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9795b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9796b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9797848b8605Smrg 9798b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9799b8e80941Smrg alu.dst.chan = i; 9800b8e80941Smrg if (i == 2) { 9801b8e80941Smrg alu.dst.write = 1; 9802b8e80941Smrg alu.last = 1; 9803b8e80941Smrg } 9804b8e80941Smrg 9805b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9806b8e80941Smrg if (r) 9807b8e80941Smrg return r; 9808b8e80941Smrg } 9809b8e80941Smrg } else { 9810b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9811b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9812b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9813b8e80941Smrg 9814b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9815b8e80941Smrg alu.dst.write = 1; 9816b8e80941Smrg alu.dst.chan = 2; 9817848b8605Smrg 9818848b8605Smrg alu.last = 1; 9819b8e80941Smrg 9820b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9821b8e80941Smrg if (r) 9822b8e80941Smrg return r; 9823b8e80941Smrg } 9824848b8605Smrg } 9825848b8605Smrg 9826b8e80941Smrg /* result.w = 1.0;*/ 9827b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 9828b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9829848b8605Smrg 9830b8e80941Smrg alu.op = ALU_OP1_MOV; 9831b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 9832b8e80941Smrg alu.src[0].chan = 0; 9833848b8605Smrg 9834b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9835b8e80941Smrg alu.dst.chan = 3; 9836848b8605Smrg alu.dst.write = 1; 9837b8e80941Smrg alu.last = 1; 9838848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9839848b8605Smrg if (r) 9840848b8605Smrg return r; 9841848b8605Smrg } 9842b8e80941Smrg return tgsi_helper_copy(ctx, inst); 9843848b8605Smrg} 9844848b8605Smrg 9845b8e80941Smrgstatic int tgsi_log(struct r600_shader_ctx *ctx) 9846848b8605Smrg{ 9847848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9848848b8605Smrg struct r600_bytecode_alu alu; 9849b8e80941Smrg int r; 9850b8e80941Smrg unsigned i; 9851848b8605Smrg 9852b8e80941Smrg /* result.x = floor(log2(|src|)); */ 9853b8e80941Smrg if (inst->Dst[0].Register.WriteMask & 1) { 9854b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 9855b8e80941Smrg for (i = 0; i < 3; i++) { 9856b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9857b8e80941Smrg 9858b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 9859b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9860b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 9861b8e80941Smrg 9862b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9863b8e80941Smrg alu.dst.chan = i; 9864b8e80941Smrg if (i == 0) 9865b8e80941Smrg alu.dst.write = 1; 9866b8e80941Smrg if (i == 2) 9867b8e80941Smrg alu.last = 1; 9868b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9869b8e80941Smrg if (r) 9870b8e80941Smrg return r; 9871b8e80941Smrg } 9872848b8605Smrg 9873848b8605Smrg } else { 9874b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9875b8e80941Smrg 9876b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 9877b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9878b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 9879b8e80941Smrg 9880b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9881b8e80941Smrg alu.dst.chan = 0; 9882b8e80941Smrg alu.dst.write = 1; 9883b8e80941Smrg alu.last = 1; 9884b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9885b8e80941Smrg if (r) 9886b8e80941Smrg return r; 9887848b8605Smrg } 9888848b8605Smrg 9889b8e80941Smrg alu.op = ALU_OP1_FLOOR; 9890b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9891b8e80941Smrg alu.src[0].chan = 0; 9892b8e80941Smrg 9893848b8605Smrg alu.dst.sel = ctx->temp_reg; 9894b8e80941Smrg alu.dst.chan = 0; 9895848b8605Smrg alu.dst.write = 1; 9896b8e80941Smrg alu.last = 1; 9897848b8605Smrg 9898848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9899848b8605Smrg if (r) 9900848b8605Smrg return r; 9901848b8605Smrg } 9902848b8605Smrg 9903b8e80941Smrg /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 9904b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 9905848b8605Smrg 9906b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 9907b8e80941Smrg for (i = 0; i < 3; i++) { 9908b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9909b8e80941Smrg 9910b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 9911b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9912b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 9913b8e80941Smrg 9914b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9915b8e80941Smrg alu.dst.chan = i; 9916b8e80941Smrg if (i == 1) 9917b8e80941Smrg alu.dst.write = 1; 9918b8e80941Smrg if (i == 2) 9919b8e80941Smrg alu.last = 1; 9920b8e80941Smrg 9921b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9922b8e80941Smrg if (r) 9923b8e80941Smrg return r; 9924b8e80941Smrg } 9925848b8605Smrg } else { 9926b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9927848b8605Smrg 9928b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 9929b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9930b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 9931848b8605Smrg 9932848b8605Smrg alu.dst.sel = ctx->temp_reg; 9933b8e80941Smrg alu.dst.chan = 1; 9934b8e80941Smrg alu.dst.write = 1; 9935848b8605Smrg alu.last = 1; 9936848b8605Smrg 9937b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9938b8e80941Smrg if (r) 9939b8e80941Smrg return r; 9940b8e80941Smrg } 9941848b8605Smrg 9942848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9943848b8605Smrg 9944848b8605Smrg alu.op = ALU_OP1_FLOOR; 9945b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9946b8e80941Smrg alu.src[0].chan = 1; 9947848b8605Smrg 9948848b8605Smrg alu.dst.sel = ctx->temp_reg; 9949b8e80941Smrg alu.dst.chan = 1; 9950848b8605Smrg alu.dst.write = 1; 9951848b8605Smrg alu.last = 1; 9952b8e80941Smrg 9953848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9954848b8605Smrg if (r) 9955848b8605Smrg return r; 9956848b8605Smrg 9957848b8605Smrg if (ctx->bc->chip_class == CAYMAN) { 9958848b8605Smrg for (i = 0; i < 3; i++) { 9959b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9960b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9961b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9962b8e80941Smrg alu.src[0].chan = 1; 9963b8e80941Smrg 9964b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9965b8e80941Smrg alu.dst.chan = i; 9966b8e80941Smrg if (i == 1) 9967b8e80941Smrg alu.dst.write = 1; 9968b8e80941Smrg if (i == 2) 9969b8e80941Smrg alu.last = 1; 9970b8e80941Smrg 9971b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9972b8e80941Smrg if (r) 9973b8e80941Smrg return r; 9974b8e80941Smrg } 9975b8e80941Smrg } else { 9976b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9977b8e80941Smrg alu.op = ALU_OP1_EXP_IEEE; 9978b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 9979b8e80941Smrg alu.src[0].chan = 1; 9980b8e80941Smrg 9981b8e80941Smrg alu.dst.sel = ctx->temp_reg; 9982b8e80941Smrg alu.dst.chan = 1; 9983b8e80941Smrg alu.dst.write = 1; 9984b8e80941Smrg alu.last = 1; 9985b8e80941Smrg 9986b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 9987b8e80941Smrg if (r) 9988b8e80941Smrg return r; 9989b8e80941Smrg } 9990b8e80941Smrg 9991b8e80941Smrg if (ctx->bc->chip_class == CAYMAN) { 9992b8e80941Smrg for (i = 0; i < 3; i++) { 9993b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9994b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 9995848b8605Smrg alu.src[0].sel = ctx->temp_reg; 9996b8e80941Smrg alu.src[0].chan = 1; 9997848b8605Smrg 9998848b8605Smrg alu.dst.sel = ctx->temp_reg; 9999848b8605Smrg alu.dst.chan = i; 10000b8e80941Smrg if (i == 1) 10001b8e80941Smrg alu.dst.write = 1; 10002b8e80941Smrg if (i == 2) 10003b8e80941Smrg alu.last = 1; 10004b8e80941Smrg 10005848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10006848b8605Smrg if (r) 10007848b8605Smrg return r; 10008848b8605Smrg } 10009848b8605Smrg } else { 10010b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10011b8e80941Smrg alu.op = ALU_OP1_RECIP_IEEE; 10012848b8605Smrg alu.src[0].sel = ctx->temp_reg; 10013b8e80941Smrg alu.src[0].chan = 1; 10014848b8605Smrg 10015848b8605Smrg alu.dst.sel = ctx->temp_reg; 10016b8e80941Smrg alu.dst.chan = 1; 10017848b8605Smrg alu.dst.write = 1; 10018848b8605Smrg alu.last = 1; 10019b8e80941Smrg 10020848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10021848b8605Smrg if (r) 10022848b8605Smrg return r; 10023848b8605Smrg } 10024848b8605Smrg 10025848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10026848b8605Smrg 10027b8e80941Smrg alu.op = ALU_OP2_MUL; 10028b8e80941Smrg 10029848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10030b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 10031b8e80941Smrg 10032b8e80941Smrg alu.src[1].sel = ctx->temp_reg; 10033b8e80941Smrg alu.src[1].chan = 1; 10034848b8605Smrg 10035848b8605Smrg alu.dst.sel = ctx->temp_reg; 10036848b8605Smrg alu.dst.chan = 1; 10037b8e80941Smrg alu.dst.write = 1; 10038848b8605Smrg alu.last = 1; 10039848b8605Smrg 10040848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10041848b8605Smrg if (r) 10042848b8605Smrg return r; 10043848b8605Smrg } 10044848b8605Smrg 10045b8e80941Smrg /* result.z = log2(|src|);*/ 10046b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 10047848b8605Smrg if (ctx->bc->chip_class == CAYMAN) { 10048848b8605Smrg for (i = 0; i < 3; i++) { 10049848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10050b8e80941Smrg 10051b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 10052848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10053b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 10054848b8605Smrg 10055848b8605Smrg alu.dst.sel = ctx->temp_reg; 10056b8e80941Smrg if (i == 2) 10057848b8605Smrg alu.dst.write = 1; 10058b8e80941Smrg alu.dst.chan = i; 10059b8e80941Smrg if (i == 2) 10060848b8605Smrg alu.last = 1; 10061848b8605Smrg 10062848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10063848b8605Smrg if (r) 10064848b8605Smrg return r; 10065848b8605Smrg } 10066848b8605Smrg } else { 10067848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10068b8e80941Smrg 10069b8e80941Smrg alu.op = ALU_OP1_LOG_IEEE; 10070848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10071b8e80941Smrg r600_bytecode_src_set_abs(&alu.src[0]); 10072848b8605Smrg 10073848b8605Smrg alu.dst.sel = ctx->temp_reg; 10074848b8605Smrg alu.dst.write = 1; 10075848b8605Smrg alu.dst.chan = 2; 10076848b8605Smrg alu.last = 1; 10077848b8605Smrg 10078848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10079848b8605Smrg if (r) 10080848b8605Smrg return r; 10081848b8605Smrg } 10082848b8605Smrg } 10083848b8605Smrg 10084b8e80941Smrg /* result.w = 1.0; */ 10085b8e80941Smrg if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 10086848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10087848b8605Smrg 10088848b8605Smrg alu.op = ALU_OP1_MOV; 10089848b8605Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 10090848b8605Smrg alu.src[0].chan = 0; 10091848b8605Smrg 10092848b8605Smrg alu.dst.sel = ctx->temp_reg; 10093848b8605Smrg alu.dst.chan = 3; 10094848b8605Smrg alu.dst.write = 1; 10095848b8605Smrg alu.last = 1; 10096b8e80941Smrg 10097848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10098848b8605Smrg if (r) 10099848b8605Smrg return r; 10100848b8605Smrg } 10101b8e80941Smrg 10102848b8605Smrg return tgsi_helper_copy(ctx, inst); 10103848b8605Smrg} 10104848b8605Smrg 10105b8e80941Smrgstatic int tgsi_eg_arl(struct r600_shader_ctx *ctx) 10106848b8605Smrg{ 10107848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10108848b8605Smrg struct r600_bytecode_alu alu; 10109848b8605Smrg int r; 10110b8e80941Smrg int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10111b8e80941Smrg unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 10112848b8605Smrg 10113b8e80941Smrg assert(inst->Dst[0].Register.Index < 3); 10114b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10115848b8605Smrg 10116b8e80941Smrg switch (inst->Instruction.Opcode) { 10117b8e80941Smrg case TGSI_OPCODE_ARL: 10118b8e80941Smrg alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 10119b8e80941Smrg break; 10120b8e80941Smrg case TGSI_OPCODE_ARR: 10121b8e80941Smrg alu.op = ALU_OP1_FLT_TO_INT; 10122b8e80941Smrg break; 10123b8e80941Smrg case TGSI_OPCODE_UARL: 10124b8e80941Smrg alu.op = ALU_OP1_MOV; 10125b8e80941Smrg break; 10126b8e80941Smrg default: 10127b8e80941Smrg assert(0); 10128b8e80941Smrg return -1; 10129b8e80941Smrg } 10130b8e80941Smrg 10131b8e80941Smrg for (i = 0; i <= lasti; ++i) { 10132b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 10133b8e80941Smrg continue; 10134b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 10135b8e80941Smrg alu.last = i == lasti; 10136b8e80941Smrg alu.dst.sel = reg; 10137b8e80941Smrg alu.dst.chan = i; 10138b8e80941Smrg alu.dst.write = 1; 10139b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10140b8e80941Smrg if (r) 10141b8e80941Smrg return r; 10142b8e80941Smrg } 10143b8e80941Smrg 10144b8e80941Smrg if (inst->Dst[0].Register.Index > 0) 10145b8e80941Smrg ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 10146b8e80941Smrg else 10147b8e80941Smrg ctx->bc->ar_loaded = 0; 10148b8e80941Smrg 10149b8e80941Smrg return 0; 10150b8e80941Smrg} 10151b8e80941Smrgstatic int tgsi_r600_arl(struct r600_shader_ctx *ctx) 10152b8e80941Smrg{ 10153b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10154b8e80941Smrg struct r600_bytecode_alu alu; 10155b8e80941Smrg int r; 10156b8e80941Smrg int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10157b8e80941Smrg 10158b8e80941Smrg switch (inst->Instruction.Opcode) { 10159b8e80941Smrg case TGSI_OPCODE_ARL: 10160b8e80941Smrg memset(&alu, 0, sizeof(alu)); 10161b8e80941Smrg alu.op = ALU_OP1_FLOOR; 10162b8e80941Smrg alu.dst.sel = ctx->bc->ar_reg; 10163b8e80941Smrg alu.dst.write = 1; 10164b8e80941Smrg for (i = 0; i <= lasti; ++i) { 10165b8e80941Smrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 10166848b8605Smrg alu.dst.chan = i; 10167b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 10168b8e80941Smrg alu.last = i == lasti; 10169b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 10170b8e80941Smrg return r; 10171b8e80941Smrg } 10172b8e80941Smrg } 10173b8e80941Smrg 10174b8e80941Smrg memset(&alu, 0, sizeof(alu)); 10175b8e80941Smrg alu.op = ALU_OP1_FLT_TO_INT; 10176b8e80941Smrg alu.src[0].sel = ctx->bc->ar_reg; 10177b8e80941Smrg alu.dst.sel = ctx->bc->ar_reg; 10178b8e80941Smrg alu.dst.write = 1; 10179b8e80941Smrg /* FLT_TO_INT is trans-only on r600/r700 */ 10180b8e80941Smrg alu.last = TRUE; 10181b8e80941Smrg for (i = 0; i <= lasti; ++i) { 10182b8e80941Smrg alu.dst.chan = i; 10183b8e80941Smrg alu.src[0].chan = i; 10184b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 10185b8e80941Smrg return r; 10186b8e80941Smrg } 10187b8e80941Smrg break; 10188b8e80941Smrg case TGSI_OPCODE_ARR: 10189b8e80941Smrg memset(&alu, 0, sizeof(alu)); 10190b8e80941Smrg alu.op = ALU_OP1_FLT_TO_INT; 10191b8e80941Smrg alu.dst.sel = ctx->bc->ar_reg; 10192b8e80941Smrg alu.dst.write = 1; 10193b8e80941Smrg /* FLT_TO_INT is trans-only on r600/r700 */ 10194b8e80941Smrg alu.last = TRUE; 10195b8e80941Smrg for (i = 0; i <= lasti; ++i) { 10196b8e80941Smrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 10197b8e80941Smrg alu.dst.chan = i; 10198b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 10199b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 10200848b8605Smrg return r; 10201848b8605Smrg } 10202b8e80941Smrg } 10203b8e80941Smrg break; 10204b8e80941Smrg case TGSI_OPCODE_UARL: 10205b8e80941Smrg memset(&alu, 0, sizeof(alu)); 10206b8e80941Smrg alu.op = ALU_OP1_MOV; 10207b8e80941Smrg alu.dst.sel = ctx->bc->ar_reg; 10208b8e80941Smrg alu.dst.write = 1; 10209b8e80941Smrg for (i = 0; i <= lasti; ++i) { 10210b8e80941Smrg if (inst->Dst[0].Register.WriteMask & (1 << i)) { 10211b8e80941Smrg alu.dst.chan = i; 10212b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 10213b8e80941Smrg alu.last = i == lasti; 10214b8e80941Smrg if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 10215b8e80941Smrg return r; 10216b8e80941Smrg } 10217b8e80941Smrg } 10218b8e80941Smrg break; 10219b8e80941Smrg default: 10220b8e80941Smrg assert(0); 10221b8e80941Smrg return -1; 10222b8e80941Smrg } 10223b8e80941Smrg 10224b8e80941Smrg ctx->bc->ar_loaded = 0; 10225b8e80941Smrg return 0; 10226b8e80941Smrg} 10227b8e80941Smrg 10228b8e80941Smrgstatic int tgsi_opdst(struct r600_shader_ctx *ctx) 10229b8e80941Smrg{ 10230b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10231b8e80941Smrg struct r600_bytecode_alu alu; 10232b8e80941Smrg int i, r = 0; 10233b8e80941Smrg 10234b8e80941Smrg for (i = 0; i < 4; i++) { 10235b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10236b8e80941Smrg 10237b8e80941Smrg alu.op = ALU_OP2_MUL; 10238b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 10239b8e80941Smrg 10240b8e80941Smrg if (i == 0 || i == 3) { 10241b8e80941Smrg alu.src[0].sel = V_SQ_ALU_SRC_1; 10242b8e80941Smrg } else { 10243b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 10244b8e80941Smrg } 10245b8e80941Smrg 10246b8e80941Smrg if (i == 0 || i == 2) { 10247b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_1; 10248b8e80941Smrg } else { 10249b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 10250b8e80941Smrg } 10251b8e80941Smrg if (i == 3) 10252b8e80941Smrg alu.last = 1; 10253b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10254b8e80941Smrg if (r) 10255b8e80941Smrg return r; 10256b8e80941Smrg } 10257b8e80941Smrg return 0; 10258b8e80941Smrg} 10259b8e80941Smrg 10260b8e80941Smrgstatic int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type, 10261b8e80941Smrg struct r600_bytecode_alu_src *src) 10262b8e80941Smrg{ 10263b8e80941Smrg struct r600_bytecode_alu alu; 10264b8e80941Smrg int r; 10265b8e80941Smrg 10266b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10267b8e80941Smrg alu.op = opcode; 10268b8e80941Smrg alu.execute_mask = 1; 10269b8e80941Smrg alu.update_pred = 1; 10270b8e80941Smrg 10271b8e80941Smrg alu.dst.sel = ctx->temp_reg; 10272b8e80941Smrg alu.dst.write = 1; 10273b8e80941Smrg alu.dst.chan = 0; 10274b8e80941Smrg 10275b8e80941Smrg alu.src[0] = *src; 10276b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_0; 10277b8e80941Smrg alu.src[1].chan = 0; 10278b8e80941Smrg 10279b8e80941Smrg alu.last = 1; 10280b8e80941Smrg 10281b8e80941Smrg r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 10282b8e80941Smrg if (r) 10283b8e80941Smrg return r; 10284b8e80941Smrg return 0; 10285b8e80941Smrg} 10286b8e80941Smrg 10287b8e80941Smrgstatic int pops(struct r600_shader_ctx *ctx, int pops) 10288b8e80941Smrg{ 10289b8e80941Smrg unsigned force_pop = ctx->bc->force_add_cf; 10290848b8605Smrg 10291b8e80941Smrg if (!force_pop) { 10292b8e80941Smrg int alu_pop = 3; 10293b8e80941Smrg if (ctx->bc->cf_last) { 10294b8e80941Smrg if (ctx->bc->cf_last->op == CF_OP_ALU) 10295b8e80941Smrg alu_pop = 0; 10296b8e80941Smrg else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 10297b8e80941Smrg alu_pop = 1; 10298b8e80941Smrg } 10299b8e80941Smrg alu_pop += pops; 10300b8e80941Smrg if (alu_pop == 1) { 10301b8e80941Smrg ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 10302b8e80941Smrg ctx->bc->force_add_cf = 1; 10303b8e80941Smrg } else if (alu_pop == 2) { 10304b8e80941Smrg ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 10305b8e80941Smrg ctx->bc->force_add_cf = 1; 10306848b8605Smrg } else { 10307b8e80941Smrg force_pop = 1; 10308848b8605Smrg } 10309b8e80941Smrg } 10310848b8605Smrg 10311b8e80941Smrg if (force_pop) { 10312b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 10313b8e80941Smrg ctx->bc->cf_last->pop_count = pops; 10314b8e80941Smrg ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 10315848b8605Smrg } 10316848b8605Smrg 10317b8e80941Smrg return 0; 10318b8e80941Smrg} 10319848b8605Smrg 10320b8e80941Smrgstatic inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, 10321b8e80941Smrg unsigned reason) 10322b8e80941Smrg{ 10323b8e80941Smrg struct r600_stack_info *stack = &ctx->bc->stack; 10324b8e80941Smrg unsigned elements; 10325b8e80941Smrg int entries; 10326848b8605Smrg 10327b8e80941Smrg unsigned entry_size = stack->entry_size; 10328848b8605Smrg 10329b8e80941Smrg elements = (stack->loop + stack->push_wqm ) * entry_size; 10330b8e80941Smrg elements += stack->push; 10331848b8605Smrg 10332b8e80941Smrg switch (ctx->bc->chip_class) { 10333b8e80941Smrg case R600: 10334b8e80941Smrg case R700: 10335b8e80941Smrg /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 10336b8e80941Smrg * the stack must be reserved to hold the current active/continue 10337b8e80941Smrg * masks */ 10338b8e80941Smrg if (reason == FC_PUSH_VPM || stack->push > 0) { 10339b8e80941Smrg elements += 2; 10340b8e80941Smrg } 10341b8e80941Smrg break; 10342848b8605Smrg 10343b8e80941Smrg case CAYMAN: 10344b8e80941Smrg /* r9xx: any stack operation on empty stack consumes 2 additional 10345b8e80941Smrg * elements */ 10346b8e80941Smrg elements += 2; 10347848b8605Smrg 10348b8e80941Smrg /* fallthrough */ 10349b8e80941Smrg /* FIXME: do the two elements added above cover the cases for the 10350b8e80941Smrg * r8xx+ below? */ 10351b8e80941Smrg 10352b8e80941Smrg case EVERGREEN: 10353b8e80941Smrg /* r8xx+: 2 extra elements are not always required, but one extra 10354b8e80941Smrg * element must be added for each of the following cases: 10355b8e80941Smrg * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 10356b8e80941Smrg * stack usage. 10357b8e80941Smrg * (Currently we don't use ALU_ELSE_AFTER.) 10358b8e80941Smrg * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 10359b8e80941Smrg * PUSH instruction executed. 10360b8e80941Smrg * 10361b8e80941Smrg * NOTE: it seems we also need to reserve additional element in some 10362b8e80941Smrg * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 10363b8e80941Smrg * then STACK_SIZE should be 2 instead of 1 */ 10364b8e80941Smrg if (reason == FC_PUSH_VPM || stack->push > 0) { 10365b8e80941Smrg elements += 1; 10366848b8605Smrg } 10367b8e80941Smrg break; 10368848b8605Smrg 10369b8e80941Smrg default: 10370b8e80941Smrg assert(0); 10371b8e80941Smrg break; 10372b8e80941Smrg } 10373848b8605Smrg 10374b8e80941Smrg /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 10375b8e80941Smrg * for all chips, so we use 4 in the final formula, not the real entry_size 10376b8e80941Smrg * for the chip */ 10377b8e80941Smrg entry_size = 4; 10378848b8605Smrg 10379b8e80941Smrg entries = (elements + (entry_size - 1)) / entry_size; 10380848b8605Smrg 10381b8e80941Smrg if (entries > stack->max_entries) 10382b8e80941Smrg stack->max_entries = entries; 10383b8e80941Smrg return elements; 10384b8e80941Smrg} 10385848b8605Smrg 10386b8e80941Smrgstatic inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 10387b8e80941Smrg{ 10388b8e80941Smrg switch(reason) { 10389b8e80941Smrg case FC_PUSH_VPM: 10390b8e80941Smrg --ctx->bc->stack.push; 10391b8e80941Smrg assert(ctx->bc->stack.push >= 0); 10392b8e80941Smrg break; 10393b8e80941Smrg case FC_PUSH_WQM: 10394b8e80941Smrg --ctx->bc->stack.push_wqm; 10395b8e80941Smrg assert(ctx->bc->stack.push_wqm >= 0); 10396b8e80941Smrg break; 10397b8e80941Smrg case FC_LOOP: 10398b8e80941Smrg --ctx->bc->stack.loop; 10399b8e80941Smrg assert(ctx->bc->stack.loop >= 0); 10400b8e80941Smrg break; 10401b8e80941Smrg default: 10402b8e80941Smrg assert(0); 10403b8e80941Smrg break; 10404b8e80941Smrg } 10405b8e80941Smrg} 10406848b8605Smrg 10407b8e80941Smrgstatic inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 10408b8e80941Smrg{ 10409b8e80941Smrg switch (reason) { 10410b8e80941Smrg case FC_PUSH_VPM: 10411b8e80941Smrg ++ctx->bc->stack.push; 10412b8e80941Smrg break; 10413b8e80941Smrg case FC_PUSH_WQM: 10414b8e80941Smrg ++ctx->bc->stack.push_wqm; 10415b8e80941Smrg break; 10416b8e80941Smrg case FC_LOOP: 10417b8e80941Smrg ++ctx->bc->stack.loop; 10418b8e80941Smrg break; 10419b8e80941Smrg default: 10420b8e80941Smrg assert(0); 10421b8e80941Smrg } 10422848b8605Smrg 10423b8e80941Smrg return callstack_update_max_depth(ctx, reason); 10424b8e80941Smrg} 10425848b8605Smrg 10426b8e80941Smrgstatic void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 10427b8e80941Smrg{ 10428b8e80941Smrg struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 10429848b8605Smrg 10430b8e80941Smrg sp->mid = realloc((void *)sp->mid, 10431b8e80941Smrg sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 10432b8e80941Smrg sp->mid[sp->num_mid] = ctx->bc->cf_last; 10433b8e80941Smrg sp->num_mid++; 10434b8e80941Smrg} 10435848b8605Smrg 10436b8e80941Smrgstatic void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 10437b8e80941Smrg{ 10438b8e80941Smrg assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack)); 10439b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 10440b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 10441b8e80941Smrg ctx->bc->fc_sp++; 10442b8e80941Smrg} 10443848b8605Smrg 10444b8e80941Smrgstatic void fc_poplevel(struct r600_shader_ctx *ctx) 10445b8e80941Smrg{ 10446b8e80941Smrg struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1]; 10447b8e80941Smrg free(sp->mid); 10448b8e80941Smrg sp->mid = NULL; 10449b8e80941Smrg sp->num_mid = 0; 10450b8e80941Smrg sp->start = NULL; 10451b8e80941Smrg sp->type = 0; 10452b8e80941Smrg ctx->bc->fc_sp--; 10453b8e80941Smrg} 10454848b8605Smrg 10455b8e80941Smrg#if 0 10456b8e80941Smrgstatic int emit_return(struct r600_shader_ctx *ctx) 10457b8e80941Smrg{ 10458b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 10459b8e80941Smrg return 0; 10460b8e80941Smrg} 10461848b8605Smrg 10462b8e80941Smrgstatic int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 10463b8e80941Smrg{ 10464b8e80941Smrg 10465b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 10466b8e80941Smrg ctx->bc->cf_last->pop_count = pops; 10467b8e80941Smrg /* XXX work out offset */ 10468b8e80941Smrg return 0; 10469b8e80941Smrg} 10470848b8605Smrg 10471b8e80941Smrgstatic int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 10472b8e80941Smrg{ 10473b8e80941Smrg return 0; 10474b8e80941Smrg} 10475848b8605Smrg 10476b8e80941Smrgstatic void emit_testflag(struct r600_shader_ctx *ctx) 10477b8e80941Smrg{ 10478848b8605Smrg 10479b8e80941Smrg} 10480848b8605Smrg 10481b8e80941Smrgstatic void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 10482b8e80941Smrg{ 10483b8e80941Smrg emit_testflag(ctx); 10484b8e80941Smrg emit_jump_to_offset(ctx, 1, 4); 10485b8e80941Smrg emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 10486b8e80941Smrg pops(ctx, ifidx + 1); 10487b8e80941Smrg emit_return(ctx); 10488b8e80941Smrg} 10489848b8605Smrg 10490b8e80941Smrgstatic void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 10491b8e80941Smrg{ 10492b8e80941Smrg emit_testflag(ctx); 10493848b8605Smrg 10494b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 10495b8e80941Smrg ctx->bc->cf_last->pop_count = 1; 10496848b8605Smrg 10497b8e80941Smrg fc_set_mid(ctx, fc_sp); 10498848b8605Smrg 10499b8e80941Smrg pops(ctx, 1); 10500b8e80941Smrg} 10501b8e80941Smrg#endif 10502848b8605Smrg 10503b8e80941Smrgstatic int emit_if(struct r600_shader_ctx *ctx, int opcode, 10504b8e80941Smrg struct r600_bytecode_alu_src *src) 10505b8e80941Smrg{ 10506b8e80941Smrg int alu_type = CF_OP_ALU_PUSH_BEFORE; 10507b8e80941Smrg bool needs_workaround = false; 10508b8e80941Smrg int elems = callstack_push(ctx, FC_PUSH_VPM); 10509848b8605Smrg 10510b8e80941Smrg if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) 10511b8e80941Smrg needs_workaround = true; 10512848b8605Smrg 10513b8e80941Smrg if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) { 10514b8e80941Smrg unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size; 10515b8e80941Smrg unsigned dmod2 = (elems) % ctx->bc->stack.entry_size; 10516848b8605Smrg 10517b8e80941Smrg if (elems && (!dmod1 || !dmod2)) 10518b8e80941Smrg needs_workaround = true; 10519b8e80941Smrg } 10520848b8605Smrg 10521b8e80941Smrg /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 10522b8e80941Smrg * LOOP_STARTxxx for nested loops may put the branch stack into a state 10523b8e80941Smrg * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 10524b8e80941Smrg * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 10525b8e80941Smrg if (needs_workaround) { 10526b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 10527b8e80941Smrg ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 10528b8e80941Smrg alu_type = CF_OP_ALU; 10529848b8605Smrg } 10530848b8605Smrg 10531b8e80941Smrg emit_logic_pred(ctx, opcode, alu_type, src); 10532848b8605Smrg 10533b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 10534848b8605Smrg 10535b8e80941Smrg fc_pushlevel(ctx, FC_IF); 10536848b8605Smrg 10537b8e80941Smrg return 0; 10538b8e80941Smrg} 10539848b8605Smrg 10540b8e80941Smrgstatic int tgsi_if(struct r600_shader_ctx *ctx) 10541b8e80941Smrg{ 10542b8e80941Smrg struct r600_bytecode_alu_src alu_src; 10543b8e80941Smrg r600_bytecode_src(&alu_src, &ctx->src[0], 0); 10544b8e80941Smrg 10545b8e80941Smrg return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src); 10546848b8605Smrg} 10547848b8605Smrg 10548b8e80941Smrgstatic int tgsi_uif(struct r600_shader_ctx *ctx) 10549848b8605Smrg{ 10550b8e80941Smrg struct r600_bytecode_alu_src alu_src; 10551b8e80941Smrg r600_bytecode_src(&alu_src, &ctx->src[0], 0); 10552b8e80941Smrg return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 10553b8e80941Smrg} 10554848b8605Smrg 10555b8e80941Smrgstatic int tgsi_else(struct r600_shader_ctx *ctx) 10556b8e80941Smrg{ 10557b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 10558b8e80941Smrg ctx->bc->cf_last->pop_count = 1; 10559848b8605Smrg 10560b8e80941Smrg fc_set_mid(ctx, ctx->bc->fc_sp - 1); 10561b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id; 10562b8e80941Smrg return 0; 10563b8e80941Smrg} 10564b8e80941Smrg 10565b8e80941Smrgstatic int tgsi_endif(struct r600_shader_ctx *ctx) 10566b8e80941Smrg{ 10567b8e80941Smrg int offset = 2; 10568b8e80941Smrg pops(ctx, 1); 10569b8e80941Smrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) { 10570b8e80941Smrg R600_ERR("if/endif unbalanced in shader\n"); 10571848b8605Smrg return -1; 10572848b8605Smrg } 10573848b8605Smrg 10574b8e80941Smrg /* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */ 10575b8e80941Smrg if (ctx->bc->cf_last->eg_alu_extended) 10576b8e80941Smrg offset += 2; 10577b8e80941Smrg 10578b8e80941Smrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) { 10579b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + offset; 10580b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1; 10581b8e80941Smrg } else { 10582b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset; 10583848b8605Smrg } 10584b8e80941Smrg fc_poplevel(ctx); 10585848b8605Smrg 10586b8e80941Smrg callstack_pop(ctx, FC_PUSH_VPM); 10587848b8605Smrg return 0; 10588848b8605Smrg} 10589b8e80941Smrg 10590b8e80941Smrgstatic int tgsi_bgnloop(struct r600_shader_ctx *ctx) 10591848b8605Smrg{ 10592b8e80941Smrg /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 10593b8e80941Smrg * limited to 4096 iterations, like the other LOOP_* instructions. */ 10594b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 10595848b8605Smrg 10596b8e80941Smrg fc_pushlevel(ctx, FC_LOOP); 10597848b8605Smrg 10598b8e80941Smrg /* check stack depth */ 10599b8e80941Smrg callstack_push(ctx, FC_LOOP); 10600b8e80941Smrg return 0; 10601b8e80941Smrg} 10602b8e80941Smrg 10603b8e80941Smrgstatic int tgsi_endloop(struct r600_shader_ctx *ctx) 10604b8e80941Smrg{ 10605b8e80941Smrg int i; 10606b8e80941Smrg 10607b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 10608b8e80941Smrg 10609b8e80941Smrg if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) { 10610b8e80941Smrg R600_ERR("loop/endloop in shader code are not paired.\n"); 10611b8e80941Smrg return -EINVAL; 10612b8e80941Smrg } 10613b8e80941Smrg 10614b8e80941Smrg /* fixup loop pointers - from r600isa 10615b8e80941Smrg LOOP END points to CF after LOOP START, 10616b8e80941Smrg LOOP START point to CF after LOOP END 10617b8e80941Smrg BRK/CONT point to LOOP END CF 10618b8e80941Smrg */ 10619b8e80941Smrg ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2; 10620b8e80941Smrg 10621b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2; 10622b8e80941Smrg 10623b8e80941Smrg for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) { 10624b8e80941Smrg ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id; 10625b8e80941Smrg } 10626b8e80941Smrg /* XXX add LOOPRET support */ 10627b8e80941Smrg fc_poplevel(ctx); 10628b8e80941Smrg callstack_pop(ctx, FC_LOOP); 10629b8e80941Smrg return 0; 10630b8e80941Smrg} 10631b8e80941Smrg 10632b8e80941Smrgstatic int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 10633b8e80941Smrg{ 10634b8e80941Smrg unsigned int fscp; 10635b8e80941Smrg 10636b8e80941Smrg for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 10637b8e80941Smrg { 10638b8e80941Smrg if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type) 10639b8e80941Smrg break; 10640848b8605Smrg } 10641848b8605Smrg 10642b8e80941Smrg if (fscp == 0) { 10643b8e80941Smrg R600_ERR("Break not inside loop/endloop pair\n"); 10644b8e80941Smrg return -EINVAL; 10645b8e80941Smrg } 10646b8e80941Smrg 10647b8e80941Smrg r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 10648b8e80941Smrg 10649b8e80941Smrg fc_set_mid(ctx, fscp - 1); 10650b8e80941Smrg 10651848b8605Smrg return 0; 10652848b8605Smrg} 10653848b8605Smrg 10654b8e80941Smrgstatic int tgsi_gs_emit(struct r600_shader_ctx *ctx) 10655b8e80941Smrg{ 10656b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10657b8e80941Smrg int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 10658b8e80941Smrg int r; 10659b8e80941Smrg 10660b8e80941Smrg if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 10661b8e80941Smrg emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 10662b8e80941Smrg 10663b8e80941Smrg r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 10664b8e80941Smrg if (!r) { 10665b8e80941Smrg ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 10666b8e80941Smrg if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 10667b8e80941Smrg return emit_inc_ring_offset(ctx, stream, TRUE); 10668b8e80941Smrg } 10669b8e80941Smrg return r; 10670b8e80941Smrg} 10671b8e80941Smrg 10672b8e80941Smrgstatic int tgsi_umad(struct r600_shader_ctx *ctx) 10673848b8605Smrg{ 10674848b8605Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10675848b8605Smrg struct r600_bytecode_alu alu; 10676b8e80941Smrg int i, j, r; 10677b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10678b8e80941Smrg 10679b8e80941Smrg /* src0 * src1 */ 10680b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10681b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 10682b8e80941Smrg continue; 10683848b8605Smrg 10684848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10685848b8605Smrg 10686b8e80941Smrg alu.dst.chan = i; 10687b8e80941Smrg alu.dst.sel = ctx->temp_reg; 10688b8e80941Smrg alu.dst.write = 1; 10689848b8605Smrg 10690b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 10691b8e80941Smrg for (j = 0; j < 2; j++) { 10692b8e80941Smrg r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 10693848b8605Smrg } 10694848b8605Smrg 10695b8e80941Smrg alu.last = 1; 10696b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 10697b8e80941Smrg if (r) 10698b8e80941Smrg return r; 10699b8e80941Smrg } 10700b8e80941Smrg 10701b8e80941Smrg 10702b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10703b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 10704b8e80941Smrg continue; 10705b8e80941Smrg 10706b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10707b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 10708b8e80941Smrg 10709b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 10710b8e80941Smrg 10711b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 10712b8e80941Smrg alu.src[0].chan = i; 10713b8e80941Smrg 10714b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 10715b8e80941Smrg if (i == lasti) { 10716848b8605Smrg alu.last = 1; 10717b8e80941Smrg } 10718848b8605Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10719848b8605Smrg if (r) 10720848b8605Smrg return r; 10721848b8605Smrg } 10722848b8605Smrg return 0; 10723848b8605Smrg} 10724848b8605Smrg 10725b8e80941Smrgstatic int tgsi_pk2h(struct r600_shader_ctx *ctx) 10726848b8605Smrg{ 10727b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10728848b8605Smrg struct r600_bytecode_alu alu; 10729b8e80941Smrg int r, i; 10730b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10731848b8605Smrg 10732b8e80941Smrg /* temp.xy = f32_to_f16(src) */ 10733848b8605Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10734b8e80941Smrg alu.op = ALU_OP1_FLT32_TO_FLT16; 10735b8e80941Smrg alu.dst.chan = 0; 10736848b8605Smrg alu.dst.sel = ctx->temp_reg; 10737848b8605Smrg alu.dst.write = 1; 10738848b8605Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10739b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10740b8e80941Smrg if (r) 10741b8e80941Smrg return r; 10742b8e80941Smrg alu.dst.chan = 1; 10743b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 10744848b8605Smrg alu.last = 1; 10745b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10746848b8605Smrg if (r) 10747848b8605Smrg return r; 10748848b8605Smrg 10749b8e80941Smrg /* dst.x = temp.y * 0x10000 + temp.x */ 10750b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10751b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 10752b8e80941Smrg continue; 10753848b8605Smrg 10754b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10755b8e80941Smrg alu.op = ALU_OP3_MULADD_UINT24; 10756b8e80941Smrg alu.is_op3 = 1; 10757b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 10758b8e80941Smrg alu.last = i == lasti; 10759b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 10760b8e80941Smrg alu.src[0].chan = 1; 10761b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 10762b8e80941Smrg alu.src[1].value = 0x10000; 10763b8e80941Smrg alu.src[2].sel = ctx->temp_reg; 10764b8e80941Smrg alu.src[2].chan = 0; 10765b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10766b8e80941Smrg if (r) 10767b8e80941Smrg return r; 10768848b8605Smrg } 10769848b8605Smrg 10770848b8605Smrg return 0; 10771848b8605Smrg} 10772848b8605Smrg 10773b8e80941Smrgstatic int tgsi_up2h(struct r600_shader_ctx *ctx) 10774848b8605Smrg{ 10775b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10776b8e80941Smrg struct r600_bytecode_alu alu; 10777b8e80941Smrg int r, i; 10778b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10779848b8605Smrg 10780b8e80941Smrg /* temp.x = src.x */ 10781b8e80941Smrg /* note: no need to mask out the high bits */ 10782b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10783b8e80941Smrg alu.op = ALU_OP1_MOV; 10784b8e80941Smrg alu.dst.chan = 0; 10785b8e80941Smrg alu.dst.sel = ctx->temp_reg; 10786b8e80941Smrg alu.dst.write = 1; 10787b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10788b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10789b8e80941Smrg if (r) 10790b8e80941Smrg return r; 10791848b8605Smrg 10792b8e80941Smrg /* temp.y = src.x >> 16 */ 10793b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10794b8e80941Smrg alu.op = ALU_OP2_LSHR_INT; 10795b8e80941Smrg alu.dst.chan = 1; 10796b8e80941Smrg alu.dst.sel = ctx->temp_reg; 10797b8e80941Smrg alu.dst.write = 1; 10798b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10799b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 10800b8e80941Smrg alu.src[1].value = 16; 10801b8e80941Smrg alu.last = 1; 10802b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10803b8e80941Smrg if (r) 10804b8e80941Smrg return r; 10805848b8605Smrg 10806b8e80941Smrg /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ 10807b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10808b8e80941Smrg if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 10809b8e80941Smrg continue; 10810b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10811b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 10812b8e80941Smrg alu.op = ALU_OP1_FLT16_TO_FLT32; 10813b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 10814b8e80941Smrg alu.src[0].chan = i % 2; 10815b8e80941Smrg alu.last = i == lasti; 10816b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10817b8e80941Smrg if (r) 10818b8e80941Smrg return r; 10819848b8605Smrg } 10820848b8605Smrg 10821b8e80941Smrg return 0; 10822848b8605Smrg} 10823848b8605Smrg 10824b8e80941Smrgstatic int tgsi_bfe(struct r600_shader_ctx *ctx) 10825848b8605Smrg{ 10826b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10827b8e80941Smrg struct r600_bytecode_alu alu; 10828b8e80941Smrg int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 10829b8e80941Smrg int r, i; 10830b8e80941Smrg int dst = -1; 10831848b8605Smrg 10832b8e80941Smrg if ((inst->Src[0].Register.File == inst->Dst[0].Register.File && 10833b8e80941Smrg inst->Src[0].Register.Index == inst->Dst[0].Register.Index) || 10834b8e80941Smrg (inst->Src[2].Register.File == inst->Dst[0].Register.File && 10835b8e80941Smrg inst->Src[2].Register.Index == inst->Dst[0].Register.Index)) 10836b8e80941Smrg dst = r600_get_temp(ctx); 10837848b8605Smrg 10838b8e80941Smrg r = tgsi_op3_dst(ctx, dst); 10839b8e80941Smrg if (r) 10840b8e80941Smrg return r; 10841848b8605Smrg 10842b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10843b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10844b8e80941Smrg alu.op = ALU_OP2_SETGE_INT; 10845b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[2], i); 10846b8e80941Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 10847b8e80941Smrg alu.src[1].value = 32; 10848b8e80941Smrg alu.dst.sel = ctx->temp_reg; 10849b8e80941Smrg alu.dst.chan = i; 10850b8e80941Smrg alu.dst.write = 1; 10851b8e80941Smrg if (i == lasti) 10852b8e80941Smrg alu.last = 1; 10853b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10854b8e80941Smrg if (r) 10855b8e80941Smrg return r; 10856b8e80941Smrg } 10857848b8605Smrg 10858b8e80941Smrg for (i = 0; i < lasti + 1; i++) { 10859b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10860b8e80941Smrg alu.op = ALU_OP3_CNDE_INT; 10861b8e80941Smrg alu.is_op3 = 1; 10862b8e80941Smrg alu.src[0].sel = ctx->temp_reg; 10863b8e80941Smrg alu.src[0].chan = i; 10864848b8605Smrg 10865b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 10866b8e80941Smrg if (dst != -1) 10867b8e80941Smrg alu.src[1].sel = dst; 10868b8e80941Smrg else 10869b8e80941Smrg alu.src[1].sel = alu.dst.sel; 10870b8e80941Smrg alu.src[1].chan = i; 10871b8e80941Smrg r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 10872b8e80941Smrg alu.dst.write = 1; 10873b8e80941Smrg if (i == lasti) 10874b8e80941Smrg alu.last = 1; 10875b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10876b8e80941Smrg if (r) 10877b8e80941Smrg return r; 10878b8e80941Smrg } 10879848b8605Smrg 10880848b8605Smrg return 0; 10881848b8605Smrg} 10882848b8605Smrg 10883b8e80941Smrgstatic int tgsi_clock(struct r600_shader_ctx *ctx) 10884848b8605Smrg{ 10885b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10886b8e80941Smrg struct r600_bytecode_alu alu; 10887b8e80941Smrg int r; 10888b8e80941Smrg 10889b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10890b8e80941Smrg alu.op = ALU_OP1_MOV; 10891b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 10892b8e80941Smrg alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO; 10893b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10894b8e80941Smrg if (r) 10895b8e80941Smrg return r; 10896b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10897b8e80941Smrg alu.op = ALU_OP1_MOV; 10898b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 10899b8e80941Smrg alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI; 10900b8e80941Smrg alu.last = 1; 10901b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10902b8e80941Smrg if (r) 10903b8e80941Smrg return r; 10904848b8605Smrg return 0; 10905848b8605Smrg} 10906848b8605Smrg 10907b8e80941Smrgstatic int emit_u64add(struct r600_shader_ctx *ctx, int op, 10908b8e80941Smrg int treg, 10909b8e80941Smrg int src0_sel, int src0_chan, 10910b8e80941Smrg int src1_sel, int src1_chan) 10911848b8605Smrg{ 10912b8e80941Smrg struct r600_bytecode_alu alu; 10913b8e80941Smrg int r; 10914b8e80941Smrg int opc; 10915848b8605Smrg 10916b8e80941Smrg if (op == ALU_OP2_ADD_INT) 10917b8e80941Smrg opc = ALU_OP2_ADDC_UINT; 10918b8e80941Smrg else 10919b8e80941Smrg opc = ALU_OP2_SUBB_UINT; 10920848b8605Smrg 10921b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10922b8e80941Smrg alu.op = op; ; 10923b8e80941Smrg alu.dst.sel = treg; 10924b8e80941Smrg alu.dst.chan = 0; 10925b8e80941Smrg alu.dst.write = 1; 10926b8e80941Smrg alu.src[0].sel = src0_sel; 10927b8e80941Smrg alu.src[0].chan = src0_chan + 0; 10928b8e80941Smrg alu.src[1].sel = src1_sel; 10929b8e80941Smrg alu.src[1].chan = src1_chan + 0; 10930b8e80941Smrg alu.src[1].neg = 0; 10931b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10932b8e80941Smrg if (r) 10933b8e80941Smrg return r; 10934848b8605Smrg 10935b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10936b8e80941Smrg alu.op = op; 10937b8e80941Smrg alu.dst.sel = treg; 10938b8e80941Smrg alu.dst.chan = 1; 10939b8e80941Smrg alu.dst.write = 1; 10940b8e80941Smrg alu.src[0].sel = src0_sel; 10941b8e80941Smrg alu.src[0].chan = src0_chan + 1; 10942b8e80941Smrg alu.src[1].sel = src1_sel; 10943b8e80941Smrg alu.src[1].chan = src1_chan + 1; 10944b8e80941Smrg alu.src[1].neg = 0; 10945b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10946b8e80941Smrg if (r) 10947b8e80941Smrg return r; 10948848b8605Smrg 10949b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10950b8e80941Smrg alu.op = opc; 10951b8e80941Smrg alu.dst.sel = treg; 10952b8e80941Smrg alu.dst.chan = 2; 10953b8e80941Smrg alu.dst.write = 1; 10954b8e80941Smrg alu.last = 1; 10955b8e80941Smrg alu.src[0].sel = src0_sel; 10956b8e80941Smrg alu.src[0].chan = src0_chan + 0; 10957b8e80941Smrg alu.src[1].sel = src1_sel; 10958b8e80941Smrg alu.src[1].chan = src1_chan + 0; 10959b8e80941Smrg alu.src[1].neg = 0; 10960b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10961b8e80941Smrg if (r) 10962b8e80941Smrg return r; 10963848b8605Smrg 10964b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10965b8e80941Smrg alu.op = op; 10966b8e80941Smrg alu.dst.sel = treg; 10967b8e80941Smrg alu.dst.chan = 1; 10968b8e80941Smrg alu.dst.write = 1; 10969b8e80941Smrg alu.src[0].sel = treg; 10970b8e80941Smrg alu.src[0].chan = 1; 10971b8e80941Smrg alu.src[1].sel = treg; 10972b8e80941Smrg alu.src[1].chan = 2; 10973b8e80941Smrg alu.last = 1; 10974b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 10975b8e80941Smrg if (r) 10976b8e80941Smrg return r; 10977b8e80941Smrg return 0; 10978848b8605Smrg} 10979848b8605Smrg 10980b8e80941Smrgstatic int egcm_u64add(struct r600_shader_ctx *ctx) 10981848b8605Smrg{ 10982b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 10983b8e80941Smrg struct r600_bytecode_alu alu; 10984b8e80941Smrg int r; 10985b8e80941Smrg int treg = ctx->temp_reg; 10986b8e80941Smrg int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT; 10987848b8605Smrg 10988b8e80941Smrg if (ctx->src[1].neg) { 10989b8e80941Smrg op = ALU_OP2_SUB_INT; 10990b8e80941Smrg opc = ALU_OP2_SUBB_UINT; 10991848b8605Smrg } 10992b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 10993b8e80941Smrg alu.op = op; ; 10994b8e80941Smrg alu.dst.sel = treg; 10995b8e80941Smrg alu.dst.chan = 0; 10996b8e80941Smrg alu.dst.write = 1; 10997b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 10998b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 10999b8e80941Smrg alu.src[1].neg = 0; 11000b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11001b8e80941Smrg if (r) 11002b8e80941Smrg return r; 11003848b8605Smrg 11004b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11005b8e80941Smrg alu.op = op; 11006b8e80941Smrg alu.dst.sel = treg; 11007b8e80941Smrg alu.dst.chan = 1; 11008b8e80941Smrg alu.dst.write = 1; 11009b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 11010b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 11011b8e80941Smrg alu.src[1].neg = 0; 11012b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11013b8e80941Smrg if (r) 11014b8e80941Smrg return r; 11015848b8605Smrg 11016b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11017b8e80941Smrg alu.op = opc ; 11018b8e80941Smrg alu.dst.sel = treg; 11019b8e80941Smrg alu.dst.chan = 2; 11020b8e80941Smrg alu.dst.write = 1; 11021b8e80941Smrg alu.last = 1; 11022b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 11023b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 11024b8e80941Smrg alu.src[1].neg = 0; 11025b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11026b8e80941Smrg if (r) 11027b8e80941Smrg return r; 11028848b8605Smrg 11029b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11030b8e80941Smrg alu.op = op; 11031b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 11032b8e80941Smrg alu.src[0].sel = treg; 11033b8e80941Smrg alu.src[0].chan = 1; 11034b8e80941Smrg alu.src[1].sel = treg; 11035b8e80941Smrg alu.src[1].chan = 2; 11036b8e80941Smrg alu.last = 1; 11037b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11038b8e80941Smrg if (r) 11039b8e80941Smrg return r; 11040b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11041b8e80941Smrg alu.op = ALU_OP1_MOV; 11042b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 11043b8e80941Smrg alu.src[0].sel = treg; 11044b8e80941Smrg alu.src[0].chan = 0; 11045b8e80941Smrg alu.last = 1; 11046b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11047b8e80941Smrg if (r) 11048b8e80941Smrg return r; 11049848b8605Smrg return 0; 11050848b8605Smrg} 11051848b8605Smrg 11052b8e80941Smrg/* result.y = mul_high a, b 11053b8e80941Smrg result.x = mul a,b 11054b8e80941Smrg result.y += a.x * b.y + a.y * b.x; 11055b8e80941Smrg*/ 11056b8e80941Smrgstatic int egcm_u64mul(struct r600_shader_ctx *ctx) 11057848b8605Smrg{ 11058b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 11059b8e80941Smrg struct r600_bytecode_alu alu; 11060b8e80941Smrg int r; 11061b8e80941Smrg int treg = ctx->temp_reg; 11062848b8605Smrg 11063b8e80941Smrg /* temp.x = mul_lo a.x, b.x */ 11064b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11065b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 11066b8e80941Smrg alu.dst.sel = treg; 11067b8e80941Smrg alu.dst.chan = 0; 11068b8e80941Smrg alu.dst.write = 1; 11069b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 11070b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 11071b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 11072b8e80941Smrg if (r) 11073b8e80941Smrg return r; 11074848b8605Smrg 11075b8e80941Smrg /* temp.y = mul_hi a.x, b.x */ 11076b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11077b8e80941Smrg alu.op = ALU_OP2_MULHI_UINT; 11078b8e80941Smrg alu.dst.sel = treg; 11079b8e80941Smrg alu.dst.chan = 1; 11080b8e80941Smrg alu.dst.write = 1; 11081b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 11082b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 11083b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 11084b8e80941Smrg if (r) 11085b8e80941Smrg return r; 11086848b8605Smrg 11087b8e80941Smrg /* temp.z = mul a.x, b.y */ 11088b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11089b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 11090b8e80941Smrg alu.dst.sel = treg; 11091b8e80941Smrg alu.dst.chan = 2; 11092b8e80941Smrg alu.dst.write = 1; 11093b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 11094b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 11095b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 11096b8e80941Smrg if (r) 11097b8e80941Smrg return r; 11098848b8605Smrg 11099b8e80941Smrg /* temp.w = mul a.y, b.x */ 11100b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11101b8e80941Smrg alu.op = ALU_OP2_MULLO_UINT; 11102b8e80941Smrg alu.dst.sel = treg; 11103b8e80941Smrg alu.dst.chan = 3; 11104b8e80941Smrg alu.dst.write = 1; 11105b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 11106b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 11107b8e80941Smrg r = emit_mul_int_op(ctx->bc, &alu); 11108b8e80941Smrg if (r) 11109b8e80941Smrg return r; 11110848b8605Smrg 11111b8e80941Smrg /* temp.z = temp.z + temp.w */ 11112b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11113b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 11114b8e80941Smrg alu.dst.sel = treg; 11115b8e80941Smrg alu.dst.chan = 2; 11116b8e80941Smrg alu.dst.write = 1; 11117b8e80941Smrg alu.src[0].sel = treg; 11118b8e80941Smrg alu.src[0].chan = 2; 11119b8e80941Smrg alu.src[1].sel = treg; 11120b8e80941Smrg alu.src[1].chan = 3; 11121b8e80941Smrg alu.last = 1; 11122b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11123b8e80941Smrg if (r) 11124b8e80941Smrg return r; 11125b8e80941Smrg 11126b8e80941Smrg /* temp.y = temp.y + temp.z */ 11127b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11128b8e80941Smrg alu.op = ALU_OP2_ADD_INT; 11129b8e80941Smrg alu.dst.sel = treg; 11130b8e80941Smrg alu.dst.chan = 1; 11131b8e80941Smrg alu.dst.write = 1; 11132b8e80941Smrg alu.src[0].sel = treg; 11133b8e80941Smrg alu.src[0].chan = 1; 11134b8e80941Smrg alu.src[1].sel = treg; 11135b8e80941Smrg alu.src[1].chan = 2; 11136b8e80941Smrg alu.last = 1; 11137b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11138b8e80941Smrg if (r) 11139b8e80941Smrg return r; 11140b8e80941Smrg 11141b8e80941Smrg /* dst.x = temp.x */ 11142b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11143b8e80941Smrg alu.op = ALU_OP1_MOV; 11144b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 11145b8e80941Smrg alu.src[0].sel = treg; 11146b8e80941Smrg alu.src[0].chan = 0; 11147b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11148b8e80941Smrg if (r) 11149b8e80941Smrg return r; 11150b8e80941Smrg 11151b8e80941Smrg /* dst.y = temp.y */ 11152b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11153b8e80941Smrg alu.op = ALU_OP1_MOV; 11154b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 11155b8e80941Smrg alu.src[0].sel = treg; 11156b8e80941Smrg alu.src[0].chan = 1; 11157b8e80941Smrg alu.last = 1; 11158b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11159b8e80941Smrg if (r) 11160b8e80941Smrg return r; 11161848b8605Smrg 11162848b8605Smrg return 0; 11163848b8605Smrg} 11164848b8605Smrg 11165b8e80941Smrgstatic int emit_u64sge(struct r600_shader_ctx *ctx, 11166b8e80941Smrg int treg, 11167b8e80941Smrg int src0_sel, int src0_base_chan, 11168b8e80941Smrg int src1_sel, int src1_base_chan) 11169848b8605Smrg{ 11170b8e80941Smrg int r; 11171b8e80941Smrg /* for 64-bit sge */ 11172b8e80941Smrg /* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */ 11173b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT, 11174b8e80941Smrg treg, 1, 11175b8e80941Smrg src0_sel, src0_base_chan + 1, 11176b8e80941Smrg src1_sel, src1_base_chan + 1); 11177b8e80941Smrg if (r) 11178b8e80941Smrg return r; 11179848b8605Smrg 11180b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 11181b8e80941Smrg treg, 0, 11182b8e80941Smrg src0_sel, src0_base_chan, 11183b8e80941Smrg src1_sel, src1_base_chan); 11184b8e80941Smrg if (r) 11185b8e80941Smrg return r; 11186848b8605Smrg 11187b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETE_INT, 11188b8e80941Smrg treg, 2, 11189b8e80941Smrg src0_sel, src0_base_chan + 1, 11190b8e80941Smrg src1_sel, src1_base_chan + 1); 11191b8e80941Smrg if (r) 11192b8e80941Smrg return r; 11193b8e80941Smrg 11194b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 11195b8e80941Smrg treg, 0, 11196b8e80941Smrg treg, 0, 11197b8e80941Smrg treg, 2); 11198b8e80941Smrg if (r) 11199b8e80941Smrg return r; 11200b8e80941Smrg 11201b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 11202b8e80941Smrg treg, 0, 11203b8e80941Smrg treg, 0, 11204b8e80941Smrg treg, 1); 11205b8e80941Smrg if (r) 11206b8e80941Smrg return r; 11207848b8605Smrg return 0; 11208848b8605Smrg} 11209848b8605Smrg 11210b8e80941Smrg/* this isn't a complete div it's just enough for qbo shader to work */ 11211b8e80941Smrgstatic int egcm_u64div(struct r600_shader_ctx *ctx) 11212848b8605Smrg{ 11213b8e80941Smrg struct r600_bytecode_alu alu; 11214b8e80941Smrg struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src; 11215b8e80941Smrg int r, i; 11216b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 11217848b8605Smrg 11218b8e80941Smrg /* make sure we are dividing my a const with 0 in the high bits */ 11219b8e80941Smrg if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL) 11220b8e80941Smrg return -1; 11221b8e80941Smrg if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0) 11222b8e80941Smrg return -1; 11223b8e80941Smrg /* make sure we are doing one division */ 11224b8e80941Smrg if (inst->Dst[0].Register.WriteMask != 0x3) 11225b8e80941Smrg return -1; 11226848b8605Smrg 11227b8e80941Smrg /* emit_if uses ctx->temp_reg so we can't */ 11228b8e80941Smrg int treg = r600_get_temp(ctx); 11229b8e80941Smrg int tmp_num = r600_get_temp(ctx); 11230b8e80941Smrg int sub_tmp = r600_get_temp(ctx); 11231b8e80941Smrg 11232b8e80941Smrg /* tmp quot are tmp_num.zw */ 11233b8e80941Smrg r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0); 11234b8e80941Smrg r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1); 11235b8e80941Smrg r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0); 11236b8e80941Smrg r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1); 11237b8e80941Smrg 11238b8e80941Smrg /* MOV tmp_num.xy, numerator */ 11239b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11240b8e80941Smrg tmp_num, 0, 11241b8e80941Smrg alu_num_lo.sel, alu_num_lo.chan, 11242b8e80941Smrg 0, 0); 11243b8e80941Smrg if (r) 11244b8e80941Smrg return r; 11245b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11246b8e80941Smrg tmp_num, 1, 11247b8e80941Smrg alu_num_hi.sel, alu_num_hi.chan, 11248b8e80941Smrg 0, 0); 11249b8e80941Smrg if (r) 11250b8e80941Smrg return r; 11251848b8605Smrg 11252b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11253b8e80941Smrg tmp_num, 2, 11254b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 0, 11255b8e80941Smrg 0, 0); 11256b8e80941Smrg if (r) 11257b8e80941Smrg return r; 11258848b8605Smrg 11259b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11260b8e80941Smrg tmp_num, 3, 11261b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 0, 11262b8e80941Smrg 0, 0); 11263b8e80941Smrg if (r) 11264b8e80941Smrg return r; 11265848b8605Smrg 11266b8e80941Smrg /* treg 0 is log2_denom */ 11267b8e80941Smrg /* normally this gets the MSB for the denom high value 11268b8e80941Smrg - however we know this will always be 0 here. */ 11269b8e80941Smrg r = single_alu_op2(ctx, 11270b8e80941Smrg ALU_OP1_MOV, 11271b8e80941Smrg treg, 0, 11272b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 32, 11273b8e80941Smrg 0, 0); 11274b8e80941Smrg if (r) 11275b8e80941Smrg return r; 11276848b8605Smrg 11277b8e80941Smrg /* normally check demon hi for 0, but we know it is already */ 11278b8e80941Smrg /* t0.z = num_hi >= denom_lo */ 11279b8e80941Smrg r = single_alu_op2(ctx, 11280b8e80941Smrg ALU_OP2_SETGE_UINT, 11281b8e80941Smrg treg, 1, 11282b8e80941Smrg alu_num_hi.sel, alu_num_hi.chan, 11283b8e80941Smrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 11284b8e80941Smrg if (r) 11285b8e80941Smrg return r; 11286848b8605Smrg 11287b8e80941Smrg memset(&alu_src, 0, sizeof(alu_src)); 11288b8e80941Smrg alu_src.sel = treg; 11289b8e80941Smrg alu_src.chan = 1; 11290b8e80941Smrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 11291b8e80941Smrg if (r) 11292b8e80941Smrg return r; 11293b8e80941Smrg 11294b8e80941Smrg /* for loops in here */ 11295b8e80941Smrg /* get msb t0.x = msb(src[1].x) first */ 11296b8e80941Smrg int msb_lo = util_last_bit(alu_denom_lo.value); 11297b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11298b8e80941Smrg treg, 0, 11299b8e80941Smrg V_SQ_ALU_SRC_LITERAL, msb_lo, 11300b8e80941Smrg 0, 0); 11301b8e80941Smrg if (r) 11302b8e80941Smrg return r; 11303848b8605Smrg 11304b8e80941Smrg /* unroll the asm here */ 11305b8e80941Smrg for (i = 0; i < 31; i++) { 11306b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 11307b8e80941Smrg treg, 2, 11308b8e80941Smrg V_SQ_ALU_SRC_LITERAL, i, 11309b8e80941Smrg treg, 0); 11310848b8605Smrg if (r) 11311848b8605Smrg return r; 11312848b8605Smrg 11313b8e80941Smrg /* we can do this on the CPU */ 11314b8e80941Smrg uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i); 11315b8e80941Smrg /* t0.z = tmp_num.y >= t0.z */ 11316b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 11317b8e80941Smrg treg, 1, 11318b8e80941Smrg tmp_num, 1, 11319b8e80941Smrg V_SQ_ALU_SRC_LITERAL, denom_lo_shl); 11320848b8605Smrg if (r) 11321848b8605Smrg return r; 11322848b8605Smrg 11323b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 11324b8e80941Smrg treg, 1, 11325b8e80941Smrg treg, 1, 11326b8e80941Smrg treg, 2); 11327848b8605Smrg if (r) 11328848b8605Smrg return r; 11329848b8605Smrg 11330b8e80941Smrg memset(&alu_src, 0, sizeof(alu_src)); 11331b8e80941Smrg alu_src.sel = treg; 11332b8e80941Smrg alu_src.chan = 1; 11333b8e80941Smrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 11334b8e80941Smrg if (r) 11335b8e80941Smrg return r; 11336848b8605Smrg 11337b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SUB_INT, 11338b8e80941Smrg tmp_num, 1, 11339b8e80941Smrg tmp_num, 1, 11340b8e80941Smrg V_SQ_ALU_SRC_LITERAL, denom_lo_shl); 11341b8e80941Smrg if (r) 11342b8e80941Smrg return r; 11343848b8605Smrg 11344b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 11345b8e80941Smrg tmp_num, 3, 11346b8e80941Smrg tmp_num, 3, 11347b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); 11348b8e80941Smrg if (r) 11349b8e80941Smrg return r; 11350848b8605Smrg 11351b8e80941Smrg r = tgsi_endif(ctx); 11352b8e80941Smrg if (r) 11353b8e80941Smrg return r; 11354848b8605Smrg } 11355848b8605Smrg 11356b8e80941Smrg /* log2_denom is always <= 31, so manually peel the last loop 11357b8e80941Smrg * iteration. 11358b8e80941Smrg */ 11359b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 11360b8e80941Smrg treg, 1, 11361b8e80941Smrg tmp_num, 1, 11362b8e80941Smrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 11363b8e80941Smrg if (r) 11364b8e80941Smrg return r; 11365848b8605Smrg 11366b8e80941Smrg memset(&alu_src, 0, sizeof(alu_src)); 11367b8e80941Smrg alu_src.sel = treg; 11368b8e80941Smrg alu_src.chan = 1; 11369b8e80941Smrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 11370b8e80941Smrg if (r) 11371b8e80941Smrg return r; 11372848b8605Smrg 11373b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SUB_INT, 11374b8e80941Smrg tmp_num, 1, 11375b8e80941Smrg tmp_num, 1, 11376b8e80941Smrg V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); 11377b8e80941Smrg if (r) 11378b8e80941Smrg return r; 11379848b8605Smrg 11380b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 11381b8e80941Smrg tmp_num, 3, 11382b8e80941Smrg tmp_num, 3, 11383b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 1U); 11384b8e80941Smrg if (r) 11385b8e80941Smrg return r; 11386b8e80941Smrg r = tgsi_endif(ctx); 11387b8e80941Smrg if (r) 11388b8e80941Smrg return r; 11389848b8605Smrg 11390b8e80941Smrg r = tgsi_endif(ctx); 11391b8e80941Smrg if (r) 11392b8e80941Smrg return r; 11393848b8605Smrg 11394b8e80941Smrg /* onto the second loop to unroll */ 11395b8e80941Smrg for (i = 0; i < 31; i++) { 11396b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, 11397b8e80941Smrg treg, 1, 11398b8e80941Smrg V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)), 11399b8e80941Smrg treg, 0); 11400b8e80941Smrg if (r) 11401b8e80941Smrg return r; 11402848b8605Smrg 11403b8e80941Smrg uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i); 11404b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11405b8e80941Smrg treg, 2, 11406b8e80941Smrg V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), 11407b8e80941Smrg 0, 0); 11408b8e80941Smrg if (r) 11409b8e80941Smrg return r; 11410848b8605Smrg 11411b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11412b8e80941Smrg treg, 3, 11413b8e80941Smrg V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), 11414b8e80941Smrg 0, 0); 11415b8e80941Smrg if (r) 11416b8e80941Smrg return r; 11417848b8605Smrg 11418b8e80941Smrg r = emit_u64sge(ctx, sub_tmp, 11419b8e80941Smrg tmp_num, 0, 11420b8e80941Smrg treg, 2); 11421b8e80941Smrg if (r) 11422b8e80941Smrg return r; 11423848b8605Smrg 11424b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_AND_INT, 11425b8e80941Smrg treg, 1, 11426b8e80941Smrg treg, 1, 11427b8e80941Smrg sub_tmp, 0); 11428b8e80941Smrg if (r) 11429b8e80941Smrg return r; 11430848b8605Smrg 11431b8e80941Smrg memset(&alu_src, 0, sizeof(alu_src)); 11432b8e80941Smrg alu_src.sel = treg; 11433b8e80941Smrg alu_src.chan = 1; 11434b8e80941Smrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 11435b8e80941Smrg if (r) 11436b8e80941Smrg return r; 11437848b8605Smrg 11438848b8605Smrg 11439b8e80941Smrg r = emit_u64add(ctx, ALU_OP2_SUB_INT, 11440b8e80941Smrg sub_tmp, 11441b8e80941Smrg tmp_num, 0, 11442b8e80941Smrg treg, 2); 11443b8e80941Smrg if (r) 11444b8e80941Smrg return r; 11445848b8605Smrg 11446b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11447b8e80941Smrg tmp_num, 0, 11448b8e80941Smrg sub_tmp, 0, 11449b8e80941Smrg 0, 0); 11450b8e80941Smrg if (r) 11451b8e80941Smrg return r; 11452848b8605Smrg 11453b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11454b8e80941Smrg tmp_num, 1, 11455b8e80941Smrg sub_tmp, 1, 11456b8e80941Smrg 0, 0); 11457b8e80941Smrg if (r) 11458b8e80941Smrg return r; 11459848b8605Smrg 11460b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 11461b8e80941Smrg tmp_num, 2, 11462b8e80941Smrg tmp_num, 2, 11463b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); 11464b8e80941Smrg if (r) 11465b8e80941Smrg return r; 11466848b8605Smrg 11467b8e80941Smrg r = tgsi_endif(ctx); 11468848b8605Smrg if (r) 11469848b8605Smrg return r; 11470848b8605Smrg } 11471b8e80941Smrg 11472b8e80941Smrg /* log2_denom is always <= 63, so manually peel the last loop 11473b8e80941Smrg * iteration. 11474b8e80941Smrg */ 11475b8e80941Smrg uint64_t denom_shl = (uint64_t)alu_denom_lo.value; 11476b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11477b8e80941Smrg treg, 2, 11478b8e80941Smrg V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), 11479b8e80941Smrg 0, 0); 11480b8e80941Smrg if (r) 11481b8e80941Smrg return r; 11482b8e80941Smrg 11483b8e80941Smrg r = single_alu_op2(ctx, ALU_OP1_MOV, 11484b8e80941Smrg treg, 3, 11485b8e80941Smrg V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), 11486b8e80941Smrg 0, 0); 11487b8e80941Smrg if (r) 11488b8e80941Smrg return r; 11489b8e80941Smrg 11490b8e80941Smrg r = emit_u64sge(ctx, sub_tmp, 11491b8e80941Smrg tmp_num, 0, 11492b8e80941Smrg treg, 2); 11493b8e80941Smrg if (r) 11494b8e80941Smrg return r; 11495b8e80941Smrg 11496b8e80941Smrg memset(&alu_src, 0, sizeof(alu_src)); 11497b8e80941Smrg alu_src.sel = sub_tmp; 11498b8e80941Smrg alu_src.chan = 0; 11499b8e80941Smrg r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); 11500b8e80941Smrg if (r) 11501b8e80941Smrg return r; 11502b8e80941Smrg 11503b8e80941Smrg r = emit_u64add(ctx, ALU_OP2_SUB_INT, 11504b8e80941Smrg sub_tmp, 11505b8e80941Smrg tmp_num, 0, 11506b8e80941Smrg treg, 2); 11507b8e80941Smrg if (r) 11508b8e80941Smrg return r; 11509b8e80941Smrg 11510b8e80941Smrg r = single_alu_op2(ctx, ALU_OP2_OR_INT, 11511b8e80941Smrg tmp_num, 2, 11512b8e80941Smrg tmp_num, 2, 11513b8e80941Smrg V_SQ_ALU_SRC_LITERAL, 1U); 11514b8e80941Smrg if (r) 11515b8e80941Smrg return r; 11516b8e80941Smrg r = tgsi_endif(ctx); 11517b8e80941Smrg if (r) 11518b8e80941Smrg return r; 11519b8e80941Smrg 11520b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11521b8e80941Smrg alu.op = ALU_OP1_MOV; 11522b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 11523b8e80941Smrg alu.src[0].sel = tmp_num; 11524b8e80941Smrg alu.src[0].chan = 2; 11525b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11526b8e80941Smrg if (r) 11527b8e80941Smrg return r; 11528b8e80941Smrg 11529b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11530b8e80941Smrg alu.op = ALU_OP1_MOV; 11531b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 11532b8e80941Smrg alu.src[0].sel = tmp_num; 11533b8e80941Smrg alu.src[0].chan = 3; 11534b8e80941Smrg alu.last = 1; 11535b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11536b8e80941Smrg if (r) 11537b8e80941Smrg return r; 11538848b8605Smrg return 0; 11539848b8605Smrg} 11540848b8605Smrg 11541b8e80941Smrgstatic int egcm_u64sne(struct r600_shader_ctx *ctx) 11542b8e80941Smrg{ 11543b8e80941Smrg struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 11544b8e80941Smrg struct r600_bytecode_alu alu; 11545b8e80941Smrg int r; 11546b8e80941Smrg int treg = ctx->temp_reg; 11547b8e80941Smrg 11548b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11549b8e80941Smrg alu.op = ALU_OP2_SETNE_INT; 11550b8e80941Smrg alu.dst.sel = treg; 11551b8e80941Smrg alu.dst.chan = 0; 11552b8e80941Smrg alu.dst.write = 1; 11553b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 11554b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 11555b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11556b8e80941Smrg if (r) 11557b8e80941Smrg return r; 11558848b8605Smrg 11559b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11560b8e80941Smrg alu.op = ALU_OP2_SETNE_INT; 11561b8e80941Smrg alu.dst.sel = treg; 11562b8e80941Smrg alu.dst.chan = 1; 11563b8e80941Smrg alu.dst.write = 1; 11564b8e80941Smrg r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 11565b8e80941Smrg r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 11566b8e80941Smrg alu.last = 1; 11567b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11568b8e80941Smrg if (r) 11569b8e80941Smrg return r; 11570b8e80941Smrg 11571b8e80941Smrg memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 11572b8e80941Smrg alu.op = ALU_OP2_OR_INT; 11573b8e80941Smrg tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 11574b8e80941Smrg alu.src[0].sel = treg; 11575b8e80941Smrg alu.src[0].chan = 0; 11576b8e80941Smrg alu.src[1].sel = treg; 11577b8e80941Smrg alu.src[1].chan = 1; 11578b8e80941Smrg alu.last = 1; 11579b8e80941Smrg r = r600_bytecode_add_alu(ctx->bc, &alu); 11580b8e80941Smrg if (r) 11581b8e80941Smrg return r; 11582b8e80941Smrg return 0; 11583b8e80941Smrg} 11584b8e80941Smrg 11585b8e80941Smrgstatic const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 11586b8e80941Smrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 11587b8e80941Smrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 11588b8e80941Smrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 11589b8e80941Smrg 11590b8e80941Smrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 11591b8e80941Smrg 11592b8e80941Smrg [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 11593b8e80941Smrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 11594b8e80941Smrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 11595b8e80941Smrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 11596b8e80941Smrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 11597b8e80941Smrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11598b8e80941Smrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11599b8e80941Smrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 11600b8e80941Smrg /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */ 11601b8e80941Smrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 11602b8e80941Smrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 11603b8e80941Smrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 11604b8e80941Smrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 11605b8e80941Smrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 11606b8e80941Smrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 11607b8e80941Smrg [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 11608b8e80941Smrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 11609b8e80941Smrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 11610b8e80941Smrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 11611b8e80941Smrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 11612b8e80941Smrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 11613b8e80941Smrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 11614b8e80941Smrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 11615b8e80941Smrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 11616b8e80941Smrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 11617b8e80941Smrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 11618b8e80941Smrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 11619b8e80941Smrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 11620b8e80941Smrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 11621b8e80941Smrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_unsupported}, 11622b8e80941Smrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 11623b8e80941Smrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 11624b8e80941Smrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 11625b8e80941Smrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 11626b8e80941Smrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 11627b8e80941Smrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 11628b8e80941Smrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 11629b8e80941Smrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 11630b8e80941Smrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 11631b8e80941Smrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 11632b8e80941Smrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 11633b8e80941Smrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 11634b8e80941Smrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 11635b8e80941Smrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 11636b8e80941Smrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 11637b8e80941Smrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 11638b8e80941Smrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 11639b8e80941Smrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 11640b8e80941Smrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 11641b8e80941Smrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 11642b8e80941Smrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 11643b8e80941Smrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 11644b8e80941Smrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 11645b8e80941Smrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 11646b8e80941Smrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 11647b8e80941Smrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 11648b8e80941Smrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 11649b8e80941Smrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 11650b8e80941Smrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 11651b8e80941Smrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 11652b8e80941Smrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 11653b8e80941Smrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 11654b8e80941Smrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 11655b8e80941Smrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 11656b8e80941Smrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 11657b8e80941Smrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 11658b8e80941Smrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 11659b8e80941Smrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11660b8e80941Smrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 11661b8e80941Smrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 11662b8e80941Smrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 11663b8e80941Smrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 11664b8e80941Smrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 11665b8e80941Smrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 11666b8e80941Smrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 11667b8e80941Smrg [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 11668b8e80941Smrg [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 11669b8e80941Smrg [81] = { ALU_OP0_NOP, tgsi_unsupported}, 11670b8e80941Smrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 11671b8e80941Smrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 11672b8e80941Smrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 11673b8e80941Smrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 11674b8e80941Smrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 11675b8e80941Smrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 11676b8e80941Smrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 11677b8e80941Smrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 11678b8e80941Smrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 11679b8e80941Smrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 11680b8e80941Smrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 11681b8e80941Smrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 11682b8e80941Smrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 11683b8e80941Smrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 11684b8e80941Smrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 11685b8e80941Smrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 11686b8e80941Smrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 11687b8e80941Smrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 11688b8e80941Smrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 11689b8e80941Smrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 11690b8e80941Smrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 11691b8e80941Smrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 11692b8e80941Smrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 11693b8e80941Smrg [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, 11694b8e80941Smrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 11695b8e80941Smrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 11696b8e80941Smrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 11697b8e80941Smrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 11698b8e80941Smrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 11699b8e80941Smrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 11700b8e80941Smrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, 11701b8e80941Smrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 11702b8e80941Smrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 11703b8e80941Smrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 11704b8e80941Smrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 11705b8e80941Smrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 11706b8e80941Smrg [TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported}, 11707b8e80941Smrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 11708b8e80941Smrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 11709b8e80941Smrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 11710b8e80941Smrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 11711b8e80941Smrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 11712b8e80941Smrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 11713b8e80941Smrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 11714b8e80941Smrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 11715b8e80941Smrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 11716b8e80941Smrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 11717b8e80941Smrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 11718b8e80941Smrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 11719b8e80941Smrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 11720b8e80941Smrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 11721b8e80941Smrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 11722b8e80941Smrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 11723b8e80941Smrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 11724b8e80941Smrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 11725b8e80941Smrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 11726b8e80941Smrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 11727b8e80941Smrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 11728b8e80941Smrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 11729b8e80941Smrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 11730b8e80941Smrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 11731b8e80941Smrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 11732b8e80941Smrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 11733b8e80941Smrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 11734b8e80941Smrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 11735b8e80941Smrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 11736b8e80941Smrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 11737b8e80941Smrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 11738b8e80941Smrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 11739b8e80941Smrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 11740b8e80941Smrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 11741b8e80941Smrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 11742b8e80941Smrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 11743b8e80941Smrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 11744b8e80941Smrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 11745b8e80941Smrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 11746b8e80941Smrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 11747b8e80941Smrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 11748b8e80941Smrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 11749b8e80941Smrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 11750b8e80941Smrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 11751b8e80941Smrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 11752b8e80941Smrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 11753b8e80941Smrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 11754b8e80941Smrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 11755b8e80941Smrg [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 11756b8e80941Smrg [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 11757b8e80941Smrg [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 11758b8e80941Smrg [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 11759b8e80941Smrg [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 11760b8e80941Smrg [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 11761b8e80941Smrg [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 11762b8e80941Smrg [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 11763b8e80941Smrg [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 11764b8e80941Smrg [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 11765b8e80941Smrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 11766b8e80941Smrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 11767b8e80941Smrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 11768b8e80941Smrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 11769b8e80941Smrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 11770b8e80941Smrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 11771b8e80941Smrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 11772b8e80941Smrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 11773b8e80941Smrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 11774b8e80941Smrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 11775b8e80941Smrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 11776b8e80941Smrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 11777b8e80941Smrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 11778b8e80941Smrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 11779b8e80941Smrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 11780b8e80941Smrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 11781b8e80941Smrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 11782b8e80941Smrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 11783b8e80941Smrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 11784848b8605Smrg}; 11785848b8605Smrg 11786b8e80941Smrgstatic const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 11787b8e80941Smrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 11788b8e80941Smrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 11789b8e80941Smrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 11790b8e80941Smrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 11791b8e80941Smrg [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 11792b8e80941Smrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 11793b8e80941Smrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 11794b8e80941Smrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 11795b8e80941Smrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 11796b8e80941Smrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11797b8e80941Smrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11798b8e80941Smrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 11799b8e80941Smrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 11800b8e80941Smrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 11801b8e80941Smrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 11802b8e80941Smrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 11803b8e80941Smrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 11804b8e80941Smrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 11805b8e80941Smrg [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 11806b8e80941Smrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 11807b8e80941Smrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 11808b8e80941Smrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 11809b8e80941Smrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 11810b8e80941Smrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 11811b8e80941Smrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 11812b8e80941Smrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 11813b8e80941Smrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 11814b8e80941Smrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 11815b8e80941Smrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 11816b8e80941Smrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 11817b8e80941Smrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 11818b8e80941Smrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 11819b8e80941Smrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock}, 11820b8e80941Smrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 11821b8e80941Smrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 11822b8e80941Smrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 11823b8e80941Smrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 11824b8e80941Smrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 11825b8e80941Smrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 11826b8e80941Smrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 11827b8e80941Smrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 11828b8e80941Smrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 11829b8e80941Smrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 11830b8e80941Smrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 11831b8e80941Smrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 11832b8e80941Smrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 11833b8e80941Smrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 11834b8e80941Smrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 11835b8e80941Smrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 11836b8e80941Smrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 11837b8e80941Smrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 11838b8e80941Smrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 11839b8e80941Smrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 11840b8e80941Smrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 11841b8e80941Smrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 11842b8e80941Smrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 11843b8e80941Smrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 11844b8e80941Smrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 11845b8e80941Smrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 11846b8e80941Smrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 11847b8e80941Smrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 11848b8e80941Smrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 11849b8e80941Smrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 11850b8e80941Smrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 11851b8e80941Smrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 11852b8e80941Smrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 11853b8e80941Smrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 11854b8e80941Smrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 11855b8e80941Smrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 11856b8e80941Smrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 11857b8e80941Smrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 11858b8e80941Smrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 11859b8e80941Smrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 11860b8e80941Smrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 11861b8e80941Smrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 11862b8e80941Smrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 11863b8e80941Smrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 11864b8e80941Smrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 11865b8e80941Smrg [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 11866b8e80941Smrg [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 11867b8e80941Smrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 11868b8e80941Smrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 11869b8e80941Smrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 11870b8e80941Smrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 11871b8e80941Smrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 11872b8e80941Smrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 11873b8e80941Smrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 11874b8e80941Smrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 11875b8e80941Smrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 11876b8e80941Smrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 11877b8e80941Smrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 11878b8e80941Smrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 11879b8e80941Smrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 11880b8e80941Smrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 11881b8e80941Smrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 11882b8e80941Smrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 11883b8e80941Smrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 11884b8e80941Smrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 11885b8e80941Smrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 11886b8e80941Smrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 11887b8e80941Smrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 11888b8e80941Smrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 11889b8e80941Smrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 11890b8e80941Smrg [TGSI_OPCODE_RESQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq}, 11891b8e80941Smrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 11892b8e80941Smrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 11893b8e80941Smrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 11894b8e80941Smrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 11895b8e80941Smrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 11896b8e80941Smrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 11897b8e80941Smrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 11898b8e80941Smrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 11899b8e80941Smrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 11900b8e80941Smrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 11901b8e80941Smrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 11902b8e80941Smrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 11903b8e80941Smrg /* Refer below for TGSI_OPCODE_DFMA */ 11904b8e80941Smrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 11905b8e80941Smrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 11906b8e80941Smrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 11907b8e80941Smrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 11908b8e80941Smrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 11909b8e80941Smrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 11910b8e80941Smrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 11911b8e80941Smrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 11912b8e80941Smrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 11913b8e80941Smrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 11914b8e80941Smrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 11915b8e80941Smrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 11916b8e80941Smrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 11917b8e80941Smrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 11918b8e80941Smrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 11919b8e80941Smrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 11920b8e80941Smrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 11921b8e80941Smrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 11922b8e80941Smrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 11923b8e80941Smrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 11924b8e80941Smrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 11925b8e80941Smrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 11926b8e80941Smrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 11927b8e80941Smrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 11928b8e80941Smrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 11929b8e80941Smrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 11930b8e80941Smrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 11931b8e80941Smrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 11932b8e80941Smrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 11933b8e80941Smrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 11934b8e80941Smrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 11935b8e80941Smrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 11936b8e80941Smrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 11937b8e80941Smrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 11938b8e80941Smrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 11939b8e80941Smrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 11940b8e80941Smrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 11941b8e80941Smrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 11942b8e80941Smrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 11943b8e80941Smrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 11944b8e80941Smrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 11945b8e80941Smrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 11946b8e80941Smrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, 11947b8e80941Smrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, 11948b8e80941Smrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 11949b8e80941Smrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 11950b8e80941Smrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 11951b8e80941Smrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 11952b8e80941Smrg [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op}, 11953b8e80941Smrg [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op}, 11954b8e80941Smrg [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op}, 11955b8e80941Smrg [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op}, 11956b8e80941Smrg [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op}, 11957b8e80941Smrg [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op}, 11958b8e80941Smrg [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op}, 11959b8e80941Smrg [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op}, 11960b8e80941Smrg [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op}, 11961b8e80941Smrg [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op}, 11962b8e80941Smrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 11963b8e80941Smrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 11964b8e80941Smrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 11965b8e80941Smrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 11966b8e80941Smrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 11967b8e80941Smrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 11968b8e80941Smrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 11969b8e80941Smrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_bfe}, 11970b8e80941Smrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_bfe}, 11971b8e80941Smrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 11972b8e80941Smrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 11973b8e80941Smrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 11974b8e80941Smrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 11975b8e80941Smrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 11976b8e80941Smrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 11977b8e80941Smrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 11978b8e80941Smrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 11979b8e80941Smrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 11980b8e80941Smrg [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 11981b8e80941Smrg [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 11982b8e80941Smrg [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 11983b8e80941Smrg [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 11984b8e80941Smrg [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 11985b8e80941Smrg [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 11986b8e80941Smrg [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 11987b8e80941Smrg [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 11988b8e80941Smrg [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 11989b8e80941Smrg [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 11990b8e80941Smrg [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 11991b8e80941Smrg [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 11992b8e80941Smrg [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 11993b8e80941Smrg [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 11994b8e80941Smrg [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 11995b8e80941Smrg [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 11996b8e80941Smrg [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 11997b8e80941Smrg [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 11998b8e80941Smrg [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 11999b8e80941Smrg [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 12000b8e80941Smrg [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 12001b8e80941Smrg [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 12002b8e80941Smrg [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 12003b8e80941Smrg [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 12004b8e80941Smrg [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 12005b8e80941Smrg [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, 12006b8e80941Smrg [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, 12007b8e80941Smrg [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, 12008b8e80941Smrg [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, 12009b8e80941Smrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 12010848b8605Smrg}; 12011848b8605Smrg 12012b8e80941Smrgstatic const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 12013b8e80941Smrg [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 12014b8e80941Smrg [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 12015b8e80941Smrg [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 12016b8e80941Smrg [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 12017b8e80941Smrg [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 12018b8e80941Smrg [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 12019b8e80941Smrg [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 12020b8e80941Smrg [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, 12021b8e80941Smrg [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 12022b8e80941Smrg [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 12023b8e80941Smrg [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 12024b8e80941Smrg [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 12025b8e80941Smrg [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, 12026b8e80941Smrg [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, 12027b8e80941Smrg [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 12028b8e80941Smrg [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 12029b8e80941Smrg [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, 12030b8e80941Smrg [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 12031b8e80941Smrg [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 12032b8e80941Smrg [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 12033b8e80941Smrg [21] = { ALU_OP0_NOP, tgsi_unsupported}, 12034b8e80941Smrg [22] = { ALU_OP0_NOP, tgsi_unsupported}, 12035b8e80941Smrg [23] = { ALU_OP0_NOP, tgsi_unsupported}, 12036b8e80941Smrg [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 12037b8e80941Smrg [25] = { ALU_OP0_NOP, tgsi_unsupported}, 12038b8e80941Smrg [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 12039b8e80941Smrg [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 12040b8e80941Smrg [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 12041b8e80941Smrg [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 12042b8e80941Smrg [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 12043b8e80941Smrg [31] = { ALU_OP0_NOP, tgsi_unsupported}, 12044b8e80941Smrg [32] = { ALU_OP0_NOP, tgsi_unsupported}, 12045b8e80941Smrg [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock}, 12046b8e80941Smrg [34] = { ALU_OP0_NOP, tgsi_unsupported}, 12047b8e80941Smrg [35] = { ALU_OP0_NOP, tgsi_unsupported}, 12048b8e80941Smrg [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 12049b8e80941Smrg [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 12050b8e80941Smrg [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 12051b8e80941Smrg [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 12052b8e80941Smrg [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 12053b8e80941Smrg [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 12054b8e80941Smrg [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 12055b8e80941Smrg [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 12056b8e80941Smrg [44] = { ALU_OP0_NOP, tgsi_unsupported}, 12057b8e80941Smrg [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 12058b8e80941Smrg [46] = { ALU_OP0_NOP, tgsi_unsupported}, 12059b8e80941Smrg [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 12060b8e80941Smrg [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 12061b8e80941Smrg [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 12062b8e80941Smrg [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 12063b8e80941Smrg [51] = { ALU_OP0_NOP, tgsi_unsupported}, 12064b8e80941Smrg [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 12065b8e80941Smrg [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 12066b8e80941Smrg [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 12067b8e80941Smrg [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 12068b8e80941Smrg [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 12069b8e80941Smrg [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 12070b8e80941Smrg [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 12071b8e80941Smrg [59] = { ALU_OP0_NOP, tgsi_unsupported}, 12072b8e80941Smrg [60] = { ALU_OP0_NOP, tgsi_unsupported}, 12073b8e80941Smrg [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 12074b8e80941Smrg [62] = { ALU_OP0_NOP, tgsi_unsupported}, 12075b8e80941Smrg [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 12076b8e80941Smrg [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 12077b8e80941Smrg [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 12078b8e80941Smrg [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 12079b8e80941Smrg [67] = { ALU_OP0_NOP, tgsi_unsupported}, 12080b8e80941Smrg [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 12081b8e80941Smrg [69] = { ALU_OP0_NOP, tgsi_unsupported}, 12082b8e80941Smrg [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 12083b8e80941Smrg [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, 12084b8e80941Smrg [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 12085b8e80941Smrg [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 12086b8e80941Smrg [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 12087b8e80941Smrg [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 12088b8e80941Smrg [76] = { ALU_OP0_NOP, tgsi_unsupported}, 12089b8e80941Smrg [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 12090b8e80941Smrg [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 12091b8e80941Smrg [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 12092b8e80941Smrg [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 12093b8e80941Smrg [82] = { ALU_OP0_NOP, tgsi_unsupported}, 12094b8e80941Smrg [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 12095b8e80941Smrg [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 12096b8e80941Smrg [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 12097b8e80941Smrg [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 12098b8e80941Smrg [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 12099b8e80941Smrg [88] = { ALU_OP0_NOP, tgsi_unsupported}, 12100b8e80941Smrg [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 12101b8e80941Smrg [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 12102b8e80941Smrg [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 12103b8e80941Smrg [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 12104b8e80941Smrg [93] = { ALU_OP0_NOP, tgsi_unsupported}, 12105b8e80941Smrg [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 12106b8e80941Smrg [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 12107b8e80941Smrg [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 12108b8e80941Smrg [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 12109b8e80941Smrg [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 12110b8e80941Smrg [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 12111b8e80941Smrg [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 12112b8e80941Smrg [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 12113b8e80941Smrg [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 12114b8e80941Smrg [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 12115b8e80941Smrg [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 12116b8e80941Smrg [TGSI_OPCODE_RESQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_resq}, 12117b8e80941Smrg [106] = { ALU_OP0_NOP, tgsi_unsupported}, 12118b8e80941Smrg [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 12119b8e80941Smrg [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 12120b8e80941Smrg [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 12121b8e80941Smrg [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 12122b8e80941Smrg [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 12123b8e80941Smrg [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 12124b8e80941Smrg [113] = { ALU_OP0_NOP, tgsi_unsupported}, 12125b8e80941Smrg [114] = { ALU_OP0_NOP, tgsi_unsupported}, 12126b8e80941Smrg [115] = { ALU_OP0_NOP, tgsi_unsupported}, 12127b8e80941Smrg [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 12128b8e80941Smrg [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 12129b8e80941Smrg /* Refer below for TGSI_OPCODE_DFMA */ 12130b8e80941Smrg [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 12131b8e80941Smrg [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 12132b8e80941Smrg [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 12133b8e80941Smrg [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 12134b8e80941Smrg [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 12135b8e80941Smrg [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 12136b8e80941Smrg [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 12137b8e80941Smrg [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 12138b8e80941Smrg [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 12139b8e80941Smrg [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 12140b8e80941Smrg [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 12141b8e80941Smrg [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 12142b8e80941Smrg [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 12143b8e80941Smrg [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 12144b8e80941Smrg [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 12145b8e80941Smrg [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 12146b8e80941Smrg [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 12147b8e80941Smrg [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 12148b8e80941Smrg [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 12149b8e80941Smrg [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 12150b8e80941Smrg [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 12151b8e80941Smrg [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 12152b8e80941Smrg [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 12153b8e80941Smrg [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 12154b8e80941Smrg [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 12155b8e80941Smrg [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 12156b8e80941Smrg [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 12157b8e80941Smrg [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 12158b8e80941Smrg [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 12159b8e80941Smrg [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 12160b8e80941Smrg [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 12161b8e80941Smrg [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 12162b8e80941Smrg [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 12163b8e80941Smrg [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 12164b8e80941Smrg [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 12165b8e80941Smrg [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 12166b8e80941Smrg [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 12167b8e80941Smrg [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 12168b8e80941Smrg [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 12169b8e80941Smrg [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 12170b8e80941Smrg [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 12171b8e80941Smrg [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 12172b8e80941Smrg [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, 12173b8e80941Smrg [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, 12174b8e80941Smrg [163] = { ALU_OP0_NOP, tgsi_unsupported}, 12175b8e80941Smrg [164] = { ALU_OP0_NOP, tgsi_unsupported}, 12176b8e80941Smrg [165] = { ALU_OP0_NOP, tgsi_unsupported}, 12177b8e80941Smrg [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 12178b8e80941Smrg [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op}, 12179b8e80941Smrg [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op}, 12180b8e80941Smrg [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op}, 12181b8e80941Smrg [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op}, 12182b8e80941Smrg [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op}, 12183b8e80941Smrg [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op}, 12184b8e80941Smrg [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op}, 12185b8e80941Smrg [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op}, 12186b8e80941Smrg [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op}, 12187b8e80941Smrg [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op}, 12188b8e80941Smrg [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 12189b8e80941Smrg [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 12190b8e80941Smrg [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 12191b8e80941Smrg [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 12192b8e80941Smrg [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 12193b8e80941Smrg [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 12194b8e80941Smrg [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 12195b8e80941Smrg [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_bfe}, 12196b8e80941Smrg [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_bfe}, 12197b8e80941Smrg [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 12198b8e80941Smrg [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 12199b8e80941Smrg [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 12200b8e80941Smrg [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 12201b8e80941Smrg [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 12202b8e80941Smrg [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 12203b8e80941Smrg [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 12204b8e80941Smrg [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 12205b8e80941Smrg [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 12206b8e80941Smrg [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 12207b8e80941Smrg [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 12208b8e80941Smrg [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 12209b8e80941Smrg [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 12210b8e80941Smrg [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 12211b8e80941Smrg [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 12212b8e80941Smrg [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 12213b8e80941Smrg [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 12214b8e80941Smrg [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 12215b8e80941Smrg [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 12216b8e80941Smrg [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 12217b8e80941Smrg [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 12218b8e80941Smrg [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 12219b8e80941Smrg [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 12220b8e80941Smrg [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 12221b8e80941Smrg [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 12222b8e80941Smrg [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 12223b8e80941Smrg [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 12224b8e80941Smrg [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 12225b8e80941Smrg [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 12226b8e80941Smrg [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 12227b8e80941Smrg [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 12228b8e80941Smrg [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 12229b8e80941Smrg [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 12230b8e80941Smrg [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 12231b8e80941Smrg [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, 12232b8e80941Smrg [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, 12233b8e80941Smrg [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, 12234b8e80941Smrg [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, 12235b8e80941Smrg [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 12236848b8605Smrg}; 12237