r600_shader.c revision 3464ebd5
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60 61int r600_find_vs_semantic_index(struct r600_shader *vs, 62 struct r600_shader *ps, int id) 63{ 64 struct r600_shader_io *input = &ps->input[id]; 65 66 for (int i = 0; i < vs->noutput; i++) { 67 if (input->name == vs->output[i].name && 68 input->sid == vs->output[i].sid) { 69 return i - 1; 70 } 71 } 72 return 0; 73} 74 75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 76{ 77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 78 struct r600_shader *rshader = &shader->shader; 79 uint32_t *ptr; 80 int i; 81 82 /* copy new shader */ 83 if (shader->bo == NULL) { 84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ 85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); 86 if (shader->bo == NULL) { 87 return -ENOMEM; 88 } 89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 90 if (R600_BIG_ENDIAN) { 91 for (i = 0; i < rshader->bc.ndw; ++i) { 92 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 93 } 94 } else { 95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 96 } 97 r600_bo_unmap(rctx->radeon, shader->bo); 98 } 99 /* build state */ 100 switch (rshader->processor_type) { 101 case TGSI_PROCESSOR_VERTEX: 102 if (rshader->family >= CHIP_CEDAR) { 103 evergreen_pipe_shader_vs(ctx, shader); 104 } else { 105 r600_pipe_shader_vs(ctx, shader); 106 } 107 break; 108 case TGSI_PROCESSOR_FRAGMENT: 109 if (rshader->family >= CHIP_CEDAR) { 110 evergreen_pipe_shader_ps(ctx, shader); 111 } else { 112 r600_pipe_shader_ps(ctx, shader); 113 } 114 break; 115 default: 116 return -EINVAL; 117 } 118 return 0; 119} 120 121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 122 123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 124{ 125 static int dump_shaders = -1; 126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 127 int r; 128 129 /* Would like some magic "get_bool_option_once" routine. 130 */ 131 if (dump_shaders == -1) 132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 133 134 if (dump_shaders) { 135 fprintf(stderr, "--------------------------------------------------------------\n"); 136 tgsi_dump(shader->tokens, 0); 137 } 138 shader->shader.family = r600_get_family(rctx->radeon); 139 r = r600_shader_from_tgsi(rctx, shader); 140 if (r) { 141 R600_ERR("translation from TGSI failed !\n"); 142 return r; 143 } 144 r = r600_bc_build(&shader->shader.bc); 145 if (r) { 146 R600_ERR("building bytecode failed !\n"); 147 return r; 148 } 149 if (dump_shaders) { 150 r600_bc_dump(&shader->shader.bc); 151 fprintf(stderr, "______________________________________________________________\n"); 152 } 153 return r600_pipe_shader(ctx, shader); 154} 155 156void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 157{ 158 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 159 160 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 161 r600_bc_clear(&shader->shader.bc); 162 163 memset(&shader->shader,0,sizeof(struct r600_shader)); 164} 165 166/* 167 * tgsi -> r600 shader 168 */ 169struct r600_shader_tgsi_instruction; 170 171struct r600_shader_src { 172 unsigned sel; 173 unsigned swizzle[4]; 174 unsigned neg; 175 unsigned abs; 176 unsigned rel; 177 uint32_t value[4]; 178}; 179 180struct r600_shader_ctx { 181 struct tgsi_shader_info info; 182 struct tgsi_parse_context parse; 183 const struct tgsi_token *tokens; 184 unsigned type; 185 unsigned file_offset[TGSI_FILE_COUNT]; 186 unsigned temp_reg; 187 unsigned ar_reg; 188 struct r600_shader_tgsi_instruction *inst_info; 189 struct r600_bc *bc; 190 struct r600_shader *shader; 191 struct r600_shader_src src[4]; 192 u32 *literals; 193 u32 nliterals; 194 u32 max_driver_temp_used; 195 /* needed for evergreen interpolation */ 196 boolean input_centroid; 197 boolean input_linear; 198 boolean input_perspective; 199 int num_interp_gpr; 200}; 201 202struct r600_shader_tgsi_instruction { 203 unsigned tgsi_opcode; 204 unsigned is_op3; 205 unsigned r600_opcode; 206 int (*process)(struct r600_shader_ctx *ctx); 207}; 208 209static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 210static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 211 212static int tgsi_is_supported(struct r600_shader_ctx *ctx) 213{ 214 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 215 int j; 216 217 if (i->Instruction.NumDstRegs > 1) { 218 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 219 return -EINVAL; 220 } 221 if (i->Instruction.Predicate) { 222 R600_ERR("predicate unsupported\n"); 223 return -EINVAL; 224 } 225#if 0 226 if (i->Instruction.Label) { 227 R600_ERR("label unsupported\n"); 228 return -EINVAL; 229 } 230#endif 231 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 232 if (i->Src[j].Register.Dimension) { 233 R600_ERR("unsupported src %d (dimension %d)\n", j, 234 i->Src[j].Register.Dimension); 235 return -EINVAL; 236 } 237 } 238 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 239 if (i->Dst[j].Register.Dimension) { 240 R600_ERR("unsupported dst (dimension)\n"); 241 return -EINVAL; 242 } 243 } 244 return 0; 245} 246 247static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 248{ 249 int i, r; 250 struct r600_bc_alu alu; 251 int gpr = 0, base_chan = 0; 252 int ij_index = 0; 253 254 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 255 ij_index = 0; 256 if (ctx->shader->input[input].centroid) 257 ij_index++; 258 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 259 ij_index = 0; 260 /* if we have perspective add one */ 261 if (ctx->input_perspective) { 262 ij_index++; 263 /* if we have perspective centroid */ 264 if (ctx->input_centroid) 265 ij_index++; 266 } 267 if (ctx->shader->input[input].centroid) 268 ij_index++; 269 } 270 271 /* work out gpr and base_chan from index */ 272 gpr = ij_index / 2; 273 base_chan = (2 * (ij_index % 2)) + 1; 274 275 for (i = 0; i < 8; i++) { 276 memset(&alu, 0, sizeof(struct r600_bc_alu)); 277 278 if (i < 4) 279 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 280 else 281 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 282 283 if ((i > 1) && (i < 6)) { 284 alu.dst.sel = ctx->shader->input[input].gpr; 285 alu.dst.write = 1; 286 } 287 288 alu.dst.chan = i % 4; 289 290 alu.src[0].sel = gpr; 291 alu.src[0].chan = (base_chan - (i % 2)); 292 293 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 294 295 alu.bank_swizzle_force = SQ_ALU_VEC_210; 296 if ((i % 4) == 3) 297 alu.last = 1; 298 r = r600_bc_add_alu(ctx->bc, &alu); 299 if (r) 300 return r; 301 } 302 return 0; 303} 304 305 306static int tgsi_declaration(struct r600_shader_ctx *ctx) 307{ 308 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 309 unsigned i; 310 int r; 311 312 switch (d->Declaration.File) { 313 case TGSI_FILE_INPUT: 314 i = ctx->shader->ninput++; 315 ctx->shader->input[i].name = d->Semantic.Name; 316 ctx->shader->input[i].sid = d->Semantic.Index; 317 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 318 ctx->shader->input[i].centroid = d->Declaration.Centroid; 319 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 320 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { 321 /* turn input into interpolate on EG */ 322 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 323 if (ctx->shader->input[i].interpolate > 0) { 324 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 325 evergreen_interp_alu(ctx, i); 326 } 327 } 328 } 329 break; 330 case TGSI_FILE_OUTPUT: 331 i = ctx->shader->noutput++; 332 ctx->shader->output[i].name = d->Semantic.Name; 333 ctx->shader->output[i].sid = d->Semantic.Index; 334 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 335 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 336 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 337 /* these don't count as vertex param exports */ 338 if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || 339 (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) 340 ctx->shader->npos++; 341 } 342 break; 343 case TGSI_FILE_CONSTANT: 344 case TGSI_FILE_TEMPORARY: 345 case TGSI_FILE_SAMPLER: 346 case TGSI_FILE_ADDRESS: 347 break; 348 349 case TGSI_FILE_SYSTEM_VALUE: 350 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 351 struct r600_bc_alu alu; 352 memset(&alu, 0, sizeof(struct r600_bc_alu)); 353 354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 355 alu.src[0].sel = 0; 356 alu.src[0].chan = 3; 357 358 alu.dst.sel = 0; 359 alu.dst.chan = 3; 360 alu.dst.write = 1; 361 alu.last = 1; 362 363 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 364 return r; 365 break; 366 } 367 368 default: 369 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 370 return -EINVAL; 371 } 372 return 0; 373} 374 375static int r600_get_temp(struct r600_shader_ctx *ctx) 376{ 377 return ctx->temp_reg + ctx->max_driver_temp_used++; 378} 379 380/* 381 * for evergreen we need to scan the shader to find the number of GPRs we need to 382 * reserve for interpolation. 383 * 384 * we need to know if we are going to emit 385 * any centroid inputs 386 * if perspective and linear are required 387*/ 388static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 389{ 390 int i; 391 int num_baryc; 392 393 ctx->input_linear = FALSE; 394 ctx->input_perspective = FALSE; 395 ctx->input_centroid = FALSE; 396 ctx->num_interp_gpr = 1; 397 398 /* any centroid inputs */ 399 for (i = 0; i < ctx->info.num_inputs; i++) { 400 /* skip position/face */ 401 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 402 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 403 continue; 404 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 405 ctx->input_linear = TRUE; 406 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 407 ctx->input_perspective = TRUE; 408 if (ctx->info.input_centroid[i]) 409 ctx->input_centroid = TRUE; 410 } 411 412 num_baryc = 0; 413 /* ignoring sample for now */ 414 if (ctx->input_perspective) 415 num_baryc++; 416 if (ctx->input_linear) 417 num_baryc++; 418 if (ctx->input_centroid) 419 num_baryc *= 2; 420 421 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 422 423 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 424 return ctx->num_interp_gpr; 425} 426 427static void tgsi_src(struct r600_shader_ctx *ctx, 428 const struct tgsi_full_src_register *tgsi_src, 429 struct r600_shader_src *r600_src) 430{ 431 memset(r600_src, 0, sizeof(*r600_src)); 432 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 433 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 434 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 435 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 436 r600_src->neg = tgsi_src->Register.Negate; 437 r600_src->abs = tgsi_src->Register.Absolute; 438 439 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 440 int index; 441 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 442 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 443 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 444 445 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 446 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 447 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 448 return; 449 } 450 index = tgsi_src->Register.Index; 451 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 452 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 453 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 454 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 455 r600_src->swizzle[0] = 3; 456 r600_src->swizzle[1] = 3; 457 r600_src->swizzle[2] = 3; 458 r600_src->swizzle[3] = 3; 459 r600_src->sel = 0; 460 } else { 461 if (tgsi_src->Register.Indirect) 462 r600_src->rel = V_SQ_REL_RELATIVE; 463 r600_src->sel = tgsi_src->Register.Index; 464 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 465 } 466} 467 468static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 469{ 470 struct r600_bc_vtx vtx; 471 unsigned int ar_reg; 472 int r; 473 474 if (offset) { 475 struct r600_bc_alu alu; 476 477 memset(&alu, 0, sizeof(alu)); 478 479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 480 alu.src[0].sel = ctx->ar_reg; 481 482 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 483 alu.src[1].value = offset; 484 485 alu.dst.sel = dst_reg; 486 alu.dst.write = 1; 487 alu.last = 1; 488 489 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 490 return r; 491 492 ar_reg = dst_reg; 493 } else { 494 ar_reg = ctx->ar_reg; 495 } 496 497 memset(&vtx, 0, sizeof(vtx)); 498 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 499 vtx.src_gpr = ar_reg; 500 vtx.mega_fetch_count = 16; 501 vtx.dst_gpr = dst_reg; 502 vtx.dst_sel_x = 0; /* SEL_X */ 503 vtx.dst_sel_y = 1; /* SEL_Y */ 504 vtx.dst_sel_z = 2; /* SEL_Z */ 505 vtx.dst_sel_w = 3; /* SEL_W */ 506 vtx.data_format = FMT_32_32_32_32_FLOAT; 507 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 508 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 509 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 510 vtx.endian = r600_endian_swap(32); 511 512 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 513 return r; 514 515 return 0; 516} 517 518static int tgsi_split_constant(struct r600_shader_ctx *ctx) 519{ 520 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 521 struct r600_bc_alu alu; 522 int i, j, k, nconst, r; 523 524 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 525 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 526 nconst++; 527 } 528 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 529 } 530 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 531 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 532 continue; 533 } 534 535 if (ctx->src[i].rel) { 536 int treg = r600_get_temp(ctx); 537 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 538 return r; 539 540 ctx->src[i].sel = treg; 541 ctx->src[i].rel = 0; 542 j--; 543 } else if (j > 0) { 544 int treg = r600_get_temp(ctx); 545 for (k = 0; k < 4; k++) { 546 memset(&alu, 0, sizeof(struct r600_bc_alu)); 547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 548 alu.src[0].sel = ctx->src[i].sel; 549 alu.src[0].chan = k; 550 alu.src[0].rel = ctx->src[i].rel; 551 alu.dst.sel = treg; 552 alu.dst.chan = k; 553 alu.dst.write = 1; 554 if (k == 3) 555 alu.last = 1; 556 r = r600_bc_add_alu(ctx->bc, &alu); 557 if (r) 558 return r; 559 } 560 ctx->src[i].sel = treg; 561 ctx->src[i].rel =0; 562 j--; 563 } 564 } 565 return 0; 566} 567 568/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 569static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 570{ 571 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 572 struct r600_bc_alu alu; 573 int i, j, k, nliteral, r; 574 575 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 576 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 577 nliteral++; 578 } 579 } 580 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 581 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 582 int treg = r600_get_temp(ctx); 583 for (k = 0; k < 4; k++) { 584 memset(&alu, 0, sizeof(struct r600_bc_alu)); 585 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 586 alu.src[0].sel = ctx->src[i].sel; 587 alu.src[0].chan = k; 588 alu.src[0].value = ctx->src[i].value[k]; 589 alu.dst.sel = treg; 590 alu.dst.chan = k; 591 alu.dst.write = 1; 592 if (k == 3) 593 alu.last = 1; 594 r = r600_bc_add_alu(ctx->bc, &alu); 595 if (r) 596 return r; 597 } 598 ctx->src[i].sel = treg; 599 j--; 600 } 601 } 602 return 0; 603} 604 605static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 606{ 607 struct r600_shader *shader = &pipeshader->shader; 608 struct tgsi_token *tokens = pipeshader->tokens; 609 struct tgsi_full_immediate *immediate; 610 struct tgsi_full_property *property; 611 struct r600_shader_ctx ctx; 612 struct r600_bc_output output[32]; 613 unsigned output_done, noutput; 614 unsigned opcode; 615 int i, j, r = 0, pos0; 616 617 ctx.bc = &shader->bc; 618 ctx.shader = shader; 619 r = r600_bc_init(ctx.bc, shader->family); 620 if (r) 621 return r; 622 ctx.tokens = tokens; 623 tgsi_scan_shader(tokens, &ctx.info); 624 tgsi_parse_init(&ctx.parse, tokens); 625 ctx.type = ctx.parse.FullHeader.Processor.Processor; 626 shader->processor_type = ctx.type; 627 ctx.bc->type = shader->processor_type; 628 629 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 630 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 631 632 shader->nr_cbufs = rctx->nr_cbufs; 633 634 /* register allocations */ 635 /* Values [0,127] correspond to GPR[0..127]. 636 * Values [128,159] correspond to constant buffer bank 0 637 * Values [160,191] correspond to constant buffer bank 1 638 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 639 * Values [256,287] correspond to constant buffer bank 2 (EG) 640 * Values [288,319] correspond to constant buffer bank 3 (EG) 641 * Other special values are shown in the list below. 642 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 643 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 644 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 645 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 646 * 248 SQ_ALU_SRC_0: special constant 0.0. 647 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 648 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 649 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 650 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 651 * 253 SQ_ALU_SRC_LITERAL: literal constant. 652 * 254 SQ_ALU_SRC_PV: previous vector result. 653 * 255 SQ_ALU_SRC_PS: previous scalar result. 654 */ 655 for (i = 0; i < TGSI_FILE_COUNT; i++) { 656 ctx.file_offset[i] = 0; 657 } 658 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 659 ctx.file_offset[TGSI_FILE_INPUT] = 1; 660 if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { 661 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 662 } else { 663 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 664 } 665 } 666 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { 667 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 668 } 669 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 670 ctx.info.file_count[TGSI_FILE_INPUT]; 671 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 672 ctx.info.file_count[TGSI_FILE_OUTPUT]; 673 674 /* Outside the GPR range. This will be translated to one of the 675 * kcache banks later. */ 676 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 677 678 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 679 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 680 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 681 ctx.temp_reg = ctx.ar_reg + 1; 682 683 ctx.nliterals = 0; 684 ctx.literals = NULL; 685 shader->fs_write_all = FALSE; 686 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 687 tgsi_parse_token(&ctx.parse); 688 switch (ctx.parse.FullToken.Token.Type) { 689 case TGSI_TOKEN_TYPE_IMMEDIATE: 690 immediate = &ctx.parse.FullToken.FullImmediate; 691 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 692 if(ctx.literals == NULL) { 693 r = -ENOMEM; 694 goto out_err; 695 } 696 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 697 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 698 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 699 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 700 ctx.nliterals++; 701 break; 702 case TGSI_TOKEN_TYPE_DECLARATION: 703 r = tgsi_declaration(&ctx); 704 if (r) 705 goto out_err; 706 break; 707 case TGSI_TOKEN_TYPE_INSTRUCTION: 708 r = tgsi_is_supported(&ctx); 709 if (r) 710 goto out_err; 711 ctx.max_driver_temp_used = 0; 712 /* reserve first tmp for everyone */ 713 r600_get_temp(&ctx); 714 715 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 716 if ((r = tgsi_split_constant(&ctx))) 717 goto out_err; 718 if ((r = tgsi_split_literal_constant(&ctx))) 719 goto out_err; 720 if (ctx.bc->chiprev == CHIPREV_CAYMAN) 721 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 722 else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) 723 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 724 else 725 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 726 r = ctx.inst_info->process(&ctx); 727 if (r) 728 goto out_err; 729 break; 730 case TGSI_TOKEN_TYPE_PROPERTY: 731 property = &ctx.parse.FullToken.FullProperty; 732 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 733 if (property->u[0].Data == 1) 734 shader->fs_write_all = TRUE; 735 } 736 break; 737 default: 738 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 739 r = -EINVAL; 740 goto out_err; 741 } 742 } 743 744 noutput = shader->noutput; 745 746 /* clamp color outputs */ 747 if (shader->clamp_color) { 748 for (i = 0; i < noutput; i++) { 749 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 750 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 751 752 int j; 753 for (j = 0; j < 4; j++) { 754 struct r600_bc_alu alu; 755 memset(&alu, 0, sizeof(struct r600_bc_alu)); 756 757 /* MOV_SAT R, R */ 758 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 759 alu.dst.sel = shader->output[i].gpr; 760 alu.dst.chan = j; 761 alu.dst.write = 1; 762 alu.dst.clamp = 1; 763 alu.src[0].sel = alu.dst.sel; 764 alu.src[0].chan = j; 765 766 if (j == 3) { 767 alu.last = 1; 768 } 769 r = r600_bc_add_alu(ctx.bc, &alu); 770 if (r) 771 return r; 772 } 773 } 774 } 775 } 776 777 /* export output */ 778 j = 0; 779 for (i = 0, pos0 = 0; i < noutput; i++) { 780 memset(&output[i], 0, sizeof(struct r600_bc_output)); 781 output[i + j].gpr = shader->output[i].gpr; 782 output[i + j].elem_size = 3; 783 output[i + j].swizzle_x = 0; 784 output[i + j].swizzle_y = 1; 785 output[i + j].swizzle_z = 2; 786 output[i + j].swizzle_w = 3; 787 output[i + j].burst_count = 1; 788 output[i + j].barrier = 1; 789 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 790 output[i + j].array_base = i - pos0; 791 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 792 switch (ctx.type) { 793 case TGSI_PROCESSOR_VERTEX: 794 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 795 output[i + j].array_base = 60; 796 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 797 /* position doesn't count in array_base */ 798 pos0++; 799 } 800 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 801 output[i + j].array_base = 61; 802 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 803 /* position doesn't count in array_base */ 804 pos0++; 805 } 806 break; 807 case TGSI_PROCESSOR_FRAGMENT: 808 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 809 output[i + j].array_base = shader->output[i].sid; 810 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 811 if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { 812 for (j = 1; j < shader->nr_cbufs; j++) { 813 memset(&output[i + j], 0, sizeof(struct r600_bc_output)); 814 output[i + j].gpr = shader->output[i].gpr; 815 output[i + j].elem_size = 3; 816 output[i + j].swizzle_x = 0; 817 output[i + j].swizzle_y = 1; 818 output[i + j].swizzle_z = 2; 819 output[i + j].swizzle_w = 3; 820 output[i + j].burst_count = 1; 821 output[i + j].barrier = 1; 822 output[i + j].array_base = shader->output[i].sid + j; 823 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 824 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 825 } 826 j--; 827 } 828 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 829 output[i + j].array_base = 61; 830 output[i + j].swizzle_x = 2; 831 output[i + j].swizzle_y = 7; 832 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 833 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 834 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 835 output[i + j].array_base = 61; 836 output[i + j].swizzle_x = 7; 837 output[i + j].swizzle_y = 1; 838 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 839 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 840 } else { 841 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 842 r = -EINVAL; 843 goto out_err; 844 } 845 break; 846 default: 847 R600_ERR("unsupported processor type %d\n", ctx.type); 848 r = -EINVAL; 849 goto out_err; 850 } 851 } 852 noutput += j; 853 /* add fake param output for vertex shader if no param is exported */ 854 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 855 for (i = 0, pos0 = 0; i < noutput; i++) { 856 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 857 pos0 = 1; 858 break; 859 } 860 } 861 if (!pos0) { 862 memset(&output[i], 0, sizeof(struct r600_bc_output)); 863 output[i].gpr = 0; 864 output[i].elem_size = 3; 865 output[i].swizzle_x = 0; 866 output[i].swizzle_y = 1; 867 output[i].swizzle_z = 2; 868 output[i].swizzle_w = 3; 869 output[i].burst_count = 1; 870 output[i].barrier = 1; 871 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 872 output[i].array_base = 0; 873 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 874 noutput++; 875 } 876 } 877 /* add fake pixel export */ 878 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 879 memset(&output[0], 0, sizeof(struct r600_bc_output)); 880 output[0].gpr = 0; 881 output[0].elem_size = 3; 882 output[0].swizzle_x = 7; 883 output[0].swizzle_y = 7; 884 output[0].swizzle_z = 7; 885 output[0].swizzle_w = 7; 886 output[0].burst_count = 1; 887 output[0].barrier = 1; 888 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 889 output[0].array_base = 0; 890 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 891 noutput++; 892 } 893 /* set export done on last export of each type */ 894 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 895 if (ctx.bc->chiprev < CHIPREV_CAYMAN) { 896 if (i == (noutput - 1)) { 897 output[i].end_of_program = 1; 898 } 899 } 900 if (!(output_done & (1 << output[i].type))) { 901 output_done |= (1 << output[i].type); 902 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 903 } 904 } 905 /* add output to bytecode */ 906 for (i = 0; i < noutput; i++) { 907 r = r600_bc_add_output(ctx.bc, &output[i]); 908 if (r) 909 goto out_err; 910 } 911 /* add program end */ 912 if (ctx.bc->chiprev == CHIPREV_CAYMAN) 913 cm_bc_add_cf_end(ctx.bc); 914 915 free(ctx.literals); 916 tgsi_parse_free(&ctx.parse); 917 return 0; 918out_err: 919 free(ctx.literals); 920 tgsi_parse_free(&ctx.parse); 921 return r; 922} 923 924static int tgsi_unsupported(struct r600_shader_ctx *ctx) 925{ 926 R600_ERR("%s tgsi opcode unsupported\n", 927 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 928 return -EINVAL; 929} 930 931static int tgsi_end(struct r600_shader_ctx *ctx) 932{ 933 return 0; 934} 935 936static void r600_bc_src(struct r600_bc_alu_src *bc_src, 937 const struct r600_shader_src *shader_src, 938 unsigned chan) 939{ 940 bc_src->sel = shader_src->sel; 941 bc_src->chan = shader_src->swizzle[chan]; 942 bc_src->neg = shader_src->neg; 943 bc_src->abs = shader_src->abs; 944 bc_src->rel = shader_src->rel; 945 bc_src->value = shader_src->value[bc_src->chan]; 946} 947 948static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) 949{ 950 bc_src->abs = 1; 951 bc_src->neg = 0; 952} 953 954static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) 955{ 956 bc_src->neg = !bc_src->neg; 957} 958 959static void tgsi_dst(struct r600_shader_ctx *ctx, 960 const struct tgsi_full_dst_register *tgsi_dst, 961 unsigned swizzle, 962 struct r600_bc_alu_dst *r600_dst) 963{ 964 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 965 966 r600_dst->sel = tgsi_dst->Register.Index; 967 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 968 r600_dst->chan = swizzle; 969 r600_dst->write = 1; 970 if (tgsi_dst->Register.Indirect) 971 r600_dst->rel = V_SQ_REL_RELATIVE; 972 if (inst->Instruction.Saturate) { 973 r600_dst->clamp = 1; 974 } 975} 976 977static int tgsi_last_instruction(unsigned writemask) 978{ 979 int i, lasti = 0; 980 981 for (i = 0; i < 4; i++) { 982 if (writemask & (1 << i)) { 983 lasti = i; 984 } 985 } 986 return lasti; 987} 988 989static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 990{ 991 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 992 struct r600_bc_alu alu; 993 int i, j, r; 994 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 995 996 for (i = 0; i < lasti + 1; i++) { 997 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 998 continue; 999 1000 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1001 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1002 1003 alu.inst = ctx->inst_info->r600_opcode; 1004 if (!swap) { 1005 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1006 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1007 } 1008 } else { 1009 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1010 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1011 } 1012 /* handle some special cases */ 1013 switch (ctx->inst_info->tgsi_opcode) { 1014 case TGSI_OPCODE_SUB: 1015 r600_bc_src_toggle_neg(&alu.src[1]); 1016 break; 1017 case TGSI_OPCODE_ABS: 1018 r600_bc_src_set_abs(&alu.src[0]); 1019 break; 1020 default: 1021 break; 1022 } 1023 if (i == lasti) { 1024 alu.last = 1; 1025 } 1026 r = r600_bc_add_alu(ctx->bc, &alu); 1027 if (r) 1028 return r; 1029 } 1030 return 0; 1031} 1032 1033static int tgsi_op2(struct r600_shader_ctx *ctx) 1034{ 1035 return tgsi_op2_s(ctx, 0); 1036} 1037 1038static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1039{ 1040 return tgsi_op2_s(ctx, 1); 1041} 1042 1043static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1044{ 1045 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1046 int i, j, r; 1047 struct r600_bc_alu alu; 1048 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1049 1050 for (i = 0 ; i < last_slot; i++) { 1051 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1052 alu.inst = ctx->inst_info->r600_opcode; 1053 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1054 r600_bc_src(&alu.src[j], &ctx->src[j], 0); 1055 } 1056 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1057 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1058 1059 if (i == last_slot - 1) 1060 alu.last = 1; 1061 r = r600_bc_add_alu(ctx->bc, &alu); 1062 if (r) 1063 return r; 1064 } 1065 return 0; 1066} 1067 1068/* 1069 * r600 - trunc to -PI..PI range 1070 * r700 - normalize by dividing by 2PI 1071 * see fdo bug 27901 1072 */ 1073static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1074{ 1075 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1076 static float double_pi = 3.1415926535 * 2; 1077 static float neg_pi = -3.1415926535; 1078 1079 int r; 1080 struct r600_bc_alu alu; 1081 1082 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1083 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1084 alu.is_op3 = 1; 1085 1086 alu.dst.chan = 0; 1087 alu.dst.sel = ctx->temp_reg; 1088 alu.dst.write = 1; 1089 1090 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1091 1092 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1093 alu.src[1].chan = 0; 1094 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1095 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1096 alu.src[2].chan = 0; 1097 alu.last = 1; 1098 r = r600_bc_add_alu(ctx->bc, &alu); 1099 if (r) 1100 return r; 1101 1102 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1103 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1104 1105 alu.dst.chan = 0; 1106 alu.dst.sel = ctx->temp_reg; 1107 alu.dst.write = 1; 1108 1109 alu.src[0].sel = ctx->temp_reg; 1110 alu.src[0].chan = 0; 1111 alu.last = 1; 1112 r = r600_bc_add_alu(ctx->bc, &alu); 1113 if (r) 1114 return r; 1115 1116 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1117 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1118 alu.is_op3 = 1; 1119 1120 alu.dst.chan = 0; 1121 alu.dst.sel = ctx->temp_reg; 1122 alu.dst.write = 1; 1123 1124 alu.src[0].sel = ctx->temp_reg; 1125 alu.src[0].chan = 0; 1126 1127 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1128 alu.src[1].chan = 0; 1129 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1130 alu.src[2].chan = 0; 1131 1132 if (ctx->bc->chiprev == CHIPREV_R600) { 1133 alu.src[1].value = *(uint32_t *)&double_pi; 1134 alu.src[2].value = *(uint32_t *)&neg_pi; 1135 } else { 1136 alu.src[1].sel = V_SQ_ALU_SRC_1; 1137 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1138 alu.src[2].neg = 1; 1139 } 1140 1141 alu.last = 1; 1142 r = r600_bc_add_alu(ctx->bc, &alu); 1143 if (r) 1144 return r; 1145 return 0; 1146} 1147 1148static int cayman_trig(struct r600_shader_ctx *ctx) 1149{ 1150 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1151 struct r600_bc_alu alu; 1152 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1153 int i, r; 1154 1155 r = tgsi_setup_trig(ctx); 1156 if (r) 1157 return r; 1158 1159 1160 for (i = 0; i < last_slot; i++) { 1161 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1162 alu.inst = ctx->inst_info->r600_opcode; 1163 alu.dst.chan = i; 1164 1165 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1166 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1167 1168 alu.src[0].sel = ctx->temp_reg; 1169 alu.src[0].chan = 0; 1170 if (i == last_slot - 1) 1171 alu.last = 1; 1172 r = r600_bc_add_alu(ctx->bc, &alu); 1173 if (r) 1174 return r; 1175 } 1176 return 0; 1177} 1178 1179static int tgsi_trig(struct r600_shader_ctx *ctx) 1180{ 1181 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1182 struct r600_bc_alu alu; 1183 int i, r; 1184 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1185 1186 r = tgsi_setup_trig(ctx); 1187 if (r) 1188 return r; 1189 1190 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1191 alu.inst = ctx->inst_info->r600_opcode; 1192 alu.dst.chan = 0; 1193 alu.dst.sel = ctx->temp_reg; 1194 alu.dst.write = 1; 1195 1196 alu.src[0].sel = ctx->temp_reg; 1197 alu.src[0].chan = 0; 1198 alu.last = 1; 1199 r = r600_bc_add_alu(ctx->bc, &alu); 1200 if (r) 1201 return r; 1202 1203 /* replicate result */ 1204 for (i = 0; i < lasti + 1; i++) { 1205 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1206 continue; 1207 1208 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1210 1211 alu.src[0].sel = ctx->temp_reg; 1212 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1213 if (i == lasti) 1214 alu.last = 1; 1215 r = r600_bc_add_alu(ctx->bc, &alu); 1216 if (r) 1217 return r; 1218 } 1219 return 0; 1220} 1221 1222static int tgsi_scs(struct r600_shader_ctx *ctx) 1223{ 1224 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1225 struct r600_bc_alu alu; 1226 int i, r; 1227 1228 /* We'll only need the trig stuff if we are going to write to the 1229 * X or Y components of the destination vector. 1230 */ 1231 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1232 r = tgsi_setup_trig(ctx); 1233 if (r) 1234 return r; 1235 } 1236 1237 /* dst.x = COS */ 1238 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1239 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1240 for (i = 0 ; i < 3; i++) { 1241 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1243 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1244 1245 if (i == 0) 1246 alu.dst.write = 1; 1247 else 1248 alu.dst.write = 0; 1249 alu.src[0].sel = ctx->temp_reg; 1250 alu.src[0].chan = 0; 1251 if (i == 2) 1252 alu.last = 1; 1253 r = r600_bc_add_alu(ctx->bc, &alu); 1254 if (r) 1255 return r; 1256 } 1257 } else { 1258 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1260 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1261 1262 alu.src[0].sel = ctx->temp_reg; 1263 alu.src[0].chan = 0; 1264 alu.last = 1; 1265 r = r600_bc_add_alu(ctx->bc, &alu); 1266 if (r) 1267 return r; 1268 } 1269 } 1270 1271 /* dst.y = SIN */ 1272 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1273 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1274 for (i = 0 ; i < 3; i++) { 1275 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1277 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1278 if (i == 1) 1279 alu.dst.write = 1; 1280 else 1281 alu.dst.write = 0; 1282 alu.src[0].sel = ctx->temp_reg; 1283 alu.src[0].chan = 0; 1284 if (i == 2) 1285 alu.last = 1; 1286 r = r600_bc_add_alu(ctx->bc, &alu); 1287 if (r) 1288 return r; 1289 } 1290 } else { 1291 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1292 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1293 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1294 1295 alu.src[0].sel = ctx->temp_reg; 1296 alu.src[0].chan = 0; 1297 alu.last = 1; 1298 r = r600_bc_add_alu(ctx->bc, &alu); 1299 if (r) 1300 return r; 1301 } 1302 } 1303 1304 /* dst.z = 0.0; */ 1305 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1306 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1307 1308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1309 1310 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1311 1312 alu.src[0].sel = V_SQ_ALU_SRC_0; 1313 alu.src[0].chan = 0; 1314 1315 alu.last = 1; 1316 1317 r = r600_bc_add_alu(ctx->bc, &alu); 1318 if (r) 1319 return r; 1320 } 1321 1322 /* dst.w = 1.0; */ 1323 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1324 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1325 1326 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1327 1328 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1329 1330 alu.src[0].sel = V_SQ_ALU_SRC_1; 1331 alu.src[0].chan = 0; 1332 1333 alu.last = 1; 1334 1335 r = r600_bc_add_alu(ctx->bc, &alu); 1336 if (r) 1337 return r; 1338 } 1339 1340 return 0; 1341} 1342 1343static int tgsi_kill(struct r600_shader_ctx *ctx) 1344{ 1345 struct r600_bc_alu alu; 1346 int i, r; 1347 1348 for (i = 0; i < 4; i++) { 1349 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1350 alu.inst = ctx->inst_info->r600_opcode; 1351 1352 alu.dst.chan = i; 1353 1354 alu.src[0].sel = V_SQ_ALU_SRC_0; 1355 1356 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1357 alu.src[1].sel = V_SQ_ALU_SRC_1; 1358 alu.src[1].neg = 1; 1359 } else { 1360 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1361 } 1362 if (i == 3) { 1363 alu.last = 1; 1364 } 1365 r = r600_bc_add_alu(ctx->bc, &alu); 1366 if (r) 1367 return r; 1368 } 1369 1370 /* kill must be last in ALU */ 1371 ctx->bc->force_add_cf = 1; 1372 ctx->shader->uses_kill = TRUE; 1373 return 0; 1374} 1375 1376static int tgsi_lit(struct r600_shader_ctx *ctx) 1377{ 1378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1379 struct r600_bc_alu alu; 1380 int r; 1381 1382 /* tmp.x = max(src.y, 0.0) */ 1383 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1384 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1385 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1386 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1387 alu.src[1].chan = 1; 1388 1389 alu.dst.sel = ctx->temp_reg; 1390 alu.dst.chan = 0; 1391 alu.dst.write = 1; 1392 1393 alu.last = 1; 1394 r = r600_bc_add_alu(ctx->bc, &alu); 1395 if (r) 1396 return r; 1397 1398 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1399 { 1400 int chan; 1401 int sel; 1402 int i; 1403 1404 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1405 for (i = 0; i < 3; i++) { 1406 /* tmp.z = log(tmp.x) */ 1407 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1409 alu.src[0].sel = ctx->temp_reg; 1410 alu.src[0].chan = 0; 1411 alu.dst.sel = ctx->temp_reg; 1412 alu.dst.chan = i; 1413 if (i == 2) { 1414 alu.dst.write = 1; 1415 alu.last = 1; 1416 } else 1417 alu.dst.write = 0; 1418 1419 r = r600_bc_add_alu(ctx->bc, &alu); 1420 if (r) 1421 return r; 1422 } 1423 } else { 1424 /* tmp.z = log(tmp.x) */ 1425 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1426 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1427 alu.src[0].sel = ctx->temp_reg; 1428 alu.src[0].chan = 0; 1429 alu.dst.sel = ctx->temp_reg; 1430 alu.dst.chan = 2; 1431 alu.dst.write = 1; 1432 alu.last = 1; 1433 r = r600_bc_add_alu(ctx->bc, &alu); 1434 if (r) 1435 return r; 1436 } 1437 1438 chan = alu.dst.chan; 1439 sel = alu.dst.sel; 1440 1441 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1442 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1444 alu.src[0].sel = sel; 1445 alu.src[0].chan = chan; 1446 r600_bc_src(&alu.src[1], &ctx->src[0], 3); 1447 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1448 alu.dst.sel = ctx->temp_reg; 1449 alu.dst.chan = 0; 1450 alu.dst.write = 1; 1451 alu.is_op3 = 1; 1452 alu.last = 1; 1453 r = r600_bc_add_alu(ctx->bc, &alu); 1454 if (r) 1455 return r; 1456 1457 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1458 for (i = 0; i < 3; i++) { 1459 /* dst.z = exp(tmp.x) */ 1460 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1461 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1462 alu.src[0].sel = ctx->temp_reg; 1463 alu.src[0].chan = 0; 1464 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1465 if (i == 2) { 1466 alu.dst.write = 1; 1467 alu.last = 1; 1468 } else 1469 alu.dst.write = 0; 1470 r = r600_bc_add_alu(ctx->bc, &alu); 1471 if (r) 1472 return r; 1473 } 1474 } else { 1475 /* dst.z = exp(tmp.x) */ 1476 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1477 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1478 alu.src[0].sel = ctx->temp_reg; 1479 alu.src[0].chan = 0; 1480 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1481 alu.last = 1; 1482 r = r600_bc_add_alu(ctx->bc, &alu); 1483 if (r) 1484 return r; 1485 } 1486 } 1487 1488 /* dst.x, <- 1.0 */ 1489 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1491 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1492 alu.src[0].chan = 0; 1493 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1494 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1495 r = r600_bc_add_alu(ctx->bc, &alu); 1496 if (r) 1497 return r; 1498 1499 /* dst.y = max(src.x, 0.0) */ 1500 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1502 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1503 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1504 alu.src[1].chan = 0; 1505 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1506 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1507 r = r600_bc_add_alu(ctx->bc, &alu); 1508 if (r) 1509 return r; 1510 1511 /* dst.w, <- 1.0 */ 1512 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1514 alu.src[0].sel = V_SQ_ALU_SRC_1; 1515 alu.src[0].chan = 0; 1516 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1517 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1518 alu.last = 1; 1519 r = r600_bc_add_alu(ctx->bc, &alu); 1520 if (r) 1521 return r; 1522 1523 return 0; 1524} 1525 1526static int tgsi_rsq(struct r600_shader_ctx *ctx) 1527{ 1528 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1529 struct r600_bc_alu alu; 1530 int i, r; 1531 1532 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1533 1534 /* FIXME: 1535 * For state trackers other than OpenGL, we'll want to use 1536 * _RECIPSQRT_IEEE instead. 1537 */ 1538 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1539 1540 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1541 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1542 r600_bc_src_set_abs(&alu.src[i]); 1543 } 1544 alu.dst.sel = ctx->temp_reg; 1545 alu.dst.write = 1; 1546 alu.last = 1; 1547 r = r600_bc_add_alu(ctx->bc, &alu); 1548 if (r) 1549 return r; 1550 /* replicate result */ 1551 return tgsi_helper_tempx_replicate(ctx); 1552} 1553 1554static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1555{ 1556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1557 struct r600_bc_alu alu; 1558 int i, r; 1559 1560 for (i = 0; i < 4; i++) { 1561 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1562 alu.src[0].sel = ctx->temp_reg; 1563 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1564 alu.dst.chan = i; 1565 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1566 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1567 if (i == 3) 1568 alu.last = 1; 1569 r = r600_bc_add_alu(ctx->bc, &alu); 1570 if (r) 1571 return r; 1572 } 1573 return 0; 1574} 1575 1576static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1577{ 1578 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1579 struct r600_bc_alu alu; 1580 int i, r; 1581 1582 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1583 alu.inst = ctx->inst_info->r600_opcode; 1584 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1585 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1586 } 1587 alu.dst.sel = ctx->temp_reg; 1588 alu.dst.write = 1; 1589 alu.last = 1; 1590 r = r600_bc_add_alu(ctx->bc, &alu); 1591 if (r) 1592 return r; 1593 /* replicate result */ 1594 return tgsi_helper_tempx_replicate(ctx); 1595} 1596 1597static int cayman_pow(struct r600_shader_ctx *ctx) 1598{ 1599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1600 int i, r; 1601 struct r600_bc_alu alu; 1602 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1603 1604 for (i = 0; i < 3; i++) { 1605 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1606 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1607 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1608 alu.dst.sel = ctx->temp_reg; 1609 alu.dst.chan = i; 1610 alu.dst.write = 1; 1611 if (i == 2) 1612 alu.last = 1; 1613 r = r600_bc_add_alu(ctx->bc, &alu); 1614 if (r) 1615 return r; 1616 } 1617 1618 /* b * LOG2(a) */ 1619 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1621 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1622 alu.src[1].sel = ctx->temp_reg; 1623 alu.dst.sel = ctx->temp_reg; 1624 alu.dst.write = 1; 1625 alu.last = 1; 1626 r = r600_bc_add_alu(ctx->bc, &alu); 1627 if (r) 1628 return r; 1629 1630 for (i = 0; i < last_slot; i++) { 1631 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1632 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1633 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1634 alu.src[0].sel = ctx->temp_reg; 1635 1636 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1637 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1638 if (i == last_slot - 1) 1639 alu.last = 1; 1640 r = r600_bc_add_alu(ctx->bc, &alu); 1641 if (r) 1642 return r; 1643 } 1644 return 0; 1645} 1646 1647static int tgsi_pow(struct r600_shader_ctx *ctx) 1648{ 1649 struct r600_bc_alu alu; 1650 int r; 1651 1652 /* LOG2(a) */ 1653 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1654 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1655 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1656 alu.dst.sel = ctx->temp_reg; 1657 alu.dst.write = 1; 1658 alu.last = 1; 1659 r = r600_bc_add_alu(ctx->bc, &alu); 1660 if (r) 1661 return r; 1662 /* b * LOG2(a) */ 1663 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1664 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1665 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1666 alu.src[1].sel = ctx->temp_reg; 1667 alu.dst.sel = ctx->temp_reg; 1668 alu.dst.write = 1; 1669 alu.last = 1; 1670 r = r600_bc_add_alu(ctx->bc, &alu); 1671 if (r) 1672 return r; 1673 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1674 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1675 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1676 alu.src[0].sel = ctx->temp_reg; 1677 alu.dst.sel = ctx->temp_reg; 1678 alu.dst.write = 1; 1679 alu.last = 1; 1680 r = r600_bc_add_alu(ctx->bc, &alu); 1681 if (r) 1682 return r; 1683 return tgsi_helper_tempx_replicate(ctx); 1684} 1685 1686static int tgsi_ssg(struct r600_shader_ctx *ctx) 1687{ 1688 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1689 struct r600_bc_alu alu; 1690 int i, r; 1691 1692 /* tmp = (src > 0 ? 1 : src) */ 1693 for (i = 0; i < 4; i++) { 1694 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1696 alu.is_op3 = 1; 1697 1698 alu.dst.sel = ctx->temp_reg; 1699 alu.dst.chan = i; 1700 1701 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1702 alu.src[1].sel = V_SQ_ALU_SRC_1; 1703 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1704 1705 if (i == 3) 1706 alu.last = 1; 1707 r = r600_bc_add_alu(ctx->bc, &alu); 1708 if (r) 1709 return r; 1710 } 1711 1712 /* dst = (-tmp > 0 ? -1 : tmp) */ 1713 for (i = 0; i < 4; i++) { 1714 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1715 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1716 alu.is_op3 = 1; 1717 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1718 1719 alu.src[0].sel = ctx->temp_reg; 1720 alu.src[0].chan = i; 1721 alu.src[0].neg = 1; 1722 1723 alu.src[1].sel = V_SQ_ALU_SRC_1; 1724 alu.src[1].neg = 1; 1725 1726 alu.src[2].sel = ctx->temp_reg; 1727 alu.src[2].chan = i; 1728 1729 if (i == 3) 1730 alu.last = 1; 1731 r = r600_bc_add_alu(ctx->bc, &alu); 1732 if (r) 1733 return r; 1734 } 1735 return 0; 1736} 1737 1738static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1739{ 1740 struct r600_bc_alu alu; 1741 int i, r; 1742 1743 for (i = 0; i < 4; i++) { 1744 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1745 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1746 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1747 alu.dst.chan = i; 1748 } else { 1749 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1750 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1751 alu.src[0].sel = ctx->temp_reg; 1752 alu.src[0].chan = i; 1753 } 1754 if (i == 3) { 1755 alu.last = 1; 1756 } 1757 r = r600_bc_add_alu(ctx->bc, &alu); 1758 if (r) 1759 return r; 1760 } 1761 return 0; 1762} 1763 1764static int tgsi_op3(struct r600_shader_ctx *ctx) 1765{ 1766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1767 struct r600_bc_alu alu; 1768 int i, j, r; 1769 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1770 1771 for (i = 0; i < lasti + 1; i++) { 1772 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1773 continue; 1774 1775 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1776 alu.inst = ctx->inst_info->r600_opcode; 1777 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1778 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1779 } 1780 1781 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1782 alu.dst.chan = i; 1783 alu.dst.write = 1; 1784 alu.is_op3 = 1; 1785 if (i == lasti) { 1786 alu.last = 1; 1787 } 1788 r = r600_bc_add_alu(ctx->bc, &alu); 1789 if (r) 1790 return r; 1791 } 1792 return 0; 1793} 1794 1795static int tgsi_dp(struct r600_shader_ctx *ctx) 1796{ 1797 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1798 struct r600_bc_alu alu; 1799 int i, j, r; 1800 1801 for (i = 0; i < 4; i++) { 1802 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1803 alu.inst = ctx->inst_info->r600_opcode; 1804 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1805 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1806 } 1807 1808 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1809 alu.dst.chan = i; 1810 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1811 /* handle some special cases */ 1812 switch (ctx->inst_info->tgsi_opcode) { 1813 case TGSI_OPCODE_DP2: 1814 if (i > 1) { 1815 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1816 alu.src[0].chan = alu.src[1].chan = 0; 1817 } 1818 break; 1819 case TGSI_OPCODE_DP3: 1820 if (i > 2) { 1821 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1822 alu.src[0].chan = alu.src[1].chan = 0; 1823 } 1824 break; 1825 case TGSI_OPCODE_DPH: 1826 if (i == 3) { 1827 alu.src[0].sel = V_SQ_ALU_SRC_1; 1828 alu.src[0].chan = 0; 1829 alu.src[0].neg = 0; 1830 } 1831 break; 1832 default: 1833 break; 1834 } 1835 if (i == 3) { 1836 alu.last = 1; 1837 } 1838 r = r600_bc_add_alu(ctx->bc, &alu); 1839 if (r) 1840 return r; 1841 } 1842 return 0; 1843} 1844 1845static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 1846 unsigned index) 1847{ 1848 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1849 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 1850 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 1851 ctx->src[index].neg || ctx->src[index].abs; 1852} 1853 1854static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 1855 unsigned index) 1856{ 1857 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1858 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 1859} 1860 1861static int tgsi_tex(struct r600_shader_ctx *ctx) 1862{ 1863 static float one_point_five = 1.5f; 1864 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1865 struct r600_bc_tex tex; 1866 struct r600_bc_alu alu; 1867 unsigned src_gpr; 1868 int r, i, j; 1869 int opcode; 1870 /* Texture fetch instructions can only use gprs as source. 1871 * Also they cannot negate the source or take the absolute value */ 1872 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 1873 boolean src_loaded = FALSE; 1874 unsigned sampler_src_reg = 1; 1875 1876 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 1877 1878 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 1879 /* TGSI moves the sampler to src reg 3 for TXD */ 1880 sampler_src_reg = 3; 1881 1882 for (i = 1; i < 3; i++) { 1883 /* set gradients h/v */ 1884 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1885 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 1886 SQ_TEX_INST_SET_GRADIENTS_V; 1887 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 1888 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1889 1890 if (tgsi_tex_src_requires_loading(ctx, i)) { 1891 tex.src_gpr = r600_get_temp(ctx); 1892 tex.src_sel_x = 0; 1893 tex.src_sel_y = 1; 1894 tex.src_sel_z = 2; 1895 tex.src_sel_w = 3; 1896 1897 for (j = 0; j < 4; j++) { 1898 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1899 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1900 r600_bc_src(&alu.src[0], &ctx->src[i], j); 1901 alu.dst.sel = tex.src_gpr; 1902 alu.dst.chan = j; 1903 if (j == 3) 1904 alu.last = 1; 1905 alu.dst.write = 1; 1906 r = r600_bc_add_alu(ctx->bc, &alu); 1907 if (r) 1908 return r; 1909 } 1910 1911 } else { 1912 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 1913 tex.src_sel_x = ctx->src[i].swizzle[0]; 1914 tex.src_sel_y = ctx->src[i].swizzle[1]; 1915 tex.src_sel_z = ctx->src[i].swizzle[2]; 1916 tex.src_sel_w = ctx->src[i].swizzle[3]; 1917 tex.src_rel = ctx->src[i].rel; 1918 } 1919 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 1920 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 1921 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1922 tex.coord_type_x = 1; 1923 tex.coord_type_y = 1; 1924 tex.coord_type_z = 1; 1925 tex.coord_type_w = 1; 1926 } 1927 r = r600_bc_add_tex(ctx->bc, &tex); 1928 if (r) 1929 return r; 1930 } 1931 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1932 int out_chan; 1933 /* Add perspective divide */ 1934 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1935 out_chan = 2; 1936 for (i = 0; i < 3; i++) { 1937 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1939 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1940 1941 alu.dst.sel = ctx->temp_reg; 1942 alu.dst.chan = i; 1943 if (i == 2) 1944 alu.last = 1; 1945 if (out_chan == i) 1946 alu.dst.write = 1; 1947 r = r600_bc_add_alu(ctx->bc, &alu); 1948 if (r) 1949 return r; 1950 } 1951 1952 } else { 1953 out_chan = 3; 1954 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1955 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1956 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1957 1958 alu.dst.sel = ctx->temp_reg; 1959 alu.dst.chan = out_chan; 1960 alu.last = 1; 1961 alu.dst.write = 1; 1962 r = r600_bc_add_alu(ctx->bc, &alu); 1963 if (r) 1964 return r; 1965 } 1966 1967 for (i = 0; i < 3; i++) { 1968 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1969 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1970 alu.src[0].sel = ctx->temp_reg; 1971 alu.src[0].chan = out_chan; 1972 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1973 alu.dst.sel = ctx->temp_reg; 1974 alu.dst.chan = i; 1975 alu.dst.write = 1; 1976 r = r600_bc_add_alu(ctx->bc, &alu); 1977 if (r) 1978 return r; 1979 } 1980 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1981 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1982 alu.src[0].sel = V_SQ_ALU_SRC_1; 1983 alu.src[0].chan = 0; 1984 alu.dst.sel = ctx->temp_reg; 1985 alu.dst.chan = 3; 1986 alu.last = 1; 1987 alu.dst.write = 1; 1988 r = r600_bc_add_alu(ctx->bc, &alu); 1989 if (r) 1990 return r; 1991 src_loaded = TRUE; 1992 src_gpr = ctx->temp_reg; 1993 } 1994 1995 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1996 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1997 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1998 1999 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 2000 for (i = 0; i < 4; i++) { 2001 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2002 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 2003 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2004 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 2005 alu.dst.sel = ctx->temp_reg; 2006 alu.dst.chan = i; 2007 if (i == 3) 2008 alu.last = 1; 2009 alu.dst.write = 1; 2010 r = r600_bc_add_alu(ctx->bc, &alu); 2011 if (r) 2012 return r; 2013 } 2014 2015 /* tmp1.z = RCP_e(|tmp1.z|) */ 2016 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2017 for (i = 0; i < 3; i++) { 2018 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2019 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2020 alu.src[0].sel = ctx->temp_reg; 2021 alu.src[0].chan = 2; 2022 alu.src[0].abs = 1; 2023 alu.dst.sel = ctx->temp_reg; 2024 alu.dst.chan = i; 2025 if (i == 2) 2026 alu.dst.write = 1; 2027 if (i == 2) 2028 alu.last = 1; 2029 r = r600_bc_add_alu(ctx->bc, &alu); 2030 if (r) 2031 return r; 2032 } 2033 } else { 2034 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2035 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2036 alu.src[0].sel = ctx->temp_reg; 2037 alu.src[0].chan = 2; 2038 alu.src[0].abs = 1; 2039 alu.dst.sel = ctx->temp_reg; 2040 alu.dst.chan = 2; 2041 alu.dst.write = 1; 2042 alu.last = 1; 2043 r = r600_bc_add_alu(ctx->bc, &alu); 2044 if (r) 2045 return r; 2046 } 2047 2048 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 2049 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 2050 * muladd has no writemask, have to use another temp 2051 */ 2052 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2053 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2054 alu.is_op3 = 1; 2055 2056 alu.src[0].sel = ctx->temp_reg; 2057 alu.src[0].chan = 0; 2058 alu.src[1].sel = ctx->temp_reg; 2059 alu.src[1].chan = 2; 2060 2061 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2062 alu.src[2].chan = 0; 2063 alu.src[2].value = *(uint32_t *)&one_point_five; 2064 2065 alu.dst.sel = ctx->temp_reg; 2066 alu.dst.chan = 0; 2067 alu.dst.write = 1; 2068 2069 r = r600_bc_add_alu(ctx->bc, &alu); 2070 if (r) 2071 return r; 2072 2073 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2074 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2075 alu.is_op3 = 1; 2076 2077 alu.src[0].sel = ctx->temp_reg; 2078 alu.src[0].chan = 1; 2079 alu.src[1].sel = ctx->temp_reg; 2080 alu.src[1].chan = 2; 2081 2082 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2083 alu.src[2].chan = 0; 2084 alu.src[2].value = *(uint32_t *)&one_point_five; 2085 2086 alu.dst.sel = ctx->temp_reg; 2087 alu.dst.chan = 1; 2088 alu.dst.write = 1; 2089 2090 alu.last = 1; 2091 r = r600_bc_add_alu(ctx->bc, &alu); 2092 if (r) 2093 return r; 2094 2095 src_loaded = TRUE; 2096 src_gpr = ctx->temp_reg; 2097 } 2098 2099 if (src_requires_loading && !src_loaded) { 2100 for (i = 0; i < 4; i++) { 2101 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2103 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2104 alu.dst.sel = ctx->temp_reg; 2105 alu.dst.chan = i; 2106 if (i == 3) 2107 alu.last = 1; 2108 alu.dst.write = 1; 2109 r = r600_bc_add_alu(ctx->bc, &alu); 2110 if (r) 2111 return r; 2112 } 2113 src_loaded = TRUE; 2114 src_gpr = ctx->temp_reg; 2115 } 2116 2117 opcode = ctx->inst_info->r600_opcode; 2118 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) { 2119 switch (opcode) { 2120 case SQ_TEX_INST_SAMPLE: 2121 opcode = SQ_TEX_INST_SAMPLE_C; 2122 break; 2123 case SQ_TEX_INST_SAMPLE_L: 2124 opcode = SQ_TEX_INST_SAMPLE_C_L; 2125 break; 2126 case SQ_TEX_INST_SAMPLE_G: 2127 opcode = SQ_TEX_INST_SAMPLE_C_G; 2128 break; 2129 } 2130 } 2131 2132 memset(&tex, 0, sizeof(struct r600_bc_tex)); 2133 tex.inst = opcode; 2134 2135 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2136 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2137 tex.src_gpr = src_gpr; 2138 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2139 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2140 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2141 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2142 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2143 if (src_loaded) { 2144 tex.src_sel_x = 0; 2145 tex.src_sel_y = 1; 2146 tex.src_sel_z = 2; 2147 tex.src_sel_w = 3; 2148 } else { 2149 tex.src_sel_x = ctx->src[0].swizzle[0]; 2150 tex.src_sel_y = ctx->src[0].swizzle[1]; 2151 tex.src_sel_z = ctx->src[0].swizzle[2]; 2152 tex.src_sel_w = ctx->src[0].swizzle[3]; 2153 tex.src_rel = ctx->src[0].rel; 2154 } 2155 2156 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2157 tex.src_sel_x = 1; 2158 tex.src_sel_y = 0; 2159 tex.src_sel_z = 3; 2160 tex.src_sel_w = 1; 2161 } 2162 2163 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2164 tex.coord_type_x = 1; 2165 tex.coord_type_y = 1; 2166 tex.coord_type_z = 1; 2167 tex.coord_type_w = 1; 2168 } 2169 2170 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 2171 tex.coord_type_z = 0; 2172 tex.src_sel_z = tex.src_sel_y; 2173 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 2174 tex.coord_type_z = 0; 2175 2176 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2177 tex.src_sel_w = tex.src_sel_z; 2178 2179 r = r600_bc_add_tex(ctx->bc, &tex); 2180 if (r) 2181 return r; 2182 2183 /* add shadow ambient support - gallium doesn't do it yet */ 2184 return 0; 2185} 2186 2187static int tgsi_lrp(struct r600_shader_ctx *ctx) 2188{ 2189 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2190 struct r600_bc_alu alu; 2191 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2192 unsigned i; 2193 int r; 2194 2195 /* optimize if it's just an equal balance */ 2196 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2197 for (i = 0; i < lasti + 1; i++) { 2198 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2199 continue; 2200 2201 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2202 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2203 r600_bc_src(&alu.src[0], &ctx->src[1], i); 2204 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2205 alu.omod = 3; 2206 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2207 alu.dst.chan = i; 2208 if (i == lasti) { 2209 alu.last = 1; 2210 } 2211 r = r600_bc_add_alu(ctx->bc, &alu); 2212 if (r) 2213 return r; 2214 } 2215 return 0; 2216 } 2217 2218 /* 1 - src0 */ 2219 for (i = 0; i < lasti + 1; i++) { 2220 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2221 continue; 2222 2223 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2224 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2225 alu.src[0].sel = V_SQ_ALU_SRC_1; 2226 alu.src[0].chan = 0; 2227 r600_bc_src(&alu.src[1], &ctx->src[0], i); 2228 r600_bc_src_toggle_neg(&alu.src[1]); 2229 alu.dst.sel = ctx->temp_reg; 2230 alu.dst.chan = i; 2231 if (i == lasti) { 2232 alu.last = 1; 2233 } 2234 alu.dst.write = 1; 2235 r = r600_bc_add_alu(ctx->bc, &alu); 2236 if (r) 2237 return r; 2238 } 2239 2240 /* (1 - src0) * src2 */ 2241 for (i = 0; i < lasti + 1; i++) { 2242 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2243 continue; 2244 2245 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2246 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2247 alu.src[0].sel = ctx->temp_reg; 2248 alu.src[0].chan = i; 2249 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2250 alu.dst.sel = ctx->temp_reg; 2251 alu.dst.chan = i; 2252 if (i == lasti) { 2253 alu.last = 1; 2254 } 2255 alu.dst.write = 1; 2256 r = r600_bc_add_alu(ctx->bc, &alu); 2257 if (r) 2258 return r; 2259 } 2260 2261 /* src0 * src1 + (1 - src0) * src2 */ 2262 for (i = 0; i < lasti + 1; i++) { 2263 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2264 continue; 2265 2266 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2267 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2268 alu.is_op3 = 1; 2269 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2270 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2271 alu.src[2].sel = ctx->temp_reg; 2272 alu.src[2].chan = i; 2273 2274 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2275 alu.dst.chan = i; 2276 if (i == lasti) { 2277 alu.last = 1; 2278 } 2279 r = r600_bc_add_alu(ctx->bc, &alu); 2280 if (r) 2281 return r; 2282 } 2283 return 0; 2284} 2285 2286static int tgsi_cmp(struct r600_shader_ctx *ctx) 2287{ 2288 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2289 struct r600_bc_alu alu; 2290 int i, r; 2291 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2292 2293 for (i = 0; i < lasti + 1; i++) { 2294 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2295 continue; 2296 2297 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2299 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2300 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2301 r600_bc_src(&alu.src[2], &ctx->src[1], i); 2302 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2303 alu.dst.chan = i; 2304 alu.dst.write = 1; 2305 alu.is_op3 = 1; 2306 if (i == lasti) 2307 alu.last = 1; 2308 r = r600_bc_add_alu(ctx->bc, &alu); 2309 if (r) 2310 return r; 2311 } 2312 return 0; 2313} 2314 2315static int tgsi_xpd(struct r600_shader_ctx *ctx) 2316{ 2317 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2318 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2319 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2320 struct r600_bc_alu alu; 2321 uint32_t use_temp = 0; 2322 int i, r; 2323 2324 if (inst->Dst[0].Register.WriteMask != 0xf) 2325 use_temp = 1; 2326 2327 for (i = 0; i < 4; i++) { 2328 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2330 if (i < 3) { 2331 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2332 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2333 } else { 2334 alu.src[0].sel = V_SQ_ALU_SRC_0; 2335 alu.src[0].chan = i; 2336 alu.src[1].sel = V_SQ_ALU_SRC_0; 2337 alu.src[1].chan = i; 2338 } 2339 2340 alu.dst.sel = ctx->temp_reg; 2341 alu.dst.chan = i; 2342 alu.dst.write = 1; 2343 2344 if (i == 3) 2345 alu.last = 1; 2346 r = r600_bc_add_alu(ctx->bc, &alu); 2347 if (r) 2348 return r; 2349 } 2350 2351 for (i = 0; i < 4; i++) { 2352 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2354 2355 if (i < 3) { 2356 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2357 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2358 } else { 2359 alu.src[0].sel = V_SQ_ALU_SRC_0; 2360 alu.src[0].chan = i; 2361 alu.src[1].sel = V_SQ_ALU_SRC_0; 2362 alu.src[1].chan = i; 2363 } 2364 2365 alu.src[2].sel = ctx->temp_reg; 2366 alu.src[2].neg = 1; 2367 alu.src[2].chan = i; 2368 2369 if (use_temp) 2370 alu.dst.sel = ctx->temp_reg; 2371 else 2372 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2373 alu.dst.chan = i; 2374 alu.dst.write = 1; 2375 alu.is_op3 = 1; 2376 if (i == 3) 2377 alu.last = 1; 2378 r = r600_bc_add_alu(ctx->bc, &alu); 2379 if (r) 2380 return r; 2381 } 2382 if (use_temp) 2383 return tgsi_helper_copy(ctx, inst); 2384 return 0; 2385} 2386 2387static int tgsi_exp(struct r600_shader_ctx *ctx) 2388{ 2389 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2390 struct r600_bc_alu alu; 2391 int r; 2392 int i; 2393 2394 /* result.x = 2^floor(src); */ 2395 if (inst->Dst[0].Register.WriteMask & 1) { 2396 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2397 2398 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2399 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2400 2401 alu.dst.sel = ctx->temp_reg; 2402 alu.dst.chan = 0; 2403 alu.dst.write = 1; 2404 alu.last = 1; 2405 r = r600_bc_add_alu(ctx->bc, &alu); 2406 if (r) 2407 return r; 2408 2409 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2410 for (i = 0; i < 3; i++) { 2411 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2412 alu.src[0].sel = ctx->temp_reg; 2413 alu.src[0].chan = 0; 2414 2415 alu.dst.sel = ctx->temp_reg; 2416 alu.dst.chan = i; 2417 if (i == 0) 2418 alu.dst.write = 1; 2419 if (i == 2) 2420 alu.last = 1; 2421 r = r600_bc_add_alu(ctx->bc, &alu); 2422 if (r) 2423 return r; 2424 } 2425 } else { 2426 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2427 alu.src[0].sel = ctx->temp_reg; 2428 alu.src[0].chan = 0; 2429 2430 alu.dst.sel = ctx->temp_reg; 2431 alu.dst.chan = 0; 2432 alu.dst.write = 1; 2433 alu.last = 1; 2434 r = r600_bc_add_alu(ctx->bc, &alu); 2435 if (r) 2436 return r; 2437 } 2438 } 2439 2440 /* result.y = tmp - floor(tmp); */ 2441 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2442 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2443 2444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2445 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2446 2447 alu.dst.sel = ctx->temp_reg; 2448#if 0 2449 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2450 if (r) 2451 return r; 2452#endif 2453 alu.dst.write = 1; 2454 alu.dst.chan = 1; 2455 2456 alu.last = 1; 2457 2458 r = r600_bc_add_alu(ctx->bc, &alu); 2459 if (r) 2460 return r; 2461 } 2462 2463 /* result.z = RoughApprox2ToX(tmp);*/ 2464 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2465 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2466 for (i = 0; i < 3; i++) { 2467 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2468 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2469 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2470 2471 alu.dst.sel = ctx->temp_reg; 2472 alu.dst.chan = i; 2473 if (i == 2) { 2474 alu.dst.write = 1; 2475 alu.last = 1; 2476 } 2477 2478 r = r600_bc_add_alu(ctx->bc, &alu); 2479 if (r) 2480 return r; 2481 } 2482 } else { 2483 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2484 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2485 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2486 2487 alu.dst.sel = ctx->temp_reg; 2488 alu.dst.write = 1; 2489 alu.dst.chan = 2; 2490 2491 alu.last = 1; 2492 2493 r = r600_bc_add_alu(ctx->bc, &alu); 2494 if (r) 2495 return r; 2496 } 2497 } 2498 2499 /* result.w = 1.0;*/ 2500 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2501 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2502 2503 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2504 alu.src[0].sel = V_SQ_ALU_SRC_1; 2505 alu.src[0].chan = 0; 2506 2507 alu.dst.sel = ctx->temp_reg; 2508 alu.dst.chan = 3; 2509 alu.dst.write = 1; 2510 alu.last = 1; 2511 r = r600_bc_add_alu(ctx->bc, &alu); 2512 if (r) 2513 return r; 2514 } 2515 return tgsi_helper_copy(ctx, inst); 2516} 2517 2518static int tgsi_log(struct r600_shader_ctx *ctx) 2519{ 2520 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2521 struct r600_bc_alu alu; 2522 int r; 2523 int i; 2524 2525 /* result.x = floor(log2(|src|)); */ 2526 if (inst->Dst[0].Register.WriteMask & 1) { 2527 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2528 for (i = 0; i < 3; i++) { 2529 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2530 2531 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2532 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2533 r600_bc_src_set_abs(&alu.src[0]); 2534 2535 alu.dst.sel = ctx->temp_reg; 2536 alu.dst.chan = i; 2537 if (i == 0) 2538 alu.dst.write = 1; 2539 if (i == 2) 2540 alu.last = 1; 2541 r = r600_bc_add_alu(ctx->bc, &alu); 2542 if (r) 2543 return r; 2544 } 2545 2546 } else { 2547 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2548 2549 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2550 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2551 r600_bc_src_set_abs(&alu.src[0]); 2552 2553 alu.dst.sel = ctx->temp_reg; 2554 alu.dst.chan = 0; 2555 alu.dst.write = 1; 2556 alu.last = 1; 2557 r = r600_bc_add_alu(ctx->bc, &alu); 2558 if (r) 2559 return r; 2560 } 2561 2562 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2563 alu.src[0].sel = ctx->temp_reg; 2564 alu.src[0].chan = 0; 2565 2566 alu.dst.sel = ctx->temp_reg; 2567 alu.dst.chan = 0; 2568 alu.dst.write = 1; 2569 alu.last = 1; 2570 2571 r = r600_bc_add_alu(ctx->bc, &alu); 2572 if (r) 2573 return r; 2574 } 2575 2576 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 2577 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2578 2579 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2580 for (i = 0; i < 3; i++) { 2581 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2582 2583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2584 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2585 r600_bc_src_set_abs(&alu.src[0]); 2586 2587 alu.dst.sel = ctx->temp_reg; 2588 alu.dst.chan = i; 2589 if (i == 1) 2590 alu.dst.write = 1; 2591 if (i == 2) 2592 alu.last = 1; 2593 2594 r = r600_bc_add_alu(ctx->bc, &alu); 2595 if (r) 2596 return r; 2597 } 2598 } else { 2599 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2600 2601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2602 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2603 r600_bc_src_set_abs(&alu.src[0]); 2604 2605 alu.dst.sel = ctx->temp_reg; 2606 alu.dst.chan = 1; 2607 alu.dst.write = 1; 2608 alu.last = 1; 2609 2610 r = r600_bc_add_alu(ctx->bc, &alu); 2611 if (r) 2612 return r; 2613 } 2614 2615 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2616 2617 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2618 alu.src[0].sel = ctx->temp_reg; 2619 alu.src[0].chan = 1; 2620 2621 alu.dst.sel = ctx->temp_reg; 2622 alu.dst.chan = 1; 2623 alu.dst.write = 1; 2624 alu.last = 1; 2625 2626 r = r600_bc_add_alu(ctx->bc, &alu); 2627 if (r) 2628 return r; 2629 2630 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2631 for (i = 0; i < 3; i++) { 2632 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2633 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2634 alu.src[0].sel = ctx->temp_reg; 2635 alu.src[0].chan = 1; 2636 2637 alu.dst.sel = ctx->temp_reg; 2638 alu.dst.chan = i; 2639 if (i == 1) 2640 alu.dst.write = 1; 2641 if (i == 2) 2642 alu.last = 1; 2643 2644 r = r600_bc_add_alu(ctx->bc, &alu); 2645 if (r) 2646 return r; 2647 } 2648 } else { 2649 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2650 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2651 alu.src[0].sel = ctx->temp_reg; 2652 alu.src[0].chan = 1; 2653 2654 alu.dst.sel = ctx->temp_reg; 2655 alu.dst.chan = 1; 2656 alu.dst.write = 1; 2657 alu.last = 1; 2658 2659 r = r600_bc_add_alu(ctx->bc, &alu); 2660 if (r) 2661 return r; 2662 } 2663 2664 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2665 for (i = 0; i < 3; i++) { 2666 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2667 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2668 alu.src[0].sel = ctx->temp_reg; 2669 alu.src[0].chan = 1; 2670 2671 alu.dst.sel = ctx->temp_reg; 2672 alu.dst.chan = i; 2673 if (i == 1) 2674 alu.dst.write = 1; 2675 if (i == 2) 2676 alu.last = 1; 2677 2678 r = r600_bc_add_alu(ctx->bc, &alu); 2679 if (r) 2680 return r; 2681 } 2682 } else { 2683 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2684 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2685 alu.src[0].sel = ctx->temp_reg; 2686 alu.src[0].chan = 1; 2687 2688 alu.dst.sel = ctx->temp_reg; 2689 alu.dst.chan = 1; 2690 alu.dst.write = 1; 2691 alu.last = 1; 2692 2693 r = r600_bc_add_alu(ctx->bc, &alu); 2694 if (r) 2695 return r; 2696 } 2697 2698 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2699 2700 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2701 2702 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2703 r600_bc_src_set_abs(&alu.src[0]); 2704 2705 alu.src[1].sel = ctx->temp_reg; 2706 alu.src[1].chan = 1; 2707 2708 alu.dst.sel = ctx->temp_reg; 2709 alu.dst.chan = 1; 2710 alu.dst.write = 1; 2711 alu.last = 1; 2712 2713 r = r600_bc_add_alu(ctx->bc, &alu); 2714 if (r) 2715 return r; 2716 } 2717 2718 /* result.z = log2(|src|);*/ 2719 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2720 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2721 for (i = 0; i < 3; i++) { 2722 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2723 2724 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2725 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2726 r600_bc_src_set_abs(&alu.src[0]); 2727 2728 alu.dst.sel = ctx->temp_reg; 2729 if (i == 2) 2730 alu.dst.write = 1; 2731 alu.dst.chan = i; 2732 if (i == 2) 2733 alu.last = 1; 2734 2735 r = r600_bc_add_alu(ctx->bc, &alu); 2736 if (r) 2737 return r; 2738 } 2739 } else { 2740 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2741 2742 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2743 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2744 r600_bc_src_set_abs(&alu.src[0]); 2745 2746 alu.dst.sel = ctx->temp_reg; 2747 alu.dst.write = 1; 2748 alu.dst.chan = 2; 2749 alu.last = 1; 2750 2751 r = r600_bc_add_alu(ctx->bc, &alu); 2752 if (r) 2753 return r; 2754 } 2755 } 2756 2757 /* result.w = 1.0; */ 2758 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2759 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2760 2761 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2762 alu.src[0].sel = V_SQ_ALU_SRC_1; 2763 alu.src[0].chan = 0; 2764 2765 alu.dst.sel = ctx->temp_reg; 2766 alu.dst.chan = 3; 2767 alu.dst.write = 1; 2768 alu.last = 1; 2769 2770 r = r600_bc_add_alu(ctx->bc, &alu); 2771 if (r) 2772 return r; 2773 } 2774 2775 return tgsi_helper_copy(ctx, inst); 2776} 2777 2778static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2779{ 2780 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2781 struct r600_bc_alu alu; 2782 int r; 2783 2784 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2785 2786 switch (inst->Instruction.Opcode) { 2787 case TGSI_OPCODE_ARL: 2788 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2789 break; 2790 case TGSI_OPCODE_ARR: 2791 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2792 break; 2793 default: 2794 assert(0); 2795 return -1; 2796 } 2797 2798 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2799 alu.last = 1; 2800 alu.dst.sel = ctx->ar_reg; 2801 alu.dst.write = 1; 2802 r = r600_bc_add_alu(ctx->bc, &alu); 2803 if (r) 2804 return r; 2805 2806 /* TODO: Note that the MOVA can be avoided if we never use AR for 2807 * indexing non-CB registers in the current ALU clause. Similarly, we 2808 * need to load AR from ar_reg again if we started a new clause 2809 * between ARL and AR usage. The easy way to do that is to remove 2810 * the MOVA here, and load it for the first AR access after ar_reg 2811 * has been modified in each clause. */ 2812 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2813 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2814 alu.src[0].sel = ctx->ar_reg; 2815 alu.src[0].chan = 0; 2816 alu.last = 1; 2817 r = r600_bc_add_alu(ctx->bc, &alu); 2818 if (r) 2819 return r; 2820 return 0; 2821} 2822static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2823{ 2824 /* TODO from r600c, ar values don't persist between clauses */ 2825 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2826 struct r600_bc_alu alu; 2827 int r; 2828 2829 switch (inst->Instruction.Opcode) { 2830 case TGSI_OPCODE_ARL: 2831 memset(&alu, 0, sizeof(alu)); 2832 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2833 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2834 alu.dst.sel = ctx->ar_reg; 2835 alu.dst.write = 1; 2836 alu.last = 1; 2837 2838 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2839 return r; 2840 2841 memset(&alu, 0, sizeof(alu)); 2842 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2843 alu.src[0].sel = ctx->ar_reg; 2844 alu.dst.sel = ctx->ar_reg; 2845 alu.dst.write = 1; 2846 alu.last = 1; 2847 2848 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2849 return r; 2850 break; 2851 case TGSI_OPCODE_ARR: 2852 memset(&alu, 0, sizeof(alu)); 2853 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2854 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2855 alu.dst.sel = ctx->ar_reg; 2856 alu.dst.write = 1; 2857 alu.last = 1; 2858 2859 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2860 return r; 2861 break; 2862 default: 2863 assert(0); 2864 return -1; 2865 } 2866 2867 memset(&alu, 0, sizeof(alu)); 2868 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2869 alu.src[0].sel = ctx->ar_reg; 2870 alu.last = 1; 2871 2872 r = r600_bc_add_alu(ctx->bc, &alu); 2873 if (r) 2874 return r; 2875 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2876 return 0; 2877} 2878 2879static int tgsi_opdst(struct r600_shader_ctx *ctx) 2880{ 2881 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2882 struct r600_bc_alu alu; 2883 int i, r = 0; 2884 2885 for (i = 0; i < 4; i++) { 2886 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2887 2888 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2889 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2890 2891 if (i == 0 || i == 3) { 2892 alu.src[0].sel = V_SQ_ALU_SRC_1; 2893 } else { 2894 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2895 } 2896 2897 if (i == 0 || i == 2) { 2898 alu.src[1].sel = V_SQ_ALU_SRC_1; 2899 } else { 2900 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2901 } 2902 if (i == 3) 2903 alu.last = 1; 2904 r = r600_bc_add_alu(ctx->bc, &alu); 2905 if (r) 2906 return r; 2907 } 2908 return 0; 2909} 2910 2911static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2912{ 2913 struct r600_bc_alu alu; 2914 int r; 2915 2916 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2917 alu.inst = opcode; 2918 alu.predicate = 1; 2919 2920 alu.dst.sel = ctx->temp_reg; 2921 alu.dst.write = 1; 2922 alu.dst.chan = 0; 2923 2924 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2925 alu.src[1].sel = V_SQ_ALU_SRC_0; 2926 alu.src[1].chan = 0; 2927 2928 alu.last = 1; 2929 2930 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2931 if (r) 2932 return r; 2933 return 0; 2934} 2935 2936static int pops(struct r600_shader_ctx *ctx, int pops) 2937{ 2938 unsigned force_pop = ctx->bc->force_add_cf; 2939 2940 if (!force_pop) { 2941 int alu_pop = 3; 2942 if (ctx->bc->cf_last) { 2943 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2944 alu_pop = 0; 2945 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2946 alu_pop = 1; 2947 } 2948 alu_pop += pops; 2949 if (alu_pop == 1) { 2950 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2951 ctx->bc->force_add_cf = 1; 2952 } else if (alu_pop == 2) { 2953 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2954 ctx->bc->force_add_cf = 1; 2955 } else { 2956 force_pop = 1; 2957 } 2958 } 2959 2960 if (force_pop) { 2961 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2962 ctx->bc->cf_last->pop_count = pops; 2963 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2964 } 2965 2966 return 0; 2967} 2968 2969static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2970{ 2971 switch(reason) { 2972 case FC_PUSH_VPM: 2973 ctx->bc->callstack[ctx->bc->call_sp].current--; 2974 break; 2975 case FC_PUSH_WQM: 2976 case FC_LOOP: 2977 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2978 break; 2979 case FC_REP: 2980 /* TOODO : for 16 vp asic should -= 2; */ 2981 ctx->bc->callstack[ctx->bc->call_sp].current --; 2982 break; 2983 } 2984} 2985 2986static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2987{ 2988 if (check_max_only) { 2989 int diff; 2990 switch (reason) { 2991 case FC_PUSH_VPM: 2992 diff = 1; 2993 break; 2994 case FC_PUSH_WQM: 2995 diff = 4; 2996 break; 2997 default: 2998 assert(0); 2999 diff = 0; 3000 } 3001 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 3002 ctx->bc->callstack[ctx->bc->call_sp].max) { 3003 ctx->bc->callstack[ctx->bc->call_sp].max = 3004 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 3005 } 3006 return; 3007 } 3008 switch (reason) { 3009 case FC_PUSH_VPM: 3010 ctx->bc->callstack[ctx->bc->call_sp].current++; 3011 break; 3012 case FC_PUSH_WQM: 3013 case FC_LOOP: 3014 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 3015 break; 3016 case FC_REP: 3017 ctx->bc->callstack[ctx->bc->call_sp].current++; 3018 break; 3019 } 3020 3021 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 3022 ctx->bc->callstack[ctx->bc->call_sp].max) { 3023 ctx->bc->callstack[ctx->bc->call_sp].max = 3024 ctx->bc->callstack[ctx->bc->call_sp].current; 3025 } 3026} 3027 3028static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 3029{ 3030 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 3031 3032 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 3033 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 3034 sp->mid[sp->num_mid] = ctx->bc->cf_last; 3035 sp->num_mid++; 3036} 3037 3038static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 3039{ 3040 ctx->bc->fc_sp++; 3041 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 3042 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 3043} 3044 3045static void fc_poplevel(struct r600_shader_ctx *ctx) 3046{ 3047 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 3048 if (sp->mid) { 3049 free(sp->mid); 3050 sp->mid = NULL; 3051 } 3052 sp->num_mid = 0; 3053 sp->start = NULL; 3054 sp->type = 0; 3055 ctx->bc->fc_sp--; 3056} 3057 3058#if 0 3059static int emit_return(struct r600_shader_ctx *ctx) 3060{ 3061 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 3062 return 0; 3063} 3064 3065static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 3066{ 3067 3068 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 3069 ctx->bc->cf_last->pop_count = pops; 3070 /* TODO work out offset */ 3071 return 0; 3072} 3073 3074static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 3075{ 3076 return 0; 3077} 3078 3079static void emit_testflag(struct r600_shader_ctx *ctx) 3080{ 3081 3082} 3083 3084static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 3085{ 3086 emit_testflag(ctx); 3087 emit_jump_to_offset(ctx, 1, 4); 3088 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 3089 pops(ctx, ifidx + 1); 3090 emit_return(ctx); 3091} 3092 3093static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 3094{ 3095 emit_testflag(ctx); 3096 3097 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3098 ctx->bc->cf_last->pop_count = 1; 3099 3100 fc_set_mid(ctx, fc_sp); 3101 3102 pops(ctx, 1); 3103} 3104#endif 3105 3106static int tgsi_if(struct r600_shader_ctx *ctx) 3107{ 3108 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 3109 3110 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3111 3112 fc_pushlevel(ctx, FC_IF); 3113 3114 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3115 return 0; 3116} 3117 3118static int tgsi_else(struct r600_shader_ctx *ctx) 3119{ 3120 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3121 ctx->bc->cf_last->pop_count = 1; 3122 3123 fc_set_mid(ctx, ctx->bc->fc_sp); 3124 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3125 return 0; 3126} 3127 3128static int tgsi_endif(struct r600_shader_ctx *ctx) 3129{ 3130 pops(ctx, 1); 3131 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3132 R600_ERR("if/endif unbalanced in shader\n"); 3133 return -1; 3134 } 3135 3136 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3137 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3138 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3139 } else { 3140 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3141 } 3142 fc_poplevel(ctx); 3143 3144 callstack_decrease_current(ctx, FC_PUSH_VPM); 3145 return 0; 3146} 3147 3148static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3149{ 3150 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3151 3152 fc_pushlevel(ctx, FC_LOOP); 3153 3154 /* check stack depth */ 3155 callstack_check_depth(ctx, FC_LOOP, 0); 3156 return 0; 3157} 3158 3159static int tgsi_endloop(struct r600_shader_ctx *ctx) 3160{ 3161 int i; 3162 3163 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3164 3165 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3166 R600_ERR("loop/endloop in shader code are not paired.\n"); 3167 return -EINVAL; 3168 } 3169 3170 /* fixup loop pointers - from r600isa 3171 LOOP END points to CF after LOOP START, 3172 LOOP START point to CF after LOOP END 3173 BRK/CONT point to LOOP END CF 3174 */ 3175 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3176 3177 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3178 3179 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3180 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3181 } 3182 /* TODO add LOOPRET support */ 3183 fc_poplevel(ctx); 3184 callstack_decrease_current(ctx, FC_LOOP); 3185 return 0; 3186} 3187 3188static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3189{ 3190 unsigned int fscp; 3191 3192 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3193 { 3194 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3195 break; 3196 } 3197 3198 if (fscp == 0) { 3199 R600_ERR("Break not inside loop/endloop pair\n"); 3200 return -EINVAL; 3201 } 3202 3203 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3204 ctx->bc->cf_last->pop_count = 1; 3205 3206 fc_set_mid(ctx, fscp); 3207 3208 pops(ctx, 1); 3209 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3210 return 0; 3211} 3212 3213static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3214 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3215 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3216 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3217 3218 /* FIXME: 3219 * For state trackers other than OpenGL, we'll want to use 3220 * _RECIP_IEEE instead. 3221 */ 3222 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3223 3224 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3225 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3226 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3227 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3228 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3229 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3230 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3231 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3232 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3233 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3234 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3235 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3236 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3237 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3238 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3239 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 /* gap */ 3241 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3242 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 /* gap */ 3244 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3247 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3249 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3250 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3251 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3252 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3253 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3254 /* gap */ 3255 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3257 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3259 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3260 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3261 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3262 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3263 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3266 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3267 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3268 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3269 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3271 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3272 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3273 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3274 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3275 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3276 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3277 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3278 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3285 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3289 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3290 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3291 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3292 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3293 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3294 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3295 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3296 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3297 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3298 /* gap */ 3299 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3300 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3302 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3303 /* gap */ 3304 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3305 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3306 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3307 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3312 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 /* gap */ 3314 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3317 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3318 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3321 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3322 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3323 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3324 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3325 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3326 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3327 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3328 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3329 /* gap */ 3330 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3331 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3332 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3333 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3334 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3335 /* gap */ 3336 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3337 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3338 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3339 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3340 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3341 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3342 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3343 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3344 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3345 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3346 /* gap */ 3347 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3348 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3349 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3350 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3351 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3352 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3353 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3354 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3355 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3356 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3357 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3358 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3359 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3360 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3361 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3362 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3363 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3364 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3365 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3366 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3367 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3368 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3369 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3370 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3371 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3372 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3373 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3374 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3375}; 3376 3377static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3378 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3379 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3380 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3381 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3382 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 3383 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3384 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3385 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3386 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3387 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3388 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3389 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3390 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3391 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3392 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3393 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3394 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3395 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3396 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3397 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3398 /* gap */ 3399 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3400 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3401 /* gap */ 3402 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3403 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3404 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3405 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3406 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3407 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3408 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3409 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3410 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3411 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3412 /* gap */ 3413 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3414 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3415 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3416 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3417 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3418 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3419 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3420 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3421 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3422 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3423 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3424 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3425 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3426 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3427 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3428 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3429 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3430 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3431 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3432 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3433 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3434 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3435 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3436 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3437 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3438 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3439 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3440 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3441 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3442 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3443 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3444 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3445 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3446 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3447 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3448 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3449 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3450 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3451 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3452 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3453 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3454 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3455 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3456 /* gap */ 3457 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3458 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3459 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3460 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3461 /* gap */ 3462 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3463 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3464 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3465 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3466 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3467 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3468 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3469 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3470 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3471 /* gap */ 3472 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3473 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3474 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3475 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3476 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3477 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3478 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3479 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3480 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3481 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3482 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3483 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3484 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3485 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3486 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3487 /* gap */ 3488 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3489 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3490 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3491 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3492 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3493 /* gap */ 3494 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3495 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3496 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3497 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3498 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3499 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3500 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3501 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3502 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3503 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3504 /* gap */ 3505 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3506 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3507 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3508 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3509 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3510 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3511 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3512 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3513 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3514 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3515 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3516 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3517 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3518 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3519 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3520 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3521 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3522 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3523 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3524 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3525 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3526 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3527 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3528 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3529 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3530 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3531 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3532 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3533}; 3534 3535static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 3536 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3537 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3538 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3539 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 3540 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 3541 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3542 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3543 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3544 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3545 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3546 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3547 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3548 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3549 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3550 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3551 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3552 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3553 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3554 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3555 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3556 /* gap */ 3557 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3558 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3559 /* gap */ 3560 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3561 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3562 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3563 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3564 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3565 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3566 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 3567 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 3568 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 3569 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3570 /* gap */ 3571 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3572 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3573 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3574 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3575 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 3576 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3577 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3578 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3579 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3580 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3581 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3582 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3583 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3584 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3585 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3586 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3587 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 3588 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3589 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3590 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3591 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3592 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3593 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3594 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3595 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3596 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3597 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3598 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3599 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3600 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3601 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3602 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3603 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3604 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3605 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3606 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3607 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3608 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3609 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3610 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3611 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3612 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3613 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3614 /* gap */ 3615 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3616 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3617 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3618 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3619 /* gap */ 3620 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3621 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3622 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3623 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3624 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3625 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3626 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3627 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3628 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3629 /* gap */ 3630 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3631 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3632 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3633 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3634 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3635 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3636 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3637 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3638 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3639 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3640 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3641 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3642 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3643 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3644 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3645 /* gap */ 3646 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3647 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3648 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3649 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3650 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3651 /* gap */ 3652 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3653 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3654 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3655 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3656 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3657 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3658 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3659 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3660 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3661 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3662 /* gap */ 3663 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3664 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3665 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3666 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3667 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3668 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3669 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3670 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3671 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3672 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3673 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3674 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3675 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3676 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3677 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3678 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3679 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3680 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3681 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3682 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3683 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3684 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3685 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3686 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3687 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3688 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3689 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3690 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3691}; 3692