1/* 2 * Copyright (c) 2012-2015 Etnaviv Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Wladimir J. van der Laan <laanwj@gmail.com> 25 */ 26 27/* TGSI->Vivante shader ISA conversion */ 28 29/* What does the compiler return (see etna_shader_object)? 30 * 1) instruction data 31 * 2) input-to-temporary mapping (fixed for ps) 32 * *) in case of ps, semantic -> varying id mapping 33 * *) for each varying: number of components used (r, rg, rgb, rgba) 34 * 3) temporary-to-output mapping (in case of vs, fixed for ps) 35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) 36 * 5) immediates base offset, immediates data 37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to 38 * configure the hw, but useful for error checking 39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips 40 * (output reg id is enough) 41 * 42 * Empty shaders are not allowed, should always at least generate a NOP. Also 43 * if there is a label at the end of the shader, an extra NOP should be 44 * generated as jump target. 45 * 46 * TODO 47 * * Use an instruction scheduler 48 * * Indirect access to uniforms / temporaries using amode 49 */ 50 51#include "etnaviv_compiler.h" 52 53#include "etnaviv_asm.h" 54#include "etnaviv_context.h" 55#include "etnaviv_debug.h" 56#include "etnaviv_disasm.h" 57#include "etnaviv_uniforms.h" 58#include "etnaviv_util.h" 59 60#include "pipe/p_shader_tokens.h" 61#include "tgsi/tgsi_info.h" 62#include "tgsi/tgsi_iterate.h" 63#include "tgsi/tgsi_lowering.h" 64#include "tgsi/tgsi_strings.h" 65#include "tgsi/tgsi_util.h" 66#include "util/u_math.h" 67#include "util/u_memory.h" 68 69#include <fcntl.h> 70#include <stdio.h> 71#include <sys/stat.h> 72#include <sys/types.h> 73 74#define ETNA_MAX_INNER_TEMPS 2 75 76static const float sincos_const[2][4] = { 77 { 78 2., -1., 4., -4., 79 }, 80 { 81 1. / (2. * M_PI), 0.75, 0.5, 0.0, 82 }, 83}; 84 85/* Native register description structure */ 86struct etna_native_reg { 87 unsigned valid : 1; 88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */ 89 unsigned rgroup : 3; 90 unsigned id : 9; 91}; 92 93/* Register description */ 94struct etna_reg_desc { 95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ 96 int idx; /* index into file */ 97 bool active; /* used in program */ 98 int first_use; /* instruction id of first use (scope begin) */ 99 int last_use; /* instruction id of last use (scope end, inclusive) */ 100 101 struct etna_native_reg native; /* native register to map to */ 102 unsigned usage_mask : 4; /* usage, per channel */ 103 bool has_semantic; /* register has associated TGSI semantic */ 104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */ 105 struct tgsi_declaration_interp interp; /* Interpolation type */ 106}; 107 108/* Label information structure */ 109struct etna_compile_label { 110 int inst_idx; /* Instruction id that label points to */ 111}; 112 113enum etna_compile_frame_type { 114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ 115 ETNA_COMPILE_FRAME_LOOP, 116}; 117 118/* nesting scope frame (LOOP, IF, ...) during compilation 119 */ 120struct etna_compile_frame { 121 enum etna_compile_frame_type type; 122 int lbl_else_idx; 123 int lbl_endif_idx; 124 int lbl_loop_bgn_idx; 125 int lbl_loop_end_idx; 126}; 127 128struct etna_compile_file { 129 /* Number of registers in each TGSI file (max register+1) */ 130 size_t reg_size; 131 /* Register descriptions, per register index */ 132 struct etna_reg_desc *reg; 133}; 134 135#define array_insert(arr, val) \ 136 do { \ 137 if (arr##_count == arr##_sz) { \ 138 arr##_sz = MAX2(2 * arr##_sz, 16); \ 139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \ 140 } \ 141 arr[arr##_count++] = val; \ 142 } while (0) 143 144 145/* scratch area for compiling shader, freed after compilation finishes */ 146struct etna_compile { 147 const struct tgsi_token *tokens; 148 bool free_tokens; 149 150 struct tgsi_shader_info info; 151 152 /* Register descriptions, per TGSI file, per register index */ 153 struct etna_compile_file file[TGSI_FILE_COUNT]; 154 155 /* Keep track of TGSI register declarations */ 156 struct etna_reg_desc decl[ETNA_MAX_DECL]; 157 uint total_decls; 158 159 /* Bitmap of dead instructions which are removed in a separate pass */ 160 bool dead_inst[ETNA_MAX_TOKENS]; 161 162 /* Immediate data */ 163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM]; 164 uint32_t imm_data[ETNA_MAX_IMM]; 165 uint32_t imm_base; /* base of immediates (in 32 bit units) */ 166 uint32_t imm_size; /* size of immediates (in 32 bit units) */ 167 168 /* Next free native register, for register allocation */ 169 uint32_t next_free_native; 170 171 /* Temporary register for use within translated TGSI instruction, 172 * only allocated when needed. 173 */ 174 int inner_temps; /* number of inner temps used; only up to one available at 175 this point */ 176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS]; 177 178 /* Fields for handling nested conditionals */ 179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; 180 int frame_sp; 181 int lbl_usage[ETNA_MAX_INSTRUCTIONS]; 182 183 unsigned labels_count, labels_sz; 184 struct etna_compile_label *labels; 185 186 unsigned num_loops; 187 188 /* Code generation */ 189 int inst_ptr; /* current instruction pointer */ 190 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; 191 192 /* I/O */ 193 194 /* Number of varyings (PS only) */ 195 int num_varyings; 196 197 /* GPU hardware specs */ 198 const struct etna_specs *specs; 199 200 const struct etna_shader_key *key; 201}; 202 203static struct etna_reg_desc * 204etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst) 205{ 206 return &c->file[dst.File].reg[dst.Index]; 207} 208 209static struct etna_reg_desc * 210etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src) 211{ 212 return &c->file[src.File].reg[src.Index]; 213} 214 215static struct etna_native_reg 216etna_native_temp(unsigned reg) 217{ 218 return (struct etna_native_reg) { 219 .valid = 1, 220 .rgroup = INST_RGROUP_TEMP, 221 .id = reg 222 }; 223} 224 225/** Register allocation **/ 226enum reg_sort_order { 227 FIRST_USE_ASC, 228 FIRST_USE_DESC, 229 LAST_USE_ASC, 230 LAST_USE_DESC 231}; 232 233/* Augmented register description for sorting */ 234struct sort_rec { 235 struct etna_reg_desc *ptr; 236 int key; 237}; 238 239static int 240sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) 241{ 242 if (a->key < b->key) 243 return -1; 244 245 if (a->key > b->key) 246 return 1; 247 248 return 0; 249} 250 251/* create an index on a register set based on certain criteria. */ 252static int 253sort_registers(struct sort_rec *sorted, struct etna_compile_file *file, 254 enum reg_sort_order so) 255{ 256 struct etna_reg_desc *regs = file->reg; 257 int ptr = 0; 258 259 /* pre-populate keys from active registers */ 260 for (int idx = 0; idx < file->reg_size; ++idx) { 261 /* only interested in active registers now; will only assign inactive ones 262 * if no space in active ones */ 263 if (regs[idx].active) { 264 sorted[ptr].ptr = ®s[idx]; 265 266 switch (so) { 267 case FIRST_USE_ASC: 268 sorted[ptr].key = regs[idx].first_use; 269 break; 270 case LAST_USE_ASC: 271 sorted[ptr].key = regs[idx].last_use; 272 break; 273 case FIRST_USE_DESC: 274 sorted[ptr].key = -regs[idx].first_use; 275 break; 276 case LAST_USE_DESC: 277 sorted[ptr].key = -regs[idx].last_use; 278 break; 279 } 280 ptr++; 281 } 282 } 283 284 /* sort index by key */ 285 qsort(sorted, ptr, sizeof(struct sort_rec), 286 (int (*)(const void *, const void *))sort_rec_compar); 287 288 return ptr; 289} 290 291/* Allocate a new, unused, native temp register */ 292static struct etna_native_reg 293alloc_new_native_reg(struct etna_compile *c) 294{ 295 assert(c->next_free_native < ETNA_MAX_TEMPS); 296 return etna_native_temp(c->next_free_native++); 297} 298 299/* assign TEMPs to native registers */ 300static void 301assign_temporaries_to_native(struct etna_compile *c, 302 struct etna_compile_file *file) 303{ 304 struct etna_reg_desc *temps = file->reg; 305 306 for (int idx = 0; idx < file->reg_size; ++idx) 307 temps[idx].native = alloc_new_native_reg(c); 308} 309 310/* assign inputs and outputs to temporaries 311 * Gallium assumes that the hardware has separate registers for taking input and 312 * output, however Vivante GPUs use temporaries both for passing in inputs and 313 * passing back outputs. 314 * Try to re-use temporary registers where possible. */ 315static void 316assign_inouts_to_temporaries(struct etna_compile *c, uint file) 317{ 318 bool mode_inputs = (file == TGSI_FILE_INPUT); 319 int inout_ptr = 0, num_inouts; 320 int temp_ptr = 0, num_temps; 321 struct sort_rec inout_order[ETNA_MAX_TEMPS]; 322 struct sort_rec temps_order[ETNA_MAX_TEMPS]; 323 num_inouts = sort_registers(inout_order, &c->file[file], 324 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); 325 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY], 326 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); 327 328 while (inout_ptr < num_inouts && temp_ptr < num_temps) { 329 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 330 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; 331 332 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */ 333 inout_ptr++; 334 continue; 335 } 336 337 /* last usage of this input is before or in same instruction of first use 338 * of temporary? */ 339 if (mode_inputs ? (inout->last_use <= temp->first_use) 340 : (inout->first_use >= temp->last_use)) { 341 /* assign it and advance to next input */ 342 inout->native = temp->native; 343 inout_ptr++; 344 } 345 346 temp_ptr++; 347 } 348 349 /* if we couldn't reuse current ones, allocate new temporaries */ 350 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) { 351 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 352 353 if (inout->active && !inout->native.valid) 354 inout->native = alloc_new_native_reg(c); 355 } 356} 357 358/* Allocate an immediate with a certain value and return the index. If 359 * there is already an immediate with that value, return that. 360 */ 361static struct etna_inst_src 362alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, 363 uint32_t value) 364{ 365 int idx; 366 367 /* Could use a hash table to speed this up */ 368 for (idx = 0; idx < c->imm_size; ++idx) { 369 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value) 370 break; 371 } 372 373 /* look if there is an unused slot */ 374 if (idx == c->imm_size) { 375 for (idx = 0; idx < c->imm_size; ++idx) { 376 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED) 377 break; 378 } 379 } 380 381 /* allocate new immediate */ 382 if (idx == c->imm_size) { 383 assert(c->imm_size < ETNA_MAX_IMM); 384 idx = c->imm_size++; 385 c->imm_data[idx] = value; 386 c->imm_contents[idx] = contents; 387 } 388 389 /* swizzle so that component with value is returned in all components */ 390 idx += c->imm_base; 391 struct etna_inst_src imm_src = { 392 .use = 1, 393 .rgroup = INST_RGROUP_UNIFORM_0, 394 .reg = idx / 4, 395 .swiz = INST_SWIZ_BROADCAST(idx & 3) 396 }; 397 398 return imm_src; 399} 400 401static struct etna_inst_src 402alloc_imm_u32(struct etna_compile *c, uint32_t value) 403{ 404 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value); 405} 406 407static struct etna_inst_src 408alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents, 409 const uint32_t *values) 410{ 411 struct etna_inst_src imm_src = { }; 412 int idx, i; 413 414 for (idx = 0; idx + 3 < c->imm_size; idx += 4) { 415 /* What if we can use a uniform with a different swizzle? */ 416 for (i = 0; i < 4; i++) 417 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i]) 418 break; 419 if (i == 4) 420 break; 421 } 422 423 if (idx + 3 >= c->imm_size) { 424 idx = align(c->imm_size, 4); 425 assert(idx + 4 <= ETNA_MAX_IMM); 426 427 for (i = 0; i < 4; i++) { 428 c->imm_data[idx + i] = values[i]; 429 c->imm_contents[idx + i] = contents; 430 } 431 432 c->imm_size = idx + 4; 433 } 434 435 assert((c->imm_base & 3) == 0); 436 idx += c->imm_base; 437 imm_src.use = 1; 438 imm_src.rgroup = INST_RGROUP_UNIFORM_0; 439 imm_src.reg = idx / 4; 440 imm_src.swiz = INST_SWIZ_IDENTITY; 441 442 return imm_src; 443} 444 445static uint32_t 446get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm, 447 unsigned swiz_idx) 448{ 449 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0); 450 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3); 451 452 return c->imm_data[idx]; 453} 454 455/* Allocate immediate with a certain float value. If there is already an 456 * immediate with that value, return that. 457 */ 458static struct etna_inst_src 459alloc_imm_f32(struct etna_compile *c, float value) 460{ 461 return alloc_imm_u32(c, fui(value)); 462} 463 464static struct etna_inst_src 465etna_imm_vec4f(struct etna_compile *c, const float *vec4) 466{ 467 uint32_t val[4]; 468 469 for (int i = 0; i < 4; i++) 470 val[i] = fui(vec4[i]); 471 472 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val); 473} 474 475/* Pass -- check register file declarations and immediates */ 476static void 477etna_compile_parse_declarations(struct etna_compile *c) 478{ 479 struct tgsi_parse_context ctx = { }; 480 MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 481 assert(status == TGSI_PARSE_OK); 482 483 while (!tgsi_parse_end_of_tokens(&ctx)) { 484 tgsi_parse_token(&ctx); 485 486 switch (ctx.FullToken.Token.Type) { 487 case TGSI_TOKEN_TYPE_IMMEDIATE: { 488 /* immediates are handled differently from other files; they are 489 * not declared explicitly, and always add four components */ 490 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; 491 assert(c->imm_size <= (ETNA_MAX_IMM - 4)); 492 493 for (int i = 0; i < 4; ++i) { 494 unsigned idx = c->imm_size++; 495 496 c->imm_data[idx] = imm->u[i].Uint; 497 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT; 498 } 499 } 500 break; 501 } 502 } 503 504 tgsi_parse_free(&ctx); 505} 506 507/* Allocate register declarations for the registers in all register files */ 508static void 509etna_allocate_decls(struct etna_compile *c) 510{ 511 uint idx = 0; 512 513 for (int x = 0; x < TGSI_FILE_COUNT; ++x) { 514 c->file[x].reg = &c->decl[idx]; 515 c->file[x].reg_size = c->info.file_max[x] + 1; 516 517 for (int sub = 0; sub < c->file[x].reg_size; ++sub) { 518 c->decl[idx].file = x; 519 c->decl[idx].idx = sub; 520 idx++; 521 } 522 } 523 524 c->total_decls = idx; 525} 526 527/* Pass -- check and record usage of temporaries, inputs, outputs */ 528static void 529etna_compile_pass_check_usage(struct etna_compile *c) 530{ 531 struct tgsi_parse_context ctx = { }; 532 MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 533 assert(status == TGSI_PARSE_OK); 534 535 for (int idx = 0; idx < c->total_decls; ++idx) { 536 c->decl[idx].active = false; 537 c->decl[idx].first_use = c->decl[idx].last_use = -1; 538 } 539 540 int inst_idx = 0; 541 while (!tgsi_parse_end_of_tokens(&ctx)) { 542 tgsi_parse_token(&ctx); 543 /* find out max register #s used 544 * For every register mark first and last instruction index where it's 545 * used this allows finding ranges where the temporary can be borrowed 546 * as input and/or output register 547 * 548 * XXX in the case of loops this needs special care, or even be completely 549 * disabled, as 550 * the last usage of a register inside a loop means it can still be used 551 * on next loop 552 * iteration (execution is no longer * chronological). The register can 553 * only be 554 * declared "free" after the loop finishes. 555 * 556 * Same for inputs: the first usage of a register inside a loop doesn't 557 * mean that the register 558 * won't have been overwritten in previous iteration. The register can 559 * only be declared free before the loop 560 * starts. 561 * The proper way would be to do full dominator / post-dominator analysis 562 * (especially with more complicated 563 * control flow such as direct branch instructions) but not for now... 564 */ 565 switch (ctx.FullToken.Token.Type) { 566 case TGSI_TOKEN_TYPE_DECLARATION: { 567 /* Declaration: fill in file details */ 568 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; 569 struct etna_compile_file *file = &c->file[decl->Declaration.File]; 570 571 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) { 572 file->reg[idx].usage_mask = 0; // we'll compute this ourselves 573 file->reg[idx].has_semantic = decl->Declaration.Semantic; 574 file->reg[idx].semantic = decl->Semantic; 575 file->reg[idx].interp = decl->Interp; 576 } 577 } break; 578 case TGSI_TOKEN_TYPE_INSTRUCTION: { 579 /* Instruction: iterate over operands of instruction */ 580 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 581 582 /* iterate over destination registers */ 583 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) { 584 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index]; 585 586 if (reg_desc->first_use == -1) 587 reg_desc->first_use = inst_idx; 588 589 reg_desc->last_use = inst_idx; 590 reg_desc->active = true; 591 } 592 593 /* iterate over source registers */ 594 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) { 595 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index]; 596 597 if (reg_desc->first_use == -1) 598 reg_desc->first_use = inst_idx; 599 600 reg_desc->last_use = inst_idx; 601 reg_desc->active = true; 602 /* accumulate usage mask for register, this is used to determine how 603 * many slots for varyings 604 * should be allocated */ 605 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); 606 } 607 inst_idx += 1; 608 } break; 609 default: 610 break; 611 } 612 } 613 614 tgsi_parse_free(&ctx); 615} 616 617/* assign inputs that need to be assigned to specific registers */ 618static void 619assign_special_inputs(struct etna_compile *c) 620{ 621 if (c->info.processor == PIPE_SHADER_FRAGMENT) { 622 /* never assign t0 as it is the position output, start assigning at t1 */ 623 c->next_free_native = 1; 624 625 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ 626 for (int idx = 0; idx < c->total_decls; ++idx) { 627 struct etna_reg_desc *reg = &c->decl[idx]; 628 629 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) 630 reg->native = etna_native_temp(0); 631 } 632 } 633} 634 635/* Check that a move instruction does not swizzle any of the components 636 * that it writes. 637 */ 638static bool 639etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, 640 const struct tgsi_src_register src) 641{ 642 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && 643 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && 644 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && 645 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); 646} 647 648/* Pass -- optimize outputs 649 * Mesa tends to generate code like this at the end if their shaders 650 * MOV OUT[1], TEMP[2] 651 * MOV OUT[0], TEMP[0] 652 * MOV OUT[2], TEMP[1] 653 * Recognize if 654 * a) there is only a single assignment to an output register and 655 * b) the temporary is not used after that 656 * Also recognize direct assignment of IN to OUT (passthrough) 657 **/ 658static void 659etna_compile_pass_optimize_outputs(struct etna_compile *c) 660{ 661 struct tgsi_parse_context ctx = { }; 662 int inst_idx = 0; 663 MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 664 assert(status == TGSI_PARSE_OK); 665 666 while (!tgsi_parse_end_of_tokens(&ctx)) { 667 tgsi_parse_token(&ctx); 668 669 switch (ctx.FullToken.Token.Type) { 670 case TGSI_TOKEN_TYPE_INSTRUCTION: { 671 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 672 673 /* iterate over operands */ 674 switch (inst->Instruction.Opcode) { 675 case TGSI_OPCODE_MOV: { 676 /* We are only interested in eliminating MOVs which write to 677 * the shader outputs. Test for this early. */ 678 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 679 break; 680 /* Elimination of a MOV must have no visible effect on the 681 * resulting shader: this means the MOV must not swizzle or 682 * saturate, and its source must not have the negate or 683 * absolute modifiers. */ 684 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) || 685 inst->Instruction.Saturate || inst->Src[0].Register.Negate || 686 inst->Src[0].Register.Absolute) 687 break; 688 689 uint out_idx = inst->Dst[0].Register.Index; 690 uint in_idx = inst->Src[0].Register.Index; 691 /* assignment of temporary to output -- 692 * and the output doesn't yet have a native register assigned 693 * and the last use of the temporary is this instruction 694 * and the MOV does not do a swizzle 695 */ 696 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && 697 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 698 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) { 699 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 700 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native; 701 /* prevent temp from being re-used for the rest of the shader */ 702 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS; 703 /* mark this MOV instruction as a no-op */ 704 c->dead_inst[inst_idx] = true; 705 } 706 /* direct assignment of input to output -- 707 * and the input or output doesn't yet have a native register 708 * assigned 709 * and the output is only used in this instruction, 710 * allocate a new register, and associate both input and output to 711 * it 712 * and the MOV does not do a swizzle 713 */ 714 if (inst->Src[0].Register.File == TGSI_FILE_INPUT && 715 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid && 716 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 717 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx && 718 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) { 719 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 720 c->file[TGSI_FILE_INPUT].reg[in_idx].native = 721 alloc_new_native_reg(c); 722 /* mark this MOV instruction as a no-op */ 723 c->dead_inst[inst_idx] = true; 724 } 725 } break; 726 default:; 727 } 728 inst_idx += 1; 729 } break; 730 } 731 } 732 733 tgsi_parse_free(&ctx); 734} 735 736/* Get a temporary to be used within one TGSI instruction. 737 * The first time that this function is called the temporary will be allocated. 738 * Each call to this function will return the same temporary. 739 */ 740static struct etna_native_reg 741etna_compile_get_inner_temp(struct etna_compile *c) 742{ 743 int inner_temp = c->inner_temps; 744 745 if (inner_temp < ETNA_MAX_INNER_TEMPS) { 746 if (!c->inner_temp[inner_temp].valid) 747 c->inner_temp[inner_temp] = alloc_new_native_reg(c); 748 749 /* alloc_new_native_reg() handles lack of registers */ 750 c->inner_temps += 1; 751 } else { 752 BUG("Too many inner temporaries (%i) requested in one instruction", 753 inner_temp + 1); 754 } 755 756 return c->inner_temp[inner_temp]; 757} 758 759static struct etna_inst_dst 760etna_native_to_dst(struct etna_native_reg native, unsigned comps) 761{ 762 /* Can only assign to temporaries */ 763 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP); 764 765 struct etna_inst_dst rv = { 766 .comps = comps, 767 .use = 1, 768 .reg = native.id, 769 }; 770 771 return rv; 772} 773 774static struct etna_inst_src 775etna_native_to_src(struct etna_native_reg native, uint32_t swizzle) 776{ 777 assert(native.valid && !native.is_tex); 778 779 struct etna_inst_src rv = { 780 .use = 1, 781 .swiz = swizzle, 782 .rgroup = native.rgroup, 783 .reg = native.id, 784 .amode = INST_AMODE_DIRECT, 785 }; 786 787 return rv; 788} 789 790static inline struct etna_inst_src 791negate(struct etna_inst_src src) 792{ 793 src.neg = !src.neg; 794 795 return src; 796} 797 798static inline struct etna_inst_src 799absolute(struct etna_inst_src src) 800{ 801 src.abs = 1; 802 803 return src; 804} 805 806static inline struct etna_inst_src 807swizzle(struct etna_inst_src src, unsigned swizzle) 808{ 809 src.swiz = inst_swiz_compose(src.swiz, swizzle); 810 811 return src; 812} 813 814/* Emit instruction and append it to program */ 815static void 816emit_inst(struct etna_compile *c, struct etna_inst *inst) 817{ 818 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS); 819 820 /* Check for uniform conflicts (each instruction can only access one 821 * uniform), 822 * if detected, use an intermediate temporary */ 823 unsigned uni_rgroup = -1; 824 unsigned uni_reg = -1; 825 826 for (int src = 0; src < ETNA_NUM_SRC; ++src) { 827 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) { 828 if (uni_reg == -1) { /* first unique uniform used */ 829 uni_rgroup = inst->src[src].rgroup; 830 uni_reg = inst->src[src].reg; 831 } else { /* second or later; check that it is a re-use */ 832 if (uni_rgroup != inst->src[src].rgroup || 833 uni_reg != inst->src[src].reg) { 834 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that " 835 "accesses different uniforms, " 836 "need to generate extra MOV"); 837 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 838 839 /* Generate move instruction to temporary */ 840 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 841 .opcode = INST_OPCODE_MOV, 842 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 843 INST_COMPS_Z | INST_COMPS_W), 844 .src[2] = inst->src[src] 845 }); 846 847 c->inst_ptr++; 848 849 /* Modify instruction to use temp register instead of uniform */ 850 inst->src[src].use = 1; 851 inst->src[src].rgroup = INST_RGROUP_TEMP; 852 inst->src[src].reg = inner_temp.id; 853 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 854 inst->src[src].neg = 0; /* negation happens on MOV */ 855 inst->src[src].abs = 0; /* abs happens on MOV */ 856 inst->src[src].amode = 0; /* amode effects happen on MOV */ 857 } 858 } 859 } 860 } 861 862 /* Finally assemble the actual instruction */ 863 etna_assemble(&c->code[c->inst_ptr * 4], inst); 864 c->inst_ptr++; 865} 866 867static unsigned int 868etna_amode(struct tgsi_ind_register indirect) 869{ 870 assert(indirect.File == TGSI_FILE_ADDRESS); 871 assert(indirect.Index == 0); 872 873 switch (indirect.Swizzle) { 874 case TGSI_SWIZZLE_X: 875 return INST_AMODE_ADD_A_X; 876 case TGSI_SWIZZLE_Y: 877 return INST_AMODE_ADD_A_Y; 878 case TGSI_SWIZZLE_Z: 879 return INST_AMODE_ADD_A_Z; 880 case TGSI_SWIZZLE_W: 881 return INST_AMODE_ADD_A_W; 882 default: 883 assert(!"Invalid swizzle"); 884 } 885 886 unreachable("bad swizzle"); 887} 888 889/* convert destination operand */ 890static struct etna_inst_dst 891convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in) 892{ 893 struct etna_inst_dst rv = { 894 /// XXX .amode 895 .comps = in->Register.WriteMask, 896 }; 897 898 if (in->Register.File == TGSI_FILE_ADDRESS) { 899 assert(in->Register.Index == 0); 900 rv.reg = in->Register.Index; 901 rv.use = 0; 902 } else { 903 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native, 904 in->Register.WriteMask); 905 } 906 907 if (in->Register.Indirect) 908 rv.amode = etna_amode(in->Indirect); 909 910 return rv; 911} 912 913/* convert texture operand */ 914static struct etna_inst_tex 915convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in, 916 const struct tgsi_instruction_texture *tex) 917{ 918 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native; 919 struct etna_inst_tex rv = { 920 // XXX .amode (to allow for an array of samplers?) 921 .swiz = INST_SWIZ_IDENTITY 922 }; 923 924 assert(native_reg.is_tex && native_reg.valid); 925 rv.id = native_reg.id; 926 927 return rv; 928} 929 930/* convert source operand */ 931static struct etna_inst_src 932etna_create_src(const struct tgsi_full_src_register *tgsi, 933 const struct etna_native_reg *native) 934{ 935 const struct tgsi_src_register *reg = &tgsi->Register; 936 struct etna_inst_src rv = { 937 .use = 1, 938 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW), 939 .neg = reg->Negate, 940 .abs = reg->Absolute, 941 .rgroup = native->rgroup, 942 .reg = native->id, 943 .amode = INST_AMODE_DIRECT, 944 }; 945 946 assert(native->valid && !native->is_tex); 947 948 if (reg->Indirect) 949 rv.amode = etna_amode(tgsi->Indirect); 950 951 return rv; 952} 953 954static struct etna_inst_src 955etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src, 956 struct etna_native_reg temp) 957{ 958 struct etna_inst mov = { }; 959 960 mov.opcode = INST_OPCODE_MOV; 961 mov.sat = 0; 962 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 963 INST_COMPS_Z | INST_COMPS_W); 964 mov.src[2] = src; 965 emit_inst(c, &mov); 966 967 src.swiz = INST_SWIZ_IDENTITY; 968 src.neg = src.abs = 0; 969 src.rgroup = temp.rgroup; 970 src.reg = temp.id; 971 972 return src; 973} 974 975static struct etna_inst_src 976etna_mov_src(struct etna_compile *c, struct etna_inst_src src) 977{ 978 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 979 980 return etna_mov_src_to_temp(c, src, temp); 981} 982 983static bool 984etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b) 985{ 986 return etna_rgroup_is_uniform(a.rgroup) && 987 etna_rgroup_is_uniform(b.rgroup) && 988 (a.rgroup != b.rgroup || a.reg != b.reg); 989} 990 991/* create a new label */ 992static unsigned int 993alloc_new_label(struct etna_compile *c) 994{ 995 struct etna_compile_label label = { 996 .inst_idx = -1, /* start by point to no specific instruction */ 997 }; 998 999 array_insert(c->labels, label); 1000 1001 return c->labels_count - 1; 1002} 1003 1004/* place label at current instruction pointer */ 1005static void 1006label_place(struct etna_compile *c, struct etna_compile_label *label) 1007{ 1008 label->inst_idx = c->inst_ptr; 1009} 1010 1011/* mark label use at current instruction. 1012 * target of the label will be filled in in the marked instruction's src2.imm 1013 * slot as soon 1014 * as the value becomes known. 1015 */ 1016static void 1017label_mark_use(struct etna_compile *c, int lbl_idx) 1018{ 1019 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS); 1020 c->lbl_usage[c->inst_ptr] = lbl_idx; 1021} 1022 1023/* walk the frame stack and return first frame with matching type */ 1024static struct etna_compile_frame * 1025find_frame(struct etna_compile *c, enum etna_compile_frame_type type) 1026{ 1027 for (int sp = c->frame_sp; sp >= 0; sp--) 1028 if (c->frame_stack[sp].type == type) 1029 return &c->frame_stack[sp]; 1030 1031 assert(0); 1032 return NULL; 1033} 1034 1035struct instr_translater { 1036 void (*fxn)(const struct instr_translater *t, struct etna_compile *c, 1037 const struct tgsi_full_instruction *inst, 1038 struct etna_inst_src *src); 1039 unsigned tgsi_opc; 1040 uint8_t opc; 1041 1042 /* tgsi src -> etna src swizzle */ 1043 int src[3]; 1044 1045 unsigned cond; 1046}; 1047 1048static void 1049trans_instr(const struct instr_translater *t, struct etna_compile *c, 1050 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1051{ 1052 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode); 1053 struct etna_inst instr = { }; 1054 1055 instr.opcode = t->opc; 1056 instr.cond = t->cond; 1057 instr.sat = inst->Instruction.Saturate; 1058 1059 assert(info->num_dst <= 1); 1060 if (info->num_dst) 1061 instr.dst = convert_dst(c, &inst->Dst[0]); 1062 1063 assert(info->num_src <= ETNA_NUM_SRC); 1064 1065 for (unsigned i = 0; i < info->num_src; i++) { 1066 int swizzle = t->src[i]; 1067 1068 assert(swizzle != -1); 1069 instr.src[swizzle] = src[i]; 1070 } 1071 1072 emit_inst(c, &instr); 1073} 1074 1075static void 1076trans_min_max(const struct instr_translater *t, struct etna_compile *c, 1077 const struct tgsi_full_instruction *inst, 1078 struct etna_inst_src *src) 1079{ 1080 emit_inst(c, &(struct etna_inst) { 1081 .opcode = INST_OPCODE_SELECT, 1082 .cond = t->cond, 1083 .sat = inst->Instruction.Saturate, 1084 .dst = convert_dst(c, &inst->Dst[0]), 1085 .src[0] = src[0], 1086 .src[1] = src[1], 1087 .src[2] = src[0], 1088 }); 1089} 1090 1091static void 1092trans_if(const struct instr_translater *t, struct etna_compile *c, 1093 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1094{ 1095 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1096 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f); 1097 1098 /* push IF to stack */ 1099 f->type = ETNA_COMPILE_FRAME_IF; 1100 /* create "else" label */ 1101 f->lbl_else_idx = alloc_new_label(c); 1102 f->lbl_endif_idx = -1; 1103 1104 /* We need to avoid the emit_inst() below becoming two instructions */ 1105 if (etna_src_uniforms_conflict(src[0], imm_0)) 1106 src[0] = etna_mov_src(c, src[0]); 1107 1108 /* mark position in instruction stream of label reference so that it can be 1109 * filled in in next pass */ 1110 label_mark_use(c, f->lbl_else_idx); 1111 1112 /* create conditional branch to label if src0 EQ 0 */ 1113 emit_inst(c, &(struct etna_inst){ 1114 .opcode = INST_OPCODE_BRANCH, 1115 .cond = INST_CONDITION_EQ, 1116 .src[0] = src[0], 1117 .src[1] = imm_0, 1118 /* imm is filled in later */ 1119 }); 1120} 1121 1122static void 1123trans_else(const struct instr_translater *t, struct etna_compile *c, 1124 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1125{ 1126 assert(c->frame_sp > 0); 1127 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1]; 1128 assert(f->type == ETNA_COMPILE_FRAME_IF); 1129 1130 /* create "endif" label, and branch to endif label */ 1131 f->lbl_endif_idx = alloc_new_label(c); 1132 label_mark_use(c, f->lbl_endif_idx); 1133 emit_inst(c, &(struct etna_inst) { 1134 .opcode = INST_OPCODE_BRANCH, 1135 .cond = INST_CONDITION_TRUE, 1136 /* imm is filled in later */ 1137 }); 1138 1139 /* mark "else" label at this position in instruction stream */ 1140 label_place(c, &c->labels[f->lbl_else_idx]); 1141} 1142 1143static void 1144trans_endif(const struct instr_translater *t, struct etna_compile *c, 1145 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1146{ 1147 assert(c->frame_sp > 0); 1148 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1149 assert(f->type == ETNA_COMPILE_FRAME_IF); 1150 1151 /* assign "endif" or "else" (if no ELSE) label to current position in 1152 * instruction stream, pop IF */ 1153 if (f->lbl_endif_idx != -1) 1154 label_place(c, &c->labels[f->lbl_endif_idx]); 1155 else 1156 label_place(c, &c->labels[f->lbl_else_idx]); 1157} 1158 1159static void 1160trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c, 1161 const struct tgsi_full_instruction *inst, 1162 struct etna_inst_src *src) 1163{ 1164 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1165 1166 /* push LOOP to stack */ 1167 f->type = ETNA_COMPILE_FRAME_LOOP; 1168 f->lbl_loop_bgn_idx = alloc_new_label(c); 1169 f->lbl_loop_end_idx = alloc_new_label(c); 1170 1171 label_place(c, &c->labels[f->lbl_loop_bgn_idx]); 1172 1173 c->num_loops++; 1174} 1175 1176static void 1177trans_loop_end(const struct instr_translater *t, struct etna_compile *c, 1178 const struct tgsi_full_instruction *inst, 1179 struct etna_inst_src *src) 1180{ 1181 assert(c->frame_sp > 0); 1182 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1183 assert(f->type == ETNA_COMPILE_FRAME_LOOP); 1184 1185 /* mark position in instruction stream of label reference so that it can be 1186 * filled in in next pass */ 1187 label_mark_use(c, f->lbl_loop_bgn_idx); 1188 1189 /* create branch to loop_bgn label */ 1190 emit_inst(c, &(struct etna_inst) { 1191 .opcode = INST_OPCODE_BRANCH, 1192 .cond = INST_CONDITION_TRUE, 1193 .src[0] = src[0], 1194 /* imm is filled in later */ 1195 }); 1196 1197 label_place(c, &c->labels[f->lbl_loop_end_idx]); 1198} 1199 1200static void 1201trans_brk(const struct instr_translater *t, struct etna_compile *c, 1202 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1203{ 1204 assert(c->frame_sp > 0); 1205 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1206 1207 /* mark position in instruction stream of label reference so that it can be 1208 * filled in in next pass */ 1209 label_mark_use(c, f->lbl_loop_end_idx); 1210 1211 /* create branch to loop_end label */ 1212 emit_inst(c, &(struct etna_inst) { 1213 .opcode = INST_OPCODE_BRANCH, 1214 .cond = INST_CONDITION_TRUE, 1215 .src[0] = src[0], 1216 /* imm is filled in later */ 1217 }); 1218} 1219 1220static void 1221trans_cont(const struct instr_translater *t, struct etna_compile *c, 1222 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1223{ 1224 assert(c->frame_sp > 0); 1225 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1226 1227 /* mark position in instruction stream of label reference so that it can be 1228 * filled in in next pass */ 1229 label_mark_use(c, f->lbl_loop_bgn_idx); 1230 1231 /* create branch to loop_end label */ 1232 emit_inst(c, &(struct etna_inst) { 1233 .opcode = INST_OPCODE_BRANCH, 1234 .cond = INST_CONDITION_TRUE, 1235 .src[0] = src[0], 1236 /* imm is filled in later */ 1237 }); 1238} 1239 1240static void 1241trans_deriv(const struct instr_translater *t, struct etna_compile *c, 1242 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1243{ 1244 emit_inst(c, &(struct etna_inst) { 1245 .opcode = t->opc, 1246 .sat = inst->Instruction.Saturate, 1247 .dst = convert_dst(c, &inst->Dst[0]), 1248 .src[0] = src[0], 1249 .src[2] = src[0], 1250 }); 1251} 1252 1253static void 1254trans_arl(const struct instr_translater *t, struct etna_compile *c, 1255 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1256{ 1257 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1258 struct etna_inst arl = { }; 1259 struct etna_inst_dst dst; 1260 1261 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | 1262 INST_COMPS_W); 1263 1264 if (c->specs->has_sign_floor_ceil) { 1265 struct etna_inst floor = { }; 1266 1267 floor.opcode = INST_OPCODE_FLOOR; 1268 floor.src[2] = src[0]; 1269 floor.dst = dst; 1270 1271 emit_inst(c, &floor); 1272 } else { 1273 struct etna_inst floor[2] = { }; 1274 1275 floor[0].opcode = INST_OPCODE_FRC; 1276 floor[0].sat = inst->Instruction.Saturate; 1277 floor[0].dst = dst; 1278 floor[0].src[2] = src[0]; 1279 1280 floor[1].opcode = INST_OPCODE_ADD; 1281 floor[1].sat = inst->Instruction.Saturate; 1282 floor[1].dst = dst; 1283 floor[1].src[0] = src[0]; 1284 floor[1].src[2].use = 1; 1285 floor[1].src[2].swiz = INST_SWIZ_IDENTITY; 1286 floor[1].src[2].neg = 1; 1287 floor[1].src[2].rgroup = temp.rgroup; 1288 floor[1].src[2].reg = temp.id; 1289 1290 emit_inst(c, &floor[0]); 1291 emit_inst(c, &floor[1]); 1292 } 1293 1294 arl.opcode = INST_OPCODE_MOVAR; 1295 arl.sat = inst->Instruction.Saturate; 1296 arl.dst = convert_dst(c, &inst->Dst[0]); 1297 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1298 1299 emit_inst(c, &arl); 1300} 1301 1302static void 1303trans_lrp(const struct instr_translater *t, struct etna_compile *c, 1304 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1305{ 1306 /* dst = src0 * src1 + (1 - src0) * src2 1307 * => src0 * src1 - (src0 - 1) * src2 1308 * => src0 * src1 - (src0 * src2 - src2) 1309 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw 1310 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw 1311 */ 1312 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1313 if (etna_src_uniforms_conflict(src[0], src[1]) || 1314 etna_src_uniforms_conflict(src[0], src[2])) { 1315 src[0] = etna_mov_src(c, src[0]); 1316 } 1317 1318 struct etna_inst mad[2] = { }; 1319 mad[0].opcode = INST_OPCODE_MAD; 1320 mad[0].sat = 0; 1321 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1322 INST_COMPS_Z | INST_COMPS_W); 1323 mad[0].src[0] = src[0]; 1324 mad[0].src[1] = src[2]; 1325 mad[0].src[2] = negate(src[2]); 1326 mad[1].opcode = INST_OPCODE_MAD; 1327 mad[1].sat = inst->Instruction.Saturate; 1328 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0]; 1329 mad[1].src[1] = src[1]; 1330 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY)); 1331 1332 emit_inst(c, &mad[0]); 1333 emit_inst(c, &mad[1]); 1334} 1335 1336static void 1337trans_lit(const struct instr_translater *t, struct etna_compile *c, 1338 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1339{ 1340 /* SELECT.LT tmp._y__, 0, src.yyyy, 0 1341 * - can be eliminated if src.y is a uniform and >= 0 1342 * SELECT.GT tmp.___w, 128, src.wwww, 128 1343 * SELECT.LT tmp.___w, -128, tmp.wwww, -128 1344 * - can be eliminated if src.w is a uniform and fits clamp 1345 * LOG tmp.x, void, void, tmp.yyyy 1346 * MUL tmp.x, tmp.xxxx, tmp.wwww, void 1347 * LITP dst, undef, src.xxxx, tmp.xxxx 1348 */ 1349 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 1350 struct etna_inst_src src_y = { }; 1351 1352 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1353 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)); 1354 1355 struct etna_inst ins = { }; 1356 ins.opcode = INST_OPCODE_SELECT; 1357 ins.cond = INST_CONDITION_LT; 1358 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y); 1359 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0); 1360 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1361 emit_inst(c, &ins); 1362 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0) 1363 src_y = alloc_imm_f32(c, 0.0); 1364 else 1365 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1366 1367 struct etna_inst_src src_w = { }; 1368 1369 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1370 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W)); 1371 1372 struct etna_inst ins = { }; 1373 ins.opcode = INST_OPCODE_SELECT; 1374 ins.cond = INST_CONDITION_GT; 1375 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W); 1376 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.); 1377 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W)); 1378 emit_inst(c, &ins); 1379 ins.cond = INST_CONDITION_LT; 1380 ins.src[0].neg = !ins.src[0].neg; 1381 ins.src[2].neg = !ins.src[2].neg; 1382 ins.src[1] = src_w; 1383 emit_inst(c, &ins); 1384 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.) 1385 src_w = alloc_imm_f32(c, -128.); 1386 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.) 1387 src_w = alloc_imm_f32(c, 128.); 1388 else 1389 src_w = swizzle(src[0], SWIZZLE(W, W, W, W)); 1390 1391 if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */ 1392 emit_inst(c, &(struct etna_inst) { 1393 .opcode = INST_OPCODE_LOG, 1394 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y), 1395 .src[2] = src_y, 1396 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1397 }); 1398 emit_inst(c, &(struct etna_inst) { 1399 .opcode = INST_OPCODE_MUL, 1400 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1401 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1402 .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)), 1403 }); 1404 } else { 1405 struct etna_inst ins[3] = { }; 1406 ins[0].opcode = INST_OPCODE_LOG; 1407 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X); 1408 ins[0].src[2] = src_y; 1409 1410 emit_inst(c, &ins[0]); 1411 } 1412 emit_inst(c, &(struct etna_inst) { 1413 .opcode = INST_OPCODE_MUL, 1414 .sat = 0, 1415 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1416 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1417 .src[1] = src_w, 1418 }); 1419 emit_inst(c, &(struct etna_inst) { 1420 .opcode = INST_OPCODE_LITP, 1421 .sat = 0, 1422 .dst = convert_dst(c, &inst->Dst[0]), 1423 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1424 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1425 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1426 }); 1427} 1428 1429static void 1430trans_ssg(const struct instr_translater *t, struct etna_compile *c, 1431 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1432{ 1433 if (c->specs->has_sign_floor_ceil) { 1434 emit_inst(c, &(struct etna_inst){ 1435 .opcode = INST_OPCODE_SIGN, 1436 .sat = inst->Instruction.Saturate, 1437 .dst = convert_dst(c, &inst->Dst[0]), 1438 .src[2] = src[0], 1439 }); 1440 } else { 1441 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1442 struct etna_inst ins[2] = { }; 1443 1444 ins[0].opcode = INST_OPCODE_SET; 1445 ins[0].cond = INST_CONDITION_NZ; 1446 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1447 INST_COMPS_Z | INST_COMPS_W); 1448 ins[0].src[0] = src[0]; 1449 1450 ins[1].opcode = INST_OPCODE_SELECT; 1451 ins[1].cond = INST_CONDITION_LZ; 1452 ins[1].sat = inst->Instruction.Saturate; 1453 ins[1].dst = convert_dst(c, &inst->Dst[0]); 1454 ins[1].src[0] = src[0]; 1455 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1456 ins[1].src[1] = negate(ins[1].src[2]); 1457 1458 emit_inst(c, &ins[0]); 1459 emit_inst(c, &ins[1]); 1460 } 1461} 1462 1463static void 1464trans_trig(const struct instr_translater *t, struct etna_compile *c, 1465 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1466{ 1467 if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */ 1468 /* On newer chips alternative SIN/COS instructions are implemented, 1469 * which: 1470 * - Need their input scaled by 1/pi instead of 2/pi 1471 * - Output an x and y component, which need to be multiplied to 1472 * get the result 1473 */ 1474 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */ 1475 emit_inst(c, &(struct etna_inst) { 1476 .opcode = INST_OPCODE_MUL, 1477 .sat = 0, 1478 .dst = etna_native_to_dst(temp, INST_COMPS_Z), 1479 .src[0] = src[0], /* any swizzling happens here */ 1480 .src[1] = alloc_imm_f32(c, 1.0f / M_PI), 1481 }); 1482 emit_inst(c, &(struct etna_inst) { 1483 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1484 ? INST_OPCODE_COS 1485 : INST_OPCODE_SIN, 1486 .sat = 0, 1487 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1488 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)), 1489 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1490 }); 1491 emit_inst(c, &(struct etna_inst) { 1492 .opcode = INST_OPCODE_MUL, 1493 .sat = inst->Instruction.Saturate, 1494 .dst = convert_dst(c, &inst->Dst[0]), 1495 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1496 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1497 }); 1498 1499 } else if (c->specs->has_sin_cos_sqrt) { 1500 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1501 /* add divide by PI/2, using a temp register. GC2000 1502 * fails with src==dst for the trig instruction. */ 1503 emit_inst(c, &(struct etna_inst) { 1504 .opcode = INST_OPCODE_MUL, 1505 .sat = 0, 1506 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1507 INST_COMPS_Z | INST_COMPS_W), 1508 .src[0] = src[0], /* any swizzling happens here */ 1509 .src[1] = alloc_imm_f32(c, 2.0f / M_PI), 1510 }); 1511 emit_inst(c, &(struct etna_inst) { 1512 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1513 ? INST_OPCODE_COS 1514 : INST_OPCODE_SIN, 1515 .sat = inst->Instruction.Saturate, 1516 .dst = convert_dst(c, &inst->Dst[0]), 1517 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), 1518 }); 1519 } else { 1520 /* Implement Nick's fast sine/cosine. Taken from: 1521 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648 1522 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X) 1523 * MAD t.x_zw, src.xxxx, A, B 1524 * FRC t.x_z_, void, void, t.xwzw 1525 * MAD t.x_z_, t.xwzw, 2, -1 1526 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs) 1527 * DP3 t.x_z_, t.zyww, C, void (for sin) 1528 * DP3 t.__z_, t.zyww, C, void (for scs) 1529 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs) 1530 * DP3 t.x_z_, t.xyww, C, void (for cos) 1531 * DP3 t.x___, t.xyww, C, void (for scs) 1532 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz 1533 * MAD dst, t.ywyw, .2225, t.xzxz 1534 */ 1535 struct etna_inst *p, ins[9] = { }; 1536 struct etna_native_reg t0 = etna_compile_get_inner_temp(c); 1537 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY); 1538 struct etna_inst_src sincos[3], in = src[0]; 1539 sincos[0] = etna_imm_vec4f(c, sincos_const[0]); 1540 sincos[1] = etna_imm_vec4f(c, sincos_const[1]); 1541 1542 /* A uniform source will cause the inner temp limit to 1543 * be exceeded. Explicitly deal with that scenario. 1544 */ 1545 if (etna_rgroup_is_uniform(src[0].rgroup)) { 1546 struct etna_inst ins = { }; 1547 ins.opcode = INST_OPCODE_MOV; 1548 ins.dst = etna_native_to_dst(t0, INST_COMPS_X); 1549 ins.src[2] = in; 1550 emit_inst(c, &ins); 1551 in = t0s; 1552 } 1553 1554 ins[0].opcode = INST_OPCODE_MAD; 1555 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W); 1556 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X)); 1557 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */ 1558 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */ 1559 1560 ins[1].opcode = INST_OPCODE_FRC; 1561 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1562 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1563 1564 ins[2].opcode = INST_OPCODE_MAD; 1565 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1566 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1567 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */ 1568 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */ 1569 1570 unsigned mul_swiz, dp3_swiz; 1571 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) { 1572 mul_swiz = SWIZZLE(W, Z, W, W); 1573 dp3_swiz = SWIZZLE(Z, Y, W, W); 1574 } else { 1575 mul_swiz = SWIZZLE(W, X, W, W); 1576 dp3_swiz = SWIZZLE(X, Y, W, W); 1577 } 1578 1579 ins[3].opcode = INST_OPCODE_MUL; 1580 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y); 1581 ins[3].src[0] = swizzle(t0s, mul_swiz); 1582 ins[3].src[1] = absolute(ins[3].src[0]); 1583 1584 ins[4].opcode = INST_OPCODE_DP3; 1585 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1586 ins[4].src[0] = swizzle(t0s, dp3_swiz); 1587 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W)); 1588 1589 p = &ins[5]; 1590 p->opcode = INST_OPCODE_MAD; 1591 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W); 1592 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z)); 1593 p->src[1] = absolute(p->src[0]); 1594 p->src[2] = negate(p->src[0]); 1595 1596 p++; 1597 p->opcode = INST_OPCODE_MAD; 1598 p->sat = inst->Instruction.Saturate; 1599 p->dst = convert_dst(c, &inst->Dst[0]), 1600 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W)); 1601 p->src[1] = alloc_imm_f32(c, 0.2225); 1602 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z)); 1603 1604 for (int i = 0; &ins[i] <= p; i++) 1605 emit_inst(c, &ins[i]); 1606 } 1607} 1608 1609static void 1610trans_lg2(const struct instr_translater *t, struct etna_compile *c, 1611 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1612{ 1613 if (c->specs->has_new_transcendentals) { 1614 /* On newer chips alternative LOG instruction is implemented, 1615 * which outputs an x and y component, which need to be multiplied to 1616 * get the result. 1617 */ 1618 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */ 1619 emit_inst(c, &(struct etna_inst) { 1620 .opcode = INST_OPCODE_LOG, 1621 .sat = 0, 1622 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1623 .src[2] = src[0], 1624 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1625 }); 1626 emit_inst(c, &(struct etna_inst) { 1627 .opcode = INST_OPCODE_MUL, 1628 .sat = inst->Instruction.Saturate, 1629 .dst = convert_dst(c, &inst->Dst[0]), 1630 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1631 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1632 }); 1633 } else { 1634 emit_inst(c, &(struct etna_inst) { 1635 .opcode = INST_OPCODE_LOG, 1636 .sat = inst->Instruction.Saturate, 1637 .dst = convert_dst(c, &inst->Dst[0]), 1638 .src[2] = src[0], 1639 }); 1640 } 1641} 1642 1643static void 1644trans_sampler(const struct instr_translater *t, struct etna_compile *c, 1645 const struct tgsi_full_instruction *inst, 1646 struct etna_inst_src *src) 1647{ 1648 /* There is no native support for GL texture rectangle coordinates, so 1649 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */ 1650 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) { 1651 uint32_t unit = inst->Src[1].Register.Index; 1652 struct etna_inst ins[2] = { }; 1653 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1654 1655 ins[0].opcode = INST_OPCODE_MUL; 1656 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); 1657 ins[0].src[0] = src[0]; 1658 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit); 1659 1660 ins[1].opcode = INST_OPCODE_MUL; 1661 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); 1662 ins[1].src[0] = src[0]; 1663 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit); 1664 1665 emit_inst(c, &ins[0]); 1666 emit_inst(c, &ins[1]); 1667 1668 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */ 1669 } 1670 1671 switch (inst->Instruction.Opcode) { 1672 case TGSI_OPCODE_TEX: 1673 emit_inst(c, &(struct etna_inst) { 1674 .opcode = INST_OPCODE_TEXLD, 1675 .sat = 0, 1676 .dst = convert_dst(c, &inst->Dst[0]), 1677 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1678 .src[0] = src[0], 1679 }); 1680 break; 1681 1682 case TGSI_OPCODE_TXB: 1683 emit_inst(c, &(struct etna_inst) { 1684 .opcode = INST_OPCODE_TEXLDB, 1685 .sat = 0, 1686 .dst = convert_dst(c, &inst->Dst[0]), 1687 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1688 .src[0] = src[0], 1689 }); 1690 break; 1691 1692 case TGSI_OPCODE_TXL: 1693 emit_inst(c, &(struct etna_inst) { 1694 .opcode = INST_OPCODE_TEXLDL, 1695 .sat = 0, 1696 .dst = convert_dst(c, &inst->Dst[0]), 1697 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1698 .src[0] = src[0], 1699 }); 1700 break; 1701 1702 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ 1703 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1704 1705 emit_inst(c, &(struct etna_inst) { 1706 .opcode = INST_OPCODE_RCP, 1707 .sat = 0, 1708 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */ 1709 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)), 1710 }); 1711 emit_inst(c, &(struct etna_inst) { 1712 .opcode = INST_OPCODE_MUL, 1713 .sat = 0, 1714 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1715 INST_COMPS_Z), /* tmp.xyz */ 1716 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)), 1717 .src[1] = src[0], /* src.xyzw */ 1718 }); 1719 emit_inst(c, &(struct etna_inst) { 1720 .opcode = INST_OPCODE_TEXLD, 1721 .sat = 0, 1722 .dst = convert_dst(c, &inst->Dst[0]), 1723 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1724 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */ 1725 }); 1726 } break; 1727 1728 default: 1729 BUG("Unhandled instruction %s", 1730 tgsi_get_opcode_name(inst->Instruction.Opcode)); 1731 assert(0); 1732 break; 1733 } 1734} 1735 1736static void 1737trans_dummy(const struct instr_translater *t, struct etna_compile *c, 1738 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1739{ 1740 /* nothing to do */ 1741} 1742 1743static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 1744#define INSTR(n, f, ...) \ 1745 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__} 1746 1747 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}), 1748 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}), 1749 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}), 1750 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}), 1751 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}), 1752 INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}), 1753 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}), 1754 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}), 1755 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}), 1756 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}), 1757 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}), 1758 INSTR(LG2, trans_lg2), 1759 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}), 1760 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}), 1761 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}), 1762 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}), 1763 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ), 1764 1765 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL), 1766 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ), 1767 1768 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX), 1769 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY), 1770 1771 INSTR(IF, trans_if), 1772 INSTR(ELSE, trans_else), 1773 INSTR(ENDIF, trans_endif), 1774 1775 INSTR(BGNLOOP, trans_loop_bgn), 1776 INSTR(ENDLOOP, trans_loop_end), 1777 INSTR(BRK, trans_brk), 1778 INSTR(CONT, trans_cont), 1779 1780 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT), 1781 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT), 1782 1783 INSTR(ARL, trans_arl), 1784 INSTR(LRP, trans_lrp), 1785 INSTR(LIT, trans_lit), 1786 INSTR(SSG, trans_ssg), 1787 1788 INSTR(SIN, trans_trig), 1789 INSTR(COS, trans_trig), 1790 1791 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT), 1792 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE), 1793 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ), 1794 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT), 1795 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE), 1796 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE), 1797 1798 INSTR(TEX, trans_sampler), 1799 INSTR(TXB, trans_sampler), 1800 INSTR(TXL, trans_sampler), 1801 INSTR(TXP, trans_sampler), 1802 1803 INSTR(NOP, trans_dummy), 1804 INSTR(END, trans_dummy), 1805}; 1806 1807/* Pass -- compile instructions */ 1808static void 1809etna_compile_pass_generate_code(struct etna_compile *c) 1810{ 1811 struct tgsi_parse_context ctx = { }; 1812 MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 1813 assert(status == TGSI_PARSE_OK); 1814 1815 int inst_idx = 0; 1816 while (!tgsi_parse_end_of_tokens(&ctx)) { 1817 const struct tgsi_full_instruction *inst = 0; 1818 1819 /* No inner temps used yet for this instruction, clear counter */ 1820 c->inner_temps = 0; 1821 1822 tgsi_parse_token(&ctx); 1823 1824 switch (ctx.FullToken.Token.Type) { 1825 case TGSI_TOKEN_TYPE_INSTRUCTION: 1826 /* iterate over operands */ 1827 inst = &ctx.FullToken.FullInstruction; 1828 if (c->dead_inst[inst_idx]) { /* skip dead instructions */ 1829 inst_idx++; 1830 continue; 1831 } 1832 1833 /* Lookup the TGSI information and generate the source arguments */ 1834 struct etna_inst_src src[ETNA_NUM_SRC]; 1835 memset(src, 0, sizeof(src)); 1836 1837 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode); 1838 1839 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { 1840 const struct tgsi_full_src_register *reg = &inst->Src[i]; 1841 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native; 1842 1843 if (!n->valid || n->is_tex) 1844 continue; 1845 1846 src[i] = etna_create_src(reg, n); 1847 } 1848 1849 const unsigned opc = inst->Instruction.Opcode; 1850 const struct instr_translater *t = &translaters[opc]; 1851 1852 if (t->fxn) { 1853 t->fxn(t, c, inst, src); 1854 1855 inst_idx += 1; 1856 } else { 1857 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc)); 1858 assert(0); 1859 } 1860 break; 1861 } 1862 } 1863 tgsi_parse_free(&ctx); 1864} 1865 1866/* Look up register by semantic */ 1867static struct etna_reg_desc * 1868find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index) 1869{ 1870 for (int idx = 0; idx < c->file[file].reg_size; ++idx) { 1871 struct etna_reg_desc *reg = &c->file[file].reg[idx]; 1872 1873 if (reg->semantic.Name == name && reg->semantic.Index == index) 1874 return reg; 1875 } 1876 1877 return NULL; /* not found */ 1878} 1879 1880/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: 1881 * - this is a vertex shader 1882 * - and this is an older GPU 1883 */ 1884static void 1885etna_compile_add_z_div_if_needed(struct etna_compile *c) 1886{ 1887 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) { 1888 /* find position out */ 1889 struct etna_reg_desc *pos_reg = 1890 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); 1891 1892 if (pos_reg != NULL) { 1893 /* 1894 * ADD tX.__z_, tX.zzzz, void, tX.wwww 1895 * MUL tX.__z_, tX.zzzz, 0.5, void 1896 */ 1897 emit_inst(c, &(struct etna_inst) { 1898 .opcode = INST_OPCODE_ADD, 1899 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1900 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1901 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)), 1902 }); 1903 emit_inst(c, &(struct etna_inst) { 1904 .opcode = INST_OPCODE_MUL, 1905 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1906 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1907 .src[1] = alloc_imm_f32(c, 0.5f), 1908 }); 1909 } 1910 } 1911} 1912 1913static void 1914etna_compile_frag_rb_swap(struct etna_compile *c) 1915{ 1916 if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) { 1917 /* find color out */ 1918 struct etna_reg_desc *color_reg = 1919 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0); 1920 1921 emit_inst(c, &(struct etna_inst) { 1922 .opcode = INST_OPCODE_MOV, 1923 .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W), 1924 .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)), 1925 }); 1926 } 1927} 1928 1929/** add a NOP to the shader if 1930 * a) the shader is empty 1931 * or 1932 * b) there is a label at the end of the shader 1933 */ 1934static void 1935etna_compile_add_nop_if_needed(struct etna_compile *c) 1936{ 1937 bool label_at_last_inst = false; 1938 1939 for (int idx = 0; idx < c->labels_count; ++idx) { 1940 if (c->labels[idx].inst_idx == c->inst_ptr) 1941 label_at_last_inst = true; 1942 1943 } 1944 1945 if (c->inst_ptr == 0 || label_at_last_inst) 1946 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP}); 1947} 1948 1949static void 1950assign_uniforms(struct etna_compile_file *file, unsigned base) 1951{ 1952 for (int idx = 0; idx < file->reg_size; ++idx) { 1953 file->reg[idx].native.valid = 1; 1954 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0; 1955 file->reg[idx].native.id = base + idx; 1956 } 1957} 1958 1959/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). 1960 * CONST must be consecutive as const buffers are supposed to be consecutive, 1961 * and before IMM, as this is 1962 * more convenient because is possible for the compilation process itself to 1963 * generate extra 1964 * immediates for constants such as pi, one, zero. 1965 */ 1966static void 1967assign_constants_and_immediates(struct etna_compile *c) 1968{ 1969 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0); 1970 /* immediates start after the constants */ 1971 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4; 1972 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4); 1973 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base, 1974 c->imm_size); 1975} 1976 1977/* Assign declared samplers to native texture units */ 1978static void 1979assign_texture_units(struct etna_compile *c) 1980{ 1981 uint tex_base = 0; 1982 1983 if (c->info.processor == PIPE_SHADER_VERTEX) 1984 tex_base = c->specs->vertex_sampler_offset; 1985 1986 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) { 1987 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1; 1988 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup 1989 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx; 1990 } 1991} 1992 1993/* Additional pass to fill in branch targets. This pass should be last 1994 * as no instruction reordering or removing/addition can be done anymore 1995 * once the branch targets are computed. 1996 */ 1997static void 1998etna_compile_fill_in_labels(struct etna_compile *c) 1999{ 2000 for (int idx = 0; idx < c->inst_ptr; ++idx) { 2001 if (c->lbl_usage[idx] != -1) 2002 etna_assemble_set_imm(&c->code[idx * 4], 2003 c->labels[c->lbl_usage[idx]].inst_idx); 2004 } 2005} 2006 2007/* compare two etna_native_reg structures, return true if equal */ 2008static bool 2009cmp_etna_native_reg(const struct etna_native_reg to, 2010 const struct etna_native_reg from) 2011{ 2012 return to.valid == from.valid && to.is_tex == from.is_tex && 2013 to.rgroup == from.rgroup && to.id == from.id; 2014} 2015 2016/* go through all declarations and swap native registers *to* and *from* */ 2017static void 2018swap_native_registers(struct etna_compile *c, const struct etna_native_reg to, 2019 const struct etna_native_reg from) 2020{ 2021 if (cmp_etna_native_reg(from, to)) 2022 return; /* Nothing to do */ 2023 2024 for (int idx = 0; idx < c->total_decls; ++idx) { 2025 if (cmp_etna_native_reg(c->decl[idx].native, from)) { 2026 c->decl[idx].native = to; 2027 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) { 2028 c->decl[idx].native = from; 2029 } 2030 } 2031} 2032 2033/* For PS we need to permute so that inputs are always in temporary 0..N-1. 2034 * Semantic POS is always t0. If that semantic is not used, avoid t0. 2035 */ 2036static void 2037permute_ps_inputs(struct etna_compile *c) 2038{ 2039 /* Special inputs: 2040 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION 2041 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD 2042 */ 2043 uint native_idx = 1; 2044 2045 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2046 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2047 uint input_id; 2048 assert(reg->has_semantic); 2049 2050 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) 2051 continue; 2052 2053 input_id = native_idx++; 2054 swap_native_registers(c, etna_native_temp(input_id), 2055 c->file[TGSI_FILE_INPUT].reg[idx].native); 2056 } 2057 2058 c->num_varyings = native_idx - 1; 2059 2060 if (native_idx > c->next_free_native) 2061 c->next_free_native = native_idx; 2062} 2063 2064/* fill in ps inputs into shader object */ 2065static void 2066fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2067{ 2068 struct etna_shader_io_file *sf = &sobj->infile; 2069 2070 sf->num_reg = 0; 2071 2072 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2073 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2074 2075 if (reg->native.id > 0) { 2076 assert(sf->num_reg < ETNA_NUM_INPUTS); 2077 sf->reg[sf->num_reg].reg = reg->native.id; 2078 sf->reg[sf->num_reg].semantic = reg->semantic; 2079 /* convert usage mask to number of components (*=wildcard) 2080 * .r (0..1) -> 1 component 2081 * .*g (2..3) -> 2 component 2082 * .**b (4..7) -> 3 components 2083 * .***a (8..15) -> 4 components 2084 */ 2085 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2086 sf->num_reg++; 2087 } 2088 } 2089 2090 assert(sf->num_reg == c->num_varyings); 2091 sobj->input_count_unk8 = 31; /* XXX what is this */ 2092} 2093 2094/* fill in output mapping for ps into shader object */ 2095static void 2096fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2097{ 2098 sobj->outfile.num_reg = 0; 2099 2100 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2101 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2102 2103 switch (reg->semantic.Name) { 2104 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ 2105 sobj->ps_color_out_reg = reg->native.id; 2106 break; 2107 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ 2108 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ 2109 break; 2110 default: 2111 assert(0); /* only outputs supported are COLOR and POSITION at the moment */ 2112 } 2113 } 2114} 2115 2116/* fill in inputs for vs into shader object */ 2117static void 2118fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2119{ 2120 struct etna_shader_io_file *sf = &sobj->infile; 2121 2122 sf->num_reg = 0; 2123 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2124 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2125 assert(sf->num_reg < ETNA_NUM_INPUTS); 2126 2127 if (!reg->native.valid) 2128 continue; 2129 2130 /* XXX exclude inputs with special semantics such as gl_frontFacing */ 2131 sf->reg[sf->num_reg].reg = reg->native.id; 2132 sf->reg[sf->num_reg].semantic = reg->semantic; 2133 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2134 sf->num_reg++; 2135 } 2136 2137 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ 2138} 2139 2140/* build two-level output index [Semantic][Index] for fast linking */ 2141static void 2142build_output_index(struct etna_shader_variant *sobj) 2143{ 2144 int total = 0; 2145 int offset = 0; 2146 2147 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) 2148 total += sobj->output_count_per_semantic[name]; 2149 2150 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); 2151 2152 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) { 2153 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; 2154 offset += sobj->output_count_per_semantic[name]; 2155 } 2156 2157 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) { 2158 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name] 2159 [sobj->outfile.reg[idx].semantic.Index] = 2160 &sobj->outfile.reg[idx]; 2161 } 2162} 2163 2164/* fill in outputs for vs into shader object */ 2165static void 2166fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2167{ 2168 struct etna_shader_io_file *sf = &sobj->outfile; 2169 2170 sf->num_reg = 0; 2171 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2172 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2173 assert(sf->num_reg < ETNA_NUM_INPUTS); 2174 2175 switch (reg->semantic.Name) { 2176 case TGSI_SEMANTIC_POSITION: 2177 sobj->vs_pos_out_reg = reg->native.id; 2178 break; 2179 case TGSI_SEMANTIC_PSIZE: 2180 sobj->vs_pointsize_out_reg = reg->native.id; 2181 break; 2182 default: 2183 sf->reg[sf->num_reg].reg = reg->native.id; 2184 sf->reg[sf->num_reg].semantic = reg->semantic; 2185 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; 2186 sf->num_reg++; 2187 sobj->output_count_per_semantic[reg->semantic.Name] = 2188 MAX2(reg->semantic.Index + 1, 2189 sobj->output_count_per_semantic[reg->semantic.Name]); 2190 } 2191 } 2192 2193 /* build two-level index for linking */ 2194 build_output_index(sobj); 2195 2196 /* fill in "mystery meat" load balancing value. This value determines how 2197 * work is scheduled between VS and PS 2198 * in the unified shader architecture. More precisely, it is determined from 2199 * the number of VS outputs, as well as chip-specific 2200 * vertex output buffer size, vertex cache size, and the number of shader 2201 * cores. 2202 * 2203 * XXX this is a conservative estimate, the "optimal" value is only known for 2204 * sure at link time because some 2205 * outputs may be unused and thus unmapped. Then again, in the general use 2206 * case with GLSL the vertex and fragment 2207 * shaders are linked already before submitting to Gallium, thus all outputs 2208 * are used. 2209 */ 2210 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2; 2211 assert(half_out); 2212 2213 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size - 2214 2 * half_out * c->specs->vertex_cache_size)) + 2215 9) / 2216 10; 2217 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2; 2218 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 2219 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 2220 VIVS_VS_LOAD_BALANCING_C(0x3f) | 2221 VIVS_VS_LOAD_BALANCING_D(0x0f); 2222} 2223 2224static bool 2225etna_compile_check_limits(struct etna_compile *c) 2226{ 2227 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX) 2228 ? c->specs->max_vs_uniforms 2229 : c->specs->max_ps_uniforms; 2230 /* round up number of uniforms, including immediates, in units of four */ 2231 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; 2232 2233 if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { 2234 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, 2235 c->specs->max_instructions); 2236 return false; 2237 } 2238 2239 if (c->next_free_native > c->specs->max_registers) { 2240 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native, 2241 c->specs->max_registers); 2242 return false; 2243 } 2244 2245 if (num_uniforms > max_uniforms) { 2246 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, 2247 max_uniforms); 2248 return false; 2249 } 2250 2251 if (c->num_varyings > c->specs->max_varyings) { 2252 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings, 2253 c->specs->max_varyings); 2254 return false; 2255 } 2256 2257 if (c->imm_base > c->specs->num_constants) { 2258 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base, 2259 c->specs->num_constants); 2260 } 2261 2262 return true; 2263} 2264 2265static void 2266copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj) 2267{ 2268 uint32_t count = c->imm_size; 2269 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 2270 2271 uinfo->const_count = c->imm_base; 2272 uinfo->imm_count = count; 2273 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data)); 2274 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents)); 2275 2276 etna_set_shader_uniforms_dirty_flags(sobj); 2277} 2278 2279bool 2280etna_compile_shader(struct etna_shader_variant *v) 2281{ 2282 /* Create scratch space that may be too large to fit on stack 2283 */ 2284 bool ret; 2285 struct etna_compile *c; 2286 2287 if (unlikely(!v)) 2288 return false; 2289 2290 const struct etna_specs *specs = v->shader->specs; 2291 2292 struct tgsi_lowering_config lconfig = { 2293 .lower_FLR = !specs->has_sign_floor_ceil, 2294 .lower_CEIL = !specs->has_sign_floor_ceil, 2295 .lower_POW = true, 2296 .lower_EXP = true, 2297 .lower_LOG = true, 2298 .lower_DP2 = !specs->has_halti2_instructions, 2299 .lower_TRUNC = true, 2300 }; 2301 2302 c = CALLOC_STRUCT(etna_compile); 2303 if (!c) 2304 return false; 2305 2306 memset(&c->lbl_usage, -1, sizeof(c->lbl_usage)); 2307 2308 const struct tgsi_token *tokens = v->shader->tokens; 2309 2310 c->specs = specs; 2311 c->key = &v->key; 2312 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info); 2313 c->free_tokens = !!c->tokens; 2314 if (!c->tokens) { 2315 /* no lowering */ 2316 c->tokens = tokens; 2317 } 2318 2319 /* Build a map from gallium register to native registers for files 2320 * CONST, SAMP, IMM, OUT, IN, TEMP. 2321 * SAMP will map as-is for fragment shaders, there will be a +8 offset for 2322 * vertex shaders. 2323 */ 2324 /* Pass one -- check register file declarations and immediates */ 2325 etna_compile_parse_declarations(c); 2326 2327 etna_allocate_decls(c); 2328 2329 /* Pass two -- check usage of temporaries, inputs, outputs */ 2330 etna_compile_pass_check_usage(c); 2331 2332 assign_special_inputs(c); 2333 2334 /* Assign native temp register to TEMPs */ 2335 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]); 2336 2337 /* optimize outputs */ 2338 etna_compile_pass_optimize_outputs(c); 2339 2340 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) 2341 * this is part of RGROUP_INTERNAL 2342 */ 2343 2344 /* assign inputs: last usage of input should be <= first usage of temp */ 2345 /* potential optimization case: 2346 * if single MOV TEMP[y], IN[x] before which temp y is not used, and 2347 * after which IN[x] 2348 * is not read, temp[y] can be used as input register as-is 2349 */ 2350 /* sort temporaries by first use 2351 * sort inputs by last usage 2352 * iterate over inputs, temporaries 2353 * if last usage of input <= first usage of temp: 2354 * assign input to temp 2355 * advance input, temporary pointer 2356 * else 2357 * advance temporary pointer 2358 * 2359 * potential problem: instruction with multiple inputs of which one is the 2360 * temp and the other is the input; 2361 * however, as the temp is not used before this, how would this make 2362 * sense? uninitialized temporaries have an undefined 2363 * value, so this would be ok 2364 */ 2365 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT); 2366 2367 /* assign outputs: first usage of output should be >= last usage of temp */ 2368 /* potential optimization case: 2369 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least 2370 * writing all components that are used in 2371 * the shader) after which temp y is no longer used temp[y] can be 2372 * used as output register as-is 2373 * 2374 * potential problem: instruction with multiple outputs of which one is the 2375 * temp and the other is the output; 2376 * however, as the temp is not used after this, how would this make 2377 * sense? could just discard the output value 2378 */ 2379 /* sort temporaries by last use 2380 * sort outputs by first usage 2381 * iterate over outputs, temporaries 2382 * if first usage of output >= last usage of temp: 2383 * assign output to temp 2384 * advance output, temporary pointer 2385 * else 2386 * advance temporary pointer 2387 */ 2388 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT); 2389 2390 assign_constants_and_immediates(c); 2391 assign_texture_units(c); 2392 2393 /* list declarations */ 2394 for (int x = 0; x < c->total_decls; ++x) { 2395 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2396 "last_use=%i native=%i usage_mask=%x " 2397 "has_semantic=%i", 2398 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2399 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2400 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2401 c->decl[x].usage_mask, c->decl[x].has_semantic); 2402 if (c->decl[x].has_semantic) 2403 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2404 tgsi_semantic_names[c->decl[x].semantic.Name], 2405 c->decl[x].semantic.Index); 2406 } 2407 /* XXX for PS we need to permute so that inputs are always in temporary 2408 * 0..N-1. 2409 * There is no "switchboard" for varyings (AFAIK!). The output color, 2410 * however, can be routed 2411 * from an arbitrary temporary. 2412 */ 2413 if (c->info.processor == PIPE_SHADER_FRAGMENT) 2414 permute_ps_inputs(c); 2415 2416 2417 /* list declarations */ 2418 for (int x = 0; x < c->total_decls; ++x) { 2419 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2420 "last_use=%i native=%i usage_mask=%x " 2421 "has_semantic=%i", 2422 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2423 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2424 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2425 c->decl[x].usage_mask, c->decl[x].has_semantic); 2426 if (c->decl[x].has_semantic) 2427 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2428 tgsi_semantic_names[c->decl[x].semantic.Name], 2429 c->decl[x].semantic.Index); 2430 } 2431 2432 /* pass 3: generate instructions */ 2433 etna_compile_pass_generate_code(c); 2434 etna_compile_add_z_div_if_needed(c); 2435 etna_compile_frag_rb_swap(c); 2436 etna_compile_add_nop_if_needed(c); 2437 2438 ret = etna_compile_check_limits(c); 2439 if (!ret) 2440 goto out; 2441 2442 etna_compile_fill_in_labels(c); 2443 2444 /* fill in output structure */ 2445 v->processor = c->info.processor; 2446 v->code_size = c->inst_ptr * 4; 2447 v->code = mem_dup(c->code, c->inst_ptr * 16); 2448 v->num_loops = c->num_loops; 2449 v->num_temps = c->next_free_native; 2450 v->vs_pos_out_reg = -1; 2451 v->vs_pointsize_out_reg = -1; 2452 v->ps_color_out_reg = -1; 2453 v->ps_depth_out_reg = -1; 2454 v->needs_icache = c->inst_ptr > c->specs->max_instructions; 2455 copy_uniform_state_to_shader(c, v); 2456 2457 if (c->info.processor == PIPE_SHADER_VERTEX) { 2458 fill_in_vs_inputs(v, c); 2459 fill_in_vs_outputs(v, c); 2460 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) { 2461 fill_in_ps_inputs(v, c); 2462 fill_in_ps_outputs(v, c); 2463 } 2464 2465out: 2466 if (c->free_tokens) 2467 FREE((void *)c->tokens); 2468 2469 FREE(c->labels); 2470 FREE(c); 2471 2472 return ret; 2473} 2474 2475extern const char *tgsi_swizzle_names[]; 2476void 2477etna_dump_shader(const struct etna_shader_variant *shader) 2478{ 2479 if (shader->processor == PIPE_SHADER_VERTEX) 2480 printf("VERT\n"); 2481 else 2482 printf("FRAG\n"); 2483 2484 2485 etna_disasm(shader->code, shader->code_size, PRINT_RAW); 2486 2487 printf("num loops: %i\n", shader->num_loops); 2488 printf("num temps: %i\n", shader->num_temps); 2489 printf("num const: %i\n", shader->uniforms.const_count); 2490 printf("immediates:\n"); 2491 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { 2492 printf(" [%i].%s = %f (0x%08x)\n", 2493 (idx + shader->uniforms.const_count) / 4, 2494 tgsi_swizzle_names[idx % 4], 2495 *((float *)&shader->uniforms.imm_data[idx]), 2496 shader->uniforms.imm_data[idx]); 2497 } 2498 printf("inputs:\n"); 2499 for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 2500 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, 2501 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], 2502 shader->infile.reg[idx].semantic.Index, 2503 shader->infile.reg[idx].num_components); 2504 } 2505 printf("outputs:\n"); 2506 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 2507 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, 2508 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], 2509 shader->outfile.reg[idx].semantic.Index, 2510 shader->outfile.reg[idx].num_components); 2511 } 2512 printf("special:\n"); 2513 if (shader->processor == PIPE_SHADER_VERTEX) { 2514 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 2515 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 2516 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 2517 } else { 2518 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 2519 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 2520 } 2521 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 2522} 2523 2524void 2525etna_destroy_shader(struct etna_shader_variant *shader) 2526{ 2527 assert(shader); 2528 2529 FREE(shader->code); 2530 FREE(shader->uniforms.imm_data); 2531 FREE(shader->uniforms.imm_contents); 2532 FREE(shader->output_per_semantic_list); 2533 FREE(shader); 2534} 2535 2536static const struct etna_shader_inout * 2537etna_shader_vs_lookup(const struct etna_shader_variant *sobj, 2538 const struct etna_shader_inout *in) 2539{ 2540 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name]) 2541 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index]; 2542 2543 return NULL; 2544} 2545 2546bool 2547etna_link_shader(struct etna_shader_link_info *info, 2548 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) 2549{ 2550 int comp_ofs = 0; 2551 /* For each fragment input we need to find the associated vertex shader 2552 * output, which can be found by matching on semantic name and index. A 2553 * binary search could be used because the vs outputs are sorted by their 2554 * semantic index and grouped by semantic type by fill_in_vs_outputs. 2555 */ 2556 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 2557 info->pcoord_varying_comp_ofs = -1; 2558 2559 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 2560 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 2561 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 2562 struct etna_varying *varying; 2563 bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR; 2564 2565 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 2566 2567 if (fsio->reg > info->num_varyings) 2568 info->num_varyings = fsio->reg; 2569 2570 varying = &info->varyings[fsio->reg - 1]; 2571 varying->num_components = fsio->num_components; 2572 2573 if (!interpolate_always) /* colors affected by flat shading */ 2574 varying->pa_attributes = 0x200; 2575 else /* texture coord or other bypasses flat shading */ 2576 varying->pa_attributes = 0x2f1; 2577 2578 varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED; 2579 varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED; 2580 varying->use[2] = VARYING_COMPONENT_USE_USED; 2581 varying->use[3] = VARYING_COMPONENT_USE_USED; 2582 2583 2584 /* point coord is an input to the PS without matching VS output, 2585 * so it gets a varying slot without being assigned a VS register. 2586 */ 2587 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) { 2588 info->pcoord_varying_comp_ofs = comp_ofs; 2589 } else { 2590 if (vsio == NULL) { /* not found -- link error */ 2591 BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index); 2592 return true; 2593 } 2594 2595 varying->reg = vsio->reg; 2596 } 2597 2598 comp_ofs += varying->num_components; 2599 } 2600 2601 assert(info->num_varyings == fs->infile.num_reg); 2602 2603 return false; 2604} 2605