1/* 2 * Copyright (c) 2012-2015 Etnaviv Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Wladimir J. van der Laan <laanwj@gmail.com> 25 */ 26 27/* TGSI->Vivante shader ISA conversion */ 28 29/* What does the compiler return (see etna_shader_object)? 30 * 1) instruction data 31 * 2) input-to-temporary mapping (fixed for ps) 32 * *) in case of ps, semantic -> varying id mapping 33 * *) for each varying: number of components used (r, rg, rgb, rgba) 34 * 3) temporary-to-output mapping (in case of vs, fixed for ps) 35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) 36 * 5) immediates base offset, immediates data 37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to 38 * configure the hw, but useful for error checking 39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips 40 * (output reg id is enough) 41 * 42 * Empty shaders are not allowed, should always at least generate a NOP. Also 43 * if there is a label at the end of the shader, an extra NOP should be 44 * generated as jump target. 45 * 46 * TODO 47 * * Use an instruction scheduler 48 * * Indirect access to uniforms / temporaries using amode 49 */ 50 51#include "etnaviv_compiler.h" 52 53#include "etnaviv_asm.h" 54#include "etnaviv_context.h" 55#include "etnaviv_debug.h" 56#include "etnaviv_uniforms.h" 57#include "etnaviv_util.h" 58 59#include "nir/tgsi_to_nir.h" 60#include "pipe/p_shader_tokens.h" 61#include "tgsi/tgsi_info.h" 62#include "tgsi/tgsi_iterate.h" 63#include "tgsi/tgsi_lowering.h" 64#include "tgsi/tgsi_strings.h" 65#include "tgsi/tgsi_util.h" 66#include "util/u_math.h" 67#include "util/u_memory.h" 68 69#include <fcntl.h> 70#include <stdio.h> 71#include <sys/stat.h> 72#include <sys/types.h> 73 74#define ETNA_MAX_INNER_TEMPS 2 75 76static const float sincos_const[2][4] = { 77 { 78 2., -1., 4., -4., 79 }, 80 { 81 1. / (2. * M_PI), 0.75, 0.5, 0.0, 82 }, 83}; 84 85/* Native register description structure */ 86struct etna_native_reg { 87 unsigned valid : 1; 88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */ 89 unsigned rgroup : 3; 90 unsigned id : 9; 91}; 92 93/* Register description */ 94struct etna_reg_desc { 95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ 96 int idx; /* index into file */ 97 bool active; /* used in program */ 98 int first_use; /* instruction id of first use (scope begin) */ 99 int last_use; /* instruction id of last use (scope end, inclusive) */ 100 101 struct etna_native_reg native; /* native register to map to */ 102 unsigned usage_mask : 4; /* usage, per channel */ 103 bool has_semantic; /* register has associated TGSI semantic */ 104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */ 105 struct tgsi_declaration_interp interp; /* Interpolation type */ 106}; 107 108/* Label information structure */ 109struct etna_compile_label { 110 int inst_idx; /* Instruction id that label points to */ 111}; 112 113enum etna_compile_frame_type { 114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ 115 ETNA_COMPILE_FRAME_LOOP, 116}; 117 118/* nesting scope frame (LOOP, IF, ...) during compilation 119 */ 120struct etna_compile_frame { 121 enum etna_compile_frame_type type; 122 int lbl_else_idx; 123 int lbl_endif_idx; 124 int lbl_loop_bgn_idx; 125 int lbl_loop_end_idx; 126}; 127 128struct etna_compile_file { 129 /* Number of registers in each TGSI file (max register+1) */ 130 size_t reg_size; 131 /* Register descriptions, per register index */ 132 struct etna_reg_desc *reg; 133}; 134 135#define array_insert(arr, val) \ 136 do { \ 137 if (arr##_count == arr##_sz) { \ 138 arr##_sz = MAX2(2 * arr##_sz, 16); \ 139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \ 140 } \ 141 arr[arr##_count++] = val; \ 142 } while (0) 143 144 145/* scratch area for compiling shader, freed after compilation finishes */ 146struct etna_compile { 147 const struct tgsi_token *tokens; 148 bool free_tokens; 149 150 struct tgsi_shader_info info; 151 152 /* Register descriptions, per TGSI file, per register index */ 153 struct etna_compile_file file[TGSI_FILE_COUNT]; 154 155 /* Keep track of TGSI register declarations */ 156 struct etna_reg_desc decl[ETNA_MAX_DECL]; 157 uint total_decls; 158 159 /* Bitmap of dead instructions which are removed in a separate pass */ 160 bool dead_inst[ETNA_MAX_TOKENS]; 161 162 /* Immediate data */ 163 enum etna_uniform_contents imm_contents[ETNA_MAX_IMM]; 164 uint32_t imm_data[ETNA_MAX_IMM]; 165 uint32_t imm_base; /* base of immediates (in 32 bit units) */ 166 uint32_t imm_size; /* size of immediates (in 32 bit units) */ 167 168 /* Next free native register, for register allocation */ 169 uint32_t next_free_native; 170 171 /* Temporary register for use within translated TGSI instruction, 172 * only allocated when needed. 173 */ 174 int inner_temps; /* number of inner temps used; only up to one available at 175 this point */ 176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS]; 177 178 /* Fields for handling nested conditionals */ 179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; 180 int frame_sp; 181 int lbl_usage[ETNA_MAX_INSTRUCTIONS]; 182 183 unsigned labels_count, labels_sz; 184 struct etna_compile_label *labels; 185 186 unsigned num_loops; 187 188 /* Code generation */ 189 int inst_ptr; /* current instruction pointer */ 190 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; 191 192 /* I/O */ 193 194 /* Number of varyings (PS only) */ 195 int num_varyings; 196 197 /* GPU hardware specs */ 198 const struct etna_specs *specs; 199 200 const struct etna_shader_key *key; 201}; 202 203static struct etna_reg_desc * 204etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst) 205{ 206 return &c->file[dst.File].reg[dst.Index]; 207} 208 209static struct etna_reg_desc * 210etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src) 211{ 212 return &c->file[src.File].reg[src.Index]; 213} 214 215static struct etna_native_reg 216etna_native_temp(unsigned reg) 217{ 218 return (struct etna_native_reg) { 219 .valid = 1, 220 .rgroup = INST_RGROUP_TEMP, 221 .id = reg 222 }; 223} 224 225static struct etna_native_reg 226etna_native_internal(unsigned reg) 227{ 228 return (struct etna_native_reg) { 229 .valid = 1, 230 .rgroup = INST_RGROUP_INTERNAL, 231 .id = reg 232 }; 233} 234 235/** Register allocation **/ 236enum reg_sort_order { 237 FIRST_USE_ASC, 238 FIRST_USE_DESC, 239 LAST_USE_ASC, 240 LAST_USE_DESC 241}; 242 243/* Augmented register description for sorting */ 244struct sort_rec { 245 struct etna_reg_desc *ptr; 246 int key; 247}; 248 249static int 250sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) 251{ 252 if (a->key < b->key) 253 return -1; 254 255 if (a->key > b->key) 256 return 1; 257 258 return 0; 259} 260 261/* create an index on a register set based on certain criteria. */ 262static int 263sort_registers(struct sort_rec *sorted, struct etna_compile_file *file, 264 enum reg_sort_order so) 265{ 266 struct etna_reg_desc *regs = file->reg; 267 int ptr = 0; 268 269 /* pre-populate keys from active registers */ 270 for (int idx = 0; idx < file->reg_size; ++idx) { 271 /* only interested in active registers now; will only assign inactive ones 272 * if no space in active ones */ 273 if (regs[idx].active) { 274 sorted[ptr].ptr = ®s[idx]; 275 276 switch (so) { 277 case FIRST_USE_ASC: 278 sorted[ptr].key = regs[idx].first_use; 279 break; 280 case LAST_USE_ASC: 281 sorted[ptr].key = regs[idx].last_use; 282 break; 283 case FIRST_USE_DESC: 284 sorted[ptr].key = -regs[idx].first_use; 285 break; 286 case LAST_USE_DESC: 287 sorted[ptr].key = -regs[idx].last_use; 288 break; 289 } 290 ptr++; 291 } 292 } 293 294 /* sort index by key */ 295 qsort(sorted, ptr, sizeof(struct sort_rec), 296 (int (*)(const void *, const void *))sort_rec_compar); 297 298 return ptr; 299} 300 301/* Allocate a new, unused, native temp register */ 302static struct etna_native_reg 303alloc_new_native_reg(struct etna_compile *c) 304{ 305 assert(c->next_free_native < ETNA_MAX_TEMPS); 306 return etna_native_temp(c->next_free_native++); 307} 308 309/* assign TEMPs to native registers */ 310static void 311assign_temporaries_to_native(struct etna_compile *c, 312 struct etna_compile_file *file) 313{ 314 struct etna_reg_desc *temps = file->reg; 315 316 for (int idx = 0; idx < file->reg_size; ++idx) 317 temps[idx].native = alloc_new_native_reg(c); 318} 319 320/* assign inputs and outputs to temporaries 321 * Gallium assumes that the hardware has separate registers for taking input and 322 * output, however Vivante GPUs use temporaries both for passing in inputs and 323 * passing back outputs. 324 * Try to re-use temporary registers where possible. */ 325static void 326assign_inouts_to_temporaries(struct etna_compile *c, uint file) 327{ 328 bool mode_inputs = (file == TGSI_FILE_INPUT); 329 int inout_ptr = 0, num_inouts; 330 int temp_ptr = 0, num_temps; 331 struct sort_rec inout_order[ETNA_MAX_TEMPS]; 332 struct sort_rec temps_order[ETNA_MAX_TEMPS]; 333 num_inouts = sort_registers(inout_order, &c->file[file], 334 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); 335 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY], 336 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); 337 338 while (inout_ptr < num_inouts && temp_ptr < num_temps) { 339 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 340 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; 341 342 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */ 343 inout_ptr++; 344 continue; 345 } 346 347 /* last usage of this input is before or in same instruction of first use 348 * of temporary? */ 349 if (mode_inputs ? (inout->last_use <= temp->first_use) 350 : (inout->first_use >= temp->last_use)) { 351 /* assign it and advance to next input */ 352 inout->native = temp->native; 353 inout_ptr++; 354 } 355 356 temp_ptr++; 357 } 358 359 /* if we couldn't reuse current ones, allocate new temporaries */ 360 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) { 361 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 362 363 if (inout->active && !inout->native.valid) 364 inout->native = alloc_new_native_reg(c); 365 } 366} 367 368/* Allocate an immediate with a certain value and return the index. If 369 * there is already an immediate with that value, return that. 370 */ 371static struct etna_inst_src 372alloc_imm(struct etna_compile *c, enum etna_uniform_contents contents, 373 uint32_t value) 374{ 375 int idx; 376 377 /* Could use a hash table to speed this up */ 378 for (idx = 0; idx < c->imm_size; ++idx) { 379 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value) 380 break; 381 } 382 383 /* look if there is an unused slot */ 384 if (idx == c->imm_size) { 385 for (idx = 0; idx < c->imm_size; ++idx) { 386 if (c->imm_contents[idx] == ETNA_UNIFORM_UNUSED) 387 break; 388 } 389 } 390 391 /* allocate new immediate */ 392 if (idx == c->imm_size) { 393 assert(c->imm_size < ETNA_MAX_IMM); 394 idx = c->imm_size++; 395 c->imm_data[idx] = value; 396 c->imm_contents[idx] = contents; 397 } 398 399 /* swizzle so that component with value is returned in all components */ 400 idx += c->imm_base; 401 struct etna_inst_src imm_src = { 402 .use = 1, 403 .rgroup = INST_RGROUP_UNIFORM_0, 404 .reg = idx / 4, 405 .swiz = INST_SWIZ_BROADCAST(idx & 3) 406 }; 407 408 return imm_src; 409} 410 411static struct etna_inst_src 412alloc_imm_u32(struct etna_compile *c, uint32_t value) 413{ 414 return alloc_imm(c, ETNA_UNIFORM_CONSTANT, value); 415} 416 417static struct etna_inst_src 418alloc_imm_vec4u(struct etna_compile *c, enum etna_uniform_contents contents, 419 const uint32_t *values) 420{ 421 struct etna_inst_src imm_src = { }; 422 int idx, i; 423 424 for (idx = 0; idx + 3 < c->imm_size; idx += 4) { 425 /* What if we can use a uniform with a different swizzle? */ 426 for (i = 0; i < 4; i++) 427 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i]) 428 break; 429 if (i == 4) 430 break; 431 } 432 433 if (idx + 3 >= c->imm_size) { 434 idx = align(c->imm_size, 4); 435 assert(idx + 4 <= ETNA_MAX_IMM); 436 437 for (i = 0; i < 4; i++) { 438 c->imm_data[idx + i] = values[i]; 439 c->imm_contents[idx + i] = contents; 440 } 441 442 c->imm_size = idx + 4; 443 } 444 445 assert((c->imm_base & 3) == 0); 446 idx += c->imm_base; 447 imm_src.use = 1; 448 imm_src.rgroup = INST_RGROUP_UNIFORM_0; 449 imm_src.reg = idx / 4; 450 imm_src.swiz = INST_SWIZ_IDENTITY; 451 452 return imm_src; 453} 454 455static uint32_t 456get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm, 457 unsigned swiz_idx) 458{ 459 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0); 460 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3); 461 462 return c->imm_data[idx]; 463} 464 465/* Allocate immediate with a certain float value. If there is already an 466 * immediate with that value, return that. 467 */ 468static struct etna_inst_src 469alloc_imm_f32(struct etna_compile *c, float value) 470{ 471 return alloc_imm_u32(c, fui(value)); 472} 473 474static struct etna_inst_src 475etna_imm_vec4f(struct etna_compile *c, const float *vec4) 476{ 477 uint32_t val[4]; 478 479 for (int i = 0; i < 4; i++) 480 val[i] = fui(vec4[i]); 481 482 return alloc_imm_vec4u(c, ETNA_UNIFORM_CONSTANT, val); 483} 484 485/* Pass -- check register file declarations and immediates */ 486static void 487etna_compile_parse_declarations(struct etna_compile *c) 488{ 489 struct tgsi_parse_context ctx = { }; 490 ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens); 491 assert(status == TGSI_PARSE_OK); 492 493 while (!tgsi_parse_end_of_tokens(&ctx)) { 494 tgsi_parse_token(&ctx); 495 496 switch (ctx.FullToken.Token.Type) { 497 case TGSI_TOKEN_TYPE_IMMEDIATE: { 498 /* immediates are handled differently from other files; they are 499 * not declared explicitly, and always add four components */ 500 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; 501 assert(c->imm_size <= (ETNA_MAX_IMM - 4)); 502 503 for (int i = 0; i < 4; ++i) { 504 unsigned idx = c->imm_size++; 505 506 c->imm_data[idx] = imm->u[i].Uint; 507 c->imm_contents[idx] = ETNA_UNIFORM_CONSTANT; 508 } 509 } 510 break; 511 } 512 } 513 514 tgsi_parse_free(&ctx); 515} 516 517/* Allocate register declarations for the registers in all register files */ 518static void 519etna_allocate_decls(struct etna_compile *c) 520{ 521 uint idx = 0; 522 523 for (int x = 0; x < TGSI_FILE_COUNT; ++x) { 524 c->file[x].reg = &c->decl[idx]; 525 c->file[x].reg_size = c->info.file_max[x] + 1; 526 527 for (int sub = 0; sub < c->file[x].reg_size; ++sub) { 528 c->decl[idx].file = x; 529 c->decl[idx].idx = sub; 530 idx++; 531 } 532 } 533 534 c->total_decls = idx; 535} 536 537/* Pass -- check and record usage of temporaries, inputs, outputs */ 538static void 539etna_compile_pass_check_usage(struct etna_compile *c) 540{ 541 struct tgsi_parse_context ctx = { }; 542 ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens); 543 assert(status == TGSI_PARSE_OK); 544 545 for (int idx = 0; idx < c->total_decls; ++idx) { 546 c->decl[idx].active = false; 547 c->decl[idx].first_use = c->decl[idx].last_use = -1; 548 } 549 550 int inst_idx = 0; 551 while (!tgsi_parse_end_of_tokens(&ctx)) { 552 tgsi_parse_token(&ctx); 553 /* find out max register #s used 554 * For every register mark first and last instruction index where it's 555 * used this allows finding ranges where the temporary can be borrowed 556 * as input and/or output register 557 * 558 * XXX in the case of loops this needs special care, or even be completely 559 * disabled, as 560 * the last usage of a register inside a loop means it can still be used 561 * on next loop 562 * iteration (execution is no longer * chronological). The register can 563 * only be 564 * declared "free" after the loop finishes. 565 * 566 * Same for inputs: the first usage of a register inside a loop doesn't 567 * mean that the register 568 * won't have been overwritten in previous iteration. The register can 569 * only be declared free before the loop 570 * starts. 571 * The proper way would be to do full dominator / post-dominator analysis 572 * (especially with more complicated 573 * control flow such as direct branch instructions) but not for now... 574 */ 575 switch (ctx.FullToken.Token.Type) { 576 case TGSI_TOKEN_TYPE_DECLARATION: { 577 /* Declaration: fill in file details */ 578 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; 579 struct etna_compile_file *file = &c->file[decl->Declaration.File]; 580 581 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) { 582 file->reg[idx].usage_mask = 0; // we'll compute this ourselves 583 file->reg[idx].has_semantic = decl->Declaration.Semantic; 584 file->reg[idx].semantic = decl->Semantic; 585 file->reg[idx].interp = decl->Interp; 586 } 587 } break; 588 case TGSI_TOKEN_TYPE_INSTRUCTION: { 589 /* Instruction: iterate over operands of instruction */ 590 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 591 592 /* iterate over destination registers */ 593 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) { 594 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index]; 595 596 if (reg_desc->first_use == -1) 597 reg_desc->first_use = inst_idx; 598 599 reg_desc->last_use = inst_idx; 600 reg_desc->active = true; 601 } 602 603 /* iterate over source registers */ 604 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) { 605 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index]; 606 607 if (reg_desc->first_use == -1) 608 reg_desc->first_use = inst_idx; 609 610 reg_desc->last_use = inst_idx; 611 reg_desc->active = true; 612 /* accumulate usage mask for register, this is used to determine how 613 * many slots for varyings 614 * should be allocated */ 615 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); 616 } 617 inst_idx += 1; 618 } break; 619 default: 620 break; 621 } 622 } 623 624 tgsi_parse_free(&ctx); 625} 626 627/* assign inputs that need to be assigned to specific registers */ 628static void 629assign_special_inputs(struct etna_compile *c) 630{ 631 if (c->info.processor == PIPE_SHADER_FRAGMENT) { 632 /* never assign t0 as it is the position output, start assigning at t1 */ 633 c->next_free_native = 1; 634 635 for (int idx = 0; idx < c->total_decls; ++idx) { 636 struct etna_reg_desc *reg = &c->decl[idx]; 637 638 if (!reg->active) 639 continue; 640 641 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ 642 if (reg->semantic.Name == TGSI_SEMANTIC_POSITION) 643 reg->native = etna_native_temp(0); 644 645 /* hardwire TGSI_SEMANTIC_FACE to i0 */ 646 if (reg->semantic.Name == TGSI_SEMANTIC_FACE) 647 reg->native = etna_native_internal(0); 648 } 649 } 650} 651 652/* Check that a move instruction does not swizzle any of the components 653 * that it writes. 654 */ 655static bool 656etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, 657 const struct tgsi_src_register src) 658{ 659 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && 660 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && 661 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && 662 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); 663} 664 665/* Pass -- optimize outputs 666 * Mesa tends to generate code like this at the end if their shaders 667 * MOV OUT[1], TEMP[2] 668 * MOV OUT[0], TEMP[0] 669 * MOV OUT[2], TEMP[1] 670 * Recognize if 671 * a) there is only a single assignment to an output register and 672 * b) the temporary is not used after that 673 * Also recognize direct assignment of IN to OUT (passthrough) 674 **/ 675static void 676etna_compile_pass_optimize_outputs(struct etna_compile *c) 677{ 678 struct tgsi_parse_context ctx = { }; 679 int inst_idx = 0; 680 ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens); 681 assert(status == TGSI_PARSE_OK); 682 683 while (!tgsi_parse_end_of_tokens(&ctx)) { 684 tgsi_parse_token(&ctx); 685 686 switch (ctx.FullToken.Token.Type) { 687 case TGSI_TOKEN_TYPE_INSTRUCTION: { 688 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 689 690 /* iterate over operands */ 691 switch (inst->Instruction.Opcode) { 692 case TGSI_OPCODE_MOV: { 693 /* We are only interested in eliminating MOVs which write to 694 * the shader outputs. Test for this early. */ 695 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 696 break; 697 /* Elimination of a MOV must have no visible effect on the 698 * resulting shader: this means the MOV must not swizzle or 699 * saturate, and its source must not have the negate or 700 * absolute modifiers. */ 701 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) || 702 inst->Instruction.Saturate || inst->Src[0].Register.Negate || 703 inst->Src[0].Register.Absolute) 704 break; 705 706 uint out_idx = inst->Dst[0].Register.Index; 707 uint in_idx = inst->Src[0].Register.Index; 708 /* assignment of temporary to output -- 709 * and the output doesn't yet have a native register assigned 710 * and the last use of the temporary is this instruction 711 * and the MOV does not do a swizzle 712 */ 713 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && 714 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 715 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) { 716 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 717 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native; 718 /* prevent temp from being re-used for the rest of the shader */ 719 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS; 720 /* mark this MOV instruction as a no-op */ 721 c->dead_inst[inst_idx] = true; 722 } 723 /* direct assignment of input to output -- 724 * and the input or output doesn't yet have a native register 725 * assigned 726 * and the output is only used in this instruction, 727 * allocate a new register, and associate both input and output to 728 * it 729 * and the MOV does not do a swizzle 730 */ 731 if (inst->Src[0].Register.File == TGSI_FILE_INPUT && 732 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid && 733 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 734 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx && 735 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) { 736 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 737 c->file[TGSI_FILE_INPUT].reg[in_idx].native = 738 alloc_new_native_reg(c); 739 /* mark this MOV instruction as a no-op */ 740 c->dead_inst[inst_idx] = true; 741 } 742 } break; 743 default:; 744 } 745 inst_idx += 1; 746 } break; 747 } 748 } 749 750 tgsi_parse_free(&ctx); 751} 752 753/* Get a temporary to be used within one TGSI instruction. 754 * The first time that this function is called the temporary will be allocated. 755 * Each call to this function will return the same temporary. 756 */ 757static struct etna_native_reg 758etna_compile_get_inner_temp(struct etna_compile *c) 759{ 760 int inner_temp = c->inner_temps; 761 762 if (inner_temp < ETNA_MAX_INNER_TEMPS) { 763 if (!c->inner_temp[inner_temp].valid) 764 c->inner_temp[inner_temp] = alloc_new_native_reg(c); 765 766 /* alloc_new_native_reg() handles lack of registers */ 767 c->inner_temps += 1; 768 } else { 769 BUG("Too many inner temporaries (%i) requested in one instruction", 770 inner_temp + 1); 771 } 772 773 return c->inner_temp[inner_temp]; 774} 775 776static struct etna_inst_dst 777etna_native_to_dst(struct etna_native_reg native, unsigned comps) 778{ 779 /* Can only assign to temporaries */ 780 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP); 781 782 struct etna_inst_dst rv = { 783 .write_mask = comps, 784 .use = 1, 785 .reg = native.id, 786 }; 787 788 return rv; 789} 790 791static struct etna_inst_src 792etna_native_to_src(struct etna_native_reg native, uint32_t swizzle) 793{ 794 assert(native.valid && !native.is_tex); 795 796 struct etna_inst_src rv = { 797 .use = 1, 798 .swiz = swizzle, 799 .rgroup = native.rgroup, 800 .reg = native.id, 801 .amode = INST_AMODE_DIRECT, 802 }; 803 804 return rv; 805} 806 807static inline struct etna_inst_src 808negate(struct etna_inst_src src) 809{ 810 src.neg = !src.neg; 811 812 return src; 813} 814 815static inline struct etna_inst_src 816absolute(struct etna_inst_src src) 817{ 818 src.abs = 1; 819 820 return src; 821} 822 823static inline struct etna_inst_src 824swizzle(struct etna_inst_src src, unsigned swizzle) 825{ 826 src.swiz = inst_swiz_compose(src.swiz, swizzle); 827 828 return src; 829} 830 831/* Emit instruction and append it to program */ 832static void 833emit_inst(struct etna_compile *c, struct etna_inst *inst) 834{ 835 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS); 836 837 /* Check for uniform conflicts (each instruction can only access one 838 * uniform), 839 * if detected, use an intermediate temporary */ 840 unsigned uni_rgroup = -1; 841 unsigned uni_reg = -1; 842 843 for (int src = 0; src < ETNA_NUM_SRC; ++src) { 844 if (inst->src[src].rgroup == INST_RGROUP_INTERNAL && 845 c->info.processor == PIPE_SHADER_FRAGMENT && 846 c->key->front_ccw) { 847 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 848 849 /* 850 * Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing 851 * configuration (CW or CCW). 852 */ 853 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 854 .opcode = INST_OPCODE_SET, 855 .cond = INST_CONDITION_NE, 856 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 857 INST_COMPS_Z | INST_COMPS_W), 858 .src[0] = inst->src[src], 859 .src[1] = alloc_imm_f32(c, 1.0f) 860 }); 861 c->inst_ptr++; 862 863 /* Modify instruction to use temp register instead of uniform */ 864 inst->src[src].use = 1; 865 inst->src[src].rgroup = INST_RGROUP_TEMP; 866 inst->src[src].reg = inner_temp.id; 867 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 868 inst->src[src].neg = 0; /* negation happens on MOV */ 869 inst->src[src].abs = 0; /* abs happens on MOV */ 870 inst->src[src].amode = 0; /* amode effects happen on MOV */ 871 } else if (etna_rgroup_is_uniform(inst->src[src].rgroup)) { 872 if (uni_reg == -1) { /* first unique uniform used */ 873 uni_rgroup = inst->src[src].rgroup; 874 uni_reg = inst->src[src].reg; 875 } else { /* second or later; check that it is a re-use */ 876 if (uni_rgroup != inst->src[src].rgroup || 877 uni_reg != inst->src[src].reg) { 878 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that " 879 "accesses different uniforms, " 880 "need to generate extra MOV"); 881 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 882 883 /* Generate move instruction to temporary */ 884 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 885 .opcode = INST_OPCODE_MOV, 886 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 887 INST_COMPS_Z | INST_COMPS_W), 888 .src[2] = inst->src[src] 889 }); 890 891 c->inst_ptr++; 892 893 /* Modify instruction to use temp register instead of uniform */ 894 inst->src[src].use = 1; 895 inst->src[src].rgroup = INST_RGROUP_TEMP; 896 inst->src[src].reg = inner_temp.id; 897 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 898 inst->src[src].neg = 0; /* negation happens on MOV */ 899 inst->src[src].abs = 0; /* abs happens on MOV */ 900 inst->src[src].amode = 0; /* amode effects happen on MOV */ 901 } 902 } 903 } 904 } 905 906 /* Finally assemble the actual instruction */ 907 etna_assemble(&c->code[c->inst_ptr * 4], inst); 908 c->inst_ptr++; 909} 910 911static unsigned int 912etna_amode(struct tgsi_ind_register indirect) 913{ 914 assert(indirect.File == TGSI_FILE_ADDRESS); 915 assert(indirect.Index == 0); 916 917 switch (indirect.Swizzle) { 918 case TGSI_SWIZZLE_X: 919 return INST_AMODE_ADD_A_X; 920 case TGSI_SWIZZLE_Y: 921 return INST_AMODE_ADD_A_Y; 922 case TGSI_SWIZZLE_Z: 923 return INST_AMODE_ADD_A_Z; 924 case TGSI_SWIZZLE_W: 925 return INST_AMODE_ADD_A_W; 926 default: 927 assert(!"Invalid swizzle"); 928 } 929 930 unreachable("bad swizzle"); 931} 932 933/* convert destination operand */ 934static struct etna_inst_dst 935convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in) 936{ 937 struct etna_inst_dst rv = { 938 /// XXX .amode 939 .write_mask = in->Register.WriteMask, 940 }; 941 942 if (in->Register.File == TGSI_FILE_ADDRESS) { 943 assert(in->Register.Index == 0); 944 rv.reg = in->Register.Index; 945 rv.use = 0; 946 } else { 947 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native, 948 in->Register.WriteMask); 949 } 950 951 if (in->Register.Indirect) 952 rv.amode = etna_amode(in->Indirect); 953 954 return rv; 955} 956 957/* convert texture operand */ 958static struct etna_inst_tex 959convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in, 960 const struct tgsi_instruction_texture *tex) 961{ 962 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native; 963 struct etna_inst_tex rv = { 964 // XXX .amode (to allow for an array of samplers?) 965 .swiz = INST_SWIZ_IDENTITY 966 }; 967 968 assert(native_reg.is_tex && native_reg.valid); 969 rv.id = native_reg.id; 970 971 return rv; 972} 973 974/* convert source operand */ 975static struct etna_inst_src 976etna_create_src(const struct tgsi_full_src_register *tgsi, 977 const struct etna_native_reg *native) 978{ 979 const struct tgsi_src_register *reg = &tgsi->Register; 980 struct etna_inst_src rv = { 981 .use = 1, 982 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW), 983 .neg = reg->Negate, 984 .abs = reg->Absolute, 985 .rgroup = native->rgroup, 986 .reg = native->id, 987 .amode = INST_AMODE_DIRECT, 988 }; 989 990 assert(native->valid && !native->is_tex); 991 992 if (reg->Indirect) 993 rv.amode = etna_amode(tgsi->Indirect); 994 995 return rv; 996} 997 998static struct etna_inst_src 999etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src, 1000 struct etna_native_reg temp) 1001{ 1002 struct etna_inst mov = { }; 1003 1004 mov.opcode = INST_OPCODE_MOV; 1005 mov.sat = 0; 1006 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1007 INST_COMPS_Z | INST_COMPS_W); 1008 mov.src[2] = src; 1009 emit_inst(c, &mov); 1010 1011 src.swiz = INST_SWIZ_IDENTITY; 1012 src.neg = src.abs = 0; 1013 src.rgroup = temp.rgroup; 1014 src.reg = temp.id; 1015 1016 return src; 1017} 1018 1019static struct etna_inst_src 1020etna_mov_src(struct etna_compile *c, struct etna_inst_src src) 1021{ 1022 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1023 1024 return etna_mov_src_to_temp(c, src, temp); 1025} 1026 1027static bool 1028etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b) 1029{ 1030 return etna_rgroup_is_uniform(a.rgroup) && 1031 etna_rgroup_is_uniform(b.rgroup) && 1032 (a.rgroup != b.rgroup || a.reg != b.reg); 1033} 1034 1035/* create a new label */ 1036static unsigned int 1037alloc_new_label(struct etna_compile *c) 1038{ 1039 struct etna_compile_label label = { 1040 .inst_idx = -1, /* start by point to no specific instruction */ 1041 }; 1042 1043 array_insert(c->labels, label); 1044 1045 return c->labels_count - 1; 1046} 1047 1048/* place label at current instruction pointer */ 1049static void 1050label_place(struct etna_compile *c, struct etna_compile_label *label) 1051{ 1052 label->inst_idx = c->inst_ptr; 1053} 1054 1055/* mark label use at current instruction. 1056 * target of the label will be filled in in the marked instruction's src2.imm 1057 * slot as soon 1058 * as the value becomes known. 1059 */ 1060static void 1061label_mark_use(struct etna_compile *c, int lbl_idx) 1062{ 1063 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS); 1064 c->lbl_usage[c->inst_ptr] = lbl_idx; 1065} 1066 1067/* walk the frame stack and return first frame with matching type */ 1068static struct etna_compile_frame * 1069find_frame(struct etna_compile *c, enum etna_compile_frame_type type) 1070{ 1071 for (int sp = c->frame_sp; sp >= 0; sp--) 1072 if (c->frame_stack[sp].type == type) 1073 return &c->frame_stack[sp]; 1074 1075 assert(0); 1076 return NULL; 1077} 1078 1079struct instr_translater { 1080 void (*fxn)(const struct instr_translater *t, struct etna_compile *c, 1081 const struct tgsi_full_instruction *inst, 1082 struct etna_inst_src *src); 1083 unsigned tgsi_opc; 1084 uint8_t opc; 1085 1086 /* tgsi src -> etna src swizzle */ 1087 int src[3]; 1088 1089 unsigned cond; 1090}; 1091 1092static void 1093trans_instr(const struct instr_translater *t, struct etna_compile *c, 1094 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1095{ 1096 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode); 1097 struct etna_inst instr = { }; 1098 1099 instr.opcode = t->opc; 1100 instr.cond = t->cond; 1101 instr.sat = inst->Instruction.Saturate; 1102 1103 assert(info->num_dst <= 1); 1104 if (info->num_dst) 1105 instr.dst = convert_dst(c, &inst->Dst[0]); 1106 1107 assert(info->num_src <= ETNA_NUM_SRC); 1108 1109 for (unsigned i = 0; i < info->num_src; i++) { 1110 int swizzle = t->src[i]; 1111 1112 assert(swizzle != -1); 1113 instr.src[swizzle] = src[i]; 1114 } 1115 1116 emit_inst(c, &instr); 1117} 1118 1119static void 1120trans_min_max(const struct instr_translater *t, struct etna_compile *c, 1121 const struct tgsi_full_instruction *inst, 1122 struct etna_inst_src *src) 1123{ 1124 emit_inst(c, &(struct etna_inst) { 1125 .opcode = INST_OPCODE_SELECT, 1126 .cond = t->cond, 1127 .sat = inst->Instruction.Saturate, 1128 .dst = convert_dst(c, &inst->Dst[0]), 1129 .src[0] = src[0], 1130 .src[1] = src[1], 1131 .src[2] = src[0], 1132 }); 1133} 1134 1135static void 1136trans_if(const struct instr_translater *t, struct etna_compile *c, 1137 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1138{ 1139 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1140 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f); 1141 1142 /* push IF to stack */ 1143 f->type = ETNA_COMPILE_FRAME_IF; 1144 /* create "else" label */ 1145 f->lbl_else_idx = alloc_new_label(c); 1146 f->lbl_endif_idx = -1; 1147 1148 /* We need to avoid the emit_inst() below becoming two instructions */ 1149 if (etna_src_uniforms_conflict(src[0], imm_0)) 1150 src[0] = etna_mov_src(c, src[0]); 1151 1152 /* mark position in instruction stream of label reference so that it can be 1153 * filled in in next pass */ 1154 label_mark_use(c, f->lbl_else_idx); 1155 1156 /* create conditional branch to label if src0 EQ 0 */ 1157 emit_inst(c, &(struct etna_inst){ 1158 .opcode = INST_OPCODE_BRANCH, 1159 .cond = INST_CONDITION_EQ, 1160 .src[0] = src[0], 1161 .src[1] = imm_0, 1162 /* imm is filled in later */ 1163 }); 1164} 1165 1166static void 1167trans_else(const struct instr_translater *t, struct etna_compile *c, 1168 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1169{ 1170 assert(c->frame_sp > 0); 1171 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1]; 1172 assert(f->type == ETNA_COMPILE_FRAME_IF); 1173 1174 /* create "endif" label, and branch to endif label */ 1175 f->lbl_endif_idx = alloc_new_label(c); 1176 label_mark_use(c, f->lbl_endif_idx); 1177 emit_inst(c, &(struct etna_inst) { 1178 .opcode = INST_OPCODE_BRANCH, 1179 .cond = INST_CONDITION_TRUE, 1180 /* imm is filled in later */ 1181 }); 1182 1183 /* mark "else" label at this position in instruction stream */ 1184 label_place(c, &c->labels[f->lbl_else_idx]); 1185} 1186 1187static void 1188trans_endif(const struct instr_translater *t, struct etna_compile *c, 1189 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1190{ 1191 assert(c->frame_sp > 0); 1192 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1193 assert(f->type == ETNA_COMPILE_FRAME_IF); 1194 1195 /* assign "endif" or "else" (if no ELSE) label to current position in 1196 * instruction stream, pop IF */ 1197 if (f->lbl_endif_idx != -1) 1198 label_place(c, &c->labels[f->lbl_endif_idx]); 1199 else 1200 label_place(c, &c->labels[f->lbl_else_idx]); 1201} 1202 1203static void 1204trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c, 1205 const struct tgsi_full_instruction *inst, 1206 struct etna_inst_src *src) 1207{ 1208 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1209 1210 /* push LOOP to stack */ 1211 f->type = ETNA_COMPILE_FRAME_LOOP; 1212 f->lbl_loop_bgn_idx = alloc_new_label(c); 1213 f->lbl_loop_end_idx = alloc_new_label(c); 1214 1215 label_place(c, &c->labels[f->lbl_loop_bgn_idx]); 1216 1217 c->num_loops++; 1218} 1219 1220static void 1221trans_loop_end(const struct instr_translater *t, struct etna_compile *c, 1222 const struct tgsi_full_instruction *inst, 1223 struct etna_inst_src *src) 1224{ 1225 assert(c->frame_sp > 0); 1226 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1227 assert(f->type == ETNA_COMPILE_FRAME_LOOP); 1228 1229 /* mark position in instruction stream of label reference so that it can be 1230 * filled in in next pass */ 1231 label_mark_use(c, f->lbl_loop_bgn_idx); 1232 1233 /* create branch to loop_bgn label */ 1234 emit_inst(c, &(struct etna_inst) { 1235 .opcode = INST_OPCODE_BRANCH, 1236 .cond = INST_CONDITION_TRUE, 1237 .src[0] = src[0], 1238 /* imm is filled in later */ 1239 }); 1240 1241 label_place(c, &c->labels[f->lbl_loop_end_idx]); 1242} 1243 1244static void 1245trans_brk(const struct instr_translater *t, struct etna_compile *c, 1246 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1247{ 1248 assert(c->frame_sp > 0); 1249 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1250 1251 /* mark position in instruction stream of label reference so that it can be 1252 * filled in in next pass */ 1253 label_mark_use(c, f->lbl_loop_end_idx); 1254 1255 /* create branch to loop_end label */ 1256 emit_inst(c, &(struct etna_inst) { 1257 .opcode = INST_OPCODE_BRANCH, 1258 .cond = INST_CONDITION_TRUE, 1259 .src[0] = src[0], 1260 /* imm is filled in later */ 1261 }); 1262} 1263 1264static void 1265trans_cont(const struct instr_translater *t, struct etna_compile *c, 1266 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1267{ 1268 assert(c->frame_sp > 0); 1269 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1270 1271 /* mark position in instruction stream of label reference so that it can be 1272 * filled in in next pass */ 1273 label_mark_use(c, f->lbl_loop_bgn_idx); 1274 1275 /* create branch to loop_end label */ 1276 emit_inst(c, &(struct etna_inst) { 1277 .opcode = INST_OPCODE_BRANCH, 1278 .cond = INST_CONDITION_TRUE, 1279 .src[0] = src[0], 1280 /* imm is filled in later */ 1281 }); 1282} 1283 1284static void 1285trans_deriv(const struct instr_translater *t, struct etna_compile *c, 1286 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1287{ 1288 emit_inst(c, &(struct etna_inst) { 1289 .opcode = t->opc, 1290 .sat = inst->Instruction.Saturate, 1291 .dst = convert_dst(c, &inst->Dst[0]), 1292 .src[0] = src[0], 1293 .src[2] = src[0], 1294 }); 1295} 1296 1297static void 1298trans_arl(const struct instr_translater *t, struct etna_compile *c, 1299 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1300{ 1301 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1302 struct etna_inst arl = { }; 1303 struct etna_inst_dst dst; 1304 1305 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | 1306 INST_COMPS_W); 1307 1308 if (c->specs->has_sign_floor_ceil) { 1309 struct etna_inst floor = { }; 1310 1311 floor.opcode = INST_OPCODE_FLOOR; 1312 floor.src[2] = src[0]; 1313 floor.dst = dst; 1314 1315 emit_inst(c, &floor); 1316 } else { 1317 struct etna_inst floor[2] = { }; 1318 1319 floor[0].opcode = INST_OPCODE_FRC; 1320 floor[0].sat = inst->Instruction.Saturate; 1321 floor[0].dst = dst; 1322 floor[0].src[2] = src[0]; 1323 1324 floor[1].opcode = INST_OPCODE_ADD; 1325 floor[1].sat = inst->Instruction.Saturate; 1326 floor[1].dst = dst; 1327 floor[1].src[0] = src[0]; 1328 floor[1].src[2].use = 1; 1329 floor[1].src[2].swiz = INST_SWIZ_IDENTITY; 1330 floor[1].src[2].neg = 1; 1331 floor[1].src[2].rgroup = temp.rgroup; 1332 floor[1].src[2].reg = temp.id; 1333 1334 emit_inst(c, &floor[0]); 1335 emit_inst(c, &floor[1]); 1336 } 1337 1338 arl.opcode = INST_OPCODE_MOVAR; 1339 arl.sat = inst->Instruction.Saturate; 1340 arl.dst = convert_dst(c, &inst->Dst[0]); 1341 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1342 1343 emit_inst(c, &arl); 1344} 1345 1346static void 1347trans_lrp(const struct instr_translater *t, struct etna_compile *c, 1348 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1349{ 1350 /* dst = src0 * src1 + (1 - src0) * src2 1351 * => src0 * src1 - (src0 - 1) * src2 1352 * => src0 * src1 - (src0 * src2 - src2) 1353 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw 1354 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw 1355 */ 1356 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1357 if (etna_src_uniforms_conflict(src[0], src[1]) || 1358 etna_src_uniforms_conflict(src[0], src[2])) { 1359 src[0] = etna_mov_src(c, src[0]); 1360 } 1361 1362 struct etna_inst mad[2] = { }; 1363 mad[0].opcode = INST_OPCODE_MAD; 1364 mad[0].sat = 0; 1365 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1366 INST_COMPS_Z | INST_COMPS_W); 1367 mad[0].src[0] = src[0]; 1368 mad[0].src[1] = src[2]; 1369 mad[0].src[2] = negate(src[2]); 1370 mad[1].opcode = INST_OPCODE_MAD; 1371 mad[1].sat = inst->Instruction.Saturate; 1372 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0]; 1373 mad[1].src[1] = src[1]; 1374 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY)); 1375 1376 emit_inst(c, &mad[0]); 1377 emit_inst(c, &mad[1]); 1378} 1379 1380static void 1381trans_lit(const struct instr_translater *t, struct etna_compile *c, 1382 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1383{ 1384 /* SELECT.LT tmp._y__, 0, src.yyyy, 0 1385 * - can be eliminated if src.y is a uniform and >= 0 1386 * SELECT.GT tmp.___w, 128, src.wwww, 128 1387 * SELECT.LT tmp.___w, -128, tmp.wwww, -128 1388 * - can be eliminated if src.w is a uniform and fits clamp 1389 * LOG tmp.x, void, void, tmp.yyyy 1390 * MUL tmp.x, tmp.xxxx, tmp.wwww, void 1391 * LITP dst, undef, src.xxxx, tmp.xxxx 1392 */ 1393 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 1394 struct etna_inst_src src_y = { }; 1395 1396 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1397 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)); 1398 1399 struct etna_inst ins = { }; 1400 ins.opcode = INST_OPCODE_SELECT; 1401 ins.cond = INST_CONDITION_LT; 1402 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y); 1403 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0); 1404 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1405 emit_inst(c, &ins); 1406 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0) 1407 src_y = alloc_imm_f32(c, 0.0); 1408 else 1409 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1410 1411 struct etna_inst_src src_w = { }; 1412 1413 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1414 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W)); 1415 1416 struct etna_inst ins = { }; 1417 ins.opcode = INST_OPCODE_SELECT; 1418 ins.cond = INST_CONDITION_GT; 1419 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W); 1420 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.); 1421 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W)); 1422 emit_inst(c, &ins); 1423 ins.cond = INST_CONDITION_LT; 1424 ins.src[0].neg = !ins.src[0].neg; 1425 ins.src[2].neg = !ins.src[2].neg; 1426 ins.src[1] = src_w; 1427 emit_inst(c, &ins); 1428 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.) 1429 src_w = alloc_imm_f32(c, -128.); 1430 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.) 1431 src_w = alloc_imm_f32(c, 128.); 1432 else 1433 src_w = swizzle(src[0], SWIZZLE(W, W, W, W)); 1434 1435 if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */ 1436 emit_inst(c, &(struct etna_inst) { 1437 .opcode = INST_OPCODE_LOG, 1438 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y), 1439 .src[2] = src_y, 1440 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1441 }); 1442 emit_inst(c, &(struct etna_inst) { 1443 .opcode = INST_OPCODE_MUL, 1444 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1445 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1446 .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)), 1447 }); 1448 } else { 1449 struct etna_inst ins[3] = { }; 1450 ins[0].opcode = INST_OPCODE_LOG; 1451 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X); 1452 ins[0].src[2] = src_y; 1453 1454 emit_inst(c, &ins[0]); 1455 } 1456 emit_inst(c, &(struct etna_inst) { 1457 .opcode = INST_OPCODE_MUL, 1458 .sat = 0, 1459 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1460 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1461 .src[1] = src_w, 1462 }); 1463 emit_inst(c, &(struct etna_inst) { 1464 .opcode = INST_OPCODE_LITP, 1465 .sat = 0, 1466 .dst = convert_dst(c, &inst->Dst[0]), 1467 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1468 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1469 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1470 }); 1471} 1472 1473static void 1474trans_ssg(const struct instr_translater *t, struct etna_compile *c, 1475 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1476{ 1477 if (c->specs->has_sign_floor_ceil) { 1478 emit_inst(c, &(struct etna_inst){ 1479 .opcode = INST_OPCODE_SIGN, 1480 .sat = inst->Instruction.Saturate, 1481 .dst = convert_dst(c, &inst->Dst[0]), 1482 .src[2] = src[0], 1483 }); 1484 } else { 1485 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1486 struct etna_inst ins[2] = { }; 1487 1488 ins[0].opcode = INST_OPCODE_SET; 1489 ins[0].cond = INST_CONDITION_NZ; 1490 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1491 INST_COMPS_Z | INST_COMPS_W); 1492 ins[0].src[0] = src[0]; 1493 1494 ins[1].opcode = INST_OPCODE_SELECT; 1495 ins[1].cond = INST_CONDITION_LZ; 1496 ins[1].sat = inst->Instruction.Saturate; 1497 ins[1].dst = convert_dst(c, &inst->Dst[0]); 1498 ins[1].src[0] = src[0]; 1499 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1500 ins[1].src[1] = negate(ins[1].src[2]); 1501 1502 emit_inst(c, &ins[0]); 1503 emit_inst(c, &ins[1]); 1504 } 1505} 1506 1507static void 1508trans_trig(const struct instr_translater *t, struct etna_compile *c, 1509 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1510{ 1511 if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */ 1512 /* On newer chips alternative SIN/COS instructions are implemented, 1513 * which: 1514 * - Need their input scaled by 1/pi instead of 2/pi 1515 * - Output an x and y component, which need to be multiplied to 1516 * get the result 1517 */ 1518 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */ 1519 emit_inst(c, &(struct etna_inst) { 1520 .opcode = INST_OPCODE_MUL, 1521 .sat = 0, 1522 .dst = etna_native_to_dst(temp, INST_COMPS_Z), 1523 .src[0] = src[0], /* any swizzling happens here */ 1524 .src[1] = alloc_imm_f32(c, 1.0f / M_PI), 1525 }); 1526 emit_inst(c, &(struct etna_inst) { 1527 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1528 ? INST_OPCODE_COS 1529 : INST_OPCODE_SIN, 1530 .sat = 0, 1531 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1532 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)), 1533 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1534 }); 1535 emit_inst(c, &(struct etna_inst) { 1536 .opcode = INST_OPCODE_MUL, 1537 .sat = inst->Instruction.Saturate, 1538 .dst = convert_dst(c, &inst->Dst[0]), 1539 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1540 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1541 }); 1542 1543 } else if (c->specs->has_sin_cos_sqrt) { 1544 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1545 /* add divide by PI/2, using a temp register. GC2000 1546 * fails with src==dst for the trig instruction. */ 1547 emit_inst(c, &(struct etna_inst) { 1548 .opcode = INST_OPCODE_MUL, 1549 .sat = 0, 1550 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1551 INST_COMPS_Z | INST_COMPS_W), 1552 .src[0] = src[0], /* any swizzling happens here */ 1553 .src[1] = alloc_imm_f32(c, 2.0f / M_PI), 1554 }); 1555 emit_inst(c, &(struct etna_inst) { 1556 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1557 ? INST_OPCODE_COS 1558 : INST_OPCODE_SIN, 1559 .sat = inst->Instruction.Saturate, 1560 .dst = convert_dst(c, &inst->Dst[0]), 1561 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), 1562 }); 1563 } else { 1564 /* Implement Nick's fast sine/cosine. Taken from: 1565 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648 1566 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X) 1567 * MAD t.x_zw, src.xxxx, A, B 1568 * FRC t.x_z_, void, void, t.xwzw 1569 * MAD t.x_z_, t.xwzw, 2, -1 1570 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs) 1571 * DP3 t.x_z_, t.zyww, C, void (for sin) 1572 * DP3 t.__z_, t.zyww, C, void (for scs) 1573 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs) 1574 * DP3 t.x_z_, t.xyww, C, void (for cos) 1575 * DP3 t.x___, t.xyww, C, void (for scs) 1576 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz 1577 * MAD dst, t.ywyw, .2225, t.xzxz 1578 */ 1579 struct etna_inst *p, ins[9] = { }; 1580 struct etna_native_reg t0 = etna_compile_get_inner_temp(c); 1581 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY); 1582 struct etna_inst_src sincos[3], in = src[0]; 1583 sincos[0] = etna_imm_vec4f(c, sincos_const[0]); 1584 sincos[1] = etna_imm_vec4f(c, sincos_const[1]); 1585 1586 /* A uniform source will cause the inner temp limit to 1587 * be exceeded. Explicitly deal with that scenario. 1588 */ 1589 if (etna_rgroup_is_uniform(src[0].rgroup)) { 1590 struct etna_inst ins = { }; 1591 ins.opcode = INST_OPCODE_MOV; 1592 ins.dst = etna_native_to_dst(t0, INST_COMPS_X); 1593 ins.src[2] = in; 1594 emit_inst(c, &ins); 1595 in = t0s; 1596 } 1597 1598 ins[0].opcode = INST_OPCODE_MAD; 1599 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W); 1600 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X)); 1601 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */ 1602 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */ 1603 1604 ins[1].opcode = INST_OPCODE_FRC; 1605 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1606 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1607 1608 ins[2].opcode = INST_OPCODE_MAD; 1609 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1610 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1611 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */ 1612 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */ 1613 1614 unsigned mul_swiz, dp3_swiz; 1615 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) { 1616 mul_swiz = SWIZZLE(W, Z, W, W); 1617 dp3_swiz = SWIZZLE(Z, Y, W, W); 1618 } else { 1619 mul_swiz = SWIZZLE(W, X, W, W); 1620 dp3_swiz = SWIZZLE(X, Y, W, W); 1621 } 1622 1623 ins[3].opcode = INST_OPCODE_MUL; 1624 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y); 1625 ins[3].src[0] = swizzle(t0s, mul_swiz); 1626 ins[3].src[1] = absolute(ins[3].src[0]); 1627 1628 ins[4].opcode = INST_OPCODE_DP3; 1629 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1630 ins[4].src[0] = swizzle(t0s, dp3_swiz); 1631 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W)); 1632 1633 p = &ins[5]; 1634 p->opcode = INST_OPCODE_MAD; 1635 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W); 1636 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z)); 1637 p->src[1] = absolute(p->src[0]); 1638 p->src[2] = negate(p->src[0]); 1639 1640 p++; 1641 p->opcode = INST_OPCODE_MAD; 1642 p->sat = inst->Instruction.Saturate; 1643 p->dst = convert_dst(c, &inst->Dst[0]), 1644 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W)); 1645 p->src[1] = alloc_imm_f32(c, 0.2225); 1646 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z)); 1647 1648 for (int i = 0; &ins[i] <= p; i++) 1649 emit_inst(c, &ins[i]); 1650 } 1651} 1652 1653static void 1654trans_lg2(const struct instr_translater *t, struct etna_compile *c, 1655 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1656{ 1657 if (c->specs->has_new_transcendentals) { 1658 /* On newer chips alternative LOG instruction is implemented, 1659 * which outputs an x and y component, which need to be multiplied to 1660 * get the result. 1661 */ 1662 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */ 1663 emit_inst(c, &(struct etna_inst) { 1664 .opcode = INST_OPCODE_LOG, 1665 .sat = 0, 1666 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1667 .src[2] = src[0], 1668 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1669 }); 1670 emit_inst(c, &(struct etna_inst) { 1671 .opcode = INST_OPCODE_MUL, 1672 .sat = inst->Instruction.Saturate, 1673 .dst = convert_dst(c, &inst->Dst[0]), 1674 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1675 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1676 }); 1677 } else { 1678 emit_inst(c, &(struct etna_inst) { 1679 .opcode = INST_OPCODE_LOG, 1680 .sat = inst->Instruction.Saturate, 1681 .dst = convert_dst(c, &inst->Dst[0]), 1682 .src[2] = src[0], 1683 }); 1684 } 1685} 1686 1687static void 1688trans_sampler(const struct instr_translater *t, struct etna_compile *c, 1689 const struct tgsi_full_instruction *inst, 1690 struct etna_inst_src *src) 1691{ 1692 /* There is no native support for GL texture rectangle coordinates, so 1693 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */ 1694 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) { 1695 uint32_t unit = inst->Src[1].Register.Index; 1696 struct etna_inst ins[2] = { }; 1697 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1698 1699 ins[0].opcode = INST_OPCODE_MUL; 1700 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); 1701 ins[0].src[0] = src[0]; 1702 ins[0].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_X, unit); 1703 1704 ins[1].opcode = INST_OPCODE_MUL; 1705 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); 1706 ins[1].src[0] = src[0]; 1707 ins[1].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_Y, unit); 1708 1709 emit_inst(c, &ins[0]); 1710 emit_inst(c, &ins[1]); 1711 1712 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */ 1713 } 1714 1715 switch (inst->Instruction.Opcode) { 1716 case TGSI_OPCODE_TEX: 1717 emit_inst(c, &(struct etna_inst) { 1718 .opcode = INST_OPCODE_TEXLD, 1719 .sat = 0, 1720 .dst = convert_dst(c, &inst->Dst[0]), 1721 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1722 .src[0] = src[0], 1723 }); 1724 break; 1725 1726 case TGSI_OPCODE_TXB: 1727 emit_inst(c, &(struct etna_inst) { 1728 .opcode = INST_OPCODE_TEXLDB, 1729 .sat = 0, 1730 .dst = convert_dst(c, &inst->Dst[0]), 1731 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1732 .src[0] = src[0], 1733 }); 1734 break; 1735 1736 case TGSI_OPCODE_TXL: 1737 emit_inst(c, &(struct etna_inst) { 1738 .opcode = INST_OPCODE_TEXLDL, 1739 .sat = 0, 1740 .dst = convert_dst(c, &inst->Dst[0]), 1741 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1742 .src[0] = src[0], 1743 }); 1744 break; 1745 1746 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ 1747 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1748 1749 emit_inst(c, &(struct etna_inst) { 1750 .opcode = INST_OPCODE_RCP, 1751 .sat = 0, 1752 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */ 1753 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)), 1754 }); 1755 emit_inst(c, &(struct etna_inst) { 1756 .opcode = INST_OPCODE_MUL, 1757 .sat = 0, 1758 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1759 INST_COMPS_Z), /* tmp.xyz */ 1760 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)), 1761 .src[1] = src[0], /* src.xyzw */ 1762 }); 1763 emit_inst(c, &(struct etna_inst) { 1764 .opcode = INST_OPCODE_TEXLD, 1765 .sat = 0, 1766 .dst = convert_dst(c, &inst->Dst[0]), 1767 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1768 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */ 1769 }); 1770 } break; 1771 1772 default: 1773 BUG("Unhandled instruction %s", 1774 tgsi_get_opcode_name(inst->Instruction.Opcode)); 1775 assert(0); 1776 break; 1777 } 1778} 1779 1780static void 1781trans_dummy(const struct instr_translater *t, struct etna_compile *c, 1782 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1783{ 1784 /* nothing to do */ 1785} 1786 1787static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 1788#define INSTR(n, f, ...) \ 1789 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__} 1790 1791 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}), 1792 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}), 1793 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}), 1794 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}), 1795 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}), 1796 INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}), 1797 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}), 1798 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}), 1799 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}), 1800 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}), 1801 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}), 1802 INSTR(LG2, trans_lg2), 1803 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}), 1804 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}), 1805 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}), 1806 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}), 1807 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ), 1808 1809 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL), 1810 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ), 1811 1812 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX), 1813 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY), 1814 1815 INSTR(IF, trans_if), 1816 INSTR(ELSE, trans_else), 1817 INSTR(ENDIF, trans_endif), 1818 1819 INSTR(BGNLOOP, trans_loop_bgn), 1820 INSTR(ENDLOOP, trans_loop_end), 1821 INSTR(BRK, trans_brk), 1822 INSTR(CONT, trans_cont), 1823 1824 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT), 1825 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT), 1826 1827 INSTR(ARL, trans_arl), 1828 INSTR(LRP, trans_lrp), 1829 INSTR(LIT, trans_lit), 1830 INSTR(SSG, trans_ssg), 1831 1832 INSTR(SIN, trans_trig), 1833 INSTR(COS, trans_trig), 1834 1835 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT), 1836 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE), 1837 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ), 1838 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT), 1839 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE), 1840 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE), 1841 1842 INSTR(TEX, trans_sampler), 1843 INSTR(TXB, trans_sampler), 1844 INSTR(TXL, trans_sampler), 1845 INSTR(TXP, trans_sampler), 1846 1847 INSTR(NOP, trans_dummy), 1848 INSTR(END, trans_dummy), 1849}; 1850 1851/* Pass -- compile instructions */ 1852static void 1853etna_compile_pass_generate_code(struct etna_compile *c) 1854{ 1855 struct tgsi_parse_context ctx = { }; 1856 ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens); 1857 assert(status == TGSI_PARSE_OK); 1858 1859 int inst_idx = 0; 1860 while (!tgsi_parse_end_of_tokens(&ctx)) { 1861 const struct tgsi_full_instruction *inst = 0; 1862 1863 /* No inner temps used yet for this instruction, clear counter */ 1864 c->inner_temps = 0; 1865 1866 tgsi_parse_token(&ctx); 1867 1868 switch (ctx.FullToken.Token.Type) { 1869 case TGSI_TOKEN_TYPE_INSTRUCTION: 1870 /* iterate over operands */ 1871 inst = &ctx.FullToken.FullInstruction; 1872 if (c->dead_inst[inst_idx]) { /* skip dead instructions */ 1873 inst_idx++; 1874 continue; 1875 } 1876 1877 /* Lookup the TGSI information and generate the source arguments */ 1878 struct etna_inst_src src[ETNA_NUM_SRC]; 1879 memset(src, 0, sizeof(src)); 1880 1881 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode); 1882 1883 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { 1884 const struct tgsi_full_src_register *reg = &inst->Src[i]; 1885 const struct etna_reg_desc *srcreg = etna_get_src_reg(c, reg->Register); 1886 const struct etna_native_reg *n = &srcreg->native; 1887 1888 if (!n->valid || n->is_tex) 1889 continue; 1890 1891 src[i] = etna_create_src(reg, n); 1892 1893 /* 1894 * Replace W=1.0 for point sprite coordinates, since hardware 1895 * can only replace X,Y and leaves Z,W=0,0 instead of Z,W=0,1 1896 */ 1897 if (srcreg && srcreg->has_semantic && 1898 srcreg->semantic.Name == TGSI_SEMANTIC_TEXCOORD && 1899 (c->key->sprite_coord_enable & BITFIELD_BIT(srcreg->semantic.Index))) { 1900 emit_inst(c, &(struct etna_inst) { 1901 .opcode = INST_OPCODE_SET, 1902 .cond = INST_CONDITION_TRUE, 1903 .dst = etna_native_to_dst(srcreg->native, INST_COMPS_W), 1904 }); 1905 } 1906 } 1907 1908 const unsigned opc = inst->Instruction.Opcode; 1909 const struct instr_translater *t = &translaters[opc]; 1910 1911 if (t->fxn) { 1912 t->fxn(t, c, inst, src); 1913 1914 inst_idx += 1; 1915 } else { 1916 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc)); 1917 assert(0); 1918 } 1919 break; 1920 } 1921 } 1922 tgsi_parse_free(&ctx); 1923} 1924 1925/* Look up register by semantic */ 1926static struct etna_reg_desc * 1927find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index) 1928{ 1929 for (int idx = 0; idx < c->file[file].reg_size; ++idx) { 1930 struct etna_reg_desc *reg = &c->file[file].reg[idx]; 1931 1932 if (reg->semantic.Name == name && reg->semantic.Index == index) 1933 return reg; 1934 } 1935 1936 return NULL; /* not found */ 1937} 1938 1939/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: 1940 * - this is a vertex shader 1941 * - and this is an older GPU 1942 */ 1943static void 1944etna_compile_add_z_div_if_needed(struct etna_compile *c) 1945{ 1946 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) { 1947 /* find position out */ 1948 struct etna_reg_desc *pos_reg = 1949 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); 1950 1951 if (pos_reg != NULL) { 1952 /* 1953 * ADD tX.__z_, tX.zzzz, void, tX.wwww 1954 * MUL tX.__z_, tX.zzzz, 0.5, void 1955 */ 1956 emit_inst(c, &(struct etna_inst) { 1957 .opcode = INST_OPCODE_ADD, 1958 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1959 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1960 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)), 1961 }); 1962 emit_inst(c, &(struct etna_inst) { 1963 .opcode = INST_OPCODE_MUL, 1964 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1965 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1966 .src[1] = alloc_imm_f32(c, 0.5f), 1967 }); 1968 } 1969 } 1970} 1971 1972static void 1973etna_compile_frag_rb_swap(struct etna_compile *c) 1974{ 1975 if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) { 1976 /* find color out */ 1977 struct etna_reg_desc *color_reg = 1978 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0); 1979 1980 emit_inst(c, &(struct etna_inst) { 1981 .opcode = INST_OPCODE_MOV, 1982 .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W), 1983 .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)), 1984 }); 1985 } 1986} 1987 1988/** add a NOP to the shader if 1989 * a) the shader is empty 1990 * or 1991 * b) there is a label at the end of the shader 1992 */ 1993static void 1994etna_compile_add_nop_if_needed(struct etna_compile *c) 1995{ 1996 bool label_at_last_inst = false; 1997 1998 for (int idx = 0; idx < c->labels_count; ++idx) { 1999 if (c->labels[idx].inst_idx == c->inst_ptr) 2000 label_at_last_inst = true; 2001 2002 } 2003 2004 if (c->inst_ptr == 0 || label_at_last_inst) 2005 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP}); 2006} 2007 2008static void 2009assign_uniforms(struct etna_compile_file *file, unsigned base) 2010{ 2011 for (int idx = 0; idx < file->reg_size; ++idx) { 2012 file->reg[idx].native.valid = 1; 2013 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0; 2014 file->reg[idx].native.id = base + idx; 2015 } 2016} 2017 2018/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). 2019 * CONST must be consecutive as const buffers are supposed to be consecutive, 2020 * and before IMM, as this is 2021 * more convenient because is possible for the compilation process itself to 2022 * generate extra 2023 * immediates for constants such as pi, one, zero. 2024 */ 2025static void 2026assign_constants_and_immediates(struct etna_compile *c) 2027{ 2028 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0); 2029 /* immediates start after the constants */ 2030 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4; 2031 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4); 2032 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base, 2033 c->imm_size); 2034} 2035 2036/* Assign declared samplers to native texture units */ 2037static void 2038assign_texture_units(struct etna_compile *c) 2039{ 2040 uint tex_base = 0; 2041 2042 if (c->info.processor == PIPE_SHADER_VERTEX) 2043 tex_base = c->specs->vertex_sampler_offset; 2044 2045 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) { 2046 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1; 2047 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup 2048 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx; 2049 } 2050} 2051 2052/* Additional pass to fill in branch targets. This pass should be last 2053 * as no instruction reordering or removing/addition can be done anymore 2054 * once the branch targets are computed. 2055 */ 2056static void 2057etna_compile_fill_in_labels(struct etna_compile *c) 2058{ 2059 for (int idx = 0; idx < c->inst_ptr; ++idx) { 2060 if (c->lbl_usage[idx] != -1) 2061 etna_assemble_set_imm(&c->code[idx * 4], 2062 c->labels[c->lbl_usage[idx]].inst_idx); 2063 } 2064} 2065 2066/* compare two etna_native_reg structures, return true if equal */ 2067static bool 2068cmp_etna_native_reg(const struct etna_native_reg to, 2069 const struct etna_native_reg from) 2070{ 2071 return to.valid == from.valid && to.is_tex == from.is_tex && 2072 to.rgroup == from.rgroup && to.id == from.id; 2073} 2074 2075/* go through all declarations and swap native registers *to* and *from* */ 2076static void 2077swap_native_registers(struct etna_compile *c, const struct etna_native_reg to, 2078 const struct etna_native_reg from) 2079{ 2080 if (cmp_etna_native_reg(from, to)) 2081 return; /* Nothing to do */ 2082 2083 for (int idx = 0; idx < c->total_decls; ++idx) { 2084 if (cmp_etna_native_reg(c->decl[idx].native, from)) { 2085 c->decl[idx].native = to; 2086 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) { 2087 c->decl[idx].native = from; 2088 } 2089 } 2090} 2091 2092/* For PS we need to permute so that inputs are always in temporary 0..N-1. 2093 * Semantic POS is always t0. If that semantic is not used, avoid t0. 2094 */ 2095static void 2096permute_ps_inputs(struct etna_compile *c) 2097{ 2098 /* Special inputs: 2099 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION 2100 * gl_FrontFacing VARYING_SLOT_FACE TGSI_SEMANTIC_FACE 2101 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD 2102 * gl_TexCoord VARYING_SLOT_TEX TGSI_SEMANTIC_TEXCOORD 2103 */ 2104 uint native_idx = 1; 2105 2106 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2107 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2108 uint input_id; 2109 assert(reg->has_semantic); 2110 2111 if (!reg->active || 2112 reg->semantic.Name == TGSI_SEMANTIC_POSITION || 2113 reg->semantic.Name == TGSI_SEMANTIC_FACE) 2114 continue; 2115 2116 input_id = native_idx++; 2117 swap_native_registers(c, etna_native_temp(input_id), 2118 c->file[TGSI_FILE_INPUT].reg[idx].native); 2119 } 2120 2121 c->num_varyings = native_idx - 1; 2122 2123 if (native_idx > c->next_free_native) 2124 c->next_free_native = native_idx; 2125} 2126 2127static inline int sem2slot(const struct tgsi_declaration_semantic *semantic) 2128{ 2129 return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index); 2130} 2131 2132/* fill in ps inputs into shader object */ 2133static void 2134fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2135{ 2136 struct etna_shader_io_file *sf = &sobj->infile; 2137 2138 sf->num_reg = 0; 2139 2140 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2141 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2142 2143 if (reg->native.id > 0) { 2144 assert(sf->num_reg < ETNA_NUM_INPUTS); 2145 sf->reg[sf->num_reg].reg = reg->native.id; 2146 sf->reg[sf->num_reg].slot = sem2slot(®->semantic); 2147 /* convert usage mask to number of components (*=wildcard) 2148 * .r (0..1) -> 1 component 2149 * .*g (2..3) -> 2 component 2150 * .**b (4..7) -> 3 components 2151 * .***a (8..15) -> 4 components 2152 */ 2153 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2154 sf->num_reg++; 2155 } 2156 } 2157 2158 assert(sf->num_reg == c->num_varyings); 2159 sobj->input_count_unk8 = 31; /* XXX what is this */ 2160} 2161 2162/* fill in output mapping for ps into shader object */ 2163static void 2164fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2165{ 2166 sobj->outfile.num_reg = 0; 2167 2168 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2169 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2170 2171 switch (reg->semantic.Name) { 2172 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ 2173 sobj->ps_color_out_reg = reg->native.id; 2174 break; 2175 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ 2176 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ 2177 break; 2178 default: 2179 assert(0); /* only outputs supported are COLOR and POSITION at the moment */ 2180 } 2181 } 2182} 2183 2184/* fill in inputs for vs into shader object */ 2185static void 2186fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2187{ 2188 struct etna_shader_io_file *sf = &sobj->infile; 2189 2190 sf->num_reg = 0; 2191 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2192 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2193 assert(sf->num_reg < ETNA_NUM_INPUTS); 2194 2195 if (!reg->native.valid) 2196 continue; 2197 2198 /* XXX exclude inputs with special semantics such as gl_frontFacing */ 2199 sf->reg[sf->num_reg].reg = reg->native.id; 2200 sf->reg[sf->num_reg].slot = sem2slot(®->semantic); 2201 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2202 sf->num_reg++; 2203 } 2204 2205 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ 2206} 2207 2208/* fill in outputs for vs into shader object */ 2209static void 2210fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2211{ 2212 struct etna_shader_io_file *sf = &sobj->outfile; 2213 2214 sf->num_reg = 0; 2215 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2216 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2217 assert(sf->num_reg < ETNA_NUM_INPUTS); 2218 2219 switch (reg->semantic.Name) { 2220 case TGSI_SEMANTIC_POSITION: 2221 sobj->vs_pos_out_reg = reg->native.id; 2222 break; 2223 case TGSI_SEMANTIC_PSIZE: 2224 sobj->vs_pointsize_out_reg = reg->native.id; 2225 break; 2226 default: 2227 sf->reg[sf->num_reg].reg = reg->native.id; 2228 sf->reg[sf->num_reg].slot = sem2slot(®->semantic); 2229 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; 2230 sf->num_reg++; 2231 } 2232 } 2233 2234 /* fill in "mystery meat" load balancing value. This value determines how 2235 * work is scheduled between VS and PS 2236 * in the unified shader architecture. More precisely, it is determined from 2237 * the number of VS outputs, as well as chip-specific 2238 * vertex output buffer size, vertex cache size, and the number of shader 2239 * cores. 2240 * 2241 * XXX this is a conservative estimate, the "optimal" value is only known for 2242 * sure at link time because some 2243 * outputs may be unused and thus unmapped. Then again, in the general use 2244 * case with GLSL the vertex and fragment 2245 * shaders are linked already before submitting to Gallium, thus all outputs 2246 * are used. 2247 */ 2248 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2; 2249 assert(half_out); 2250 2251 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size - 2252 2 * half_out * c->specs->vertex_cache_size)) + 2253 9) / 2254 10; 2255 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2; 2256 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 2257 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 2258 VIVS_VS_LOAD_BALANCING_C(0x3f) | 2259 VIVS_VS_LOAD_BALANCING_D(0x0f); 2260} 2261 2262static bool 2263etna_compile_check_limits(struct etna_compile *c) 2264{ 2265 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX) 2266 ? c->specs->max_vs_uniforms 2267 : c->specs->max_ps_uniforms; 2268 /* round up number of uniforms, including immediates, in units of four */ 2269 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; 2270 2271 if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { 2272 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, 2273 c->specs->max_instructions); 2274 return false; 2275 } 2276 2277 if (c->next_free_native > c->specs->max_registers) { 2278 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native, 2279 c->specs->max_registers); 2280 return false; 2281 } 2282 2283 if (num_uniforms > max_uniforms) { 2284 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, 2285 max_uniforms); 2286 return false; 2287 } 2288 2289 if (c->num_varyings > c->specs->max_varyings) { 2290 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings, 2291 c->specs->max_varyings); 2292 return false; 2293 } 2294 2295 if (c->imm_base > c->specs->num_constants) { 2296 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base, 2297 c->specs->num_constants); 2298 } 2299 2300 return true; 2301} 2302 2303static void 2304copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj) 2305{ 2306 uint32_t count = c->imm_base + c->imm_size; 2307 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 2308 2309 uinfo->count = count; 2310 2311 uinfo->data = malloc(count * sizeof(*c->imm_data)); 2312 for (unsigned i = 0; i < c->imm_base; i++) 2313 uinfo->data[i] = i; 2314 memcpy(&uinfo->data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data)); 2315 2316 uinfo->contents = malloc(count * sizeof(*c->imm_contents)); 2317 for (unsigned i = 0; i < c->imm_base; i++) 2318 uinfo->contents[i] = ETNA_UNIFORM_UNIFORM; 2319 memcpy(&uinfo->contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents)); 2320 2321 etna_set_shader_uniforms_dirty_flags(sobj); 2322} 2323 2324bool 2325etna_compile_shader(struct etna_shader_variant *v) 2326{ 2327 if (DBG_ENABLED(ETNA_DBG_NIR)) 2328 return etna_compile_shader_nir(v); 2329 2330 /* Create scratch space that may be too large to fit on stack 2331 */ 2332 bool ret; 2333 struct etna_compile *c; 2334 2335 if (unlikely(!v)) 2336 return false; 2337 2338 const struct etna_specs *specs = v->shader->specs; 2339 2340 struct tgsi_lowering_config lconfig = { 2341 .lower_FLR = !specs->has_sign_floor_ceil, 2342 .lower_CEIL = !specs->has_sign_floor_ceil, 2343 .lower_POW = true, 2344 .lower_EXP = true, 2345 .lower_LOG = true, 2346 .lower_DP2 = !specs->has_halti2_instructions, 2347 .lower_TRUNC = true, 2348 }; 2349 2350 c = CALLOC_STRUCT(etna_compile); 2351 if (!c) 2352 return false; 2353 2354 memset(&c->lbl_usage, -1, sizeof(c->lbl_usage)); 2355 2356 const struct tgsi_token *tokens = v->shader->tokens; 2357 2358 c->specs = specs; 2359 c->key = &v->key; 2360 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info); 2361 c->free_tokens = !!c->tokens; 2362 if (!c->tokens) { 2363 /* no lowering */ 2364 c->tokens = tokens; 2365 } 2366 2367 /* Build a map from gallium register to native registers for files 2368 * CONST, SAMP, IMM, OUT, IN, TEMP. 2369 * SAMP will map as-is for fragment shaders, there will be a +8 offset for 2370 * vertex shaders. 2371 */ 2372 /* Pass one -- check register file declarations and immediates */ 2373 etna_compile_parse_declarations(c); 2374 2375 etna_allocate_decls(c); 2376 2377 /* Pass two -- check usage of temporaries, inputs, outputs */ 2378 etna_compile_pass_check_usage(c); 2379 2380 assign_special_inputs(c); 2381 2382 /* Assign native temp register to TEMPs */ 2383 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]); 2384 2385 /* optimize outputs */ 2386 etna_compile_pass_optimize_outputs(c); 2387 2388 /* assign inputs: last usage of input should be <= first usage of temp */ 2389 /* potential optimization case: 2390 * if single MOV TEMP[y], IN[x] before which temp y is not used, and 2391 * after which IN[x] 2392 * is not read, temp[y] can be used as input register as-is 2393 */ 2394 /* sort temporaries by first use 2395 * sort inputs by last usage 2396 * iterate over inputs, temporaries 2397 * if last usage of input <= first usage of temp: 2398 * assign input to temp 2399 * advance input, temporary pointer 2400 * else 2401 * advance temporary pointer 2402 * 2403 * potential problem: instruction with multiple inputs of which one is the 2404 * temp and the other is the input; 2405 * however, as the temp is not used before this, how would this make 2406 * sense? uninitialized temporaries have an undefined 2407 * value, so this would be ok 2408 */ 2409 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT); 2410 2411 /* assign outputs: first usage of output should be >= last usage of temp */ 2412 /* potential optimization case: 2413 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least 2414 * writing all components that are used in 2415 * the shader) after which temp y is no longer used temp[y] can be 2416 * used as output register as-is 2417 * 2418 * potential problem: instruction with multiple outputs of which one is the 2419 * temp and the other is the output; 2420 * however, as the temp is not used after this, how would this make 2421 * sense? could just discard the output value 2422 */ 2423 /* sort temporaries by last use 2424 * sort outputs by first usage 2425 * iterate over outputs, temporaries 2426 * if first usage of output >= last usage of temp: 2427 * assign output to temp 2428 * advance output, temporary pointer 2429 * else 2430 * advance temporary pointer 2431 */ 2432 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT); 2433 2434 assign_constants_and_immediates(c); 2435 assign_texture_units(c); 2436 2437 /* list declarations */ 2438 for (int x = 0; x < c->total_decls; ++x) { 2439 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2440 "last_use=%i native=%i usage_mask=%x " 2441 "has_semantic=%i", 2442 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2443 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2444 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2445 c->decl[x].usage_mask, c->decl[x].has_semantic); 2446 if (c->decl[x].has_semantic) 2447 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2448 tgsi_semantic_names[c->decl[x].semantic.Name], 2449 c->decl[x].semantic.Index); 2450 } 2451 /* XXX for PS we need to permute so that inputs are always in temporary 2452 * 0..N-1. 2453 * There is no "switchboard" for varyings (AFAIK!). The output color, 2454 * however, can be routed 2455 * from an arbitrary temporary. 2456 */ 2457 if (c->info.processor == PIPE_SHADER_FRAGMENT) 2458 permute_ps_inputs(c); 2459 2460 2461 /* list declarations */ 2462 for (int x = 0; x < c->total_decls; ++x) { 2463 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2464 "last_use=%i native=%i usage_mask=%x " 2465 "has_semantic=%i", 2466 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2467 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2468 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2469 c->decl[x].usage_mask, c->decl[x].has_semantic); 2470 if (c->decl[x].has_semantic) 2471 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2472 tgsi_semantic_names[c->decl[x].semantic.Name], 2473 c->decl[x].semantic.Index); 2474 } 2475 2476 /* pass 3: generate instructions */ 2477 etna_compile_pass_generate_code(c); 2478 etna_compile_add_z_div_if_needed(c); 2479 etna_compile_frag_rb_swap(c); 2480 etna_compile_add_nop_if_needed(c); 2481 2482 ret = etna_compile_check_limits(c); 2483 if (!ret) 2484 goto out; 2485 2486 etna_compile_fill_in_labels(c); 2487 2488 /* fill in output structure */ 2489 v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX; 2490 v->uses_discard = c->info.uses_kill; 2491 v->code_size = c->inst_ptr * 4; 2492 v->code = mem_dup(c->code, c->inst_ptr * 16); 2493 v->num_loops = c->num_loops; 2494 v->num_temps = c->next_free_native; 2495 v->vs_id_in_reg = -1; 2496 v->vs_pos_out_reg = -1; 2497 v->vs_pointsize_out_reg = -1; 2498 v->ps_color_out_reg = -1; 2499 v->ps_depth_out_reg = -1; 2500 v->needs_icache = c->inst_ptr > c->specs->max_instructions; 2501 copy_uniform_state_to_shader(c, v); 2502 2503 if (c->info.processor == PIPE_SHADER_VERTEX) { 2504 fill_in_vs_inputs(v, c); 2505 fill_in_vs_outputs(v, c); 2506 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) { 2507 fill_in_ps_inputs(v, c); 2508 fill_in_ps_outputs(v, c); 2509 } 2510 2511out: 2512 if (c->free_tokens) 2513 FREE((void *)c->tokens); 2514 2515 FREE(c->labels); 2516 FREE(c); 2517 2518 return ret; 2519} 2520 2521static const struct etna_shader_inout * 2522etna_shader_vs_lookup(const struct etna_shader_variant *sobj, 2523 const struct etna_shader_inout *in) 2524{ 2525 for (int i = 0; i < sobj->outfile.num_reg; i++) 2526 if (sobj->outfile.reg[i].slot == in->slot) 2527 return &sobj->outfile.reg[i]; 2528 2529 return NULL; 2530} 2531 2532bool 2533etna_link_shader(struct etna_shader_link_info *info, 2534 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) 2535{ 2536 int comp_ofs = 0; 2537 /* For each fragment input we need to find the associated vertex shader 2538 * output, which can be found by matching on semantic name and index. A 2539 * binary search could be used because the vs outputs are sorted by their 2540 * semantic index and grouped by semantic type by fill_in_vs_outputs. 2541 */ 2542 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 2543 info->pcoord_varying_comp_ofs = -1; 2544 2545 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 2546 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 2547 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 2548 struct etna_varying *varying; 2549 bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) && 2550 (fsio->slot != VARYING_SLOT_COL1)); 2551 2552 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 2553 2554 if (fsio->reg > info->num_varyings) 2555 info->num_varyings = fsio->reg; 2556 2557 varying = &info->varyings[fsio->reg - 1]; 2558 varying->num_components = fsio->num_components; 2559 2560 if (!interpolate_always) /* colors affected by flat shading */ 2561 varying->pa_attributes = 0x200; 2562 else /* texture coord or other bypasses flat shading */ 2563 varying->pa_attributes = 0x2f1; 2564 2565 varying->use[0] = VARYING_COMPONENT_USE_UNUSED; 2566 varying->use[1] = VARYING_COMPONENT_USE_UNUSED; 2567 varying->use[2] = VARYING_COMPONENT_USE_UNUSED; 2568 varying->use[3] = VARYING_COMPONENT_USE_UNUSED; 2569 2570 /* point/tex coord is an input to the PS without matching VS output, 2571 * so it gets a varying slot without being assigned a VS register. 2572 */ 2573 if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) { 2574 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; 2575 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; 2576 2577 info->pcoord_varying_comp_ofs = comp_ofs; 2578 } else { 2579 if (vsio == NULL) { /* not found -- link error */ 2580 BUG("Semantic value not found in vertex shader outputs\n"); 2581 return true; 2582 } 2583 2584 varying->reg = vsio->reg; 2585 } 2586 2587 comp_ofs += varying->num_components; 2588 } 2589 2590 assert(info->num_varyings == fs->infile.num_reg); 2591 2592 return false; 2593} 2594