1b8e80941Smrg/* 2b8e80941Smrg * Copyright (c) 2012-2015 Etnaviv Project 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the 12b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 13b8e80941Smrg * of the Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Wladimir J. van der Laan <laanwj@gmail.com> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg/* TGSI->Vivante shader ISA conversion */ 28b8e80941Smrg 29b8e80941Smrg/* What does the compiler return (see etna_shader_object)? 30b8e80941Smrg * 1) instruction data 31b8e80941Smrg * 2) input-to-temporary mapping (fixed for ps) 32b8e80941Smrg * *) in case of ps, semantic -> varying id mapping 33b8e80941Smrg * *) for each varying: number of components used (r, rg, rgb, rgba) 34b8e80941Smrg * 3) temporary-to-output mapping (in case of vs, fixed for ps) 35b8e80941Smrg * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) 36b8e80941Smrg * 5) immediates base offset, immediates data 37b8e80941Smrg * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to 38b8e80941Smrg * configure the hw, but useful for error checking 39b8e80941Smrg * 7) enough information to add the z=(z+w)/2.0 necessary for older chips 40b8e80941Smrg * (output reg id is enough) 41b8e80941Smrg * 42b8e80941Smrg * Empty shaders are not allowed, should always at least generate a NOP. Also 43b8e80941Smrg * if there is a label at the end of the shader, an extra NOP should be 44b8e80941Smrg * generated as jump target. 45b8e80941Smrg * 46b8e80941Smrg * TODO 47b8e80941Smrg * * Use an instruction scheduler 48b8e80941Smrg * * Indirect access to uniforms / temporaries using amode 49b8e80941Smrg */ 50b8e80941Smrg 51b8e80941Smrg#include "etnaviv_compiler.h" 52b8e80941Smrg 53b8e80941Smrg#include "etnaviv_asm.h" 54b8e80941Smrg#include "etnaviv_context.h" 55b8e80941Smrg#include "etnaviv_debug.h" 56b8e80941Smrg#include "etnaviv_disasm.h" 57b8e80941Smrg#include "etnaviv_uniforms.h" 58b8e80941Smrg#include "etnaviv_util.h" 59b8e80941Smrg 60b8e80941Smrg#include "pipe/p_shader_tokens.h" 61b8e80941Smrg#include "tgsi/tgsi_info.h" 62b8e80941Smrg#include "tgsi/tgsi_iterate.h" 63b8e80941Smrg#include "tgsi/tgsi_lowering.h" 64b8e80941Smrg#include "tgsi/tgsi_strings.h" 65b8e80941Smrg#include "tgsi/tgsi_util.h" 66b8e80941Smrg#include "util/u_math.h" 67b8e80941Smrg#include "util/u_memory.h" 68b8e80941Smrg 69b8e80941Smrg#include <fcntl.h> 70b8e80941Smrg#include <stdio.h> 71b8e80941Smrg#include <sys/stat.h> 72b8e80941Smrg#include <sys/types.h> 73b8e80941Smrg 74b8e80941Smrg#define ETNA_MAX_INNER_TEMPS 2 75b8e80941Smrg 76b8e80941Smrgstatic const float sincos_const[2][4] = { 77b8e80941Smrg { 78b8e80941Smrg 2., -1., 4., -4., 79b8e80941Smrg }, 80b8e80941Smrg { 81b8e80941Smrg 1. / (2. * M_PI), 0.75, 0.5, 0.0, 82b8e80941Smrg }, 83b8e80941Smrg}; 84b8e80941Smrg 85b8e80941Smrg/* Native register description structure */ 86b8e80941Smrgstruct etna_native_reg { 87b8e80941Smrg unsigned valid : 1; 88b8e80941Smrg unsigned is_tex : 1; /* is texture unit, overrides rgroup */ 89b8e80941Smrg unsigned rgroup : 3; 90b8e80941Smrg unsigned id : 9; 91b8e80941Smrg}; 92b8e80941Smrg 93b8e80941Smrg/* Register description */ 94b8e80941Smrgstruct etna_reg_desc { 95b8e80941Smrg enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ 96b8e80941Smrg int idx; /* index into file */ 97b8e80941Smrg bool active; /* used in program */ 98b8e80941Smrg int first_use; /* instruction id of first use (scope begin) */ 99b8e80941Smrg int last_use; /* instruction id of last use (scope end, inclusive) */ 100b8e80941Smrg 101b8e80941Smrg struct etna_native_reg native; /* native register to map to */ 102b8e80941Smrg unsigned usage_mask : 4; /* usage, per channel */ 103b8e80941Smrg bool has_semantic; /* register has associated TGSI semantic */ 104b8e80941Smrg struct tgsi_declaration_semantic semantic; /* TGSI semantic */ 105b8e80941Smrg struct tgsi_declaration_interp interp; /* Interpolation type */ 106b8e80941Smrg}; 107b8e80941Smrg 108b8e80941Smrg/* Label information structure */ 109b8e80941Smrgstruct etna_compile_label { 110b8e80941Smrg int inst_idx; /* Instruction id that label points to */ 111b8e80941Smrg}; 112b8e80941Smrg 113b8e80941Smrgenum etna_compile_frame_type { 114b8e80941Smrg ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ 115b8e80941Smrg ETNA_COMPILE_FRAME_LOOP, 116b8e80941Smrg}; 117b8e80941Smrg 118b8e80941Smrg/* nesting scope frame (LOOP, IF, ...) during compilation 119b8e80941Smrg */ 120b8e80941Smrgstruct etna_compile_frame { 121b8e80941Smrg enum etna_compile_frame_type type; 122b8e80941Smrg int lbl_else_idx; 123b8e80941Smrg int lbl_endif_idx; 124b8e80941Smrg int lbl_loop_bgn_idx; 125b8e80941Smrg int lbl_loop_end_idx; 126b8e80941Smrg}; 127b8e80941Smrg 128b8e80941Smrgstruct etna_compile_file { 129b8e80941Smrg /* Number of registers in each TGSI file (max register+1) */ 130b8e80941Smrg size_t reg_size; 131b8e80941Smrg /* Register descriptions, per register index */ 132b8e80941Smrg struct etna_reg_desc *reg; 133b8e80941Smrg}; 134b8e80941Smrg 135b8e80941Smrg#define array_insert(arr, val) \ 136b8e80941Smrg do { \ 137b8e80941Smrg if (arr##_count == arr##_sz) { \ 138b8e80941Smrg arr##_sz = MAX2(2 * arr##_sz, 16); \ 139b8e80941Smrg arr = realloc(arr, arr##_sz * sizeof(arr[0])); \ 140b8e80941Smrg } \ 141b8e80941Smrg arr[arr##_count++] = val; \ 142b8e80941Smrg } while (0) 143b8e80941Smrg 144b8e80941Smrg 145b8e80941Smrg/* scratch area for compiling shader, freed after compilation finishes */ 146b8e80941Smrgstruct etna_compile { 147b8e80941Smrg const struct tgsi_token *tokens; 148b8e80941Smrg bool free_tokens; 149b8e80941Smrg 150b8e80941Smrg struct tgsi_shader_info info; 151b8e80941Smrg 152b8e80941Smrg /* Register descriptions, per TGSI file, per register index */ 153b8e80941Smrg struct etna_compile_file file[TGSI_FILE_COUNT]; 154b8e80941Smrg 155b8e80941Smrg /* Keep track of TGSI register declarations */ 156b8e80941Smrg struct etna_reg_desc decl[ETNA_MAX_DECL]; 157b8e80941Smrg uint total_decls; 158b8e80941Smrg 159b8e80941Smrg /* Bitmap of dead instructions which are removed in a separate pass */ 160b8e80941Smrg bool dead_inst[ETNA_MAX_TOKENS]; 161b8e80941Smrg 162b8e80941Smrg /* Immediate data */ 163b8e80941Smrg enum etna_immediate_contents imm_contents[ETNA_MAX_IMM]; 164b8e80941Smrg uint32_t imm_data[ETNA_MAX_IMM]; 165b8e80941Smrg uint32_t imm_base; /* base of immediates (in 32 bit units) */ 166b8e80941Smrg uint32_t imm_size; /* size of immediates (in 32 bit units) */ 167b8e80941Smrg 168b8e80941Smrg /* Next free native register, for register allocation */ 169b8e80941Smrg uint32_t next_free_native; 170b8e80941Smrg 171b8e80941Smrg /* Temporary register for use within translated TGSI instruction, 172b8e80941Smrg * only allocated when needed. 173b8e80941Smrg */ 174b8e80941Smrg int inner_temps; /* number of inner temps used; only up to one available at 175b8e80941Smrg this point */ 176b8e80941Smrg struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS]; 177b8e80941Smrg 178b8e80941Smrg /* Fields for handling nested conditionals */ 179b8e80941Smrg struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; 180b8e80941Smrg int frame_sp; 181b8e80941Smrg int lbl_usage[ETNA_MAX_INSTRUCTIONS]; 182b8e80941Smrg 183b8e80941Smrg unsigned labels_count, labels_sz; 184b8e80941Smrg struct etna_compile_label *labels; 185b8e80941Smrg 186b8e80941Smrg unsigned num_loops; 187b8e80941Smrg 188b8e80941Smrg /* Code generation */ 189b8e80941Smrg int inst_ptr; /* current instruction pointer */ 190b8e80941Smrg uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; 191b8e80941Smrg 192b8e80941Smrg /* I/O */ 193b8e80941Smrg 194b8e80941Smrg /* Number of varyings (PS only) */ 195b8e80941Smrg int num_varyings; 196b8e80941Smrg 197b8e80941Smrg /* GPU hardware specs */ 198b8e80941Smrg const struct etna_specs *specs; 199b8e80941Smrg 200b8e80941Smrg const struct etna_shader_key *key; 201b8e80941Smrg}; 202b8e80941Smrg 203b8e80941Smrgstatic struct etna_reg_desc * 204b8e80941Smrgetna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst) 205b8e80941Smrg{ 206b8e80941Smrg return &c->file[dst.File].reg[dst.Index]; 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrgstatic struct etna_reg_desc * 210b8e80941Smrgetna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src) 211b8e80941Smrg{ 212b8e80941Smrg return &c->file[src.File].reg[src.Index]; 213b8e80941Smrg} 214b8e80941Smrg 215b8e80941Smrgstatic struct etna_native_reg 216b8e80941Smrgetna_native_temp(unsigned reg) 217b8e80941Smrg{ 218b8e80941Smrg return (struct etna_native_reg) { 219b8e80941Smrg .valid = 1, 220b8e80941Smrg .rgroup = INST_RGROUP_TEMP, 221b8e80941Smrg .id = reg 222b8e80941Smrg }; 223b8e80941Smrg} 224b8e80941Smrg 225b8e80941Smrg/** Register allocation **/ 226b8e80941Smrgenum reg_sort_order { 227b8e80941Smrg FIRST_USE_ASC, 228b8e80941Smrg FIRST_USE_DESC, 229b8e80941Smrg LAST_USE_ASC, 230b8e80941Smrg LAST_USE_DESC 231b8e80941Smrg}; 232b8e80941Smrg 233b8e80941Smrg/* Augmented register description for sorting */ 234b8e80941Smrgstruct sort_rec { 235b8e80941Smrg struct etna_reg_desc *ptr; 236b8e80941Smrg int key; 237b8e80941Smrg}; 238b8e80941Smrg 239b8e80941Smrgstatic int 240b8e80941Smrgsort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) 241b8e80941Smrg{ 242b8e80941Smrg if (a->key < b->key) 243b8e80941Smrg return -1; 244b8e80941Smrg 245b8e80941Smrg if (a->key > b->key) 246b8e80941Smrg return 1; 247b8e80941Smrg 248b8e80941Smrg return 0; 249b8e80941Smrg} 250b8e80941Smrg 251b8e80941Smrg/* create an index on a register set based on certain criteria. */ 252b8e80941Smrgstatic int 253b8e80941Smrgsort_registers(struct sort_rec *sorted, struct etna_compile_file *file, 254b8e80941Smrg enum reg_sort_order so) 255b8e80941Smrg{ 256b8e80941Smrg struct etna_reg_desc *regs = file->reg; 257b8e80941Smrg int ptr = 0; 258b8e80941Smrg 259b8e80941Smrg /* pre-populate keys from active registers */ 260b8e80941Smrg for (int idx = 0; idx < file->reg_size; ++idx) { 261b8e80941Smrg /* only interested in active registers now; will only assign inactive ones 262b8e80941Smrg * if no space in active ones */ 263b8e80941Smrg if (regs[idx].active) { 264b8e80941Smrg sorted[ptr].ptr = ®s[idx]; 265b8e80941Smrg 266b8e80941Smrg switch (so) { 267b8e80941Smrg case FIRST_USE_ASC: 268b8e80941Smrg sorted[ptr].key = regs[idx].first_use; 269b8e80941Smrg break; 270b8e80941Smrg case LAST_USE_ASC: 271b8e80941Smrg sorted[ptr].key = regs[idx].last_use; 272b8e80941Smrg break; 273b8e80941Smrg case FIRST_USE_DESC: 274b8e80941Smrg sorted[ptr].key = -regs[idx].first_use; 275b8e80941Smrg break; 276b8e80941Smrg case LAST_USE_DESC: 277b8e80941Smrg sorted[ptr].key = -regs[idx].last_use; 278b8e80941Smrg break; 279b8e80941Smrg } 280b8e80941Smrg ptr++; 281b8e80941Smrg } 282b8e80941Smrg } 283b8e80941Smrg 284b8e80941Smrg /* sort index by key */ 285b8e80941Smrg qsort(sorted, ptr, sizeof(struct sort_rec), 286b8e80941Smrg (int (*)(const void *, const void *))sort_rec_compar); 287b8e80941Smrg 288b8e80941Smrg return ptr; 289b8e80941Smrg} 290b8e80941Smrg 291b8e80941Smrg/* Allocate a new, unused, native temp register */ 292b8e80941Smrgstatic struct etna_native_reg 293b8e80941Smrgalloc_new_native_reg(struct etna_compile *c) 294b8e80941Smrg{ 295b8e80941Smrg assert(c->next_free_native < ETNA_MAX_TEMPS); 296b8e80941Smrg return etna_native_temp(c->next_free_native++); 297b8e80941Smrg} 298b8e80941Smrg 299b8e80941Smrg/* assign TEMPs to native registers */ 300b8e80941Smrgstatic void 301b8e80941Smrgassign_temporaries_to_native(struct etna_compile *c, 302b8e80941Smrg struct etna_compile_file *file) 303b8e80941Smrg{ 304b8e80941Smrg struct etna_reg_desc *temps = file->reg; 305b8e80941Smrg 306b8e80941Smrg for (int idx = 0; idx < file->reg_size; ++idx) 307b8e80941Smrg temps[idx].native = alloc_new_native_reg(c); 308b8e80941Smrg} 309b8e80941Smrg 310b8e80941Smrg/* assign inputs and outputs to temporaries 311b8e80941Smrg * Gallium assumes that the hardware has separate registers for taking input and 312b8e80941Smrg * output, however Vivante GPUs use temporaries both for passing in inputs and 313b8e80941Smrg * passing back outputs. 314b8e80941Smrg * Try to re-use temporary registers where possible. */ 315b8e80941Smrgstatic void 316b8e80941Smrgassign_inouts_to_temporaries(struct etna_compile *c, uint file) 317b8e80941Smrg{ 318b8e80941Smrg bool mode_inputs = (file == TGSI_FILE_INPUT); 319b8e80941Smrg int inout_ptr = 0, num_inouts; 320b8e80941Smrg int temp_ptr = 0, num_temps; 321b8e80941Smrg struct sort_rec inout_order[ETNA_MAX_TEMPS]; 322b8e80941Smrg struct sort_rec temps_order[ETNA_MAX_TEMPS]; 323b8e80941Smrg num_inouts = sort_registers(inout_order, &c->file[file], 324b8e80941Smrg mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); 325b8e80941Smrg num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY], 326b8e80941Smrg mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); 327b8e80941Smrg 328b8e80941Smrg while (inout_ptr < num_inouts && temp_ptr < num_temps) { 329b8e80941Smrg struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 330b8e80941Smrg struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; 331b8e80941Smrg 332b8e80941Smrg if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */ 333b8e80941Smrg inout_ptr++; 334b8e80941Smrg continue; 335b8e80941Smrg } 336b8e80941Smrg 337b8e80941Smrg /* last usage of this input is before or in same instruction of first use 338b8e80941Smrg * of temporary? */ 339b8e80941Smrg if (mode_inputs ? (inout->last_use <= temp->first_use) 340b8e80941Smrg : (inout->first_use >= temp->last_use)) { 341b8e80941Smrg /* assign it and advance to next input */ 342b8e80941Smrg inout->native = temp->native; 343b8e80941Smrg inout_ptr++; 344b8e80941Smrg } 345b8e80941Smrg 346b8e80941Smrg temp_ptr++; 347b8e80941Smrg } 348b8e80941Smrg 349b8e80941Smrg /* if we couldn't reuse current ones, allocate new temporaries */ 350b8e80941Smrg for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) { 351b8e80941Smrg struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 352b8e80941Smrg 353b8e80941Smrg if (inout->active && !inout->native.valid) 354b8e80941Smrg inout->native = alloc_new_native_reg(c); 355b8e80941Smrg } 356b8e80941Smrg} 357b8e80941Smrg 358b8e80941Smrg/* Allocate an immediate with a certain value and return the index. If 359b8e80941Smrg * there is already an immediate with that value, return that. 360b8e80941Smrg */ 361b8e80941Smrgstatic struct etna_inst_src 362b8e80941Smrgalloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, 363b8e80941Smrg uint32_t value) 364b8e80941Smrg{ 365b8e80941Smrg int idx; 366b8e80941Smrg 367b8e80941Smrg /* Could use a hash table to speed this up */ 368b8e80941Smrg for (idx = 0; idx < c->imm_size; ++idx) { 369b8e80941Smrg if (c->imm_contents[idx] == contents && c->imm_data[idx] == value) 370b8e80941Smrg break; 371b8e80941Smrg } 372b8e80941Smrg 373b8e80941Smrg /* look if there is an unused slot */ 374b8e80941Smrg if (idx == c->imm_size) { 375b8e80941Smrg for (idx = 0; idx < c->imm_size; ++idx) { 376b8e80941Smrg if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED) 377b8e80941Smrg break; 378b8e80941Smrg } 379b8e80941Smrg } 380b8e80941Smrg 381b8e80941Smrg /* allocate new immediate */ 382b8e80941Smrg if (idx == c->imm_size) { 383b8e80941Smrg assert(c->imm_size < ETNA_MAX_IMM); 384b8e80941Smrg idx = c->imm_size++; 385b8e80941Smrg c->imm_data[idx] = value; 386b8e80941Smrg c->imm_contents[idx] = contents; 387b8e80941Smrg } 388b8e80941Smrg 389b8e80941Smrg /* swizzle so that component with value is returned in all components */ 390b8e80941Smrg idx += c->imm_base; 391b8e80941Smrg struct etna_inst_src imm_src = { 392b8e80941Smrg .use = 1, 393b8e80941Smrg .rgroup = INST_RGROUP_UNIFORM_0, 394b8e80941Smrg .reg = idx / 4, 395b8e80941Smrg .swiz = INST_SWIZ_BROADCAST(idx & 3) 396b8e80941Smrg }; 397b8e80941Smrg 398b8e80941Smrg return imm_src; 399b8e80941Smrg} 400b8e80941Smrg 401b8e80941Smrgstatic struct etna_inst_src 402b8e80941Smrgalloc_imm_u32(struct etna_compile *c, uint32_t value) 403b8e80941Smrg{ 404b8e80941Smrg return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value); 405b8e80941Smrg} 406b8e80941Smrg 407b8e80941Smrgstatic struct etna_inst_src 408b8e80941Smrgalloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents, 409b8e80941Smrg const uint32_t *values) 410b8e80941Smrg{ 411b8e80941Smrg struct etna_inst_src imm_src = { }; 412b8e80941Smrg int idx, i; 413b8e80941Smrg 414b8e80941Smrg for (idx = 0; idx + 3 < c->imm_size; idx += 4) { 415b8e80941Smrg /* What if we can use a uniform with a different swizzle? */ 416b8e80941Smrg for (i = 0; i < 4; i++) 417b8e80941Smrg if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i]) 418b8e80941Smrg break; 419b8e80941Smrg if (i == 4) 420b8e80941Smrg break; 421b8e80941Smrg } 422b8e80941Smrg 423b8e80941Smrg if (idx + 3 >= c->imm_size) { 424b8e80941Smrg idx = align(c->imm_size, 4); 425b8e80941Smrg assert(idx + 4 <= ETNA_MAX_IMM); 426b8e80941Smrg 427b8e80941Smrg for (i = 0; i < 4; i++) { 428b8e80941Smrg c->imm_data[idx + i] = values[i]; 429b8e80941Smrg c->imm_contents[idx + i] = contents; 430b8e80941Smrg } 431b8e80941Smrg 432b8e80941Smrg c->imm_size = idx + 4; 433b8e80941Smrg } 434b8e80941Smrg 435b8e80941Smrg assert((c->imm_base & 3) == 0); 436b8e80941Smrg idx += c->imm_base; 437b8e80941Smrg imm_src.use = 1; 438b8e80941Smrg imm_src.rgroup = INST_RGROUP_UNIFORM_0; 439b8e80941Smrg imm_src.reg = idx / 4; 440b8e80941Smrg imm_src.swiz = INST_SWIZ_IDENTITY; 441b8e80941Smrg 442b8e80941Smrg return imm_src; 443b8e80941Smrg} 444b8e80941Smrg 445b8e80941Smrgstatic uint32_t 446b8e80941Smrgget_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm, 447b8e80941Smrg unsigned swiz_idx) 448b8e80941Smrg{ 449b8e80941Smrg assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0); 450b8e80941Smrg unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3); 451b8e80941Smrg 452b8e80941Smrg return c->imm_data[idx]; 453b8e80941Smrg} 454b8e80941Smrg 455b8e80941Smrg/* Allocate immediate with a certain float value. If there is already an 456b8e80941Smrg * immediate with that value, return that. 457b8e80941Smrg */ 458b8e80941Smrgstatic struct etna_inst_src 459b8e80941Smrgalloc_imm_f32(struct etna_compile *c, float value) 460b8e80941Smrg{ 461b8e80941Smrg return alloc_imm_u32(c, fui(value)); 462b8e80941Smrg} 463b8e80941Smrg 464b8e80941Smrgstatic struct etna_inst_src 465b8e80941Smrgetna_imm_vec4f(struct etna_compile *c, const float *vec4) 466b8e80941Smrg{ 467b8e80941Smrg uint32_t val[4]; 468b8e80941Smrg 469b8e80941Smrg for (int i = 0; i < 4; i++) 470b8e80941Smrg val[i] = fui(vec4[i]); 471b8e80941Smrg 472b8e80941Smrg return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val); 473b8e80941Smrg} 474b8e80941Smrg 475b8e80941Smrg/* Pass -- check register file declarations and immediates */ 476b8e80941Smrgstatic void 477b8e80941Smrgetna_compile_parse_declarations(struct etna_compile *c) 478b8e80941Smrg{ 479b8e80941Smrg struct tgsi_parse_context ctx = { }; 480b8e80941Smrg MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 481b8e80941Smrg assert(status == TGSI_PARSE_OK); 482b8e80941Smrg 483b8e80941Smrg while (!tgsi_parse_end_of_tokens(&ctx)) { 484b8e80941Smrg tgsi_parse_token(&ctx); 485b8e80941Smrg 486b8e80941Smrg switch (ctx.FullToken.Token.Type) { 487b8e80941Smrg case TGSI_TOKEN_TYPE_IMMEDIATE: { 488b8e80941Smrg /* immediates are handled differently from other files; they are 489b8e80941Smrg * not declared explicitly, and always add four components */ 490b8e80941Smrg const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; 491b8e80941Smrg assert(c->imm_size <= (ETNA_MAX_IMM - 4)); 492b8e80941Smrg 493b8e80941Smrg for (int i = 0; i < 4; ++i) { 494b8e80941Smrg unsigned idx = c->imm_size++; 495b8e80941Smrg 496b8e80941Smrg c->imm_data[idx] = imm->u[i].Uint; 497b8e80941Smrg c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT; 498b8e80941Smrg } 499b8e80941Smrg } 500b8e80941Smrg break; 501b8e80941Smrg } 502b8e80941Smrg } 503b8e80941Smrg 504b8e80941Smrg tgsi_parse_free(&ctx); 505b8e80941Smrg} 506b8e80941Smrg 507b8e80941Smrg/* Allocate register declarations for the registers in all register files */ 508b8e80941Smrgstatic void 509b8e80941Smrgetna_allocate_decls(struct etna_compile *c) 510b8e80941Smrg{ 511b8e80941Smrg uint idx = 0; 512b8e80941Smrg 513b8e80941Smrg for (int x = 0; x < TGSI_FILE_COUNT; ++x) { 514b8e80941Smrg c->file[x].reg = &c->decl[idx]; 515b8e80941Smrg c->file[x].reg_size = c->info.file_max[x] + 1; 516b8e80941Smrg 517b8e80941Smrg for (int sub = 0; sub < c->file[x].reg_size; ++sub) { 518b8e80941Smrg c->decl[idx].file = x; 519b8e80941Smrg c->decl[idx].idx = sub; 520b8e80941Smrg idx++; 521b8e80941Smrg } 522b8e80941Smrg } 523b8e80941Smrg 524b8e80941Smrg c->total_decls = idx; 525b8e80941Smrg} 526b8e80941Smrg 527b8e80941Smrg/* Pass -- check and record usage of temporaries, inputs, outputs */ 528b8e80941Smrgstatic void 529b8e80941Smrgetna_compile_pass_check_usage(struct etna_compile *c) 530b8e80941Smrg{ 531b8e80941Smrg struct tgsi_parse_context ctx = { }; 532b8e80941Smrg MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 533b8e80941Smrg assert(status == TGSI_PARSE_OK); 534b8e80941Smrg 535b8e80941Smrg for (int idx = 0; idx < c->total_decls; ++idx) { 536b8e80941Smrg c->decl[idx].active = false; 537b8e80941Smrg c->decl[idx].first_use = c->decl[idx].last_use = -1; 538b8e80941Smrg } 539b8e80941Smrg 540b8e80941Smrg int inst_idx = 0; 541b8e80941Smrg while (!tgsi_parse_end_of_tokens(&ctx)) { 542b8e80941Smrg tgsi_parse_token(&ctx); 543b8e80941Smrg /* find out max register #s used 544b8e80941Smrg * For every register mark first and last instruction index where it's 545b8e80941Smrg * used this allows finding ranges where the temporary can be borrowed 546b8e80941Smrg * as input and/or output register 547b8e80941Smrg * 548b8e80941Smrg * XXX in the case of loops this needs special care, or even be completely 549b8e80941Smrg * disabled, as 550b8e80941Smrg * the last usage of a register inside a loop means it can still be used 551b8e80941Smrg * on next loop 552b8e80941Smrg * iteration (execution is no longer * chronological). The register can 553b8e80941Smrg * only be 554b8e80941Smrg * declared "free" after the loop finishes. 555b8e80941Smrg * 556b8e80941Smrg * Same for inputs: the first usage of a register inside a loop doesn't 557b8e80941Smrg * mean that the register 558b8e80941Smrg * won't have been overwritten in previous iteration. The register can 559b8e80941Smrg * only be declared free before the loop 560b8e80941Smrg * starts. 561b8e80941Smrg * The proper way would be to do full dominator / post-dominator analysis 562b8e80941Smrg * (especially with more complicated 563b8e80941Smrg * control flow such as direct branch instructions) but not for now... 564b8e80941Smrg */ 565b8e80941Smrg switch (ctx.FullToken.Token.Type) { 566b8e80941Smrg case TGSI_TOKEN_TYPE_DECLARATION: { 567b8e80941Smrg /* Declaration: fill in file details */ 568b8e80941Smrg const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; 569b8e80941Smrg struct etna_compile_file *file = &c->file[decl->Declaration.File]; 570b8e80941Smrg 571b8e80941Smrg for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) { 572b8e80941Smrg file->reg[idx].usage_mask = 0; // we'll compute this ourselves 573b8e80941Smrg file->reg[idx].has_semantic = decl->Declaration.Semantic; 574b8e80941Smrg file->reg[idx].semantic = decl->Semantic; 575b8e80941Smrg file->reg[idx].interp = decl->Interp; 576b8e80941Smrg } 577b8e80941Smrg } break; 578b8e80941Smrg case TGSI_TOKEN_TYPE_INSTRUCTION: { 579b8e80941Smrg /* Instruction: iterate over operands of instruction */ 580b8e80941Smrg const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 581b8e80941Smrg 582b8e80941Smrg /* iterate over destination registers */ 583b8e80941Smrg for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) { 584b8e80941Smrg struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index]; 585b8e80941Smrg 586b8e80941Smrg if (reg_desc->first_use == -1) 587b8e80941Smrg reg_desc->first_use = inst_idx; 588b8e80941Smrg 589b8e80941Smrg reg_desc->last_use = inst_idx; 590b8e80941Smrg reg_desc->active = true; 591b8e80941Smrg } 592b8e80941Smrg 593b8e80941Smrg /* iterate over source registers */ 594b8e80941Smrg for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) { 595b8e80941Smrg struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index]; 596b8e80941Smrg 597b8e80941Smrg if (reg_desc->first_use == -1) 598b8e80941Smrg reg_desc->first_use = inst_idx; 599b8e80941Smrg 600b8e80941Smrg reg_desc->last_use = inst_idx; 601b8e80941Smrg reg_desc->active = true; 602b8e80941Smrg /* accumulate usage mask for register, this is used to determine how 603b8e80941Smrg * many slots for varyings 604b8e80941Smrg * should be allocated */ 605b8e80941Smrg reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); 606b8e80941Smrg } 607b8e80941Smrg inst_idx += 1; 608b8e80941Smrg } break; 609b8e80941Smrg default: 610b8e80941Smrg break; 611b8e80941Smrg } 612b8e80941Smrg } 613b8e80941Smrg 614b8e80941Smrg tgsi_parse_free(&ctx); 615b8e80941Smrg} 616b8e80941Smrg 617b8e80941Smrg/* assign inputs that need to be assigned to specific registers */ 618b8e80941Smrgstatic void 619b8e80941Smrgassign_special_inputs(struct etna_compile *c) 620b8e80941Smrg{ 621b8e80941Smrg if (c->info.processor == PIPE_SHADER_FRAGMENT) { 622b8e80941Smrg /* never assign t0 as it is the position output, start assigning at t1 */ 623b8e80941Smrg c->next_free_native = 1; 624b8e80941Smrg 625b8e80941Smrg /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ 626b8e80941Smrg for (int idx = 0; idx < c->total_decls; ++idx) { 627b8e80941Smrg struct etna_reg_desc *reg = &c->decl[idx]; 628b8e80941Smrg 629b8e80941Smrg if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) 630b8e80941Smrg reg->native = etna_native_temp(0); 631b8e80941Smrg } 632b8e80941Smrg } 633b8e80941Smrg} 634b8e80941Smrg 635b8e80941Smrg/* Check that a move instruction does not swizzle any of the components 636b8e80941Smrg * that it writes. 637b8e80941Smrg */ 638b8e80941Smrgstatic bool 639b8e80941Smrgetna_mov_check_no_swizzle(const struct tgsi_dst_register dst, 640b8e80941Smrg const struct tgsi_src_register src) 641b8e80941Smrg{ 642b8e80941Smrg return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && 643b8e80941Smrg (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && 644b8e80941Smrg (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && 645b8e80941Smrg (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); 646b8e80941Smrg} 647b8e80941Smrg 648b8e80941Smrg/* Pass -- optimize outputs 649b8e80941Smrg * Mesa tends to generate code like this at the end if their shaders 650b8e80941Smrg * MOV OUT[1], TEMP[2] 651b8e80941Smrg * MOV OUT[0], TEMP[0] 652b8e80941Smrg * MOV OUT[2], TEMP[1] 653b8e80941Smrg * Recognize if 654b8e80941Smrg * a) there is only a single assignment to an output register and 655b8e80941Smrg * b) the temporary is not used after that 656b8e80941Smrg * Also recognize direct assignment of IN to OUT (passthrough) 657b8e80941Smrg **/ 658b8e80941Smrgstatic void 659b8e80941Smrgetna_compile_pass_optimize_outputs(struct etna_compile *c) 660b8e80941Smrg{ 661b8e80941Smrg struct tgsi_parse_context ctx = { }; 662b8e80941Smrg int inst_idx = 0; 663b8e80941Smrg MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 664b8e80941Smrg assert(status == TGSI_PARSE_OK); 665b8e80941Smrg 666b8e80941Smrg while (!tgsi_parse_end_of_tokens(&ctx)) { 667b8e80941Smrg tgsi_parse_token(&ctx); 668b8e80941Smrg 669b8e80941Smrg switch (ctx.FullToken.Token.Type) { 670b8e80941Smrg case TGSI_TOKEN_TYPE_INSTRUCTION: { 671b8e80941Smrg const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 672b8e80941Smrg 673b8e80941Smrg /* iterate over operands */ 674b8e80941Smrg switch (inst->Instruction.Opcode) { 675b8e80941Smrg case TGSI_OPCODE_MOV: { 676b8e80941Smrg /* We are only interested in eliminating MOVs which write to 677b8e80941Smrg * the shader outputs. Test for this early. */ 678b8e80941Smrg if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 679b8e80941Smrg break; 680b8e80941Smrg /* Elimination of a MOV must have no visible effect on the 681b8e80941Smrg * resulting shader: this means the MOV must not swizzle or 682b8e80941Smrg * saturate, and its source must not have the negate or 683b8e80941Smrg * absolute modifiers. */ 684b8e80941Smrg if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) || 685b8e80941Smrg inst->Instruction.Saturate || inst->Src[0].Register.Negate || 686b8e80941Smrg inst->Src[0].Register.Absolute) 687b8e80941Smrg break; 688b8e80941Smrg 689b8e80941Smrg uint out_idx = inst->Dst[0].Register.Index; 690b8e80941Smrg uint in_idx = inst->Src[0].Register.Index; 691b8e80941Smrg /* assignment of temporary to output -- 692b8e80941Smrg * and the output doesn't yet have a native register assigned 693b8e80941Smrg * and the last use of the temporary is this instruction 694b8e80941Smrg * and the MOV does not do a swizzle 695b8e80941Smrg */ 696b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && 697b8e80941Smrg !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 698b8e80941Smrg c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) { 699b8e80941Smrg c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 700b8e80941Smrg c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native; 701b8e80941Smrg /* prevent temp from being re-used for the rest of the shader */ 702b8e80941Smrg c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS; 703b8e80941Smrg /* mark this MOV instruction as a no-op */ 704b8e80941Smrg c->dead_inst[inst_idx] = true; 705b8e80941Smrg } 706b8e80941Smrg /* direct assignment of input to output -- 707b8e80941Smrg * and the input or output doesn't yet have a native register 708b8e80941Smrg * assigned 709b8e80941Smrg * and the output is only used in this instruction, 710b8e80941Smrg * allocate a new register, and associate both input and output to 711b8e80941Smrg * it 712b8e80941Smrg * and the MOV does not do a swizzle 713b8e80941Smrg */ 714b8e80941Smrg if (inst->Src[0].Register.File == TGSI_FILE_INPUT && 715b8e80941Smrg !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid && 716b8e80941Smrg !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 717b8e80941Smrg c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx && 718b8e80941Smrg c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) { 719b8e80941Smrg c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 720b8e80941Smrg c->file[TGSI_FILE_INPUT].reg[in_idx].native = 721b8e80941Smrg alloc_new_native_reg(c); 722b8e80941Smrg /* mark this MOV instruction as a no-op */ 723b8e80941Smrg c->dead_inst[inst_idx] = true; 724b8e80941Smrg } 725b8e80941Smrg } break; 726b8e80941Smrg default:; 727b8e80941Smrg } 728b8e80941Smrg inst_idx += 1; 729b8e80941Smrg } break; 730b8e80941Smrg } 731b8e80941Smrg } 732b8e80941Smrg 733b8e80941Smrg tgsi_parse_free(&ctx); 734b8e80941Smrg} 735b8e80941Smrg 736b8e80941Smrg/* Get a temporary to be used within one TGSI instruction. 737b8e80941Smrg * The first time that this function is called the temporary will be allocated. 738b8e80941Smrg * Each call to this function will return the same temporary. 739b8e80941Smrg */ 740b8e80941Smrgstatic struct etna_native_reg 741b8e80941Smrgetna_compile_get_inner_temp(struct etna_compile *c) 742b8e80941Smrg{ 743b8e80941Smrg int inner_temp = c->inner_temps; 744b8e80941Smrg 745b8e80941Smrg if (inner_temp < ETNA_MAX_INNER_TEMPS) { 746b8e80941Smrg if (!c->inner_temp[inner_temp].valid) 747b8e80941Smrg c->inner_temp[inner_temp] = alloc_new_native_reg(c); 748b8e80941Smrg 749b8e80941Smrg /* alloc_new_native_reg() handles lack of registers */ 750b8e80941Smrg c->inner_temps += 1; 751b8e80941Smrg } else { 752b8e80941Smrg BUG("Too many inner temporaries (%i) requested in one instruction", 753b8e80941Smrg inner_temp + 1); 754b8e80941Smrg } 755b8e80941Smrg 756b8e80941Smrg return c->inner_temp[inner_temp]; 757b8e80941Smrg} 758b8e80941Smrg 759b8e80941Smrgstatic struct etna_inst_dst 760b8e80941Smrgetna_native_to_dst(struct etna_native_reg native, unsigned comps) 761b8e80941Smrg{ 762b8e80941Smrg /* Can only assign to temporaries */ 763b8e80941Smrg assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP); 764b8e80941Smrg 765b8e80941Smrg struct etna_inst_dst rv = { 766b8e80941Smrg .comps = comps, 767b8e80941Smrg .use = 1, 768b8e80941Smrg .reg = native.id, 769b8e80941Smrg }; 770b8e80941Smrg 771b8e80941Smrg return rv; 772b8e80941Smrg} 773b8e80941Smrg 774b8e80941Smrgstatic struct etna_inst_src 775b8e80941Smrgetna_native_to_src(struct etna_native_reg native, uint32_t swizzle) 776b8e80941Smrg{ 777b8e80941Smrg assert(native.valid && !native.is_tex); 778b8e80941Smrg 779b8e80941Smrg struct etna_inst_src rv = { 780b8e80941Smrg .use = 1, 781b8e80941Smrg .swiz = swizzle, 782b8e80941Smrg .rgroup = native.rgroup, 783b8e80941Smrg .reg = native.id, 784b8e80941Smrg .amode = INST_AMODE_DIRECT, 785b8e80941Smrg }; 786b8e80941Smrg 787b8e80941Smrg return rv; 788b8e80941Smrg} 789b8e80941Smrg 790b8e80941Smrgstatic inline struct etna_inst_src 791b8e80941Smrgnegate(struct etna_inst_src src) 792b8e80941Smrg{ 793b8e80941Smrg src.neg = !src.neg; 794b8e80941Smrg 795b8e80941Smrg return src; 796b8e80941Smrg} 797b8e80941Smrg 798b8e80941Smrgstatic inline struct etna_inst_src 799b8e80941Smrgabsolute(struct etna_inst_src src) 800b8e80941Smrg{ 801b8e80941Smrg src.abs = 1; 802b8e80941Smrg 803b8e80941Smrg return src; 804b8e80941Smrg} 805b8e80941Smrg 806b8e80941Smrgstatic inline struct etna_inst_src 807b8e80941Smrgswizzle(struct etna_inst_src src, unsigned swizzle) 808b8e80941Smrg{ 809b8e80941Smrg src.swiz = inst_swiz_compose(src.swiz, swizzle); 810b8e80941Smrg 811b8e80941Smrg return src; 812b8e80941Smrg} 813b8e80941Smrg 814b8e80941Smrg/* Emit instruction and append it to program */ 815b8e80941Smrgstatic void 816b8e80941Smrgemit_inst(struct etna_compile *c, struct etna_inst *inst) 817b8e80941Smrg{ 818b8e80941Smrg assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS); 819b8e80941Smrg 820b8e80941Smrg /* Check for uniform conflicts (each instruction can only access one 821b8e80941Smrg * uniform), 822b8e80941Smrg * if detected, use an intermediate temporary */ 823b8e80941Smrg unsigned uni_rgroup = -1; 824b8e80941Smrg unsigned uni_reg = -1; 825b8e80941Smrg 826b8e80941Smrg for (int src = 0; src < ETNA_NUM_SRC; ++src) { 827b8e80941Smrg if (etna_rgroup_is_uniform(inst->src[src].rgroup)) { 828b8e80941Smrg if (uni_reg == -1) { /* first unique uniform used */ 829b8e80941Smrg uni_rgroup = inst->src[src].rgroup; 830b8e80941Smrg uni_reg = inst->src[src].reg; 831b8e80941Smrg } else { /* second or later; check that it is a re-use */ 832b8e80941Smrg if (uni_rgroup != inst->src[src].rgroup || 833b8e80941Smrg uni_reg != inst->src[src].reg) { 834b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that " 835b8e80941Smrg "accesses different uniforms, " 836b8e80941Smrg "need to generate extra MOV"); 837b8e80941Smrg struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 838b8e80941Smrg 839b8e80941Smrg /* Generate move instruction to temporary */ 840b8e80941Smrg etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 841b8e80941Smrg .opcode = INST_OPCODE_MOV, 842b8e80941Smrg .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 843b8e80941Smrg INST_COMPS_Z | INST_COMPS_W), 844b8e80941Smrg .src[2] = inst->src[src] 845b8e80941Smrg }); 846b8e80941Smrg 847b8e80941Smrg c->inst_ptr++; 848b8e80941Smrg 849b8e80941Smrg /* Modify instruction to use temp register instead of uniform */ 850b8e80941Smrg inst->src[src].use = 1; 851b8e80941Smrg inst->src[src].rgroup = INST_RGROUP_TEMP; 852b8e80941Smrg inst->src[src].reg = inner_temp.id; 853b8e80941Smrg inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 854b8e80941Smrg inst->src[src].neg = 0; /* negation happens on MOV */ 855b8e80941Smrg inst->src[src].abs = 0; /* abs happens on MOV */ 856b8e80941Smrg inst->src[src].amode = 0; /* amode effects happen on MOV */ 857b8e80941Smrg } 858b8e80941Smrg } 859b8e80941Smrg } 860b8e80941Smrg } 861b8e80941Smrg 862b8e80941Smrg /* Finally assemble the actual instruction */ 863b8e80941Smrg etna_assemble(&c->code[c->inst_ptr * 4], inst); 864b8e80941Smrg c->inst_ptr++; 865b8e80941Smrg} 866b8e80941Smrg 867b8e80941Smrgstatic unsigned int 868b8e80941Smrgetna_amode(struct tgsi_ind_register indirect) 869b8e80941Smrg{ 870b8e80941Smrg assert(indirect.File == TGSI_FILE_ADDRESS); 871b8e80941Smrg assert(indirect.Index == 0); 872b8e80941Smrg 873b8e80941Smrg switch (indirect.Swizzle) { 874b8e80941Smrg case TGSI_SWIZZLE_X: 875b8e80941Smrg return INST_AMODE_ADD_A_X; 876b8e80941Smrg case TGSI_SWIZZLE_Y: 877b8e80941Smrg return INST_AMODE_ADD_A_Y; 878b8e80941Smrg case TGSI_SWIZZLE_Z: 879b8e80941Smrg return INST_AMODE_ADD_A_Z; 880b8e80941Smrg case TGSI_SWIZZLE_W: 881b8e80941Smrg return INST_AMODE_ADD_A_W; 882b8e80941Smrg default: 883b8e80941Smrg assert(!"Invalid swizzle"); 884b8e80941Smrg } 885b8e80941Smrg 886b8e80941Smrg unreachable("bad swizzle"); 887b8e80941Smrg} 888b8e80941Smrg 889b8e80941Smrg/* convert destination operand */ 890b8e80941Smrgstatic struct etna_inst_dst 891b8e80941Smrgconvert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in) 892b8e80941Smrg{ 893b8e80941Smrg struct etna_inst_dst rv = { 894b8e80941Smrg /// XXX .amode 895b8e80941Smrg .comps = in->Register.WriteMask, 896b8e80941Smrg }; 897b8e80941Smrg 898b8e80941Smrg if (in->Register.File == TGSI_FILE_ADDRESS) { 899b8e80941Smrg assert(in->Register.Index == 0); 900b8e80941Smrg rv.reg = in->Register.Index; 901b8e80941Smrg rv.use = 0; 902b8e80941Smrg } else { 903b8e80941Smrg rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native, 904b8e80941Smrg in->Register.WriteMask); 905b8e80941Smrg } 906b8e80941Smrg 907b8e80941Smrg if (in->Register.Indirect) 908b8e80941Smrg rv.amode = etna_amode(in->Indirect); 909b8e80941Smrg 910b8e80941Smrg return rv; 911b8e80941Smrg} 912b8e80941Smrg 913b8e80941Smrg/* convert texture operand */ 914b8e80941Smrgstatic struct etna_inst_tex 915b8e80941Smrgconvert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in, 916b8e80941Smrg const struct tgsi_instruction_texture *tex) 917b8e80941Smrg{ 918b8e80941Smrg struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native; 919b8e80941Smrg struct etna_inst_tex rv = { 920b8e80941Smrg // XXX .amode (to allow for an array of samplers?) 921b8e80941Smrg .swiz = INST_SWIZ_IDENTITY 922b8e80941Smrg }; 923b8e80941Smrg 924b8e80941Smrg assert(native_reg.is_tex && native_reg.valid); 925b8e80941Smrg rv.id = native_reg.id; 926b8e80941Smrg 927b8e80941Smrg return rv; 928b8e80941Smrg} 929b8e80941Smrg 930b8e80941Smrg/* convert source operand */ 931b8e80941Smrgstatic struct etna_inst_src 932b8e80941Smrgetna_create_src(const struct tgsi_full_src_register *tgsi, 933b8e80941Smrg const struct etna_native_reg *native) 934b8e80941Smrg{ 935b8e80941Smrg const struct tgsi_src_register *reg = &tgsi->Register; 936b8e80941Smrg struct etna_inst_src rv = { 937b8e80941Smrg .use = 1, 938b8e80941Smrg .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW), 939b8e80941Smrg .neg = reg->Negate, 940b8e80941Smrg .abs = reg->Absolute, 941b8e80941Smrg .rgroup = native->rgroup, 942b8e80941Smrg .reg = native->id, 943b8e80941Smrg .amode = INST_AMODE_DIRECT, 944b8e80941Smrg }; 945b8e80941Smrg 946b8e80941Smrg assert(native->valid && !native->is_tex); 947b8e80941Smrg 948b8e80941Smrg if (reg->Indirect) 949b8e80941Smrg rv.amode = etna_amode(tgsi->Indirect); 950b8e80941Smrg 951b8e80941Smrg return rv; 952b8e80941Smrg} 953b8e80941Smrg 954b8e80941Smrgstatic struct etna_inst_src 955b8e80941Smrgetna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src, 956b8e80941Smrg struct etna_native_reg temp) 957b8e80941Smrg{ 958b8e80941Smrg struct etna_inst mov = { }; 959b8e80941Smrg 960b8e80941Smrg mov.opcode = INST_OPCODE_MOV; 961b8e80941Smrg mov.sat = 0; 962b8e80941Smrg mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 963b8e80941Smrg INST_COMPS_Z | INST_COMPS_W); 964b8e80941Smrg mov.src[2] = src; 965b8e80941Smrg emit_inst(c, &mov); 966b8e80941Smrg 967b8e80941Smrg src.swiz = INST_SWIZ_IDENTITY; 968b8e80941Smrg src.neg = src.abs = 0; 969b8e80941Smrg src.rgroup = temp.rgroup; 970b8e80941Smrg src.reg = temp.id; 971b8e80941Smrg 972b8e80941Smrg return src; 973b8e80941Smrg} 974b8e80941Smrg 975b8e80941Smrgstatic struct etna_inst_src 976b8e80941Smrgetna_mov_src(struct etna_compile *c, struct etna_inst_src src) 977b8e80941Smrg{ 978b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 979b8e80941Smrg 980b8e80941Smrg return etna_mov_src_to_temp(c, src, temp); 981b8e80941Smrg} 982b8e80941Smrg 983b8e80941Smrgstatic bool 984b8e80941Smrgetna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b) 985b8e80941Smrg{ 986b8e80941Smrg return etna_rgroup_is_uniform(a.rgroup) && 987b8e80941Smrg etna_rgroup_is_uniform(b.rgroup) && 988b8e80941Smrg (a.rgroup != b.rgroup || a.reg != b.reg); 989b8e80941Smrg} 990b8e80941Smrg 991b8e80941Smrg/* create a new label */ 992b8e80941Smrgstatic unsigned int 993b8e80941Smrgalloc_new_label(struct etna_compile *c) 994b8e80941Smrg{ 995b8e80941Smrg struct etna_compile_label label = { 996b8e80941Smrg .inst_idx = -1, /* start by point to no specific instruction */ 997b8e80941Smrg }; 998b8e80941Smrg 999b8e80941Smrg array_insert(c->labels, label); 1000b8e80941Smrg 1001b8e80941Smrg return c->labels_count - 1; 1002b8e80941Smrg} 1003b8e80941Smrg 1004b8e80941Smrg/* place label at current instruction pointer */ 1005b8e80941Smrgstatic void 1006b8e80941Smrglabel_place(struct etna_compile *c, struct etna_compile_label *label) 1007b8e80941Smrg{ 1008b8e80941Smrg label->inst_idx = c->inst_ptr; 1009b8e80941Smrg} 1010b8e80941Smrg 1011b8e80941Smrg/* mark label use at current instruction. 1012b8e80941Smrg * target of the label will be filled in in the marked instruction's src2.imm 1013b8e80941Smrg * slot as soon 1014b8e80941Smrg * as the value becomes known. 1015b8e80941Smrg */ 1016b8e80941Smrgstatic void 1017b8e80941Smrglabel_mark_use(struct etna_compile *c, int lbl_idx) 1018b8e80941Smrg{ 1019b8e80941Smrg assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS); 1020b8e80941Smrg c->lbl_usage[c->inst_ptr] = lbl_idx; 1021b8e80941Smrg} 1022b8e80941Smrg 1023b8e80941Smrg/* walk the frame stack and return first frame with matching type */ 1024b8e80941Smrgstatic struct etna_compile_frame * 1025b8e80941Smrgfind_frame(struct etna_compile *c, enum etna_compile_frame_type type) 1026b8e80941Smrg{ 1027b8e80941Smrg for (int sp = c->frame_sp; sp >= 0; sp--) 1028b8e80941Smrg if (c->frame_stack[sp].type == type) 1029b8e80941Smrg return &c->frame_stack[sp]; 1030b8e80941Smrg 1031b8e80941Smrg assert(0); 1032b8e80941Smrg return NULL; 1033b8e80941Smrg} 1034b8e80941Smrg 1035b8e80941Smrgstruct instr_translater { 1036b8e80941Smrg void (*fxn)(const struct instr_translater *t, struct etna_compile *c, 1037b8e80941Smrg const struct tgsi_full_instruction *inst, 1038b8e80941Smrg struct etna_inst_src *src); 1039b8e80941Smrg unsigned tgsi_opc; 1040b8e80941Smrg uint8_t opc; 1041b8e80941Smrg 1042b8e80941Smrg /* tgsi src -> etna src swizzle */ 1043b8e80941Smrg int src[3]; 1044b8e80941Smrg 1045b8e80941Smrg unsigned cond; 1046b8e80941Smrg}; 1047b8e80941Smrg 1048b8e80941Smrgstatic void 1049b8e80941Smrgtrans_instr(const struct instr_translater *t, struct etna_compile *c, 1050b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1051b8e80941Smrg{ 1052b8e80941Smrg const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode); 1053b8e80941Smrg struct etna_inst instr = { }; 1054b8e80941Smrg 1055b8e80941Smrg instr.opcode = t->opc; 1056b8e80941Smrg instr.cond = t->cond; 1057b8e80941Smrg instr.sat = inst->Instruction.Saturate; 1058b8e80941Smrg 1059b8e80941Smrg assert(info->num_dst <= 1); 1060b8e80941Smrg if (info->num_dst) 1061b8e80941Smrg instr.dst = convert_dst(c, &inst->Dst[0]); 1062b8e80941Smrg 1063b8e80941Smrg assert(info->num_src <= ETNA_NUM_SRC); 1064b8e80941Smrg 1065b8e80941Smrg for (unsigned i = 0; i < info->num_src; i++) { 1066b8e80941Smrg int swizzle = t->src[i]; 1067b8e80941Smrg 1068b8e80941Smrg assert(swizzle != -1); 1069b8e80941Smrg instr.src[swizzle] = src[i]; 1070b8e80941Smrg } 1071b8e80941Smrg 1072b8e80941Smrg emit_inst(c, &instr); 1073b8e80941Smrg} 1074b8e80941Smrg 1075b8e80941Smrgstatic void 1076b8e80941Smrgtrans_min_max(const struct instr_translater *t, struct etna_compile *c, 1077b8e80941Smrg const struct tgsi_full_instruction *inst, 1078b8e80941Smrg struct etna_inst_src *src) 1079b8e80941Smrg{ 1080b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1081b8e80941Smrg .opcode = INST_OPCODE_SELECT, 1082b8e80941Smrg .cond = t->cond, 1083b8e80941Smrg .sat = inst->Instruction.Saturate, 1084b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1085b8e80941Smrg .src[0] = src[0], 1086b8e80941Smrg .src[1] = src[1], 1087b8e80941Smrg .src[2] = src[0], 1088b8e80941Smrg }); 1089b8e80941Smrg} 1090b8e80941Smrg 1091b8e80941Smrgstatic void 1092b8e80941Smrgtrans_if(const struct instr_translater *t, struct etna_compile *c, 1093b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1094b8e80941Smrg{ 1095b8e80941Smrg struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1096b8e80941Smrg struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f); 1097b8e80941Smrg 1098b8e80941Smrg /* push IF to stack */ 1099b8e80941Smrg f->type = ETNA_COMPILE_FRAME_IF; 1100b8e80941Smrg /* create "else" label */ 1101b8e80941Smrg f->lbl_else_idx = alloc_new_label(c); 1102b8e80941Smrg f->lbl_endif_idx = -1; 1103b8e80941Smrg 1104b8e80941Smrg /* We need to avoid the emit_inst() below becoming two instructions */ 1105b8e80941Smrg if (etna_src_uniforms_conflict(src[0], imm_0)) 1106b8e80941Smrg src[0] = etna_mov_src(c, src[0]); 1107b8e80941Smrg 1108b8e80941Smrg /* mark position in instruction stream of label reference so that it can be 1109b8e80941Smrg * filled in in next pass */ 1110b8e80941Smrg label_mark_use(c, f->lbl_else_idx); 1111b8e80941Smrg 1112b8e80941Smrg /* create conditional branch to label if src0 EQ 0 */ 1113b8e80941Smrg emit_inst(c, &(struct etna_inst){ 1114b8e80941Smrg .opcode = INST_OPCODE_BRANCH, 1115b8e80941Smrg .cond = INST_CONDITION_EQ, 1116b8e80941Smrg .src[0] = src[0], 1117b8e80941Smrg .src[1] = imm_0, 1118b8e80941Smrg /* imm is filled in later */ 1119b8e80941Smrg }); 1120b8e80941Smrg} 1121b8e80941Smrg 1122b8e80941Smrgstatic void 1123b8e80941Smrgtrans_else(const struct instr_translater *t, struct etna_compile *c, 1124b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1125b8e80941Smrg{ 1126b8e80941Smrg assert(c->frame_sp > 0); 1127b8e80941Smrg struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1]; 1128b8e80941Smrg assert(f->type == ETNA_COMPILE_FRAME_IF); 1129b8e80941Smrg 1130b8e80941Smrg /* create "endif" label, and branch to endif label */ 1131b8e80941Smrg f->lbl_endif_idx = alloc_new_label(c); 1132b8e80941Smrg label_mark_use(c, f->lbl_endif_idx); 1133b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1134b8e80941Smrg .opcode = INST_OPCODE_BRANCH, 1135b8e80941Smrg .cond = INST_CONDITION_TRUE, 1136b8e80941Smrg /* imm is filled in later */ 1137b8e80941Smrg }); 1138b8e80941Smrg 1139b8e80941Smrg /* mark "else" label at this position in instruction stream */ 1140b8e80941Smrg label_place(c, &c->labels[f->lbl_else_idx]); 1141b8e80941Smrg} 1142b8e80941Smrg 1143b8e80941Smrgstatic void 1144b8e80941Smrgtrans_endif(const struct instr_translater *t, struct etna_compile *c, 1145b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1146b8e80941Smrg{ 1147b8e80941Smrg assert(c->frame_sp > 0); 1148b8e80941Smrg struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1149b8e80941Smrg assert(f->type == ETNA_COMPILE_FRAME_IF); 1150b8e80941Smrg 1151b8e80941Smrg /* assign "endif" or "else" (if no ELSE) label to current position in 1152b8e80941Smrg * instruction stream, pop IF */ 1153b8e80941Smrg if (f->lbl_endif_idx != -1) 1154b8e80941Smrg label_place(c, &c->labels[f->lbl_endif_idx]); 1155b8e80941Smrg else 1156b8e80941Smrg label_place(c, &c->labels[f->lbl_else_idx]); 1157b8e80941Smrg} 1158b8e80941Smrg 1159b8e80941Smrgstatic void 1160b8e80941Smrgtrans_loop_bgn(const struct instr_translater *t, struct etna_compile *c, 1161b8e80941Smrg const struct tgsi_full_instruction *inst, 1162b8e80941Smrg struct etna_inst_src *src) 1163b8e80941Smrg{ 1164b8e80941Smrg struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1165b8e80941Smrg 1166b8e80941Smrg /* push LOOP to stack */ 1167b8e80941Smrg f->type = ETNA_COMPILE_FRAME_LOOP; 1168b8e80941Smrg f->lbl_loop_bgn_idx = alloc_new_label(c); 1169b8e80941Smrg f->lbl_loop_end_idx = alloc_new_label(c); 1170b8e80941Smrg 1171b8e80941Smrg label_place(c, &c->labels[f->lbl_loop_bgn_idx]); 1172b8e80941Smrg 1173b8e80941Smrg c->num_loops++; 1174b8e80941Smrg} 1175b8e80941Smrg 1176b8e80941Smrgstatic void 1177b8e80941Smrgtrans_loop_end(const struct instr_translater *t, struct etna_compile *c, 1178b8e80941Smrg const struct tgsi_full_instruction *inst, 1179b8e80941Smrg struct etna_inst_src *src) 1180b8e80941Smrg{ 1181b8e80941Smrg assert(c->frame_sp > 0); 1182b8e80941Smrg struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1183b8e80941Smrg assert(f->type == ETNA_COMPILE_FRAME_LOOP); 1184b8e80941Smrg 1185b8e80941Smrg /* mark position in instruction stream of label reference so that it can be 1186b8e80941Smrg * filled in in next pass */ 1187b8e80941Smrg label_mark_use(c, f->lbl_loop_bgn_idx); 1188b8e80941Smrg 1189b8e80941Smrg /* create branch to loop_bgn label */ 1190b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1191b8e80941Smrg .opcode = INST_OPCODE_BRANCH, 1192b8e80941Smrg .cond = INST_CONDITION_TRUE, 1193b8e80941Smrg .src[0] = src[0], 1194b8e80941Smrg /* imm is filled in later */ 1195b8e80941Smrg }); 1196b8e80941Smrg 1197b8e80941Smrg label_place(c, &c->labels[f->lbl_loop_end_idx]); 1198b8e80941Smrg} 1199b8e80941Smrg 1200b8e80941Smrgstatic void 1201b8e80941Smrgtrans_brk(const struct instr_translater *t, struct etna_compile *c, 1202b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1203b8e80941Smrg{ 1204b8e80941Smrg assert(c->frame_sp > 0); 1205b8e80941Smrg struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1206b8e80941Smrg 1207b8e80941Smrg /* mark position in instruction stream of label reference so that it can be 1208b8e80941Smrg * filled in in next pass */ 1209b8e80941Smrg label_mark_use(c, f->lbl_loop_end_idx); 1210b8e80941Smrg 1211b8e80941Smrg /* create branch to loop_end label */ 1212b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1213b8e80941Smrg .opcode = INST_OPCODE_BRANCH, 1214b8e80941Smrg .cond = INST_CONDITION_TRUE, 1215b8e80941Smrg .src[0] = src[0], 1216b8e80941Smrg /* imm is filled in later */ 1217b8e80941Smrg }); 1218b8e80941Smrg} 1219b8e80941Smrg 1220b8e80941Smrgstatic void 1221b8e80941Smrgtrans_cont(const struct instr_translater *t, struct etna_compile *c, 1222b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1223b8e80941Smrg{ 1224b8e80941Smrg assert(c->frame_sp > 0); 1225b8e80941Smrg struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1226b8e80941Smrg 1227b8e80941Smrg /* mark position in instruction stream of label reference so that it can be 1228b8e80941Smrg * filled in in next pass */ 1229b8e80941Smrg label_mark_use(c, f->lbl_loop_bgn_idx); 1230b8e80941Smrg 1231b8e80941Smrg /* create branch to loop_end label */ 1232b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1233b8e80941Smrg .opcode = INST_OPCODE_BRANCH, 1234b8e80941Smrg .cond = INST_CONDITION_TRUE, 1235b8e80941Smrg .src[0] = src[0], 1236b8e80941Smrg /* imm is filled in later */ 1237b8e80941Smrg }); 1238b8e80941Smrg} 1239b8e80941Smrg 1240b8e80941Smrgstatic void 1241b8e80941Smrgtrans_deriv(const struct instr_translater *t, struct etna_compile *c, 1242b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1243b8e80941Smrg{ 1244b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1245b8e80941Smrg .opcode = t->opc, 1246b8e80941Smrg .sat = inst->Instruction.Saturate, 1247b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1248b8e80941Smrg .src[0] = src[0], 1249b8e80941Smrg .src[2] = src[0], 1250b8e80941Smrg }); 1251b8e80941Smrg} 1252b8e80941Smrg 1253b8e80941Smrgstatic void 1254b8e80941Smrgtrans_arl(const struct instr_translater *t, struct etna_compile *c, 1255b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1256b8e80941Smrg{ 1257b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1258b8e80941Smrg struct etna_inst arl = { }; 1259b8e80941Smrg struct etna_inst_dst dst; 1260b8e80941Smrg 1261b8e80941Smrg dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | 1262b8e80941Smrg INST_COMPS_W); 1263b8e80941Smrg 1264b8e80941Smrg if (c->specs->has_sign_floor_ceil) { 1265b8e80941Smrg struct etna_inst floor = { }; 1266b8e80941Smrg 1267b8e80941Smrg floor.opcode = INST_OPCODE_FLOOR; 1268b8e80941Smrg floor.src[2] = src[0]; 1269b8e80941Smrg floor.dst = dst; 1270b8e80941Smrg 1271b8e80941Smrg emit_inst(c, &floor); 1272b8e80941Smrg } else { 1273b8e80941Smrg struct etna_inst floor[2] = { }; 1274b8e80941Smrg 1275b8e80941Smrg floor[0].opcode = INST_OPCODE_FRC; 1276b8e80941Smrg floor[0].sat = inst->Instruction.Saturate; 1277b8e80941Smrg floor[0].dst = dst; 1278b8e80941Smrg floor[0].src[2] = src[0]; 1279b8e80941Smrg 1280b8e80941Smrg floor[1].opcode = INST_OPCODE_ADD; 1281b8e80941Smrg floor[1].sat = inst->Instruction.Saturate; 1282b8e80941Smrg floor[1].dst = dst; 1283b8e80941Smrg floor[1].src[0] = src[0]; 1284b8e80941Smrg floor[1].src[2].use = 1; 1285b8e80941Smrg floor[1].src[2].swiz = INST_SWIZ_IDENTITY; 1286b8e80941Smrg floor[1].src[2].neg = 1; 1287b8e80941Smrg floor[1].src[2].rgroup = temp.rgroup; 1288b8e80941Smrg floor[1].src[2].reg = temp.id; 1289b8e80941Smrg 1290b8e80941Smrg emit_inst(c, &floor[0]); 1291b8e80941Smrg emit_inst(c, &floor[1]); 1292b8e80941Smrg } 1293b8e80941Smrg 1294b8e80941Smrg arl.opcode = INST_OPCODE_MOVAR; 1295b8e80941Smrg arl.sat = inst->Instruction.Saturate; 1296b8e80941Smrg arl.dst = convert_dst(c, &inst->Dst[0]); 1297b8e80941Smrg arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1298b8e80941Smrg 1299b8e80941Smrg emit_inst(c, &arl); 1300b8e80941Smrg} 1301b8e80941Smrg 1302b8e80941Smrgstatic void 1303b8e80941Smrgtrans_lrp(const struct instr_translater *t, struct etna_compile *c, 1304b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1305b8e80941Smrg{ 1306b8e80941Smrg /* dst = src0 * src1 + (1 - src0) * src2 1307b8e80941Smrg * => src0 * src1 - (src0 - 1) * src2 1308b8e80941Smrg * => src0 * src1 - (src0 * src2 - src2) 1309b8e80941Smrg * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw 1310b8e80941Smrg * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw 1311b8e80941Smrg */ 1312b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1313b8e80941Smrg if (etna_src_uniforms_conflict(src[0], src[1]) || 1314b8e80941Smrg etna_src_uniforms_conflict(src[0], src[2])) { 1315b8e80941Smrg src[0] = etna_mov_src(c, src[0]); 1316b8e80941Smrg } 1317b8e80941Smrg 1318b8e80941Smrg struct etna_inst mad[2] = { }; 1319b8e80941Smrg mad[0].opcode = INST_OPCODE_MAD; 1320b8e80941Smrg mad[0].sat = 0; 1321b8e80941Smrg mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1322b8e80941Smrg INST_COMPS_Z | INST_COMPS_W); 1323b8e80941Smrg mad[0].src[0] = src[0]; 1324b8e80941Smrg mad[0].src[1] = src[2]; 1325b8e80941Smrg mad[0].src[2] = negate(src[2]); 1326b8e80941Smrg mad[1].opcode = INST_OPCODE_MAD; 1327b8e80941Smrg mad[1].sat = inst->Instruction.Saturate; 1328b8e80941Smrg mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0]; 1329b8e80941Smrg mad[1].src[1] = src[1]; 1330b8e80941Smrg mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY)); 1331b8e80941Smrg 1332b8e80941Smrg emit_inst(c, &mad[0]); 1333b8e80941Smrg emit_inst(c, &mad[1]); 1334b8e80941Smrg} 1335b8e80941Smrg 1336b8e80941Smrgstatic void 1337b8e80941Smrgtrans_lit(const struct instr_translater *t, struct etna_compile *c, 1338b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1339b8e80941Smrg{ 1340b8e80941Smrg /* SELECT.LT tmp._y__, 0, src.yyyy, 0 1341b8e80941Smrg * - can be eliminated if src.y is a uniform and >= 0 1342b8e80941Smrg * SELECT.GT tmp.___w, 128, src.wwww, 128 1343b8e80941Smrg * SELECT.LT tmp.___w, -128, tmp.wwww, -128 1344b8e80941Smrg * - can be eliminated if src.w is a uniform and fits clamp 1345b8e80941Smrg * LOG tmp.x, void, void, tmp.yyyy 1346b8e80941Smrg * MUL tmp.x, tmp.xxxx, tmp.wwww, void 1347b8e80941Smrg * LITP dst, undef, src.xxxx, tmp.xxxx 1348b8e80941Smrg */ 1349b8e80941Smrg struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 1350b8e80941Smrg struct etna_inst_src src_y = { }; 1351b8e80941Smrg 1352b8e80941Smrg if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1353b8e80941Smrg src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)); 1354b8e80941Smrg 1355b8e80941Smrg struct etna_inst ins = { }; 1356b8e80941Smrg ins.opcode = INST_OPCODE_SELECT; 1357b8e80941Smrg ins.cond = INST_CONDITION_LT; 1358b8e80941Smrg ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y); 1359b8e80941Smrg ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0); 1360b8e80941Smrg ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1361b8e80941Smrg emit_inst(c, &ins); 1362b8e80941Smrg } else if (uif(get_imm_u32(c, &src[0], 1)) < 0) 1363b8e80941Smrg src_y = alloc_imm_f32(c, 0.0); 1364b8e80941Smrg else 1365b8e80941Smrg src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1366b8e80941Smrg 1367b8e80941Smrg struct etna_inst_src src_w = { }; 1368b8e80941Smrg 1369b8e80941Smrg if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1370b8e80941Smrg src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W)); 1371b8e80941Smrg 1372b8e80941Smrg struct etna_inst ins = { }; 1373b8e80941Smrg ins.opcode = INST_OPCODE_SELECT; 1374b8e80941Smrg ins.cond = INST_CONDITION_GT; 1375b8e80941Smrg ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W); 1376b8e80941Smrg ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.); 1377b8e80941Smrg ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W)); 1378b8e80941Smrg emit_inst(c, &ins); 1379b8e80941Smrg ins.cond = INST_CONDITION_LT; 1380b8e80941Smrg ins.src[0].neg = !ins.src[0].neg; 1381b8e80941Smrg ins.src[2].neg = !ins.src[2].neg; 1382b8e80941Smrg ins.src[1] = src_w; 1383b8e80941Smrg emit_inst(c, &ins); 1384b8e80941Smrg } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.) 1385b8e80941Smrg src_w = alloc_imm_f32(c, -128.); 1386b8e80941Smrg else if (uif(get_imm_u32(c, &src[0], 3)) > 128.) 1387b8e80941Smrg src_w = alloc_imm_f32(c, 128.); 1388b8e80941Smrg else 1389b8e80941Smrg src_w = swizzle(src[0], SWIZZLE(W, W, W, W)); 1390b8e80941Smrg 1391b8e80941Smrg if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */ 1392b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1393b8e80941Smrg .opcode = INST_OPCODE_LOG, 1394b8e80941Smrg .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y), 1395b8e80941Smrg .src[2] = src_y, 1396b8e80941Smrg .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1397b8e80941Smrg }); 1398b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1399b8e80941Smrg .opcode = INST_OPCODE_MUL, 1400b8e80941Smrg .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1401b8e80941Smrg .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1402b8e80941Smrg .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)), 1403b8e80941Smrg }); 1404b8e80941Smrg } else { 1405b8e80941Smrg struct etna_inst ins[3] = { }; 1406b8e80941Smrg ins[0].opcode = INST_OPCODE_LOG; 1407b8e80941Smrg ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X); 1408b8e80941Smrg ins[0].src[2] = src_y; 1409b8e80941Smrg 1410b8e80941Smrg emit_inst(c, &ins[0]); 1411b8e80941Smrg } 1412b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1413b8e80941Smrg .opcode = INST_OPCODE_MUL, 1414b8e80941Smrg .sat = 0, 1415b8e80941Smrg .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1416b8e80941Smrg .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1417b8e80941Smrg .src[1] = src_w, 1418b8e80941Smrg }); 1419b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1420b8e80941Smrg .opcode = INST_OPCODE_LITP, 1421b8e80941Smrg .sat = 0, 1422b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1423b8e80941Smrg .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1424b8e80941Smrg .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1425b8e80941Smrg .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1426b8e80941Smrg }); 1427b8e80941Smrg} 1428b8e80941Smrg 1429b8e80941Smrgstatic void 1430b8e80941Smrgtrans_ssg(const struct instr_translater *t, struct etna_compile *c, 1431b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1432b8e80941Smrg{ 1433b8e80941Smrg if (c->specs->has_sign_floor_ceil) { 1434b8e80941Smrg emit_inst(c, &(struct etna_inst){ 1435b8e80941Smrg .opcode = INST_OPCODE_SIGN, 1436b8e80941Smrg .sat = inst->Instruction.Saturate, 1437b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1438b8e80941Smrg .src[2] = src[0], 1439b8e80941Smrg }); 1440b8e80941Smrg } else { 1441b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1442b8e80941Smrg struct etna_inst ins[2] = { }; 1443b8e80941Smrg 1444b8e80941Smrg ins[0].opcode = INST_OPCODE_SET; 1445b8e80941Smrg ins[0].cond = INST_CONDITION_NZ; 1446b8e80941Smrg ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1447b8e80941Smrg INST_COMPS_Z | INST_COMPS_W); 1448b8e80941Smrg ins[0].src[0] = src[0]; 1449b8e80941Smrg 1450b8e80941Smrg ins[1].opcode = INST_OPCODE_SELECT; 1451b8e80941Smrg ins[1].cond = INST_CONDITION_LZ; 1452b8e80941Smrg ins[1].sat = inst->Instruction.Saturate; 1453b8e80941Smrg ins[1].dst = convert_dst(c, &inst->Dst[0]); 1454b8e80941Smrg ins[1].src[0] = src[0]; 1455b8e80941Smrg ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1456b8e80941Smrg ins[1].src[1] = negate(ins[1].src[2]); 1457b8e80941Smrg 1458b8e80941Smrg emit_inst(c, &ins[0]); 1459b8e80941Smrg emit_inst(c, &ins[1]); 1460b8e80941Smrg } 1461b8e80941Smrg} 1462b8e80941Smrg 1463b8e80941Smrgstatic void 1464b8e80941Smrgtrans_trig(const struct instr_translater *t, struct etna_compile *c, 1465b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1466b8e80941Smrg{ 1467b8e80941Smrg if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */ 1468b8e80941Smrg /* On newer chips alternative SIN/COS instructions are implemented, 1469b8e80941Smrg * which: 1470b8e80941Smrg * - Need their input scaled by 1/pi instead of 2/pi 1471b8e80941Smrg * - Output an x and y component, which need to be multiplied to 1472b8e80941Smrg * get the result 1473b8e80941Smrg */ 1474b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */ 1475b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1476b8e80941Smrg .opcode = INST_OPCODE_MUL, 1477b8e80941Smrg .sat = 0, 1478b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_Z), 1479b8e80941Smrg .src[0] = src[0], /* any swizzling happens here */ 1480b8e80941Smrg .src[1] = alloc_imm_f32(c, 1.0f / M_PI), 1481b8e80941Smrg }); 1482b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1483b8e80941Smrg .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1484b8e80941Smrg ? INST_OPCODE_COS 1485b8e80941Smrg : INST_OPCODE_SIN, 1486b8e80941Smrg .sat = 0, 1487b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1488b8e80941Smrg .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)), 1489b8e80941Smrg .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1490b8e80941Smrg }); 1491b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1492b8e80941Smrg .opcode = INST_OPCODE_MUL, 1493b8e80941Smrg .sat = inst->Instruction.Saturate, 1494b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1495b8e80941Smrg .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1496b8e80941Smrg .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1497b8e80941Smrg }); 1498b8e80941Smrg 1499b8e80941Smrg } else if (c->specs->has_sin_cos_sqrt) { 1500b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1501b8e80941Smrg /* add divide by PI/2, using a temp register. GC2000 1502b8e80941Smrg * fails with src==dst for the trig instruction. */ 1503b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1504b8e80941Smrg .opcode = INST_OPCODE_MUL, 1505b8e80941Smrg .sat = 0, 1506b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1507b8e80941Smrg INST_COMPS_Z | INST_COMPS_W), 1508b8e80941Smrg .src[0] = src[0], /* any swizzling happens here */ 1509b8e80941Smrg .src[1] = alloc_imm_f32(c, 2.0f / M_PI), 1510b8e80941Smrg }); 1511b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1512b8e80941Smrg .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1513b8e80941Smrg ? INST_OPCODE_COS 1514b8e80941Smrg : INST_OPCODE_SIN, 1515b8e80941Smrg .sat = inst->Instruction.Saturate, 1516b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1517b8e80941Smrg .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), 1518b8e80941Smrg }); 1519b8e80941Smrg } else { 1520b8e80941Smrg /* Implement Nick's fast sine/cosine. Taken from: 1521b8e80941Smrg * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648 1522b8e80941Smrg * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X) 1523b8e80941Smrg * MAD t.x_zw, src.xxxx, A, B 1524b8e80941Smrg * FRC t.x_z_, void, void, t.xwzw 1525b8e80941Smrg * MAD t.x_z_, t.xwzw, 2, -1 1526b8e80941Smrg * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs) 1527b8e80941Smrg * DP3 t.x_z_, t.zyww, C, void (for sin) 1528b8e80941Smrg * DP3 t.__z_, t.zyww, C, void (for scs) 1529b8e80941Smrg * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs) 1530b8e80941Smrg * DP3 t.x_z_, t.xyww, C, void (for cos) 1531b8e80941Smrg * DP3 t.x___, t.xyww, C, void (for scs) 1532b8e80941Smrg * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz 1533b8e80941Smrg * MAD dst, t.ywyw, .2225, t.xzxz 1534b8e80941Smrg */ 1535b8e80941Smrg struct etna_inst *p, ins[9] = { }; 1536b8e80941Smrg struct etna_native_reg t0 = etna_compile_get_inner_temp(c); 1537b8e80941Smrg struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY); 1538b8e80941Smrg struct etna_inst_src sincos[3], in = src[0]; 1539b8e80941Smrg sincos[0] = etna_imm_vec4f(c, sincos_const[0]); 1540b8e80941Smrg sincos[1] = etna_imm_vec4f(c, sincos_const[1]); 1541b8e80941Smrg 1542b8e80941Smrg /* A uniform source will cause the inner temp limit to 1543b8e80941Smrg * be exceeded. Explicitly deal with that scenario. 1544b8e80941Smrg */ 1545b8e80941Smrg if (etna_rgroup_is_uniform(src[0].rgroup)) { 1546b8e80941Smrg struct etna_inst ins = { }; 1547b8e80941Smrg ins.opcode = INST_OPCODE_MOV; 1548b8e80941Smrg ins.dst = etna_native_to_dst(t0, INST_COMPS_X); 1549b8e80941Smrg ins.src[2] = in; 1550b8e80941Smrg emit_inst(c, &ins); 1551b8e80941Smrg in = t0s; 1552b8e80941Smrg } 1553b8e80941Smrg 1554b8e80941Smrg ins[0].opcode = INST_OPCODE_MAD; 1555b8e80941Smrg ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W); 1556b8e80941Smrg ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X)); 1557b8e80941Smrg ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */ 1558b8e80941Smrg ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */ 1559b8e80941Smrg 1560b8e80941Smrg ins[1].opcode = INST_OPCODE_FRC; 1561b8e80941Smrg ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1562b8e80941Smrg ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1563b8e80941Smrg 1564b8e80941Smrg ins[2].opcode = INST_OPCODE_MAD; 1565b8e80941Smrg ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1566b8e80941Smrg ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1567b8e80941Smrg ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */ 1568b8e80941Smrg ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */ 1569b8e80941Smrg 1570b8e80941Smrg unsigned mul_swiz, dp3_swiz; 1571b8e80941Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) { 1572b8e80941Smrg mul_swiz = SWIZZLE(W, Z, W, W); 1573b8e80941Smrg dp3_swiz = SWIZZLE(Z, Y, W, W); 1574b8e80941Smrg } else { 1575b8e80941Smrg mul_swiz = SWIZZLE(W, X, W, W); 1576b8e80941Smrg dp3_swiz = SWIZZLE(X, Y, W, W); 1577b8e80941Smrg } 1578b8e80941Smrg 1579b8e80941Smrg ins[3].opcode = INST_OPCODE_MUL; 1580b8e80941Smrg ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y); 1581b8e80941Smrg ins[3].src[0] = swizzle(t0s, mul_swiz); 1582b8e80941Smrg ins[3].src[1] = absolute(ins[3].src[0]); 1583b8e80941Smrg 1584b8e80941Smrg ins[4].opcode = INST_OPCODE_DP3; 1585b8e80941Smrg ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1586b8e80941Smrg ins[4].src[0] = swizzle(t0s, dp3_swiz); 1587b8e80941Smrg ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W)); 1588b8e80941Smrg 1589b8e80941Smrg p = &ins[5]; 1590b8e80941Smrg p->opcode = INST_OPCODE_MAD; 1591b8e80941Smrg p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W); 1592b8e80941Smrg p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z)); 1593b8e80941Smrg p->src[1] = absolute(p->src[0]); 1594b8e80941Smrg p->src[2] = negate(p->src[0]); 1595b8e80941Smrg 1596b8e80941Smrg p++; 1597b8e80941Smrg p->opcode = INST_OPCODE_MAD; 1598b8e80941Smrg p->sat = inst->Instruction.Saturate; 1599b8e80941Smrg p->dst = convert_dst(c, &inst->Dst[0]), 1600b8e80941Smrg p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W)); 1601b8e80941Smrg p->src[1] = alloc_imm_f32(c, 0.2225); 1602b8e80941Smrg p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z)); 1603b8e80941Smrg 1604b8e80941Smrg for (int i = 0; &ins[i] <= p; i++) 1605b8e80941Smrg emit_inst(c, &ins[i]); 1606b8e80941Smrg } 1607b8e80941Smrg} 1608b8e80941Smrg 1609b8e80941Smrgstatic void 1610b8e80941Smrgtrans_lg2(const struct instr_translater *t, struct etna_compile *c, 1611b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1612b8e80941Smrg{ 1613b8e80941Smrg if (c->specs->has_new_transcendentals) { 1614b8e80941Smrg /* On newer chips alternative LOG instruction is implemented, 1615b8e80941Smrg * which outputs an x and y component, which need to be multiplied to 1616b8e80941Smrg * get the result. 1617b8e80941Smrg */ 1618b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */ 1619b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1620b8e80941Smrg .opcode = INST_OPCODE_LOG, 1621b8e80941Smrg .sat = 0, 1622b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1623b8e80941Smrg .src[2] = src[0], 1624b8e80941Smrg .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1625b8e80941Smrg }); 1626b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1627b8e80941Smrg .opcode = INST_OPCODE_MUL, 1628b8e80941Smrg .sat = inst->Instruction.Saturate, 1629b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1630b8e80941Smrg .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1631b8e80941Smrg .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1632b8e80941Smrg }); 1633b8e80941Smrg } else { 1634b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1635b8e80941Smrg .opcode = INST_OPCODE_LOG, 1636b8e80941Smrg .sat = inst->Instruction.Saturate, 1637b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1638b8e80941Smrg .src[2] = src[0], 1639b8e80941Smrg }); 1640b8e80941Smrg } 1641b8e80941Smrg} 1642b8e80941Smrg 1643b8e80941Smrgstatic void 1644b8e80941Smrgtrans_sampler(const struct instr_translater *t, struct etna_compile *c, 1645b8e80941Smrg const struct tgsi_full_instruction *inst, 1646b8e80941Smrg struct etna_inst_src *src) 1647b8e80941Smrg{ 1648b8e80941Smrg /* There is no native support for GL texture rectangle coordinates, so 1649b8e80941Smrg * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */ 1650b8e80941Smrg if (inst->Texture.Texture == TGSI_TEXTURE_RECT) { 1651b8e80941Smrg uint32_t unit = inst->Src[1].Register.Index; 1652b8e80941Smrg struct etna_inst ins[2] = { }; 1653b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1654b8e80941Smrg 1655b8e80941Smrg ins[0].opcode = INST_OPCODE_MUL; 1656b8e80941Smrg ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); 1657b8e80941Smrg ins[0].src[0] = src[0]; 1658b8e80941Smrg ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit); 1659b8e80941Smrg 1660b8e80941Smrg ins[1].opcode = INST_OPCODE_MUL; 1661b8e80941Smrg ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); 1662b8e80941Smrg ins[1].src[0] = src[0]; 1663b8e80941Smrg ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit); 1664b8e80941Smrg 1665b8e80941Smrg emit_inst(c, &ins[0]); 1666b8e80941Smrg emit_inst(c, &ins[1]); 1667b8e80941Smrg 1668b8e80941Smrg src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */ 1669b8e80941Smrg } 1670b8e80941Smrg 1671b8e80941Smrg switch (inst->Instruction.Opcode) { 1672b8e80941Smrg case TGSI_OPCODE_TEX: 1673b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1674b8e80941Smrg .opcode = INST_OPCODE_TEXLD, 1675b8e80941Smrg .sat = 0, 1676b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1677b8e80941Smrg .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1678b8e80941Smrg .src[0] = src[0], 1679b8e80941Smrg }); 1680b8e80941Smrg break; 1681b8e80941Smrg 1682b8e80941Smrg case TGSI_OPCODE_TXB: 1683b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1684b8e80941Smrg .opcode = INST_OPCODE_TEXLDB, 1685b8e80941Smrg .sat = 0, 1686b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1687b8e80941Smrg .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1688b8e80941Smrg .src[0] = src[0], 1689b8e80941Smrg }); 1690b8e80941Smrg break; 1691b8e80941Smrg 1692b8e80941Smrg case TGSI_OPCODE_TXL: 1693b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1694b8e80941Smrg .opcode = INST_OPCODE_TEXLDL, 1695b8e80941Smrg .sat = 0, 1696b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1697b8e80941Smrg .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1698b8e80941Smrg .src[0] = src[0], 1699b8e80941Smrg }); 1700b8e80941Smrg break; 1701b8e80941Smrg 1702b8e80941Smrg case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ 1703b8e80941Smrg struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1704b8e80941Smrg 1705b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1706b8e80941Smrg .opcode = INST_OPCODE_RCP, 1707b8e80941Smrg .sat = 0, 1708b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */ 1709b8e80941Smrg .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)), 1710b8e80941Smrg }); 1711b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1712b8e80941Smrg .opcode = INST_OPCODE_MUL, 1713b8e80941Smrg .sat = 0, 1714b8e80941Smrg .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1715b8e80941Smrg INST_COMPS_Z), /* tmp.xyz */ 1716b8e80941Smrg .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)), 1717b8e80941Smrg .src[1] = src[0], /* src.xyzw */ 1718b8e80941Smrg }); 1719b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1720b8e80941Smrg .opcode = INST_OPCODE_TEXLD, 1721b8e80941Smrg .sat = 0, 1722b8e80941Smrg .dst = convert_dst(c, &inst->Dst[0]), 1723b8e80941Smrg .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1724b8e80941Smrg .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */ 1725b8e80941Smrg }); 1726b8e80941Smrg } break; 1727b8e80941Smrg 1728b8e80941Smrg default: 1729b8e80941Smrg BUG("Unhandled instruction %s", 1730b8e80941Smrg tgsi_get_opcode_name(inst->Instruction.Opcode)); 1731b8e80941Smrg assert(0); 1732b8e80941Smrg break; 1733b8e80941Smrg } 1734b8e80941Smrg} 1735b8e80941Smrg 1736b8e80941Smrgstatic void 1737b8e80941Smrgtrans_dummy(const struct instr_translater *t, struct etna_compile *c, 1738b8e80941Smrg const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1739b8e80941Smrg{ 1740b8e80941Smrg /* nothing to do */ 1741b8e80941Smrg} 1742b8e80941Smrg 1743b8e80941Smrgstatic const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 1744b8e80941Smrg#define INSTR(n, f, ...) \ 1745b8e80941Smrg [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__} 1746b8e80941Smrg 1747b8e80941Smrg INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}), 1748b8e80941Smrg INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}), 1749b8e80941Smrg INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}), 1750b8e80941Smrg INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}), 1751b8e80941Smrg INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}), 1752b8e80941Smrg INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}), 1753b8e80941Smrg INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}), 1754b8e80941Smrg INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}), 1755b8e80941Smrg INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}), 1756b8e80941Smrg INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}), 1757b8e80941Smrg INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}), 1758b8e80941Smrg INSTR(LG2, trans_lg2), 1759b8e80941Smrg INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}), 1760b8e80941Smrg INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}), 1761b8e80941Smrg INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}), 1762b8e80941Smrg INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}), 1763b8e80941Smrg INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ), 1764b8e80941Smrg 1765b8e80941Smrg INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL), 1766b8e80941Smrg INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ), 1767b8e80941Smrg 1768b8e80941Smrg INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX), 1769b8e80941Smrg INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY), 1770b8e80941Smrg 1771b8e80941Smrg INSTR(IF, trans_if), 1772b8e80941Smrg INSTR(ELSE, trans_else), 1773b8e80941Smrg INSTR(ENDIF, trans_endif), 1774b8e80941Smrg 1775b8e80941Smrg INSTR(BGNLOOP, trans_loop_bgn), 1776b8e80941Smrg INSTR(ENDLOOP, trans_loop_end), 1777b8e80941Smrg INSTR(BRK, trans_brk), 1778b8e80941Smrg INSTR(CONT, trans_cont), 1779b8e80941Smrg 1780b8e80941Smrg INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT), 1781b8e80941Smrg INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT), 1782b8e80941Smrg 1783b8e80941Smrg INSTR(ARL, trans_arl), 1784b8e80941Smrg INSTR(LRP, trans_lrp), 1785b8e80941Smrg INSTR(LIT, trans_lit), 1786b8e80941Smrg INSTR(SSG, trans_ssg), 1787b8e80941Smrg 1788b8e80941Smrg INSTR(SIN, trans_trig), 1789b8e80941Smrg INSTR(COS, trans_trig), 1790b8e80941Smrg 1791b8e80941Smrg INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT), 1792b8e80941Smrg INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE), 1793b8e80941Smrg INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ), 1794b8e80941Smrg INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT), 1795b8e80941Smrg INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE), 1796b8e80941Smrg INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE), 1797b8e80941Smrg 1798b8e80941Smrg INSTR(TEX, trans_sampler), 1799b8e80941Smrg INSTR(TXB, trans_sampler), 1800b8e80941Smrg INSTR(TXL, trans_sampler), 1801b8e80941Smrg INSTR(TXP, trans_sampler), 1802b8e80941Smrg 1803b8e80941Smrg INSTR(NOP, trans_dummy), 1804b8e80941Smrg INSTR(END, trans_dummy), 1805b8e80941Smrg}; 1806b8e80941Smrg 1807b8e80941Smrg/* Pass -- compile instructions */ 1808b8e80941Smrgstatic void 1809b8e80941Smrgetna_compile_pass_generate_code(struct etna_compile *c) 1810b8e80941Smrg{ 1811b8e80941Smrg struct tgsi_parse_context ctx = { }; 1812b8e80941Smrg MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); 1813b8e80941Smrg assert(status == TGSI_PARSE_OK); 1814b8e80941Smrg 1815b8e80941Smrg int inst_idx = 0; 1816b8e80941Smrg while (!tgsi_parse_end_of_tokens(&ctx)) { 1817b8e80941Smrg const struct tgsi_full_instruction *inst = 0; 1818b8e80941Smrg 1819b8e80941Smrg /* No inner temps used yet for this instruction, clear counter */ 1820b8e80941Smrg c->inner_temps = 0; 1821b8e80941Smrg 1822b8e80941Smrg tgsi_parse_token(&ctx); 1823b8e80941Smrg 1824b8e80941Smrg switch (ctx.FullToken.Token.Type) { 1825b8e80941Smrg case TGSI_TOKEN_TYPE_INSTRUCTION: 1826b8e80941Smrg /* iterate over operands */ 1827b8e80941Smrg inst = &ctx.FullToken.FullInstruction; 1828b8e80941Smrg if (c->dead_inst[inst_idx]) { /* skip dead instructions */ 1829b8e80941Smrg inst_idx++; 1830b8e80941Smrg continue; 1831b8e80941Smrg } 1832b8e80941Smrg 1833b8e80941Smrg /* Lookup the TGSI information and generate the source arguments */ 1834b8e80941Smrg struct etna_inst_src src[ETNA_NUM_SRC]; 1835b8e80941Smrg memset(src, 0, sizeof(src)); 1836b8e80941Smrg 1837b8e80941Smrg const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode); 1838b8e80941Smrg 1839b8e80941Smrg for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { 1840b8e80941Smrg const struct tgsi_full_src_register *reg = &inst->Src[i]; 1841b8e80941Smrg const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native; 1842b8e80941Smrg 1843b8e80941Smrg if (!n->valid || n->is_tex) 1844b8e80941Smrg continue; 1845b8e80941Smrg 1846b8e80941Smrg src[i] = etna_create_src(reg, n); 1847b8e80941Smrg } 1848b8e80941Smrg 1849b8e80941Smrg const unsigned opc = inst->Instruction.Opcode; 1850b8e80941Smrg const struct instr_translater *t = &translaters[opc]; 1851b8e80941Smrg 1852b8e80941Smrg if (t->fxn) { 1853b8e80941Smrg t->fxn(t, c, inst, src); 1854b8e80941Smrg 1855b8e80941Smrg inst_idx += 1; 1856b8e80941Smrg } else { 1857b8e80941Smrg BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc)); 1858b8e80941Smrg assert(0); 1859b8e80941Smrg } 1860b8e80941Smrg break; 1861b8e80941Smrg } 1862b8e80941Smrg } 1863b8e80941Smrg tgsi_parse_free(&ctx); 1864b8e80941Smrg} 1865b8e80941Smrg 1866b8e80941Smrg/* Look up register by semantic */ 1867b8e80941Smrgstatic struct etna_reg_desc * 1868b8e80941Smrgfind_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index) 1869b8e80941Smrg{ 1870b8e80941Smrg for (int idx = 0; idx < c->file[file].reg_size; ++idx) { 1871b8e80941Smrg struct etna_reg_desc *reg = &c->file[file].reg[idx]; 1872b8e80941Smrg 1873b8e80941Smrg if (reg->semantic.Name == name && reg->semantic.Index == index) 1874b8e80941Smrg return reg; 1875b8e80941Smrg } 1876b8e80941Smrg 1877b8e80941Smrg return NULL; /* not found */ 1878b8e80941Smrg} 1879b8e80941Smrg 1880b8e80941Smrg/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: 1881b8e80941Smrg * - this is a vertex shader 1882b8e80941Smrg * - and this is an older GPU 1883b8e80941Smrg */ 1884b8e80941Smrgstatic void 1885b8e80941Smrgetna_compile_add_z_div_if_needed(struct etna_compile *c) 1886b8e80941Smrg{ 1887b8e80941Smrg if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) { 1888b8e80941Smrg /* find position out */ 1889b8e80941Smrg struct etna_reg_desc *pos_reg = 1890b8e80941Smrg find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); 1891b8e80941Smrg 1892b8e80941Smrg if (pos_reg != NULL) { 1893b8e80941Smrg /* 1894b8e80941Smrg * ADD tX.__z_, tX.zzzz, void, tX.wwww 1895b8e80941Smrg * MUL tX.__z_, tX.zzzz, 0.5, void 1896b8e80941Smrg */ 1897b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1898b8e80941Smrg .opcode = INST_OPCODE_ADD, 1899b8e80941Smrg .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1900b8e80941Smrg .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1901b8e80941Smrg .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)), 1902b8e80941Smrg }); 1903b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1904b8e80941Smrg .opcode = INST_OPCODE_MUL, 1905b8e80941Smrg .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1906b8e80941Smrg .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1907b8e80941Smrg .src[1] = alloc_imm_f32(c, 0.5f), 1908b8e80941Smrg }); 1909b8e80941Smrg } 1910b8e80941Smrg } 1911b8e80941Smrg} 1912b8e80941Smrg 1913b8e80941Smrgstatic void 1914b8e80941Smrgetna_compile_frag_rb_swap(struct etna_compile *c) 1915b8e80941Smrg{ 1916b8e80941Smrg if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) { 1917b8e80941Smrg /* find color out */ 1918b8e80941Smrg struct etna_reg_desc *color_reg = 1919b8e80941Smrg find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0); 1920b8e80941Smrg 1921b8e80941Smrg emit_inst(c, &(struct etna_inst) { 1922b8e80941Smrg .opcode = INST_OPCODE_MOV, 1923b8e80941Smrg .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W), 1924b8e80941Smrg .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)), 1925b8e80941Smrg }); 1926b8e80941Smrg } 1927b8e80941Smrg} 1928b8e80941Smrg 1929b8e80941Smrg/** add a NOP to the shader if 1930b8e80941Smrg * a) the shader is empty 1931b8e80941Smrg * or 1932b8e80941Smrg * b) there is a label at the end of the shader 1933b8e80941Smrg */ 1934b8e80941Smrgstatic void 1935b8e80941Smrgetna_compile_add_nop_if_needed(struct etna_compile *c) 1936b8e80941Smrg{ 1937b8e80941Smrg bool label_at_last_inst = false; 1938b8e80941Smrg 1939b8e80941Smrg for (int idx = 0; idx < c->labels_count; ++idx) { 1940b8e80941Smrg if (c->labels[idx].inst_idx == c->inst_ptr) 1941b8e80941Smrg label_at_last_inst = true; 1942b8e80941Smrg 1943b8e80941Smrg } 1944b8e80941Smrg 1945b8e80941Smrg if (c->inst_ptr == 0 || label_at_last_inst) 1946b8e80941Smrg emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP}); 1947b8e80941Smrg} 1948b8e80941Smrg 1949b8e80941Smrgstatic void 1950b8e80941Smrgassign_uniforms(struct etna_compile_file *file, unsigned base) 1951b8e80941Smrg{ 1952b8e80941Smrg for (int idx = 0; idx < file->reg_size; ++idx) { 1953b8e80941Smrg file->reg[idx].native.valid = 1; 1954b8e80941Smrg file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0; 1955b8e80941Smrg file->reg[idx].native.id = base + idx; 1956b8e80941Smrg } 1957b8e80941Smrg} 1958b8e80941Smrg 1959b8e80941Smrg/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). 1960b8e80941Smrg * CONST must be consecutive as const buffers are supposed to be consecutive, 1961b8e80941Smrg * and before IMM, as this is 1962b8e80941Smrg * more convenient because is possible for the compilation process itself to 1963b8e80941Smrg * generate extra 1964b8e80941Smrg * immediates for constants such as pi, one, zero. 1965b8e80941Smrg */ 1966b8e80941Smrgstatic void 1967b8e80941Smrgassign_constants_and_immediates(struct etna_compile *c) 1968b8e80941Smrg{ 1969b8e80941Smrg assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0); 1970b8e80941Smrg /* immediates start after the constants */ 1971b8e80941Smrg c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4; 1972b8e80941Smrg assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4); 1973b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base, 1974b8e80941Smrg c->imm_size); 1975b8e80941Smrg} 1976b8e80941Smrg 1977b8e80941Smrg/* Assign declared samplers to native texture units */ 1978b8e80941Smrgstatic void 1979b8e80941Smrgassign_texture_units(struct etna_compile *c) 1980b8e80941Smrg{ 1981b8e80941Smrg uint tex_base = 0; 1982b8e80941Smrg 1983b8e80941Smrg if (c->info.processor == PIPE_SHADER_VERTEX) 1984b8e80941Smrg tex_base = c->specs->vertex_sampler_offset; 1985b8e80941Smrg 1986b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) { 1987b8e80941Smrg c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1; 1988b8e80941Smrg c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup 1989b8e80941Smrg c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx; 1990b8e80941Smrg } 1991b8e80941Smrg} 1992b8e80941Smrg 1993b8e80941Smrg/* Additional pass to fill in branch targets. This pass should be last 1994b8e80941Smrg * as no instruction reordering or removing/addition can be done anymore 1995b8e80941Smrg * once the branch targets are computed. 1996b8e80941Smrg */ 1997b8e80941Smrgstatic void 1998b8e80941Smrgetna_compile_fill_in_labels(struct etna_compile *c) 1999b8e80941Smrg{ 2000b8e80941Smrg for (int idx = 0; idx < c->inst_ptr; ++idx) { 2001b8e80941Smrg if (c->lbl_usage[idx] != -1) 2002b8e80941Smrg etna_assemble_set_imm(&c->code[idx * 4], 2003b8e80941Smrg c->labels[c->lbl_usage[idx]].inst_idx); 2004b8e80941Smrg } 2005b8e80941Smrg} 2006b8e80941Smrg 2007b8e80941Smrg/* compare two etna_native_reg structures, return true if equal */ 2008b8e80941Smrgstatic bool 2009b8e80941Smrgcmp_etna_native_reg(const struct etna_native_reg to, 2010b8e80941Smrg const struct etna_native_reg from) 2011b8e80941Smrg{ 2012b8e80941Smrg return to.valid == from.valid && to.is_tex == from.is_tex && 2013b8e80941Smrg to.rgroup == from.rgroup && to.id == from.id; 2014b8e80941Smrg} 2015b8e80941Smrg 2016b8e80941Smrg/* go through all declarations and swap native registers *to* and *from* */ 2017b8e80941Smrgstatic void 2018b8e80941Smrgswap_native_registers(struct etna_compile *c, const struct etna_native_reg to, 2019b8e80941Smrg const struct etna_native_reg from) 2020b8e80941Smrg{ 2021b8e80941Smrg if (cmp_etna_native_reg(from, to)) 2022b8e80941Smrg return; /* Nothing to do */ 2023b8e80941Smrg 2024b8e80941Smrg for (int idx = 0; idx < c->total_decls; ++idx) { 2025b8e80941Smrg if (cmp_etna_native_reg(c->decl[idx].native, from)) { 2026b8e80941Smrg c->decl[idx].native = to; 2027b8e80941Smrg } else if (cmp_etna_native_reg(c->decl[idx].native, to)) { 2028b8e80941Smrg c->decl[idx].native = from; 2029b8e80941Smrg } 2030b8e80941Smrg } 2031b8e80941Smrg} 2032b8e80941Smrg 2033b8e80941Smrg/* For PS we need to permute so that inputs are always in temporary 0..N-1. 2034b8e80941Smrg * Semantic POS is always t0. If that semantic is not used, avoid t0. 2035b8e80941Smrg */ 2036b8e80941Smrgstatic void 2037b8e80941Smrgpermute_ps_inputs(struct etna_compile *c) 2038b8e80941Smrg{ 2039b8e80941Smrg /* Special inputs: 2040b8e80941Smrg * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION 2041b8e80941Smrg * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD 2042b8e80941Smrg */ 2043b8e80941Smrg uint native_idx = 1; 2044b8e80941Smrg 2045b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2046b8e80941Smrg struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2047b8e80941Smrg uint input_id; 2048b8e80941Smrg assert(reg->has_semantic); 2049b8e80941Smrg 2050b8e80941Smrg if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) 2051b8e80941Smrg continue; 2052b8e80941Smrg 2053b8e80941Smrg input_id = native_idx++; 2054b8e80941Smrg swap_native_registers(c, etna_native_temp(input_id), 2055b8e80941Smrg c->file[TGSI_FILE_INPUT].reg[idx].native); 2056b8e80941Smrg } 2057b8e80941Smrg 2058b8e80941Smrg c->num_varyings = native_idx - 1; 2059b8e80941Smrg 2060b8e80941Smrg if (native_idx > c->next_free_native) 2061b8e80941Smrg c->next_free_native = native_idx; 2062b8e80941Smrg} 2063b8e80941Smrg 2064b8e80941Smrg/* fill in ps inputs into shader object */ 2065b8e80941Smrgstatic void 2066b8e80941Smrgfill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2067b8e80941Smrg{ 2068b8e80941Smrg struct etna_shader_io_file *sf = &sobj->infile; 2069b8e80941Smrg 2070b8e80941Smrg sf->num_reg = 0; 2071b8e80941Smrg 2072b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2073b8e80941Smrg struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2074b8e80941Smrg 2075b8e80941Smrg if (reg->native.id > 0) { 2076b8e80941Smrg assert(sf->num_reg < ETNA_NUM_INPUTS); 2077b8e80941Smrg sf->reg[sf->num_reg].reg = reg->native.id; 2078b8e80941Smrg sf->reg[sf->num_reg].semantic = reg->semantic; 2079b8e80941Smrg /* convert usage mask to number of components (*=wildcard) 2080b8e80941Smrg * .r (0..1) -> 1 component 2081b8e80941Smrg * .*g (2..3) -> 2 component 2082b8e80941Smrg * .**b (4..7) -> 3 components 2083b8e80941Smrg * .***a (8..15) -> 4 components 2084b8e80941Smrg */ 2085b8e80941Smrg sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2086b8e80941Smrg sf->num_reg++; 2087b8e80941Smrg } 2088b8e80941Smrg } 2089b8e80941Smrg 2090b8e80941Smrg assert(sf->num_reg == c->num_varyings); 2091b8e80941Smrg sobj->input_count_unk8 = 31; /* XXX what is this */ 2092b8e80941Smrg} 2093b8e80941Smrg 2094b8e80941Smrg/* fill in output mapping for ps into shader object */ 2095b8e80941Smrgstatic void 2096b8e80941Smrgfill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2097b8e80941Smrg{ 2098b8e80941Smrg sobj->outfile.num_reg = 0; 2099b8e80941Smrg 2100b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2101b8e80941Smrg struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2102b8e80941Smrg 2103b8e80941Smrg switch (reg->semantic.Name) { 2104b8e80941Smrg case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ 2105b8e80941Smrg sobj->ps_color_out_reg = reg->native.id; 2106b8e80941Smrg break; 2107b8e80941Smrg case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ 2108b8e80941Smrg sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ 2109b8e80941Smrg break; 2110b8e80941Smrg default: 2111b8e80941Smrg assert(0); /* only outputs supported are COLOR and POSITION at the moment */ 2112b8e80941Smrg } 2113b8e80941Smrg } 2114b8e80941Smrg} 2115b8e80941Smrg 2116b8e80941Smrg/* fill in inputs for vs into shader object */ 2117b8e80941Smrgstatic void 2118b8e80941Smrgfill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2119b8e80941Smrg{ 2120b8e80941Smrg struct etna_shader_io_file *sf = &sobj->infile; 2121b8e80941Smrg 2122b8e80941Smrg sf->num_reg = 0; 2123b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2124b8e80941Smrg struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2125b8e80941Smrg assert(sf->num_reg < ETNA_NUM_INPUTS); 2126b8e80941Smrg 2127b8e80941Smrg if (!reg->native.valid) 2128b8e80941Smrg continue; 2129b8e80941Smrg 2130b8e80941Smrg /* XXX exclude inputs with special semantics such as gl_frontFacing */ 2131b8e80941Smrg sf->reg[sf->num_reg].reg = reg->native.id; 2132b8e80941Smrg sf->reg[sf->num_reg].semantic = reg->semantic; 2133b8e80941Smrg sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2134b8e80941Smrg sf->num_reg++; 2135b8e80941Smrg } 2136b8e80941Smrg 2137b8e80941Smrg sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ 2138b8e80941Smrg} 2139b8e80941Smrg 2140b8e80941Smrg/* build two-level output index [Semantic][Index] for fast linking */ 2141b8e80941Smrgstatic void 2142b8e80941Smrgbuild_output_index(struct etna_shader_variant *sobj) 2143b8e80941Smrg{ 2144b8e80941Smrg int total = 0; 2145b8e80941Smrg int offset = 0; 2146b8e80941Smrg 2147b8e80941Smrg for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) 2148b8e80941Smrg total += sobj->output_count_per_semantic[name]; 2149b8e80941Smrg 2150b8e80941Smrg sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); 2151b8e80941Smrg 2152b8e80941Smrg for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) { 2153b8e80941Smrg sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; 2154b8e80941Smrg offset += sobj->output_count_per_semantic[name]; 2155b8e80941Smrg } 2156b8e80941Smrg 2157b8e80941Smrg for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) { 2158b8e80941Smrg sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name] 2159b8e80941Smrg [sobj->outfile.reg[idx].semantic.Index] = 2160b8e80941Smrg &sobj->outfile.reg[idx]; 2161b8e80941Smrg } 2162b8e80941Smrg} 2163b8e80941Smrg 2164b8e80941Smrg/* fill in outputs for vs into shader object */ 2165b8e80941Smrgstatic void 2166b8e80941Smrgfill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2167b8e80941Smrg{ 2168b8e80941Smrg struct etna_shader_io_file *sf = &sobj->outfile; 2169b8e80941Smrg 2170b8e80941Smrg sf->num_reg = 0; 2171b8e80941Smrg for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2172b8e80941Smrg struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2173b8e80941Smrg assert(sf->num_reg < ETNA_NUM_INPUTS); 2174b8e80941Smrg 2175b8e80941Smrg switch (reg->semantic.Name) { 2176b8e80941Smrg case TGSI_SEMANTIC_POSITION: 2177b8e80941Smrg sobj->vs_pos_out_reg = reg->native.id; 2178b8e80941Smrg break; 2179b8e80941Smrg case TGSI_SEMANTIC_PSIZE: 2180b8e80941Smrg sobj->vs_pointsize_out_reg = reg->native.id; 2181b8e80941Smrg break; 2182b8e80941Smrg default: 2183b8e80941Smrg sf->reg[sf->num_reg].reg = reg->native.id; 2184b8e80941Smrg sf->reg[sf->num_reg].semantic = reg->semantic; 2185b8e80941Smrg sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; 2186b8e80941Smrg sf->num_reg++; 2187b8e80941Smrg sobj->output_count_per_semantic[reg->semantic.Name] = 2188b8e80941Smrg MAX2(reg->semantic.Index + 1, 2189b8e80941Smrg sobj->output_count_per_semantic[reg->semantic.Name]); 2190b8e80941Smrg } 2191b8e80941Smrg } 2192b8e80941Smrg 2193b8e80941Smrg /* build two-level index for linking */ 2194b8e80941Smrg build_output_index(sobj); 2195b8e80941Smrg 2196b8e80941Smrg /* fill in "mystery meat" load balancing value. This value determines how 2197b8e80941Smrg * work is scheduled between VS and PS 2198b8e80941Smrg * in the unified shader architecture. More precisely, it is determined from 2199b8e80941Smrg * the number of VS outputs, as well as chip-specific 2200b8e80941Smrg * vertex output buffer size, vertex cache size, and the number of shader 2201b8e80941Smrg * cores. 2202b8e80941Smrg * 2203b8e80941Smrg * XXX this is a conservative estimate, the "optimal" value is only known for 2204b8e80941Smrg * sure at link time because some 2205b8e80941Smrg * outputs may be unused and thus unmapped. Then again, in the general use 2206b8e80941Smrg * case with GLSL the vertex and fragment 2207b8e80941Smrg * shaders are linked already before submitting to Gallium, thus all outputs 2208b8e80941Smrg * are used. 2209b8e80941Smrg */ 2210b8e80941Smrg int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2; 2211b8e80941Smrg assert(half_out); 2212b8e80941Smrg 2213b8e80941Smrg uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size - 2214b8e80941Smrg 2 * half_out * c->specs->vertex_cache_size)) + 2215b8e80941Smrg 9) / 2216b8e80941Smrg 10; 2217b8e80941Smrg uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2; 2218b8e80941Smrg sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 2219b8e80941Smrg VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 2220b8e80941Smrg VIVS_VS_LOAD_BALANCING_C(0x3f) | 2221b8e80941Smrg VIVS_VS_LOAD_BALANCING_D(0x0f); 2222b8e80941Smrg} 2223b8e80941Smrg 2224b8e80941Smrgstatic bool 2225b8e80941Smrgetna_compile_check_limits(struct etna_compile *c) 2226b8e80941Smrg{ 2227b8e80941Smrg int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX) 2228b8e80941Smrg ? c->specs->max_vs_uniforms 2229b8e80941Smrg : c->specs->max_ps_uniforms; 2230b8e80941Smrg /* round up number of uniforms, including immediates, in units of four */ 2231b8e80941Smrg int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; 2232b8e80941Smrg 2233b8e80941Smrg if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { 2234b8e80941Smrg DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, 2235b8e80941Smrg c->specs->max_instructions); 2236b8e80941Smrg return false; 2237b8e80941Smrg } 2238b8e80941Smrg 2239b8e80941Smrg if (c->next_free_native > c->specs->max_registers) { 2240b8e80941Smrg DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native, 2241b8e80941Smrg c->specs->max_registers); 2242b8e80941Smrg return false; 2243b8e80941Smrg } 2244b8e80941Smrg 2245b8e80941Smrg if (num_uniforms > max_uniforms) { 2246b8e80941Smrg DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, 2247b8e80941Smrg max_uniforms); 2248b8e80941Smrg return false; 2249b8e80941Smrg } 2250b8e80941Smrg 2251b8e80941Smrg if (c->num_varyings > c->specs->max_varyings) { 2252b8e80941Smrg DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings, 2253b8e80941Smrg c->specs->max_varyings); 2254b8e80941Smrg return false; 2255b8e80941Smrg } 2256b8e80941Smrg 2257b8e80941Smrg if (c->imm_base > c->specs->num_constants) { 2258b8e80941Smrg DBG("Number of constants (%d) exceeds maximum %d", c->imm_base, 2259b8e80941Smrg c->specs->num_constants); 2260b8e80941Smrg } 2261b8e80941Smrg 2262b8e80941Smrg return true; 2263b8e80941Smrg} 2264b8e80941Smrg 2265b8e80941Smrgstatic void 2266b8e80941Smrgcopy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj) 2267b8e80941Smrg{ 2268b8e80941Smrg uint32_t count = c->imm_size; 2269b8e80941Smrg struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 2270b8e80941Smrg 2271b8e80941Smrg uinfo->const_count = c->imm_base; 2272b8e80941Smrg uinfo->imm_count = count; 2273b8e80941Smrg uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data)); 2274b8e80941Smrg uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents)); 2275b8e80941Smrg 2276b8e80941Smrg etna_set_shader_uniforms_dirty_flags(sobj); 2277b8e80941Smrg} 2278b8e80941Smrg 2279b8e80941Smrgbool 2280b8e80941Smrgetna_compile_shader(struct etna_shader_variant *v) 2281b8e80941Smrg{ 2282b8e80941Smrg /* Create scratch space that may be too large to fit on stack 2283b8e80941Smrg */ 2284b8e80941Smrg bool ret; 2285b8e80941Smrg struct etna_compile *c; 2286b8e80941Smrg 2287b8e80941Smrg if (unlikely(!v)) 2288b8e80941Smrg return false; 2289b8e80941Smrg 2290b8e80941Smrg const struct etna_specs *specs = v->shader->specs; 2291b8e80941Smrg 2292b8e80941Smrg struct tgsi_lowering_config lconfig = { 2293b8e80941Smrg .lower_FLR = !specs->has_sign_floor_ceil, 2294b8e80941Smrg .lower_CEIL = !specs->has_sign_floor_ceil, 2295b8e80941Smrg .lower_POW = true, 2296b8e80941Smrg .lower_EXP = true, 2297b8e80941Smrg .lower_LOG = true, 2298b8e80941Smrg .lower_DP2 = !specs->has_halti2_instructions, 2299b8e80941Smrg .lower_TRUNC = true, 2300b8e80941Smrg }; 2301b8e80941Smrg 2302b8e80941Smrg c = CALLOC_STRUCT(etna_compile); 2303b8e80941Smrg if (!c) 2304b8e80941Smrg return false; 2305b8e80941Smrg 2306b8e80941Smrg memset(&c->lbl_usage, -1, sizeof(c->lbl_usage)); 2307b8e80941Smrg 2308b8e80941Smrg const struct tgsi_token *tokens = v->shader->tokens; 2309b8e80941Smrg 2310b8e80941Smrg c->specs = specs; 2311b8e80941Smrg c->key = &v->key; 2312b8e80941Smrg c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info); 2313b8e80941Smrg c->free_tokens = !!c->tokens; 2314b8e80941Smrg if (!c->tokens) { 2315b8e80941Smrg /* no lowering */ 2316b8e80941Smrg c->tokens = tokens; 2317b8e80941Smrg } 2318b8e80941Smrg 2319b8e80941Smrg /* Build a map from gallium register to native registers for files 2320b8e80941Smrg * CONST, SAMP, IMM, OUT, IN, TEMP. 2321b8e80941Smrg * SAMP will map as-is for fragment shaders, there will be a +8 offset for 2322b8e80941Smrg * vertex shaders. 2323b8e80941Smrg */ 2324b8e80941Smrg /* Pass one -- check register file declarations and immediates */ 2325b8e80941Smrg etna_compile_parse_declarations(c); 2326b8e80941Smrg 2327b8e80941Smrg etna_allocate_decls(c); 2328b8e80941Smrg 2329b8e80941Smrg /* Pass two -- check usage of temporaries, inputs, outputs */ 2330b8e80941Smrg etna_compile_pass_check_usage(c); 2331b8e80941Smrg 2332b8e80941Smrg assign_special_inputs(c); 2333b8e80941Smrg 2334b8e80941Smrg /* Assign native temp register to TEMPs */ 2335b8e80941Smrg assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]); 2336b8e80941Smrg 2337b8e80941Smrg /* optimize outputs */ 2338b8e80941Smrg etna_compile_pass_optimize_outputs(c); 2339b8e80941Smrg 2340b8e80941Smrg /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) 2341b8e80941Smrg * this is part of RGROUP_INTERNAL 2342b8e80941Smrg */ 2343b8e80941Smrg 2344b8e80941Smrg /* assign inputs: last usage of input should be <= first usage of temp */ 2345b8e80941Smrg /* potential optimization case: 2346b8e80941Smrg * if single MOV TEMP[y], IN[x] before which temp y is not used, and 2347b8e80941Smrg * after which IN[x] 2348b8e80941Smrg * is not read, temp[y] can be used as input register as-is 2349b8e80941Smrg */ 2350b8e80941Smrg /* sort temporaries by first use 2351b8e80941Smrg * sort inputs by last usage 2352b8e80941Smrg * iterate over inputs, temporaries 2353b8e80941Smrg * if last usage of input <= first usage of temp: 2354b8e80941Smrg * assign input to temp 2355b8e80941Smrg * advance input, temporary pointer 2356b8e80941Smrg * else 2357b8e80941Smrg * advance temporary pointer 2358b8e80941Smrg * 2359b8e80941Smrg * potential problem: instruction with multiple inputs of which one is the 2360b8e80941Smrg * temp and the other is the input; 2361b8e80941Smrg * however, as the temp is not used before this, how would this make 2362b8e80941Smrg * sense? uninitialized temporaries have an undefined 2363b8e80941Smrg * value, so this would be ok 2364b8e80941Smrg */ 2365b8e80941Smrg assign_inouts_to_temporaries(c, TGSI_FILE_INPUT); 2366b8e80941Smrg 2367b8e80941Smrg /* assign outputs: first usage of output should be >= last usage of temp */ 2368b8e80941Smrg /* potential optimization case: 2369b8e80941Smrg * if single MOV OUT[x], TEMP[y] (with full write mask, or at least 2370b8e80941Smrg * writing all components that are used in 2371b8e80941Smrg * the shader) after which temp y is no longer used temp[y] can be 2372b8e80941Smrg * used as output register as-is 2373b8e80941Smrg * 2374b8e80941Smrg * potential problem: instruction with multiple outputs of which one is the 2375b8e80941Smrg * temp and the other is the output; 2376b8e80941Smrg * however, as the temp is not used after this, how would this make 2377b8e80941Smrg * sense? could just discard the output value 2378b8e80941Smrg */ 2379b8e80941Smrg /* sort temporaries by last use 2380b8e80941Smrg * sort outputs by first usage 2381b8e80941Smrg * iterate over outputs, temporaries 2382b8e80941Smrg * if first usage of output >= last usage of temp: 2383b8e80941Smrg * assign output to temp 2384b8e80941Smrg * advance output, temporary pointer 2385b8e80941Smrg * else 2386b8e80941Smrg * advance temporary pointer 2387b8e80941Smrg */ 2388b8e80941Smrg assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT); 2389b8e80941Smrg 2390b8e80941Smrg assign_constants_and_immediates(c); 2391b8e80941Smrg assign_texture_units(c); 2392b8e80941Smrg 2393b8e80941Smrg /* list declarations */ 2394b8e80941Smrg for (int x = 0; x < c->total_decls; ++x) { 2395b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2396b8e80941Smrg "last_use=%i native=%i usage_mask=%x " 2397b8e80941Smrg "has_semantic=%i", 2398b8e80941Smrg x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2399b8e80941Smrg c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2400b8e80941Smrg c->decl[x].native.valid ? c->decl[x].native.id : -1, 2401b8e80941Smrg c->decl[x].usage_mask, c->decl[x].has_semantic); 2402b8e80941Smrg if (c->decl[x].has_semantic) 2403b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2404b8e80941Smrg tgsi_semantic_names[c->decl[x].semantic.Name], 2405b8e80941Smrg c->decl[x].semantic.Index); 2406b8e80941Smrg } 2407b8e80941Smrg /* XXX for PS we need to permute so that inputs are always in temporary 2408b8e80941Smrg * 0..N-1. 2409b8e80941Smrg * There is no "switchboard" for varyings (AFAIK!). The output color, 2410b8e80941Smrg * however, can be routed 2411b8e80941Smrg * from an arbitrary temporary. 2412b8e80941Smrg */ 2413b8e80941Smrg if (c->info.processor == PIPE_SHADER_FRAGMENT) 2414b8e80941Smrg permute_ps_inputs(c); 2415b8e80941Smrg 2416b8e80941Smrg 2417b8e80941Smrg /* list declarations */ 2418b8e80941Smrg for (int x = 0; x < c->total_decls; ++x) { 2419b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2420b8e80941Smrg "last_use=%i native=%i usage_mask=%x " 2421b8e80941Smrg "has_semantic=%i", 2422b8e80941Smrg x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2423b8e80941Smrg c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2424b8e80941Smrg c->decl[x].native.valid ? c->decl[x].native.id : -1, 2425b8e80941Smrg c->decl[x].usage_mask, c->decl[x].has_semantic); 2426b8e80941Smrg if (c->decl[x].has_semantic) 2427b8e80941Smrg DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2428b8e80941Smrg tgsi_semantic_names[c->decl[x].semantic.Name], 2429b8e80941Smrg c->decl[x].semantic.Index); 2430b8e80941Smrg } 2431b8e80941Smrg 2432b8e80941Smrg /* pass 3: generate instructions */ 2433b8e80941Smrg etna_compile_pass_generate_code(c); 2434b8e80941Smrg etna_compile_add_z_div_if_needed(c); 2435b8e80941Smrg etna_compile_frag_rb_swap(c); 2436b8e80941Smrg etna_compile_add_nop_if_needed(c); 2437b8e80941Smrg 2438b8e80941Smrg ret = etna_compile_check_limits(c); 2439b8e80941Smrg if (!ret) 2440b8e80941Smrg goto out; 2441b8e80941Smrg 2442b8e80941Smrg etna_compile_fill_in_labels(c); 2443b8e80941Smrg 2444b8e80941Smrg /* fill in output structure */ 2445b8e80941Smrg v->processor = c->info.processor; 2446b8e80941Smrg v->code_size = c->inst_ptr * 4; 2447b8e80941Smrg v->code = mem_dup(c->code, c->inst_ptr * 16); 2448b8e80941Smrg v->num_loops = c->num_loops; 2449b8e80941Smrg v->num_temps = c->next_free_native; 2450b8e80941Smrg v->vs_pos_out_reg = -1; 2451b8e80941Smrg v->vs_pointsize_out_reg = -1; 2452b8e80941Smrg v->ps_color_out_reg = -1; 2453b8e80941Smrg v->ps_depth_out_reg = -1; 2454b8e80941Smrg v->needs_icache = c->inst_ptr > c->specs->max_instructions; 2455b8e80941Smrg copy_uniform_state_to_shader(c, v); 2456b8e80941Smrg 2457b8e80941Smrg if (c->info.processor == PIPE_SHADER_VERTEX) { 2458b8e80941Smrg fill_in_vs_inputs(v, c); 2459b8e80941Smrg fill_in_vs_outputs(v, c); 2460b8e80941Smrg } else if (c->info.processor == PIPE_SHADER_FRAGMENT) { 2461b8e80941Smrg fill_in_ps_inputs(v, c); 2462b8e80941Smrg fill_in_ps_outputs(v, c); 2463b8e80941Smrg } 2464b8e80941Smrg 2465b8e80941Smrgout: 2466b8e80941Smrg if (c->free_tokens) 2467b8e80941Smrg FREE((void *)c->tokens); 2468b8e80941Smrg 2469b8e80941Smrg FREE(c->labels); 2470b8e80941Smrg FREE(c); 2471b8e80941Smrg 2472b8e80941Smrg return ret; 2473b8e80941Smrg} 2474b8e80941Smrg 2475b8e80941Smrgextern const char *tgsi_swizzle_names[]; 2476b8e80941Smrgvoid 2477b8e80941Smrgetna_dump_shader(const struct etna_shader_variant *shader) 2478b8e80941Smrg{ 2479b8e80941Smrg if (shader->processor == PIPE_SHADER_VERTEX) 2480b8e80941Smrg printf("VERT\n"); 2481b8e80941Smrg else 2482b8e80941Smrg printf("FRAG\n"); 2483b8e80941Smrg 2484b8e80941Smrg 2485b8e80941Smrg etna_disasm(shader->code, shader->code_size, PRINT_RAW); 2486b8e80941Smrg 2487b8e80941Smrg printf("num loops: %i\n", shader->num_loops); 2488b8e80941Smrg printf("num temps: %i\n", shader->num_temps); 2489b8e80941Smrg printf("num const: %i\n", shader->uniforms.const_count); 2490b8e80941Smrg printf("immediates:\n"); 2491b8e80941Smrg for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { 2492b8e80941Smrg printf(" [%i].%s = %f (0x%08x)\n", 2493b8e80941Smrg (idx + shader->uniforms.const_count) / 4, 2494b8e80941Smrg tgsi_swizzle_names[idx % 4], 2495b8e80941Smrg *((float *)&shader->uniforms.imm_data[idx]), 2496b8e80941Smrg shader->uniforms.imm_data[idx]); 2497b8e80941Smrg } 2498b8e80941Smrg printf("inputs:\n"); 2499b8e80941Smrg for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 2500b8e80941Smrg printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, 2501b8e80941Smrg tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], 2502b8e80941Smrg shader->infile.reg[idx].semantic.Index, 2503b8e80941Smrg shader->infile.reg[idx].num_components); 2504b8e80941Smrg } 2505b8e80941Smrg printf("outputs:\n"); 2506b8e80941Smrg for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 2507b8e80941Smrg printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, 2508b8e80941Smrg tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], 2509b8e80941Smrg shader->outfile.reg[idx].semantic.Index, 2510b8e80941Smrg shader->outfile.reg[idx].num_components); 2511b8e80941Smrg } 2512b8e80941Smrg printf("special:\n"); 2513b8e80941Smrg if (shader->processor == PIPE_SHADER_VERTEX) { 2514b8e80941Smrg printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 2515b8e80941Smrg printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 2516b8e80941Smrg printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 2517b8e80941Smrg } else { 2518b8e80941Smrg printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 2519b8e80941Smrg printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 2520b8e80941Smrg } 2521b8e80941Smrg printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 2522b8e80941Smrg} 2523b8e80941Smrg 2524b8e80941Smrgvoid 2525b8e80941Smrgetna_destroy_shader(struct etna_shader_variant *shader) 2526b8e80941Smrg{ 2527b8e80941Smrg assert(shader); 2528b8e80941Smrg 2529b8e80941Smrg FREE(shader->code); 2530b8e80941Smrg FREE(shader->uniforms.imm_data); 2531b8e80941Smrg FREE(shader->uniforms.imm_contents); 2532b8e80941Smrg FREE(shader->output_per_semantic_list); 2533b8e80941Smrg FREE(shader); 2534b8e80941Smrg} 2535b8e80941Smrg 2536b8e80941Smrgstatic const struct etna_shader_inout * 2537b8e80941Smrgetna_shader_vs_lookup(const struct etna_shader_variant *sobj, 2538b8e80941Smrg const struct etna_shader_inout *in) 2539b8e80941Smrg{ 2540b8e80941Smrg if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name]) 2541b8e80941Smrg return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index]; 2542b8e80941Smrg 2543b8e80941Smrg return NULL; 2544b8e80941Smrg} 2545b8e80941Smrg 2546b8e80941Smrgbool 2547b8e80941Smrgetna_link_shader(struct etna_shader_link_info *info, 2548b8e80941Smrg const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) 2549b8e80941Smrg{ 2550b8e80941Smrg int comp_ofs = 0; 2551b8e80941Smrg /* For each fragment input we need to find the associated vertex shader 2552b8e80941Smrg * output, which can be found by matching on semantic name and index. A 2553b8e80941Smrg * binary search could be used because the vs outputs are sorted by their 2554b8e80941Smrg * semantic index and grouped by semantic type by fill_in_vs_outputs. 2555b8e80941Smrg */ 2556b8e80941Smrg assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 2557b8e80941Smrg info->pcoord_varying_comp_ofs = -1; 2558b8e80941Smrg 2559b8e80941Smrg for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 2560b8e80941Smrg const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 2561b8e80941Smrg const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 2562b8e80941Smrg struct etna_varying *varying; 2563b8e80941Smrg bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR; 2564b8e80941Smrg 2565b8e80941Smrg assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 2566b8e80941Smrg 2567b8e80941Smrg if (fsio->reg > info->num_varyings) 2568b8e80941Smrg info->num_varyings = fsio->reg; 2569b8e80941Smrg 2570b8e80941Smrg varying = &info->varyings[fsio->reg - 1]; 2571b8e80941Smrg varying->num_components = fsio->num_components; 2572b8e80941Smrg 2573b8e80941Smrg if (!interpolate_always) /* colors affected by flat shading */ 2574b8e80941Smrg varying->pa_attributes = 0x200; 2575b8e80941Smrg else /* texture coord or other bypasses flat shading */ 2576b8e80941Smrg varying->pa_attributes = 0x2f1; 2577b8e80941Smrg 2578b8e80941Smrg varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED; 2579b8e80941Smrg varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED; 2580b8e80941Smrg varying->use[2] = VARYING_COMPONENT_USE_USED; 2581b8e80941Smrg varying->use[3] = VARYING_COMPONENT_USE_USED; 2582b8e80941Smrg 2583b8e80941Smrg 2584b8e80941Smrg /* point coord is an input to the PS without matching VS output, 2585b8e80941Smrg * so it gets a varying slot without being assigned a VS register. 2586b8e80941Smrg */ 2587b8e80941Smrg if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) { 2588b8e80941Smrg info->pcoord_varying_comp_ofs = comp_ofs; 2589b8e80941Smrg } else { 2590b8e80941Smrg if (vsio == NULL) { /* not found -- link error */ 2591b8e80941Smrg BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index); 2592b8e80941Smrg return true; 2593b8e80941Smrg } 2594b8e80941Smrg 2595b8e80941Smrg varying->reg = vsio->reg; 2596b8e80941Smrg } 2597b8e80941Smrg 2598b8e80941Smrg comp_ofs += varying->num_components; 2599b8e80941Smrg } 2600b8e80941Smrg 2601b8e80941Smrg assert(info->num_varyings == fs->infile.num_reg); 2602b8e80941Smrg 2603b8e80941Smrg return false; 2604b8e80941Smrg} 2605