1/* 2 * Copyright (c) 2012-2019 Etnaviv Project 3 * Copyright (c) 2019 Zodiac Inflight Innovations 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sub license, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the 13 * next paragraph) shall be included in all copies or substantial portions 14 * of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jonathan Marek <jonathan@marek.ca> 26 * Wladimir J. van der Laan <laanwj@gmail.com> 27 */ 28 29#include "etnaviv_compiler.h" 30#include "etnaviv_compiler_nir.h" 31#include "etnaviv_asm.h" 32#include "etnaviv_context.h" 33#include "etnaviv_debug.h" 34#include "etnaviv_nir.h" 35#include "etnaviv_uniforms.h" 36#include "etnaviv_util.h" 37 38#include <math.h> 39#include "util/u_memory.h" 40#include "util/register_allocate.h" 41#include "compiler/nir/nir_builder.h" 42 43#include "tgsi/tgsi_strings.h" 44#include "util/compiler.h" 45#include "util/half_float.h" 46 47static bool 48etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) 49{ 50 const struct etna_specs *specs = data; 51 52 if (instr->type != nir_instr_type_alu) 53 return false; 54 55 nir_alu_instr *alu = nir_instr_as_alu(instr); 56 switch (alu->op) { 57 case nir_op_frsq: 58 case nir_op_frcp: 59 case nir_op_flog2: 60 case nir_op_fexp2: 61 case nir_op_fsqrt: 62 case nir_op_fcos: 63 case nir_op_fsin: 64 case nir_op_fdiv: 65 case nir_op_imul: 66 return true; 67 /* TODO: can do better than alu_to_scalar for vector compares */ 68 case nir_op_b32all_fequal2: 69 case nir_op_b32all_fequal3: 70 case nir_op_b32all_fequal4: 71 case nir_op_b32any_fnequal2: 72 case nir_op_b32any_fnequal3: 73 case nir_op_b32any_fnequal4: 74 case nir_op_b32all_iequal2: 75 case nir_op_b32all_iequal3: 76 case nir_op_b32all_iequal4: 77 case nir_op_b32any_inequal2: 78 case nir_op_b32any_inequal3: 79 case nir_op_b32any_inequal4: 80 return true; 81 case nir_op_fdot2: 82 if (!specs->has_halti2_instructions) 83 return true; 84 break; 85 default: 86 break; 87 } 88 89 return false; 90} 91 92static void 93etna_emit_block_start(struct etna_compile *c, unsigned block) 94{ 95 c->block_ptr[block] = c->inst_ptr; 96} 97 98static void 99etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src src) 100{ 101 struct etna_shader_io_file *sf = &c->variant->outfile; 102 103 if (is_fs(c)) { 104 switch (var->data.location) { 105 case FRAG_RESULT_COLOR: 106 case FRAG_RESULT_DATA0: /* DATA0 is used by gallium shaders for color */ 107 c->variant->ps_color_out_reg = src.reg; 108 break; 109 case FRAG_RESULT_DEPTH: 110 c->variant->ps_depth_out_reg = src.reg; 111 break; 112 default: 113 unreachable("Unsupported fs output"); 114 } 115 return; 116 } 117 118 switch (var->data.location) { 119 case VARYING_SLOT_POS: 120 c->variant->vs_pos_out_reg = src.reg; 121 break; 122 case VARYING_SLOT_PSIZ: 123 c->variant->vs_pointsize_out_reg = src.reg; 124 break; 125 default: 126 sf->reg[sf->num_reg].reg = src.reg; 127 sf->reg[sf->num_reg].slot = var->data.location; 128 sf->reg[sf->num_reg].num_components = glsl_get_components(var->type); 129 sf->num_reg++; 130 break; 131 } 132} 133 134#define OPT(nir, pass, ...) ({ \ 135 bool this_progress = false; \ 136 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 137 this_progress; \ 138}) 139 140static void 141etna_optimize_loop(nir_shader *s) 142{ 143 bool progress; 144 do { 145 progress = false; 146 147 NIR_PASS_V(s, nir_lower_vars_to_ssa); 148 progress |= OPT(s, nir_opt_copy_prop_vars); 149 progress |= OPT(s, nir_opt_shrink_vectors, true); 150 progress |= OPT(s, nir_copy_prop); 151 progress |= OPT(s, nir_opt_dce); 152 progress |= OPT(s, nir_opt_cse); 153 progress |= OPT(s, nir_opt_peephole_select, 16, true, true); 154 progress |= OPT(s, nir_opt_intrinsics); 155 progress |= OPT(s, nir_opt_algebraic); 156 progress |= OPT(s, nir_opt_constant_folding); 157 progress |= OPT(s, nir_opt_dead_cf); 158 if (OPT(s, nir_opt_trivial_continues)) { 159 progress = true; 160 /* If nir_opt_trivial_continues makes progress, then we need to clean 161 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 162 * to make progress. 163 */ 164 OPT(s, nir_copy_prop); 165 OPT(s, nir_opt_dce); 166 } 167 progress |= OPT(s, nir_opt_loop_unroll); 168 progress |= OPT(s, nir_opt_if, false); 169 progress |= OPT(s, nir_opt_remove_phis); 170 progress |= OPT(s, nir_opt_undef); 171 } 172 while (progress); 173} 174 175static int 176etna_glsl_type_size(const struct glsl_type *type, bool bindless) 177{ 178 return glsl_count_attribute_slots(type, false); 179} 180 181static void 182copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count) 183{ 184 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 185 186 uinfo->count = count * 4; 187 uinfo->data = MALLOC(uinfo->count * sizeof(*uinfo->data)); 188 uinfo->contents = MALLOC(uinfo->count * sizeof(*uinfo->contents)); 189 190 for (unsigned i = 0; i < uinfo->count; i++) { 191 uinfo->data[i] = consts[i]; 192 uinfo->contents[i] = consts[i] >> 32; 193 } 194 195 etna_set_shader_uniforms_dirty_flags(sobj); 196} 197 198#define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3]) 199#define SRC_DISABLE ((hw_src){}) 200#define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s}) 201#define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s}) 202 203typedef struct etna_inst_dst hw_dst; 204typedef struct etna_inst_src hw_src; 205 206static inline hw_src 207src_swizzle(hw_src src, unsigned swizzle) 208{ 209 if (src.rgroup != INST_RGROUP_IMMEDIATE) 210 src.swiz = inst_swiz_compose(src.swiz, swizzle); 211 212 return src; 213} 214 215/* constants are represented as 64-bit ints 216 * 32-bit for the value and 32-bit for the type (imm, uniform, etc) 217 */ 218 219#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)} 220#define CONST(x) CONST_VAL(ETNA_UNIFORM_CONSTANT, x) 221#define UNIFORM(x) CONST_VAL(ETNA_UNIFORM_UNIFORM, x) 222#define TEXSCALE(x, i) CONST_VAL(ETNA_UNIFORM_TEXRECT_SCALE_X + (i), x) 223 224static int 225const_add(uint64_t *c, uint64_t value) 226{ 227 for (unsigned i = 0; i < 4; i++) { 228 if (c[i] == value || !c[i]) { 229 c[i] = value; 230 return i; 231 } 232 } 233 return -1; 234} 235 236static hw_src 237const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components) 238{ 239 /* use inline immediates if possible */ 240 if (c->specs->halti >= 2 && num_components == 1 && 241 value[0].u64 >> 32 == ETNA_UNIFORM_CONSTANT) { 242 uint32_t bits = value[0].u32; 243 244 /* "float" - shifted by 12 */ 245 if ((bits & 0xfff) == 0) 246 return etna_immediate_src(0, bits >> 12); 247 248 /* "unsigned" - raw 20 bit value */ 249 if (bits < (1 << 20)) 250 return etna_immediate_src(2, bits); 251 252 /* "signed" - sign extended 20-bit (sign included) value */ 253 if (bits >= 0xfff80000) 254 return etna_immediate_src(1, bits); 255 } 256 257 unsigned i; 258 int swiz = -1; 259 for (i = 0; swiz < 0; i++) { 260 uint64_t *a = &c->consts[i*4]; 261 uint64_t save[4]; 262 memcpy(save, a, sizeof(save)); 263 swiz = 0; 264 for (unsigned j = 0; j < num_components; j++) { 265 int c = const_add(a, value[j].u64); 266 if (c < 0) { 267 memcpy(a, save, sizeof(save)); 268 swiz = -1; 269 break; 270 } 271 swiz |= c << j * 2; 272 } 273 } 274 275 assert(i <= ETNA_MAX_IMM / 4); 276 c->const_count = MAX2(c->const_count, i); 277 278 return SRC_CONST(i - 1, swiz); 279} 280 281/* how to swizzle when used as a src */ 282static const uint8_t 283reg_swiz[NUM_REG_TYPES] = { 284 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, 285 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, 286 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y), 287 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, 288 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, 289 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, 290 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z), 291 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z), 292 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z), 293 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z), 294 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, 295 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, 296 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W), 297 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W), 298 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W), 299 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X), 300 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W), 301 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W), 302 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W), 303 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X), 304 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X), 305 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X), 306}; 307 308/* how to swizzle when used as a dest */ 309static const uint8_t 310reg_dst_swiz[NUM_REG_TYPES] = { 311 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, 312 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, 313 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X), 314 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, 315 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, 316 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, 317 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X), 318 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y), 319 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y), 320 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y), 321 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, 322 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, 323 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X), 324 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y), 325 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y), 326 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z), 327 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y), 328 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y), 329 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y), 330 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z), 331 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z), 332 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z), 333}; 334 335/* nir_src to allocated register */ 336static hw_src 337ra_src(struct etna_compile *c, nir_src *src) 338{ 339 unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]); 340 return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]); 341} 342 343static hw_src 344get_src(struct etna_compile *c, nir_src *src) 345{ 346 if (!src->is_ssa) 347 return ra_src(c, src); 348 349 nir_instr *instr = src->ssa->parent_instr; 350 351 if (instr->pass_flags & BYPASS_SRC) { 352 assert(instr->type == nir_instr_type_alu); 353 nir_alu_instr *alu = nir_instr_as_alu(instr); 354 assert(alu->op == nir_op_mov); 355 return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0])); 356 } 357 358 switch (instr->type) { 359 case nir_instr_type_load_const: 360 return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components); 361 case nir_instr_type_intrinsic: { 362 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 363 switch (intr->intrinsic) { 364 case nir_intrinsic_load_input: 365 case nir_intrinsic_load_instance_id: 366 case nir_intrinsic_load_uniform: 367 case nir_intrinsic_load_ubo: 368 return ra_src(c, src); 369 case nir_intrinsic_load_front_face: 370 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL }; 371 case nir_intrinsic_load_frag_coord: 372 return SRC_REG(0, INST_SWIZ_IDENTITY); 373 case nir_intrinsic_load_texture_rect_scaling: { 374 int sampler = nir_src_as_int(intr->src[0]); 375 nir_const_value values[] = { 376 TEXSCALE(sampler, 0), 377 TEXSCALE(sampler, 1), 378 }; 379 380 return src_swizzle(const_src(c, values, 2), SWIZZLE(X,Y,X,X)); 381 } 382 default: 383 compile_error(c, "Unhandled NIR intrinsic type: %s\n", 384 nir_intrinsic_infos[intr->intrinsic].name); 385 break; 386 } 387 } break; 388 case nir_instr_type_alu: 389 case nir_instr_type_tex: 390 return ra_src(c, src); 391 case nir_instr_type_ssa_undef: { 392 /* return zero to deal with broken Blur demo */ 393 nir_const_value value = CONST(0); 394 return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X)); 395 } 396 default: 397 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); 398 break; 399 } 400 401 return SRC_DISABLE; 402} 403 404static bool 405vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa) 406{ 407 for (unsigned i = 0; i < 4; i++) { 408 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa) 409 continue; 410 411 if (vec->src[i].swizzle[0] != i) 412 return true; 413 } 414 415 /* don't deal with possible bypassed vec/mov chain */ 416 nir_foreach_use(use_src, ssa) { 417 nir_instr *instr = use_src->parent_instr; 418 if (instr->type != nir_instr_type_alu) 419 continue; 420 421 nir_alu_instr *alu = nir_instr_as_alu(instr); 422 423 switch (alu->op) { 424 case nir_op_mov: 425 case nir_op_vec2: 426 case nir_op_vec3: 427 case nir_op_vec4: 428 return true; 429 default: 430 break; 431 } 432 } 433 return false; 434} 435 436/* get allocated dest register for nir_dest 437 * *p_swiz tells how the components need to be placed into register 438 */ 439static hw_dst 440ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz) 441{ 442 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf; 443 dest = real_dest(dest, &swiz, &mask); 444 445 unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]); 446 unsigned t = reg_get_type(r); 447 448 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]); 449 450 return (hw_dst) { 451 .use = 1, 452 .reg = reg_get_base(c, r), 453 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]), 454 }; 455} 456 457static void 458emit_alu(struct etna_compile *c, nir_alu_instr * alu) 459{ 460 const nir_op_info *info = &nir_op_infos[alu->op]; 461 462 /* marked as dead instruction (vecN and other bypassed instr) */ 463 if (alu->instr.pass_flags) 464 return; 465 466 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4)); 467 468 unsigned dst_swiz; 469 hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz); 470 471 /* compose alu write_mask with RA write mask */ 472 if (!alu->dest.dest.is_ssa) 473 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask); 474 475 switch (alu->op) { 476 case nir_op_fdot2: 477 case nir_op_fdot3: 478 case nir_op_fdot4: 479 /* not per-component - don't compose dst_swiz */ 480 dst_swiz = INST_SWIZ_IDENTITY; 481 break; 482 default: 483 break; 484 } 485 486 hw_src srcs[3]; 487 488 for (int i = 0; i < info->num_inputs; i++) { 489 nir_alu_src *asrc = &alu->src[i]; 490 hw_src src; 491 492 src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc)); 493 src = src_swizzle(src, dst_swiz); 494 495 if (src.rgroup != INST_RGROUP_IMMEDIATE) { 496 src.neg = asrc->negate || (alu->op == nir_op_fneg); 497 src.abs = asrc->abs || (alu->op == nir_op_fabs); 498 } else { 499 assert(!asrc->negate && alu->op != nir_op_fneg); 500 assert(!asrc->abs && alu->op != nir_op_fabs); 501 } 502 503 srcs[i] = src; 504 } 505 506 etna_emit_alu(c, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat)); 507} 508 509static void 510emit_tex(struct etna_compile *c, nir_tex_instr * tex) 511{ 512 unsigned dst_swiz; 513 hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz); 514 nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL; 515 516 for (unsigned i = 0; i < tex->num_srcs; i++) { 517 switch (tex->src[i].src_type) { 518 case nir_tex_src_coord: 519 coord = &tex->src[i].src; 520 break; 521 case nir_tex_src_bias: 522 case nir_tex_src_lod: 523 assert(!lod_bias); 524 lod_bias = &tex->src[i].src; 525 break; 526 case nir_tex_src_comparator: 527 compare = &tex->src[i].src; 528 break; 529 default: 530 compile_error(c, "Unhandled NIR tex src type: %d\n", 531 tex->src[i].src_type); 532 break; 533 } 534 } 535 536 etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord), 537 lod_bias ? get_src(c, lod_bias) : SRC_DISABLE, 538 compare ? get_src(c, compare) : SRC_DISABLE); 539} 540 541static void 542emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr) 543{ 544 switch (intr->intrinsic) { 545 case nir_intrinsic_store_deref: 546 etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1])); 547 break; 548 case nir_intrinsic_discard_if: 549 etna_emit_discard(c, get_src(c, &intr->src[0])); 550 break; 551 case nir_intrinsic_discard: 552 etna_emit_discard(c, SRC_DISABLE); 553 break; 554 case nir_intrinsic_load_uniform: { 555 unsigned dst_swiz; 556 struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz); 557 558 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */ 559 emit_inst(c, &(struct etna_inst) { 560 .opcode = INST_OPCODE_MOVAR, 561 .dst.write_mask = 0x1, 562 .src[2] = get_src(c, &intr->src[0]), 563 }); 564 emit_inst(c, &(struct etna_inst) { 565 .opcode = INST_OPCODE_MOV, 566 .dst = dst, 567 .src[2] = { 568 .use = 1, 569 .rgroup = INST_RGROUP_UNIFORM_0, 570 .reg = nir_intrinsic_base(intr), 571 .swiz = dst_swiz, 572 .amode = INST_AMODE_ADD_A_X, 573 }, 574 }); 575 } break; 576 case nir_intrinsic_load_ubo: { 577 /* TODO: if offset is of the form (x + C) then add C to the base instead */ 578 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32; 579 unsigned dst_swiz; 580 emit_inst(c, &(struct etna_inst) { 581 .opcode = INST_OPCODE_LOAD, 582 .type = INST_TYPE_U32, 583 .dst = ra_dest(c, &intr->dest, &dst_swiz), 584 .src[0] = get_src(c, &intr->src[1]), 585 .src[1] = const_src(c, &CONST_VAL(ETNA_UNIFORM_UBO0_ADDR + idx, 0), 1), 586 }); 587 } break; 588 case nir_intrinsic_load_front_face: 589 case nir_intrinsic_load_frag_coord: 590 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */ 591 break; 592 case nir_intrinsic_load_input: 593 case nir_intrinsic_load_instance_id: 594 case nir_intrinsic_load_texture_rect_scaling: 595 break; 596 default: 597 compile_error(c, "Unhandled NIR intrinsic type: %s\n", 598 nir_intrinsic_infos[intr->intrinsic].name); 599 } 600} 601 602static void 603emit_instr(struct etna_compile *c, nir_instr * instr) 604{ 605 switch (instr->type) { 606 case nir_instr_type_alu: 607 emit_alu(c, nir_instr_as_alu(instr)); 608 break; 609 case nir_instr_type_tex: 610 emit_tex(c, nir_instr_as_tex(instr)); 611 break; 612 case nir_instr_type_intrinsic: 613 emit_intrinsic(c, nir_instr_as_intrinsic(instr)); 614 break; 615 case nir_instr_type_jump: 616 assert(nir_instr_is_last(instr)); 617 break; 618 case nir_instr_type_load_const: 619 case nir_instr_type_ssa_undef: 620 case nir_instr_type_deref: 621 break; 622 default: 623 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); 624 break; 625 } 626} 627 628static void 629emit_block(struct etna_compile *c, nir_block * block) 630{ 631 etna_emit_block_start(c, block->index); 632 633 nir_foreach_instr(instr, block) 634 emit_instr(c, instr); 635 636 /* succs->index < block->index is for the loop case */ 637 nir_block *succs = block->successors[0]; 638 if (nir_block_ends_in_jump(block) || succs->index < block->index) 639 etna_emit_jump(c, succs->index, SRC_DISABLE); 640} 641 642static void 643emit_cf_list(struct etna_compile *c, struct exec_list *list); 644 645static void 646emit_if(struct etna_compile *c, nir_if * nif) 647{ 648 etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition)); 649 emit_cf_list(c, &nif->then_list); 650 651 /* jump at end of then_list to skip else_list 652 * not needed if then_list already ends with a jump or else_list is empty 653 */ 654 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) && 655 !nir_cf_list_is_empty_block(&nif->else_list)) 656 etna_emit_jump(c, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE); 657 658 emit_cf_list(c, &nif->else_list); 659} 660 661static void 662emit_cf_list(struct etna_compile *c, struct exec_list *list) 663{ 664 foreach_list_typed(nir_cf_node, node, node, list) { 665 switch (node->type) { 666 case nir_cf_node_block: 667 emit_block(c, nir_cf_node_as_block(node)); 668 break; 669 case nir_cf_node_if: 670 emit_if(c, nir_cf_node_as_if(node)); 671 break; 672 case nir_cf_node_loop: 673 emit_cf_list(c, &nir_cf_node_as_loop(node)->body); 674 break; 675 default: 676 compile_error(c, "Unknown NIR node type\n"); 677 break; 678 } 679 } 680} 681 682/* based on nir_lower_vec_to_movs */ 683static unsigned 684insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) 685{ 686 assert(start_idx < nir_op_infos[vec->op].num_inputs); 687 unsigned write_mask = (1u << start_idx); 688 689 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); 690 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx]); 691 692 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0]; 693 mov->src[0].negate = vec->src[start_idx].negate; 694 mov->src[0].abs = vec->src[start_idx].abs; 695 696 unsigned num_components = 1; 697 698 for (unsigned i = start_idx + 1; i < 4; i++) { 699 if (!(vec->dest.write_mask & (1 << i))) 700 continue; 701 702 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && 703 vec->src[i].negate == vec->src[start_idx].negate && 704 vec->src[i].abs == vec->src[start_idx].abs) { 705 write_mask |= (1 << i); 706 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0]; 707 num_components++; 708 } 709 } 710 711 mov->dest.write_mask = (1 << num_components) - 1; 712 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL); 713 714 /* replace vec srcs with inserted mov */ 715 for (unsigned i = 0, j = 0; i < 4; i++) { 716 if (!(write_mask & (1 << i))) 717 continue; 718 719 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa)); 720 vec->src[i].swizzle[0] = j++; 721 } 722 723 nir_instr_insert_before(&vec->instr, &mov->instr); 724 725 return write_mask; 726} 727 728/* 729 * for vecN instructions: 730 * -merge constant sources into a single src 731 * -insert movs (nir_lower_vec_to_movs equivalent) 732 * for non-vecN instructions: 733 * -try to merge constants as single constant 734 * -insert movs for multiple constants (pre-HALTI5) 735 */ 736static void 737lower_alu(struct etna_compile *c, nir_alu_instr *alu) 738{ 739 const nir_op_info *info = &nir_op_infos[alu->op]; 740 741 nir_builder b; 742 nir_builder_init(&b, c->impl); 743 b.cursor = nir_before_instr(&alu->instr); 744 745 switch (alu->op) { 746 case nir_op_vec2: 747 case nir_op_vec3: 748 case nir_op_vec4: 749 break; 750 default: 751 /* pre-GC7000L can only have 1 uniform src per instruction */ 752 if (c->specs->halti >= 5) 753 return; 754 755 nir_const_value value[4] = {}; 756 uint8_t swizzle[4][4] = {}; 757 unsigned swiz_max = 0, num_const = 0; 758 759 for (unsigned i = 0; i < info->num_inputs; i++) { 760 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 761 if (!cv) 762 continue; 763 764 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components; 765 for (unsigned j = 0; j < num_components; j++) { 766 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64); 767 swizzle[i][j] = idx; 768 swiz_max = MAX2(swiz_max, (unsigned) idx); 769 } 770 num_const++; 771 } 772 773 /* nothing to do */ 774 if (num_const <= 1) 775 return; 776 777 /* resolve with single combined const src */ 778 if (swiz_max < 4) { 779 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value); 780 781 for (unsigned i = 0; i < info->num_inputs; i++) { 782 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 783 if (!cv) 784 continue; 785 786 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); 787 788 for (unsigned j = 0; j < 4; j++) 789 alu->src[i].swizzle[j] = swizzle[i][j]; 790 } 791 return; 792 } 793 794 /* resolve with movs */ 795 num_const = 0; 796 for (unsigned i = 0; i < info->num_inputs; i++) { 797 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 798 if (!cv) 799 continue; 800 801 num_const++; 802 if (num_const == 1) 803 continue; 804 805 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa); 806 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov)); 807 } 808 return; 809 } 810 811 nir_const_value value[4]; 812 unsigned num_components = 0; 813 814 for (unsigned i = 0; i < info->num_inputs; i++) { 815 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 816 if (cv) 817 value[num_components++] = cv[alu->src[i].swizzle[0]]; 818 } 819 820 /* if there is more than one constant source to the vecN, combine them 821 * into a single load_const (removing the vecN completely if all components 822 * are constant) 823 */ 824 if (num_components > 1) { 825 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value); 826 827 if (num_components == info->num_inputs) { 828 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, def); 829 nir_instr_remove(&alu->instr); 830 return; 831 } 832 833 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) { 834 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 835 if (!cv) 836 continue; 837 838 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); 839 alu->src[i].swizzle[0] = j++; 840 } 841 } 842 843 unsigned finished_write_mask = 0; 844 for (unsigned i = 0; i < 4; i++) { 845 if (!(alu->dest.write_mask & (1 << i))) 846 continue; 847 848 nir_ssa_def *ssa = alu->src[i].src.ssa; 849 850 /* check that vecN instruction is only user of this */ 851 bool need_mov = list_length(&ssa->if_uses) != 0; 852 nir_foreach_use(use_src, ssa) { 853 if (use_src->parent_instr != &alu->instr) 854 need_mov = true; 855 } 856 857 nir_instr *instr = ssa->parent_instr; 858 switch (instr->type) { 859 case nir_instr_type_alu: 860 case nir_instr_type_tex: 861 break; 862 case nir_instr_type_intrinsic: 863 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) { 864 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa); 865 break; 866 } 867 FALLTHROUGH; 868 default: 869 need_mov = true; 870 } 871 872 if (need_mov && !(finished_write_mask & (1 << i))) 873 finished_write_mask |= insert_vec_mov(alu, i, c->nir); 874 } 875} 876 877static bool 878emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) 879{ 880 nir_shader *shader = c->nir; 881 c->impl = nir_shader_get_entrypoint(shader); 882 883 bool have_indirect_uniform = false; 884 unsigned indirect_max = 0; 885 886 nir_builder b; 887 nir_builder_init(&b, c->impl); 888 889 /* convert non-dynamic uniform loads to constants, etc */ 890 nir_foreach_block(block, c->impl) { 891 nir_foreach_instr_safe(instr, block) { 892 switch(instr->type) { 893 case nir_instr_type_alu: 894 /* deals with vecN and const srcs */ 895 lower_alu(c, nir_instr_as_alu(instr)); 896 break; 897 case nir_instr_type_load_const: { 898 nir_load_const_instr *load_const = nir_instr_as_load_const(instr); 899 for (unsigned i = 0; i < load_const->def.num_components; i++) 900 load_const->value[i] = CONST(load_const->value[i].u32); 901 } break; 902 case nir_instr_type_intrinsic: { 903 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 904 /* TODO: load_ubo can also become a constant in some cases 905 * (at the moment it can end up emitting a LOAD with two 906 * uniform sources, which could be a problem on HALTI2) 907 */ 908 if (intr->intrinsic != nir_intrinsic_load_uniform) 909 break; 910 nir_const_value *off = nir_src_as_const_value(intr->src[0]); 911 if (!off || off[0].u64 >> 32 != ETNA_UNIFORM_CONSTANT) { 912 have_indirect_uniform = true; 913 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr); 914 break; 915 } 916 917 unsigned base = nir_intrinsic_base(intr); 918 /* pre halti2 uniform offset will be float */ 919 if (c->specs->halti < 2) 920 base += (unsigned) off[0].f32; 921 else 922 base += off[0].u32; 923 nir_const_value value[4]; 924 925 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) 926 value[i] = UNIFORM(base * 4 + i); 927 928 b.cursor = nir_after_instr(instr); 929 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value); 930 931 nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); 932 nir_instr_remove(instr); 933 } break; 934 default: 935 break; 936 } 937 } 938 } 939 940 /* TODO: only emit required indirect uniform ranges */ 941 if (have_indirect_uniform) { 942 for (unsigned i = 0; i < indirect_max * 4; i++) 943 c->consts[i] = UNIFORM(i).u64; 944 c->const_count = indirect_max; 945 } 946 947 /* add mov for any store output using sysval/const and for depth stores from intrinsics */ 948 nir_foreach_block(block, c->impl) { 949 nir_foreach_instr_safe(instr, block) { 950 if (instr->type != nir_instr_type_intrinsic) 951 continue; 952 953 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 954 955 switch (intr->intrinsic) { 956 case nir_intrinsic_store_deref: { 957 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 958 nir_src *src = &intr->src[1]; 959 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr) || 960 (shader->info.stage == MESA_SHADER_FRAGMENT && 961 deref->var->data.location == FRAG_RESULT_DEPTH && 962 src->is_ssa && 963 src->ssa->parent_instr->type != nir_instr_type_alu)) { 964 b.cursor = nir_before_instr(instr); 965 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa))); 966 } 967 } break; 968 default: 969 break; 970 } 971 } 972 } 973 974 /* call directly to avoid validation (load_const don't pass validation at this point) */ 975 nir_convert_from_ssa(shader, true); 976 nir_opt_dce(shader); 977 978 etna_ra_assign(c, shader); 979 980 emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body); 981 982 *num_temps = etna_ra_finish(c); 983 *num_consts = c->const_count; 984 return true; 985} 986 987static bool 988etna_compile_check_limits(struct etna_shader_variant *v) 989{ 990 const struct etna_specs *specs = v->shader->specs; 991 int max_uniforms = (v->stage == MESA_SHADER_VERTEX) 992 ? specs->max_vs_uniforms 993 : specs->max_ps_uniforms; 994 995 if (!specs->has_icache && v->needs_icache) { 996 DBG("Number of instructions (%d) exceeds maximum %d", v->code_size / 4, 997 specs->max_instructions); 998 return false; 999 } 1000 1001 if (v->num_temps > specs->max_registers) { 1002 DBG("Number of registers (%d) exceeds maximum %d", v->num_temps, 1003 specs->max_registers); 1004 return false; 1005 } 1006 1007 if (v->uniforms.count / 4 > max_uniforms) { 1008 DBG("Number of uniforms (%d) exceeds maximum %d", 1009 v->uniforms.count / 4, max_uniforms); 1010 return false; 1011 } 1012 1013 return true; 1014} 1015 1016static void 1017fill_vs_mystery(struct etna_shader_variant *v) 1018{ 1019 const struct etna_specs *specs = v->shader->specs; 1020 1021 v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */ 1022 1023 /* fill in "mystery meat" load balancing value. This value determines how 1024 * work is scheduled between VS and PS 1025 * in the unified shader architecture. More precisely, it is determined from 1026 * the number of VS outputs, as well as chip-specific 1027 * vertex output buffer size, vertex cache size, and the number of shader 1028 * cores. 1029 * 1030 * XXX this is a conservative estimate, the "optimal" value is only known for 1031 * sure at link time because some 1032 * outputs may be unused and thus unmapped. Then again, in the general use 1033 * case with GLSL the vertex and fragment 1034 * shaders are linked already before submitting to Gallium, thus all outputs 1035 * are used. 1036 * 1037 * note: TGSI compiler counts all outputs (including position and pointsize), here 1038 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output 1039 * TODO: might have a problem that we don't count pointsize when it is used 1040 */ 1041 1042 int half_out = v->outfile.num_reg / 2 + 1; 1043 assert(half_out); 1044 1045 uint32_t b = ((20480 / (specs->vertex_output_buffer_size - 1046 2 * half_out * specs->vertex_cache_size)) + 1047 9) / 1048 10; 1049 uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2; 1050 v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 1051 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 1052 VIVS_VS_LOAD_BALANCING_C(0x3f) | 1053 VIVS_VS_LOAD_BALANCING_D(0x0f); 1054} 1055 1056bool 1057etna_compile_shader_nir(struct etna_shader_variant *v) 1058{ 1059 if (unlikely(!v)) 1060 return false; 1061 1062 struct etna_compile *c = CALLOC_STRUCT(etna_compile); 1063 if (!c) 1064 return false; 1065 1066 c->variant = v; 1067 c->specs = v->shader->specs; 1068 c->nir = nir_shader_clone(NULL, v->shader->nir); 1069 1070 nir_shader *s = c->nir; 1071 const struct etna_specs *specs = c->specs; 1072 1073 v->stage = s->info.stage; 1074 v->uses_discard = s->info.fs.uses_discard; 1075 v->num_loops = 0; /* TODO */ 1076 v->vs_id_in_reg = -1; 1077 v->vs_pos_out_reg = -1; 1078 v->vs_pointsize_out_reg = -1; 1079 v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */ 1080 v->ps_depth_out_reg = -1; 1081 1082 /* 1083 * Lower glTexCoord, fixes e.g. neverball point sprite (exit cylinder stars) 1084 * and gl4es pointsprite.trace apitrace 1085 */ 1086 if (s->info.stage == MESA_SHADER_FRAGMENT && v->key.sprite_coord_enable) { 1087 NIR_PASS_V(s, nir_lower_texcoord_replace, v->key.sprite_coord_enable, 1088 false, v->key.sprite_coord_yinvert); 1089 } 1090 1091 /* setup input linking */ 1092 struct etna_shader_io_file *sf = &v->infile; 1093 if (s->info.stage == MESA_SHADER_VERTEX) { 1094 nir_foreach_shader_in_variable(var, s) { 1095 unsigned idx = var->data.driver_location; 1096 sf->reg[idx].reg = idx; 1097 sf->reg[idx].slot = var->data.location; 1098 sf->reg[idx].num_components = glsl_get_components(var->type); 1099 sf->num_reg = MAX2(sf->num_reg, idx+1); 1100 } 1101 } else { 1102 unsigned count = 0; 1103 nir_foreach_shader_in_variable(var, s) { 1104 unsigned idx = var->data.driver_location; 1105 sf->reg[idx].reg = idx + 1; 1106 sf->reg[idx].slot = var->data.location; 1107 sf->reg[idx].num_components = glsl_get_components(var->type); 1108 sf->num_reg = MAX2(sf->num_reg, idx+1); 1109 count++; 1110 } 1111 assert(sf->num_reg == count); 1112 } 1113 1114 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_uniform, etna_glsl_type_size, 1115 (nir_lower_io_options)0); 1116 1117 NIR_PASS_V(s, nir_lower_regs_to_ssa); 1118 NIR_PASS_V(s, nir_lower_vars_to_ssa); 1119 NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX); 1120 NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); 1121 NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); 1122 nir_lower_idiv_options idiv_options = { 1123 .imprecise_32bit_lowering = true, 1124 .allow_fp16 = true, 1125 }; 1126 NIR_PASS_V(s, nir_lower_idiv, &idiv_options); 1127 1128 etna_optimize_loop(s); 1129 1130 /* TODO: remove this extra run if nir_opt_peephole_select is able to handle ubo's. */ 1131 if (OPT(s, etna_nir_lower_ubo_to_uniform)) 1132 etna_optimize_loop(s); 1133 1134 NIR_PASS_V(s, etna_lower_io, v); 1135 1136 if (v->shader->specs->vs_need_z_div) 1137 NIR_PASS_V(s, nir_lower_clip_halfz); 1138 1139 /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */ 1140 if (c->specs->halti < 2) { 1141 /* use opt_algebraic between int_to_float and boot_to_float because 1142 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops 1143 */ 1144 NIR_PASS_V(s, nir_lower_int_to_float); 1145 NIR_PASS_V(s, nir_opt_algebraic); 1146 NIR_PASS_V(s, nir_lower_bool_to_float); 1147 } else { 1148 NIR_PASS_V(s, nir_lower_bool_to_int32); 1149 } 1150 1151 while( OPT(s, nir_opt_vectorize, NULL, NULL) ); 1152 NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); 1153 1154 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 1155 NIR_PASS_V(s, nir_opt_algebraic_late); 1156 1157 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); 1158 NIR_PASS_V(s, nir_copy_prop); 1159 /* only HW supported integer source mod is ineg for iadd instruction (?) */ 1160 NIR_PASS_V(s, nir_lower_to_source_mods, ~nir_lower_int_source_mods); 1161 /* need copy prop after uses_to_dest, and before src mods: see 1162 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95 1163 */ 1164 1165 NIR_PASS_V(s, nir_opt_dce); 1166 1167 NIR_PASS_V(s, nir_lower_bool_to_bitsize); 1168 NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals); 1169 1170 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) 1171 nir_print_shader(s, stdout); 1172 1173 unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks]; 1174 c->block_ptr = block_ptr; 1175 1176 unsigned num_consts; 1177 ASSERTED bool ok = emit_shader(c, &v->num_temps, &num_consts); 1178 assert(ok); 1179 1180 /* empty shader, emit NOP */ 1181 if (!c->inst_ptr) 1182 emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_NOP }); 1183 1184 /* assemble instructions, fixing up labels */ 1185 uint32_t *code = MALLOC(c->inst_ptr * 16); 1186 for (unsigned i = 0; i < c->inst_ptr; i++) { 1187 struct etna_inst *inst = &c->code[i]; 1188 if (inst->opcode == INST_OPCODE_BRANCH) 1189 inst->imm = block_ptr[inst->imm]; 1190 1191 inst->halti5 = specs->halti >= 5; 1192 etna_assemble(&code[i * 4], inst); 1193 } 1194 1195 v->code_size = c->inst_ptr * 4; 1196 v->code = code; 1197 v->needs_icache = c->inst_ptr > specs->max_instructions; 1198 1199 copy_uniform_state_to_shader(v, c->consts, num_consts); 1200 1201 if (s->info.stage == MESA_SHADER_FRAGMENT) { 1202 v->input_count_unk8 = 31; /* XXX what is this */ 1203 assert(v->ps_depth_out_reg <= 0); 1204 } else { 1205 fill_vs_mystery(v); 1206 } 1207 1208 bool result = etna_compile_check_limits(v); 1209 ralloc_free(c->nir); 1210 FREE(c); 1211 return result; 1212} 1213 1214static const struct etna_shader_inout * 1215etna_shader_vs_lookup(const struct etna_shader_variant *sobj, 1216 const struct etna_shader_inout *in) 1217{ 1218 for (int i = 0; i < sobj->outfile.num_reg; i++) 1219 if (sobj->outfile.reg[i].slot == in->slot) 1220 return &sobj->outfile.reg[i]; 1221 1222 return NULL; 1223} 1224 1225bool 1226etna_link_shader_nir(struct etna_shader_link_info *info, 1227 const struct etna_shader_variant *vs, 1228 const struct etna_shader_variant *fs) 1229{ 1230 int comp_ofs = 0; 1231 /* For each fragment input we need to find the associated vertex shader 1232 * output, which can be found by matching on semantic name and index. A 1233 * binary search could be used because the vs outputs are sorted by their 1234 * semantic index and grouped by semantic type by fill_in_vs_outputs. 1235 */ 1236 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 1237 info->pcoord_varying_comp_ofs = -1; 1238 1239 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 1240 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 1241 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 1242 struct etna_varying *varying; 1243 bool interpolate_always = true; 1244 1245 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 1246 1247 if (fsio->reg > info->num_varyings) 1248 info->num_varyings = fsio->reg; 1249 1250 varying = &info->varyings[fsio->reg - 1]; 1251 varying->num_components = fsio->num_components; 1252 1253 if (!interpolate_always) /* colors affected by flat shading */ 1254 varying->pa_attributes = 0x200; 1255 else /* texture coord or other bypasses flat shading */ 1256 varying->pa_attributes = 0x2f1; 1257 1258 varying->use[0] = VARYING_COMPONENT_USE_UNUSED; 1259 varying->use[1] = VARYING_COMPONENT_USE_UNUSED; 1260 varying->use[2] = VARYING_COMPONENT_USE_UNUSED; 1261 varying->use[3] = VARYING_COMPONENT_USE_UNUSED; 1262 1263 /* point/tex coord is an input to the PS without matching VS output, 1264 * so it gets a varying slot without being assigned a VS register. 1265 */ 1266 if (fsio->slot == VARYING_SLOT_PNTC) { 1267 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; 1268 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; 1269 1270 info->pcoord_varying_comp_ofs = comp_ofs; 1271 } else if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) { 1272 /* 1273 * Do nothing, TexCoord is lowered to PointCoord above 1274 * and the TexCoord here is just a remnant. This needs 1275 * to be removed with some nir_remove_dead_variables(), 1276 * but that one removes all FS inputs ... why? 1277 */ 1278 } else { 1279 if (vsio == NULL) { /* not found -- link error */ 1280 BUG("Semantic value not found in vertex shader outputs\n"); 1281 return true; 1282 } 1283 varying->reg = vsio->reg; 1284 } 1285 1286 comp_ofs += varying->num_components; 1287 } 1288 1289 assert(info->num_varyings == fs->infile.num_reg); 1290 1291 return false; 1292} 1293