1/* 2 * Copyright © 2016-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "broadcom/common/v3d_device_info.h" 25#include "v3d_compiler.h" 26 27int 28vir_get_nsrc(struct qinst *inst) 29{ 30 switch (inst->qpu.type) { 31 case V3D_QPU_INSTR_TYPE_BRANCH: 32 return 0; 33 case V3D_QPU_INSTR_TYPE_ALU: 34 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) 35 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op); 36 else 37 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op); 38 } 39 40 return 0; 41} 42 43/** 44 * Returns whether the instruction has any side effects that must be 45 * preserved. 46 */ 47bool 48vir_has_side_effects(struct v3d_compile *c, struct qinst *inst) 49{ 50 switch (inst->qpu.type) { 51 case V3D_QPU_INSTR_TYPE_BRANCH: 52 return true; 53 case V3D_QPU_INSTR_TYPE_ALU: 54 switch (inst->qpu.alu.add.op) { 55 case V3D_QPU_A_SETREVF: 56 case V3D_QPU_A_SETMSF: 57 case V3D_QPU_A_VPMSETUP: 58 case V3D_QPU_A_STVPMV: 59 case V3D_QPU_A_STVPMD: 60 case V3D_QPU_A_STVPMP: 61 case V3D_QPU_A_VPMWT: 62 case V3D_QPU_A_TMUWT: 63 return true; 64 default: 65 break; 66 } 67 68 switch (inst->qpu.alu.mul.op) { 69 case V3D_QPU_M_MULTOP: 70 return true; 71 default: 72 break; 73 } 74 } 75 76 if (inst->qpu.sig.ldtmu || 77 inst->qpu.sig.ldvary || 78 inst->qpu.sig.wrtmuc || 79 inst->qpu.sig.thrsw) { 80 return true; 81 } 82 83 return false; 84} 85 86bool 87vir_is_raw_mov(struct qinst *inst) 88{ 89 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 90 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV && 91 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) { 92 return false; 93 } 94 95 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 96 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 97 return false; 98 } 99 100 if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 101 inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE || 102 inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 103 inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) { 104 return false; 105 } 106 107 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 108 inst->qpu.flags.mc != V3D_QPU_COND_NONE) 109 return false; 110 111 return true; 112} 113 114bool 115vir_is_add(struct qinst *inst) 116{ 117 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 118 inst->qpu.alu.add.op != V3D_QPU_A_NOP); 119} 120 121bool 122vir_is_mul(struct qinst *inst) 123{ 124 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 125 inst->qpu.alu.mul.op != V3D_QPU_M_NOP); 126} 127 128bool 129vir_is_tex(struct qinst *inst) 130{ 131 if (inst->dst.file == QFILE_MAGIC) 132 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index); 133 134 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 135 inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) { 136 return true; 137 } 138 139 return false; 140} 141 142bool 143vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) 144{ 145 for (int i = 0; i < vir_get_nsrc(inst); i++) { 146 switch (inst->src[i].file) { 147 case QFILE_VPM: 148 return true; 149 default: 150 break; 151 } 152 } 153 154 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || 155 inst->qpu.sig.ldtlb || 156 inst->qpu.sig.ldtlbu || 157 inst->qpu.sig.ldvpm)) { 158 return true; 159 } 160 161 return false; 162} 163 164bool 165vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) 166{ 167 switch (inst->dst.file) { 168 case QFILE_MAGIC: 169 switch (inst->dst.index) { 170 case V3D_QPU_WADDR_RECIP: 171 case V3D_QPU_WADDR_RSQRT: 172 case V3D_QPU_WADDR_EXP: 173 case V3D_QPU_WADDR_LOG: 174 case V3D_QPU_WADDR_SIN: 175 return true; 176 } 177 break; 178 default: 179 break; 180 } 181 182 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) 183 return true; 184 185 return false; 186} 187 188void 189vir_set_unpack(struct qinst *inst, int src, 190 enum v3d_qpu_input_unpack unpack) 191{ 192 assert(src == 0 || src == 1); 193 194 if (vir_is_add(inst)) { 195 if (src == 0) 196 inst->qpu.alu.add.a_unpack = unpack; 197 else 198 inst->qpu.alu.add.b_unpack = unpack; 199 } else { 200 assert(vir_is_mul(inst)); 201 if (src == 0) 202 inst->qpu.alu.mul.a_unpack = unpack; 203 else 204 inst->qpu.alu.mul.b_unpack = unpack; 205 } 206} 207 208void 209vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond) 210{ 211 if (vir_is_add(inst)) { 212 inst->qpu.flags.ac = cond; 213 } else { 214 assert(vir_is_mul(inst)); 215 inst->qpu.flags.mc = cond; 216 } 217} 218 219void 220vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf) 221{ 222 if (vir_is_add(inst)) { 223 inst->qpu.flags.apf = pf; 224 } else { 225 assert(vir_is_mul(inst)); 226 inst->qpu.flags.mpf = pf; 227 } 228} 229 230void 231vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf) 232{ 233 if (vir_is_add(inst)) { 234 inst->qpu.flags.auf = uf; 235 } else { 236 assert(vir_is_mul(inst)); 237 inst->qpu.flags.muf = uf; 238 } 239} 240 241#if 0 242uint8_t 243vir_channels_written(struct qinst *inst) 244{ 245 if (vir_is_mul(inst)) { 246 switch (inst->dst.pack) { 247 case QPU_PACK_MUL_NOP: 248 case QPU_PACK_MUL_8888: 249 return 0xf; 250 case QPU_PACK_MUL_8A: 251 return 0x1; 252 case QPU_PACK_MUL_8B: 253 return 0x2; 254 case QPU_PACK_MUL_8C: 255 return 0x4; 256 case QPU_PACK_MUL_8D: 257 return 0x8; 258 } 259 } else { 260 switch (inst->dst.pack) { 261 case QPU_PACK_A_NOP: 262 case QPU_PACK_A_8888: 263 case QPU_PACK_A_8888_SAT: 264 case QPU_PACK_A_32_SAT: 265 return 0xf; 266 case QPU_PACK_A_8A: 267 case QPU_PACK_A_8A_SAT: 268 return 0x1; 269 case QPU_PACK_A_8B: 270 case QPU_PACK_A_8B_SAT: 271 return 0x2; 272 case QPU_PACK_A_8C: 273 case QPU_PACK_A_8C_SAT: 274 return 0x4; 275 case QPU_PACK_A_8D: 276 case QPU_PACK_A_8D_SAT: 277 return 0x8; 278 case QPU_PACK_A_16A: 279 case QPU_PACK_A_16A_SAT: 280 return 0x3; 281 case QPU_PACK_A_16B: 282 case QPU_PACK_A_16B_SAT: 283 return 0xc; 284 } 285 } 286 unreachable("Bad pack field"); 287} 288#endif 289 290struct qreg 291vir_get_temp(struct v3d_compile *c) 292{ 293 struct qreg reg; 294 295 reg.file = QFILE_TEMP; 296 reg.index = c->num_temps++; 297 298 if (c->num_temps > c->defs_array_size) { 299 uint32_t old_size = c->defs_array_size; 300 c->defs_array_size = MAX2(old_size * 2, 16); 301 302 c->defs = reralloc(c, c->defs, struct qinst *, 303 c->defs_array_size); 304 memset(&c->defs[old_size], 0, 305 sizeof(c->defs[0]) * (c->defs_array_size - old_size)); 306 307 c->spillable = reralloc(c, c->spillable, 308 BITSET_WORD, 309 BITSET_WORDS(c->defs_array_size)); 310 for (int i = old_size; i < c->defs_array_size; i++) 311 BITSET_SET(c->spillable, i); 312 } 313 314 return reg; 315} 316 317struct qinst * 318vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1) 319{ 320 struct qinst *inst = calloc(1, sizeof(*inst)); 321 322 inst->qpu = v3d_qpu_nop(); 323 inst->qpu.alu.add.op = op; 324 325 inst->dst = dst; 326 inst->src[0] = src0; 327 inst->src[1] = src1; 328 inst->uniform = ~0; 329 330 return inst; 331} 332 333struct qinst * 334vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1) 335{ 336 struct qinst *inst = calloc(1, sizeof(*inst)); 337 338 inst->qpu = v3d_qpu_nop(); 339 inst->qpu.alu.mul.op = op; 340 341 inst->dst = dst; 342 inst->src[0] = src0; 343 inst->src[1] = src1; 344 inst->uniform = ~0; 345 346 return inst; 347} 348 349struct qinst * 350vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) 351{ 352 struct qinst *inst = calloc(1, sizeof(*inst)); 353 354 inst->qpu = v3d_qpu_nop(); 355 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH; 356 inst->qpu.branch.cond = cond; 357 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE; 358 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL; 359 inst->qpu.branch.ub = true; 360 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL; 361 362 inst->dst = vir_nop_reg(); 363 inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0); 364 365 return inst; 366} 367 368static void 369vir_emit(struct v3d_compile *c, struct qinst *inst) 370{ 371 switch (c->cursor.mode) { 372 case vir_cursor_add: 373 list_add(&inst->link, c->cursor.link); 374 break; 375 case vir_cursor_addtail: 376 list_addtail(&inst->link, c->cursor.link); 377 break; 378 } 379 380 c->cursor = vir_after_inst(inst); 381 c->live_intervals_valid = false; 382} 383 384/* Updates inst to write to a new temporary, emits it, and notes the def. */ 385struct qreg 386vir_emit_def(struct v3d_compile *c, struct qinst *inst) 387{ 388 assert(inst->dst.file == QFILE_NULL); 389 390 /* If we're emitting an instruction that's a def, it had better be 391 * writing a register. 392 */ 393 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 394 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || 395 v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); 396 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || 397 v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); 398 } 399 400 inst->dst = vir_get_temp(c); 401 402 if (inst->dst.file == QFILE_TEMP) 403 c->defs[inst->dst.index] = inst; 404 405 vir_emit(c, inst); 406 407 return inst->dst; 408} 409 410struct qinst * 411vir_emit_nondef(struct v3d_compile *c, struct qinst *inst) 412{ 413 if (inst->dst.file == QFILE_TEMP) 414 c->defs[inst->dst.index] = NULL; 415 416 vir_emit(c, inst); 417 418 return inst; 419} 420 421struct qblock * 422vir_new_block(struct v3d_compile *c) 423{ 424 struct qblock *block = rzalloc(c, struct qblock); 425 426 list_inithead(&block->instructions); 427 428 block->predecessors = _mesa_set_create(block, 429 _mesa_hash_pointer, 430 _mesa_key_pointer_equal); 431 432 block->index = c->next_block_index++; 433 434 return block; 435} 436 437void 438vir_set_emit_block(struct v3d_compile *c, struct qblock *block) 439{ 440 c->cur_block = block; 441 c->cursor = vir_after_block(block); 442 list_addtail(&block->link, &c->blocks); 443} 444 445struct qblock * 446vir_entry_block(struct v3d_compile *c) 447{ 448 return list_first_entry(&c->blocks, struct qblock, link); 449} 450 451struct qblock * 452vir_exit_block(struct v3d_compile *c) 453{ 454 return list_last_entry(&c->blocks, struct qblock, link); 455} 456 457void 458vir_link_blocks(struct qblock *predecessor, struct qblock *successor) 459{ 460 _mesa_set_add(successor->predecessors, predecessor); 461 if (predecessor->successors[0]) { 462 assert(!predecessor->successors[1]); 463 predecessor->successors[1] = successor; 464 } else { 465 predecessor->successors[0] = successor; 466 } 467} 468 469const struct v3d_compiler * 470v3d_compiler_init(const struct v3d_device_info *devinfo) 471{ 472 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler); 473 if (!compiler) 474 return NULL; 475 476 compiler->devinfo = devinfo; 477 478 if (!vir_init_reg_sets(compiler)) { 479 ralloc_free(compiler); 480 return NULL; 481 } 482 483 return compiler; 484} 485 486void 487v3d_compiler_free(const struct v3d_compiler *compiler) 488{ 489 ralloc_free((void *)compiler); 490} 491 492static struct v3d_compile * 493vir_compile_init(const struct v3d_compiler *compiler, 494 struct v3d_key *key, 495 nir_shader *s, 496 void (*debug_output)(const char *msg, 497 void *debug_output_data), 498 void *debug_output_data, 499 int program_id, int variant_id) 500{ 501 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile); 502 503 c->compiler = compiler; 504 c->devinfo = compiler->devinfo; 505 c->key = key; 506 c->program_id = program_id; 507 c->variant_id = variant_id; 508 c->threads = 4; 509 c->debug_output = debug_output; 510 c->debug_output_data = debug_output_data; 511 512 s = nir_shader_clone(c, s); 513 c->s = s; 514 515 list_inithead(&c->blocks); 516 vir_set_emit_block(c, vir_new_block(c)); 517 518 c->output_position_index = -1; 519 c->output_sample_mask_index = -1; 520 521 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, 522 _mesa_key_pointer_equal); 523 524 return c; 525} 526 527static int 528type_size_vec4(const struct glsl_type *type, bool bindless) 529{ 530 return glsl_count_attribute_slots(type, false); 531} 532 533static void 534v3d_lower_nir(struct v3d_compile *c) 535{ 536 struct nir_lower_tex_options tex_options = { 537 .lower_txd = true, 538 .lower_tg4_broadcom_swizzle = true, 539 540 .lower_rect = false, /* XXX: Use this on V3D 3.x */ 541 .lower_txp = ~0, 542 /* Apply swizzles to all samplers. */ 543 .swizzle_result = ~0, 544 }; 545 546 /* Lower the format swizzle and (for 32-bit returns) 547 * ARB_texture_swizzle-style swizzle. 548 */ 549 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) { 550 for (int j = 0; j < 4; j++) 551 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j]; 552 553 if (c->key->tex[i].clamp_s) 554 tex_options.saturate_s |= 1 << i; 555 if (c->key->tex[i].clamp_t) 556 tex_options.saturate_t |= 1 << i; 557 if (c->key->tex[i].clamp_r) 558 tex_options.saturate_r |= 1 << i; 559 if (c->key->tex[i].return_size == 16) { 560 tex_options.lower_tex_packing[i] = 561 nir_lower_tex_packing_16; 562 } 563 } 564 565 /* CS textures may not have return_size reflecting the shadow state. */ 566 nir_foreach_variable(var, &c->s->uniforms) { 567 const struct glsl_type *type = glsl_without_array(var->type); 568 unsigned array_len = MAX2(glsl_get_length(var->type), 1); 569 570 if (!glsl_type_is_sampler(type) || 571 !glsl_sampler_type_is_shadow(type)) 572 continue; 573 574 for (int i = 0; i < array_len; i++) { 575 tex_options.lower_tex_packing[var->data.binding + i] = 576 nir_lower_tex_packing_16; 577 } 578 } 579 580 NIR_PASS_V(c->s, nir_lower_tex, &tex_options); 581 NIR_PASS_V(c->s, nir_lower_system_values); 582 583 NIR_PASS_V(c->s, nir_lower_vars_to_scratch, 584 nir_var_function_temp, 585 0, 586 glsl_get_natural_size_align_bytes); 587 NIR_PASS_V(c->s, v3d_nir_lower_scratch); 588} 589 590static void 591v3d_set_prog_data_uniforms(struct v3d_compile *c, 592 struct v3d_prog_data *prog_data) 593{ 594 int count = c->num_uniforms; 595 struct v3d_uniform_list *ulist = &prog_data->uniforms; 596 597 ulist->count = count; 598 ulist->data = ralloc_array(prog_data, uint32_t, count); 599 memcpy(ulist->data, c->uniform_data, 600 count * sizeof(*ulist->data)); 601 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count); 602 memcpy(ulist->contents, c->uniform_contents, 603 count * sizeof(*ulist->contents)); 604} 605 606static void 607v3d_vs_set_prog_data(struct v3d_compile *c, 608 struct v3d_vs_prog_data *prog_data) 609{ 610 /* The vertex data gets format converted by the VPM so that 611 * each attribute channel takes up a VPM column. Precompute 612 * the sizes for the shader record. 613 */ 614 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) { 615 prog_data->vattr_sizes[i] = c->vattr_sizes[i]; 616 prog_data->vpm_input_size += c->vattr_sizes[i]; 617 } 618 619 prog_data->uses_vid = (c->s->info.system_values_read & 620 (1ull << SYSTEM_VALUE_VERTEX_ID)); 621 prog_data->uses_iid = (c->s->info.system_values_read & 622 (1ull << SYSTEM_VALUE_INSTANCE_ID)); 623 624 if (prog_data->uses_vid) 625 prog_data->vpm_input_size++; 626 if (prog_data->uses_iid) 627 prog_data->vpm_input_size++; 628 629 /* Input/output segment size are in sectors (8 rows of 32 bits per 630 * channel). 631 */ 632 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; 633 prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8; 634 635 /* Set us up for shared input/output segments. This is apparently 636 * necessary for our VCM setup to avoid varying corruption. 637 */ 638 prog_data->separate_segments = false; 639 prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size, 640 prog_data->vpm_input_size); 641 prog_data->vpm_input_size = 0; 642 643 /* Compute VCM cache size. We set up our program to take up less than 644 * half of the VPM, so that any set of bin and render programs won't 645 * run out of space. We need space for at least one input segment, 646 * and then allocate the rest to output segments (one for the current 647 * program, the rest to VCM). The valid range of the VCM cache size 648 * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 649 * batches. 650 */ 651 assert(c->devinfo->vpm_size); 652 int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8; 653 int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; 654 int half_vpm = vpm_size_in_sectors / 2; 655 int vpm_output_sectors = half_vpm - prog_data->vpm_input_size; 656 int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size; 657 assert(vpm_output_batches >= 2); 658 prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); 659} 660 661static void 662v3d_set_fs_prog_data_inputs(struct v3d_compile *c, 663 struct v3d_fs_prog_data *prog_data) 664{ 665 prog_data->num_inputs = c->num_inputs; 666 memcpy(prog_data->input_slots, c->input_slots, 667 c->num_inputs * sizeof(*c->input_slots)); 668 669 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) > 670 (V3D_MAX_FS_INPUTS - 1) / 24); 671 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) { 672 if (BITSET_TEST(c->flat_shade_flags, i)) 673 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24); 674 675 if (BITSET_TEST(c->noperspective_flags, i)) 676 prog_data->noperspective_flags[i / 24] |= 1 << (i % 24); 677 678 if (BITSET_TEST(c->centroid_flags, i)) 679 prog_data->centroid_flags[i / 24] |= 1 << (i % 24); 680 } 681} 682 683static void 684v3d_fs_set_prog_data(struct v3d_compile *c, 685 struct v3d_fs_prog_data *prog_data) 686{ 687 v3d_set_fs_prog_data_inputs(c, prog_data); 688 prog_data->writes_z = c->writes_z; 689 prog_data->disable_ez = !c->s->info.fs.early_fragment_tests; 690 prog_data->uses_center_w = c->uses_center_w; 691} 692 693static void 694v3d_cs_set_prog_data(struct v3d_compile *c, 695 struct v3d_compute_prog_data *prog_data) 696{ 697 prog_data->shared_size = c->s->info.cs.shared_size; 698} 699 700static void 701v3d_set_prog_data(struct v3d_compile *c, 702 struct v3d_prog_data *prog_data) 703{ 704 prog_data->threads = c->threads; 705 prog_data->single_seg = !c->last_thrsw; 706 prog_data->spill_size = c->spill_size; 707 708 v3d_set_prog_data_uniforms(c, prog_data); 709 710 if (c->s->info.stage == MESA_SHADER_COMPUTE) { 711 v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data); 712 } else if (c->s->info.stage == MESA_SHADER_VERTEX) { 713 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data); 714 } else { 715 assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 716 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data); 717 } 718} 719 720static uint64_t * 721v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size) 722{ 723 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t); 724 725 uint64_t *qpu_insts = malloc(*final_assembly_size); 726 if (!qpu_insts) 727 return NULL; 728 729 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size); 730 731 vir_compile_destroy(c); 732 733 return qpu_insts; 734} 735 736static void 737v3d_nir_lower_vs_early(struct v3d_compile *c) 738{ 739 /* Split our I/O vars and dead code eliminate the unused 740 * components. 741 */ 742 NIR_PASS_V(c->s, nir_lower_io_to_scalar_early, 743 nir_var_shader_in | nir_var_shader_out); 744 uint64_t used_outputs[4] = {0}; 745 for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 746 int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]); 747 int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]); 748 used_outputs[comp] |= 1ull << slot; 749 } 750 NIR_PASS_V(c->s, nir_remove_unused_io_vars, 751 &c->s->outputs, used_outputs, NULL); /* demotes to globals */ 752 NIR_PASS_V(c->s, nir_lower_global_vars_to_local); 753 v3d_optimize_nir(c->s); 754 NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in); 755 NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 756 type_size_vec4, 757 (nir_lower_io_options)0); 758} 759 760static void 761v3d_fixup_fs_output_types(struct v3d_compile *c) 762{ 763 nir_foreach_variable(var, &c->s->outputs) { 764 uint32_t mask = 0; 765 766 switch (var->data.location) { 767 case FRAG_RESULT_COLOR: 768 mask = ~0; 769 break; 770 case FRAG_RESULT_DATA0: 771 case FRAG_RESULT_DATA1: 772 case FRAG_RESULT_DATA2: 773 case FRAG_RESULT_DATA3: 774 mask = 1 << (var->data.location - FRAG_RESULT_DATA0); 775 break; 776 } 777 778 if (c->fs_key->int_color_rb & mask) { 779 var->type = 780 glsl_vector_type(GLSL_TYPE_INT, 781 glsl_get_components(var->type)); 782 } else if (c->fs_key->uint_color_rb & mask) { 783 var->type = 784 glsl_vector_type(GLSL_TYPE_UINT, 785 glsl_get_components(var->type)); 786 } 787 } 788} 789 790static void 791v3d_nir_lower_fs_early(struct v3d_compile *c) 792{ 793 if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb) 794 v3d_fixup_fs_output_types(c); 795 796 /* If the shader has no non-TLB side effects, we can promote it to 797 * enabling early_fragment_tests even if the user didn't. 798 */ 799 if (!(c->s->info.num_images || 800 c->s->info.num_ssbos || 801 c->s->info.num_abos)) { 802 c->s->info.fs.early_fragment_tests = true; 803 } 804} 805 806static void 807v3d_nir_lower_vs_late(struct v3d_compile *c) 808{ 809 if (c->vs_key->clamp_color) 810 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 811 812 if (c->key->ucp_enables) { 813 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables, 814 false); 815 NIR_PASS_V(c->s, nir_lower_io_to_scalar, 816 nir_var_shader_out); 817 } 818 819 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */ 820 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); 821} 822 823static void 824v3d_nir_lower_fs_late(struct v3d_compile *c) 825{ 826 if (c->fs_key->light_twoside) 827 NIR_PASS_V(c->s, nir_lower_two_sided_color); 828 829 if (c->fs_key->clamp_color) 830 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 831 832 if (c->fs_key->alpha_test) { 833 NIR_PASS_V(c->s, nir_lower_alpha_test, 834 c->fs_key->alpha_test_func, 835 false); 836 } 837 838 if (c->key->ucp_enables) 839 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); 840 841 /* Note: FS input scalarizing must happen after 842 * nir_lower_two_sided_color, which only handles a vec4 at a time. 843 */ 844 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); 845} 846 847static uint32_t 848vir_get_max_temps(struct v3d_compile *c) 849{ 850 int max_ip = 0; 851 vir_for_each_inst_inorder(inst, c) 852 max_ip++; 853 854 uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip); 855 856 for (int t = 0; t < c->num_temps; t++) { 857 for (int i = c->temp_start[t]; (i < c->temp_end[t] && 858 i < max_ip); i++) { 859 if (i > max_ip) 860 break; 861 pressure[i]++; 862 } 863 } 864 865 uint32_t max_temps = 0; 866 for (int i = 0; i < max_ip; i++) 867 max_temps = MAX2(max_temps, pressure[i]); 868 869 ralloc_free(pressure); 870 871 return max_temps; 872} 873 874uint64_t *v3d_compile(const struct v3d_compiler *compiler, 875 struct v3d_key *key, 876 struct v3d_prog_data **out_prog_data, 877 nir_shader *s, 878 void (*debug_output)(const char *msg, 879 void *debug_output_data), 880 void *debug_output_data, 881 int program_id, int variant_id, 882 uint32_t *final_assembly_size) 883{ 884 struct v3d_prog_data *prog_data; 885 struct v3d_compile *c = vir_compile_init(compiler, key, s, 886 debug_output, debug_output_data, 887 program_id, variant_id); 888 889 switch (c->s->info.stage) { 890 case MESA_SHADER_VERTEX: 891 c->vs_key = (struct v3d_vs_key *)key; 892 prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data)); 893 break; 894 case MESA_SHADER_FRAGMENT: 895 c->fs_key = (struct v3d_fs_key *)key; 896 prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data)); 897 break; 898 case MESA_SHADER_COMPUTE: 899 prog_data = rzalloc_size(NULL, 900 sizeof(struct v3d_compute_prog_data)); 901 break; 902 default: 903 unreachable("unsupported shader stage"); 904 } 905 906 if (c->s->info.stage == MESA_SHADER_VERTEX) { 907 v3d_nir_lower_vs_early(c); 908 } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 909 assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 910 v3d_nir_lower_fs_early(c); 911 } 912 913 v3d_lower_nir(c); 914 915 if (c->s->info.stage == MESA_SHADER_VERTEX) { 916 v3d_nir_lower_vs_late(c); 917 } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 918 assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 919 v3d_nir_lower_fs_late(c); 920 } 921 922 NIR_PASS_V(c->s, v3d_nir_lower_io, c); 923 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c); 924 NIR_PASS_V(c->s, v3d_nir_lower_image_load_store); 925 NIR_PASS_V(c->s, nir_lower_idiv); 926 927 v3d_optimize_nir(c->s); 928 NIR_PASS_V(c->s, nir_lower_bool_to_int32); 929 NIR_PASS_V(c->s, nir_convert_from_ssa, true); 930 931 v3d_nir_to_vir(c); 932 933 v3d_set_prog_data(c, prog_data); 934 935 *out_prog_data = prog_data; 936 937 char *shaderdb; 938 int ret = asprintf(&shaderdb, 939 "%s shader: %d inst, %d threads, %d loops, " 940 "%d uniforms, %d max-temps, %d:%d spills:fills", 941 vir_get_stage_name(c), 942 c->qpu_inst_count, 943 c->threads, 944 c->loops, 945 c->num_uniforms, 946 vir_get_max_temps(c), 947 c->spills, 948 c->fills); 949 if (ret >= 0) { 950 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) 951 fprintf(stderr, "SHADER-DB: %s\n", shaderdb); 952 953 c->debug_output(shaderdb, c->debug_output_data); 954 free(shaderdb); 955 } 956 957 return v3d_return_qpu_insts(c, final_assembly_size); 958} 959 960void 961vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) 962{ 963 if (qinst->dst.file == QFILE_TEMP) 964 c->defs[qinst->dst.index] = NULL; 965 966 assert(&qinst->link != c->cursor.link); 967 968 list_del(&qinst->link); 969 free(qinst); 970 971 c->live_intervals_valid = false; 972} 973 974struct qreg 975vir_follow_movs(struct v3d_compile *c, struct qreg reg) 976{ 977 /* XXX 978 int pack = reg.pack; 979 980 while (reg.file == QFILE_TEMP && 981 c->defs[reg.index] && 982 (c->defs[reg.index]->op == QOP_MOV || 983 c->defs[reg.index]->op == QOP_FMOV) && 984 !c->defs[reg.index]->dst.pack && 985 !c->defs[reg.index]->src[0].pack) { 986 reg = c->defs[reg.index]->src[0]; 987 } 988 989 reg.pack = pack; 990 */ 991 return reg; 992} 993 994void 995vir_compile_destroy(struct v3d_compile *c) 996{ 997 /* Defuse the assert that we aren't removing the cursor's instruction. 998 */ 999 c->cursor.link = NULL; 1000 1001 vir_for_each_block(block, c) { 1002 while (!list_empty(&block->instructions)) { 1003 struct qinst *qinst = 1004 list_first_entry(&block->instructions, 1005 struct qinst, link); 1006 vir_remove_instruction(c, qinst); 1007 } 1008 } 1009 1010 ralloc_free(c); 1011} 1012 1013uint32_t 1014vir_get_uniform_index(struct v3d_compile *c, 1015 enum quniform_contents contents, 1016 uint32_t data) 1017{ 1018 for (int i = 0; i < c->num_uniforms; i++) { 1019 if (c->uniform_contents[i] == contents && 1020 c->uniform_data[i] == data) { 1021 return i; 1022 } 1023 } 1024 1025 uint32_t uniform = c->num_uniforms++; 1026 1027 if (uniform >= c->uniform_array_size) { 1028 c->uniform_array_size = MAX2(MAX2(16, uniform + 1), 1029 c->uniform_array_size * 2); 1030 1031 c->uniform_data = reralloc(c, c->uniform_data, 1032 uint32_t, 1033 c->uniform_array_size); 1034 c->uniform_contents = reralloc(c, c->uniform_contents, 1035 enum quniform_contents, 1036 c->uniform_array_size); 1037 } 1038 1039 c->uniform_contents[uniform] = contents; 1040 c->uniform_data[uniform] = data; 1041 1042 return uniform; 1043} 1044 1045struct qreg 1046vir_uniform(struct v3d_compile *c, 1047 enum quniform_contents contents, 1048 uint32_t data) 1049{ 1050 struct qinst *inst = vir_NOP(c); 1051 inst->qpu.sig.ldunif = true; 1052 inst->uniform = vir_get_uniform_index(c, contents, data); 1053 inst->dst = vir_get_temp(c); 1054 c->defs[inst->dst.index] = inst; 1055 return inst->dst; 1056} 1057 1058#define OPTPASS(func) \ 1059 do { \ 1060 bool stage_progress = func(c); \ 1061 if (stage_progress) { \ 1062 progress = true; \ 1063 if (print_opt_debug) { \ 1064 fprintf(stderr, \ 1065 "VIR opt pass %2d: %s progress\n", \ 1066 pass, #func); \ 1067 } \ 1068 /*XXX vir_validate(c);*/ \ 1069 } \ 1070 } while (0) 1071 1072void 1073vir_optimize(struct v3d_compile *c) 1074{ 1075 bool print_opt_debug = false; 1076 int pass = 1; 1077 1078 while (true) { 1079 bool progress = false; 1080 1081 OPTPASS(vir_opt_copy_propagate); 1082 OPTPASS(vir_opt_redundant_flags); 1083 OPTPASS(vir_opt_dead_code); 1084 OPTPASS(vir_opt_small_immediates); 1085 1086 if (!progress) 1087 break; 1088 1089 pass++; 1090 } 1091} 1092 1093const char * 1094vir_get_stage_name(struct v3d_compile *c) 1095{ 1096 if (c->vs_key && c->vs_key->is_coord) 1097 return "MESA_SHADER_COORD"; 1098 else 1099 return gl_shader_stage_name(c->s->info.stage); 1100} 1101