1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016-2017 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "broadcom/common/v3d_device_info.h" 25b8e80941Smrg#include "v3d_compiler.h" 26b8e80941Smrg 27b8e80941Smrgint 28b8e80941Smrgvir_get_nsrc(struct qinst *inst) 29b8e80941Smrg{ 30b8e80941Smrg switch (inst->qpu.type) { 31b8e80941Smrg case V3D_QPU_INSTR_TYPE_BRANCH: 32b8e80941Smrg return 0; 33b8e80941Smrg case V3D_QPU_INSTR_TYPE_ALU: 34b8e80941Smrg if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) 35b8e80941Smrg return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op); 36b8e80941Smrg else 37b8e80941Smrg return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op); 38b8e80941Smrg } 39b8e80941Smrg 40b8e80941Smrg return 0; 41b8e80941Smrg} 42b8e80941Smrg 43b8e80941Smrg/** 44b8e80941Smrg * Returns whether the instruction has any side effects that must be 45b8e80941Smrg * preserved. 46b8e80941Smrg */ 47b8e80941Smrgbool 48b8e80941Smrgvir_has_side_effects(struct v3d_compile *c, struct qinst *inst) 49b8e80941Smrg{ 50b8e80941Smrg switch (inst->qpu.type) { 51b8e80941Smrg case V3D_QPU_INSTR_TYPE_BRANCH: 52b8e80941Smrg return true; 53b8e80941Smrg case V3D_QPU_INSTR_TYPE_ALU: 54b8e80941Smrg switch (inst->qpu.alu.add.op) { 55b8e80941Smrg case V3D_QPU_A_SETREVF: 56b8e80941Smrg case V3D_QPU_A_SETMSF: 57b8e80941Smrg case V3D_QPU_A_VPMSETUP: 58b8e80941Smrg case V3D_QPU_A_STVPMV: 59b8e80941Smrg case V3D_QPU_A_STVPMD: 60b8e80941Smrg case V3D_QPU_A_STVPMP: 61b8e80941Smrg case V3D_QPU_A_VPMWT: 62b8e80941Smrg case V3D_QPU_A_TMUWT: 63b8e80941Smrg return true; 64b8e80941Smrg default: 65b8e80941Smrg break; 66b8e80941Smrg } 67b8e80941Smrg 68b8e80941Smrg switch (inst->qpu.alu.mul.op) { 69b8e80941Smrg case V3D_QPU_M_MULTOP: 70b8e80941Smrg return true; 71b8e80941Smrg default: 72b8e80941Smrg break; 73b8e80941Smrg } 74b8e80941Smrg } 75b8e80941Smrg 76b8e80941Smrg if (inst->qpu.sig.ldtmu || 77b8e80941Smrg inst->qpu.sig.ldvary || 78b8e80941Smrg inst->qpu.sig.wrtmuc || 79b8e80941Smrg inst->qpu.sig.thrsw) { 80b8e80941Smrg return true; 81b8e80941Smrg } 82b8e80941Smrg 83b8e80941Smrg return false; 84b8e80941Smrg} 85b8e80941Smrg 86b8e80941Smrgbool 87b8e80941Smrgvir_is_raw_mov(struct qinst *inst) 88b8e80941Smrg{ 89b8e80941Smrg if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 90b8e80941Smrg (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV && 91b8e80941Smrg inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) { 92b8e80941Smrg return false; 93b8e80941Smrg } 94b8e80941Smrg 95b8e80941Smrg if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 96b8e80941Smrg inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 97b8e80941Smrg return false; 98b8e80941Smrg } 99b8e80941Smrg 100b8e80941Smrg if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 101b8e80941Smrg inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE || 102b8e80941Smrg inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 103b8e80941Smrg inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) { 104b8e80941Smrg return false; 105b8e80941Smrg } 106b8e80941Smrg 107b8e80941Smrg if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 108b8e80941Smrg inst->qpu.flags.mc != V3D_QPU_COND_NONE) 109b8e80941Smrg return false; 110b8e80941Smrg 111b8e80941Smrg return true; 112b8e80941Smrg} 113b8e80941Smrg 114b8e80941Smrgbool 115b8e80941Smrgvir_is_add(struct qinst *inst) 116b8e80941Smrg{ 117b8e80941Smrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 118b8e80941Smrg inst->qpu.alu.add.op != V3D_QPU_A_NOP); 119b8e80941Smrg} 120b8e80941Smrg 121b8e80941Smrgbool 122b8e80941Smrgvir_is_mul(struct qinst *inst) 123b8e80941Smrg{ 124b8e80941Smrg return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 125b8e80941Smrg inst->qpu.alu.mul.op != V3D_QPU_M_NOP); 126b8e80941Smrg} 127b8e80941Smrg 128b8e80941Smrgbool 129b8e80941Smrgvir_is_tex(struct qinst *inst) 130b8e80941Smrg{ 131b8e80941Smrg if (inst->dst.file == QFILE_MAGIC) 132b8e80941Smrg return v3d_qpu_magic_waddr_is_tmu(inst->dst.index); 133b8e80941Smrg 134b8e80941Smrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && 135b8e80941Smrg inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) { 136b8e80941Smrg return true; 137b8e80941Smrg } 138b8e80941Smrg 139b8e80941Smrg return false; 140b8e80941Smrg} 141b8e80941Smrg 142b8e80941Smrgbool 143b8e80941Smrgvir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) 144b8e80941Smrg{ 145b8e80941Smrg for (int i = 0; i < vir_get_nsrc(inst); i++) { 146b8e80941Smrg switch (inst->src[i].file) { 147b8e80941Smrg case QFILE_VPM: 148b8e80941Smrg return true; 149b8e80941Smrg default: 150b8e80941Smrg break; 151b8e80941Smrg } 152b8e80941Smrg } 153b8e80941Smrg 154b8e80941Smrg if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || 155b8e80941Smrg inst->qpu.sig.ldtlb || 156b8e80941Smrg inst->qpu.sig.ldtlbu || 157b8e80941Smrg inst->qpu.sig.ldvpm)) { 158b8e80941Smrg return true; 159b8e80941Smrg } 160b8e80941Smrg 161b8e80941Smrg return false; 162b8e80941Smrg} 163b8e80941Smrg 164b8e80941Smrgbool 165b8e80941Smrgvir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) 166b8e80941Smrg{ 167b8e80941Smrg switch (inst->dst.file) { 168b8e80941Smrg case QFILE_MAGIC: 169b8e80941Smrg switch (inst->dst.index) { 170b8e80941Smrg case V3D_QPU_WADDR_RECIP: 171b8e80941Smrg case V3D_QPU_WADDR_RSQRT: 172b8e80941Smrg case V3D_QPU_WADDR_EXP: 173b8e80941Smrg case V3D_QPU_WADDR_LOG: 174b8e80941Smrg case V3D_QPU_WADDR_SIN: 175b8e80941Smrg return true; 176b8e80941Smrg } 177b8e80941Smrg break; 178b8e80941Smrg default: 179b8e80941Smrg break; 180b8e80941Smrg } 181b8e80941Smrg 182b8e80941Smrg if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) 183b8e80941Smrg return true; 184b8e80941Smrg 185b8e80941Smrg return false; 186b8e80941Smrg} 187b8e80941Smrg 188b8e80941Smrgvoid 189b8e80941Smrgvir_set_unpack(struct qinst *inst, int src, 190b8e80941Smrg enum v3d_qpu_input_unpack unpack) 191b8e80941Smrg{ 192b8e80941Smrg assert(src == 0 || src == 1); 193b8e80941Smrg 194b8e80941Smrg if (vir_is_add(inst)) { 195b8e80941Smrg if (src == 0) 196b8e80941Smrg inst->qpu.alu.add.a_unpack = unpack; 197b8e80941Smrg else 198b8e80941Smrg inst->qpu.alu.add.b_unpack = unpack; 199b8e80941Smrg } else { 200b8e80941Smrg assert(vir_is_mul(inst)); 201b8e80941Smrg if (src == 0) 202b8e80941Smrg inst->qpu.alu.mul.a_unpack = unpack; 203b8e80941Smrg else 204b8e80941Smrg inst->qpu.alu.mul.b_unpack = unpack; 205b8e80941Smrg } 206b8e80941Smrg} 207b8e80941Smrg 208b8e80941Smrgvoid 209b8e80941Smrgvir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond) 210b8e80941Smrg{ 211b8e80941Smrg if (vir_is_add(inst)) { 212b8e80941Smrg inst->qpu.flags.ac = cond; 213b8e80941Smrg } else { 214b8e80941Smrg assert(vir_is_mul(inst)); 215b8e80941Smrg inst->qpu.flags.mc = cond; 216b8e80941Smrg } 217b8e80941Smrg} 218b8e80941Smrg 219b8e80941Smrgvoid 220b8e80941Smrgvir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf) 221b8e80941Smrg{ 222b8e80941Smrg if (vir_is_add(inst)) { 223b8e80941Smrg inst->qpu.flags.apf = pf; 224b8e80941Smrg } else { 225b8e80941Smrg assert(vir_is_mul(inst)); 226b8e80941Smrg inst->qpu.flags.mpf = pf; 227b8e80941Smrg } 228b8e80941Smrg} 229b8e80941Smrg 230b8e80941Smrgvoid 231b8e80941Smrgvir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf) 232b8e80941Smrg{ 233b8e80941Smrg if (vir_is_add(inst)) { 234b8e80941Smrg inst->qpu.flags.auf = uf; 235b8e80941Smrg } else { 236b8e80941Smrg assert(vir_is_mul(inst)); 237b8e80941Smrg inst->qpu.flags.muf = uf; 238b8e80941Smrg } 239b8e80941Smrg} 240b8e80941Smrg 241b8e80941Smrg#if 0 242b8e80941Smrguint8_t 243b8e80941Smrgvir_channels_written(struct qinst *inst) 244b8e80941Smrg{ 245b8e80941Smrg if (vir_is_mul(inst)) { 246b8e80941Smrg switch (inst->dst.pack) { 247b8e80941Smrg case QPU_PACK_MUL_NOP: 248b8e80941Smrg case QPU_PACK_MUL_8888: 249b8e80941Smrg return 0xf; 250b8e80941Smrg case QPU_PACK_MUL_8A: 251b8e80941Smrg return 0x1; 252b8e80941Smrg case QPU_PACK_MUL_8B: 253b8e80941Smrg return 0x2; 254b8e80941Smrg case QPU_PACK_MUL_8C: 255b8e80941Smrg return 0x4; 256b8e80941Smrg case QPU_PACK_MUL_8D: 257b8e80941Smrg return 0x8; 258b8e80941Smrg } 259b8e80941Smrg } else { 260b8e80941Smrg switch (inst->dst.pack) { 261b8e80941Smrg case QPU_PACK_A_NOP: 262b8e80941Smrg case QPU_PACK_A_8888: 263b8e80941Smrg case QPU_PACK_A_8888_SAT: 264b8e80941Smrg case QPU_PACK_A_32_SAT: 265b8e80941Smrg return 0xf; 266b8e80941Smrg case QPU_PACK_A_8A: 267b8e80941Smrg case QPU_PACK_A_8A_SAT: 268b8e80941Smrg return 0x1; 269b8e80941Smrg case QPU_PACK_A_8B: 270b8e80941Smrg case QPU_PACK_A_8B_SAT: 271b8e80941Smrg return 0x2; 272b8e80941Smrg case QPU_PACK_A_8C: 273b8e80941Smrg case QPU_PACK_A_8C_SAT: 274b8e80941Smrg return 0x4; 275b8e80941Smrg case QPU_PACK_A_8D: 276b8e80941Smrg case QPU_PACK_A_8D_SAT: 277b8e80941Smrg return 0x8; 278b8e80941Smrg case QPU_PACK_A_16A: 279b8e80941Smrg case QPU_PACK_A_16A_SAT: 280b8e80941Smrg return 0x3; 281b8e80941Smrg case QPU_PACK_A_16B: 282b8e80941Smrg case QPU_PACK_A_16B_SAT: 283b8e80941Smrg return 0xc; 284b8e80941Smrg } 285b8e80941Smrg } 286b8e80941Smrg unreachable("Bad pack field"); 287b8e80941Smrg} 288b8e80941Smrg#endif 289b8e80941Smrg 290b8e80941Smrgstruct qreg 291b8e80941Smrgvir_get_temp(struct v3d_compile *c) 292b8e80941Smrg{ 293b8e80941Smrg struct qreg reg; 294b8e80941Smrg 295b8e80941Smrg reg.file = QFILE_TEMP; 296b8e80941Smrg reg.index = c->num_temps++; 297b8e80941Smrg 298b8e80941Smrg if (c->num_temps > c->defs_array_size) { 299b8e80941Smrg uint32_t old_size = c->defs_array_size; 300b8e80941Smrg c->defs_array_size = MAX2(old_size * 2, 16); 301b8e80941Smrg 302b8e80941Smrg c->defs = reralloc(c, c->defs, struct qinst *, 303b8e80941Smrg c->defs_array_size); 304b8e80941Smrg memset(&c->defs[old_size], 0, 305b8e80941Smrg sizeof(c->defs[0]) * (c->defs_array_size - old_size)); 306b8e80941Smrg 307b8e80941Smrg c->spillable = reralloc(c, c->spillable, 308b8e80941Smrg BITSET_WORD, 309b8e80941Smrg BITSET_WORDS(c->defs_array_size)); 310b8e80941Smrg for (int i = old_size; i < c->defs_array_size; i++) 311b8e80941Smrg BITSET_SET(c->spillable, i); 312b8e80941Smrg } 313b8e80941Smrg 314b8e80941Smrg return reg; 315b8e80941Smrg} 316b8e80941Smrg 317b8e80941Smrgstruct qinst * 318b8e80941Smrgvir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1) 319b8e80941Smrg{ 320b8e80941Smrg struct qinst *inst = calloc(1, sizeof(*inst)); 321b8e80941Smrg 322b8e80941Smrg inst->qpu = v3d_qpu_nop(); 323b8e80941Smrg inst->qpu.alu.add.op = op; 324b8e80941Smrg 325b8e80941Smrg inst->dst = dst; 326b8e80941Smrg inst->src[0] = src0; 327b8e80941Smrg inst->src[1] = src1; 328b8e80941Smrg inst->uniform = ~0; 329b8e80941Smrg 330b8e80941Smrg return inst; 331b8e80941Smrg} 332b8e80941Smrg 333b8e80941Smrgstruct qinst * 334b8e80941Smrgvir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1) 335b8e80941Smrg{ 336b8e80941Smrg struct qinst *inst = calloc(1, sizeof(*inst)); 337b8e80941Smrg 338b8e80941Smrg inst->qpu = v3d_qpu_nop(); 339b8e80941Smrg inst->qpu.alu.mul.op = op; 340b8e80941Smrg 341b8e80941Smrg inst->dst = dst; 342b8e80941Smrg inst->src[0] = src0; 343b8e80941Smrg inst->src[1] = src1; 344b8e80941Smrg inst->uniform = ~0; 345b8e80941Smrg 346b8e80941Smrg return inst; 347b8e80941Smrg} 348b8e80941Smrg 349b8e80941Smrgstruct qinst * 350b8e80941Smrgvir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) 351b8e80941Smrg{ 352b8e80941Smrg struct qinst *inst = calloc(1, sizeof(*inst)); 353b8e80941Smrg 354b8e80941Smrg inst->qpu = v3d_qpu_nop(); 355b8e80941Smrg inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH; 356b8e80941Smrg inst->qpu.branch.cond = cond; 357b8e80941Smrg inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE; 358b8e80941Smrg inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL; 359b8e80941Smrg inst->qpu.branch.ub = true; 360b8e80941Smrg inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL; 361b8e80941Smrg 362b8e80941Smrg inst->dst = vir_nop_reg(); 363b8e80941Smrg inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0); 364b8e80941Smrg 365b8e80941Smrg return inst; 366b8e80941Smrg} 367b8e80941Smrg 368b8e80941Smrgstatic void 369b8e80941Smrgvir_emit(struct v3d_compile *c, struct qinst *inst) 370b8e80941Smrg{ 371b8e80941Smrg switch (c->cursor.mode) { 372b8e80941Smrg case vir_cursor_add: 373b8e80941Smrg list_add(&inst->link, c->cursor.link); 374b8e80941Smrg break; 375b8e80941Smrg case vir_cursor_addtail: 376b8e80941Smrg list_addtail(&inst->link, c->cursor.link); 377b8e80941Smrg break; 378b8e80941Smrg } 379b8e80941Smrg 380b8e80941Smrg c->cursor = vir_after_inst(inst); 381b8e80941Smrg c->live_intervals_valid = false; 382b8e80941Smrg} 383b8e80941Smrg 384b8e80941Smrg/* Updates inst to write to a new temporary, emits it, and notes the def. */ 385b8e80941Smrgstruct qreg 386b8e80941Smrgvir_emit_def(struct v3d_compile *c, struct qinst *inst) 387b8e80941Smrg{ 388b8e80941Smrg assert(inst->dst.file == QFILE_NULL); 389b8e80941Smrg 390b8e80941Smrg /* If we're emitting an instruction that's a def, it had better be 391b8e80941Smrg * writing a register. 392b8e80941Smrg */ 393b8e80941Smrg if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 394b8e80941Smrg assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || 395b8e80941Smrg v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); 396b8e80941Smrg assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || 397b8e80941Smrg v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); 398b8e80941Smrg } 399b8e80941Smrg 400b8e80941Smrg inst->dst = vir_get_temp(c); 401b8e80941Smrg 402b8e80941Smrg if (inst->dst.file == QFILE_TEMP) 403b8e80941Smrg c->defs[inst->dst.index] = inst; 404b8e80941Smrg 405b8e80941Smrg vir_emit(c, inst); 406b8e80941Smrg 407b8e80941Smrg return inst->dst; 408b8e80941Smrg} 409b8e80941Smrg 410b8e80941Smrgstruct qinst * 411b8e80941Smrgvir_emit_nondef(struct v3d_compile *c, struct qinst *inst) 412b8e80941Smrg{ 413b8e80941Smrg if (inst->dst.file == QFILE_TEMP) 414b8e80941Smrg c->defs[inst->dst.index] = NULL; 415b8e80941Smrg 416b8e80941Smrg vir_emit(c, inst); 417b8e80941Smrg 418b8e80941Smrg return inst; 419b8e80941Smrg} 420b8e80941Smrg 421b8e80941Smrgstruct qblock * 422b8e80941Smrgvir_new_block(struct v3d_compile *c) 423b8e80941Smrg{ 424b8e80941Smrg struct qblock *block = rzalloc(c, struct qblock); 425b8e80941Smrg 426b8e80941Smrg list_inithead(&block->instructions); 427b8e80941Smrg 428b8e80941Smrg block->predecessors = _mesa_set_create(block, 429b8e80941Smrg _mesa_hash_pointer, 430b8e80941Smrg _mesa_key_pointer_equal); 431b8e80941Smrg 432b8e80941Smrg block->index = c->next_block_index++; 433b8e80941Smrg 434b8e80941Smrg return block; 435b8e80941Smrg} 436b8e80941Smrg 437b8e80941Smrgvoid 438b8e80941Smrgvir_set_emit_block(struct v3d_compile *c, struct qblock *block) 439b8e80941Smrg{ 440b8e80941Smrg c->cur_block = block; 441b8e80941Smrg c->cursor = vir_after_block(block); 442b8e80941Smrg list_addtail(&block->link, &c->blocks); 443b8e80941Smrg} 444b8e80941Smrg 445b8e80941Smrgstruct qblock * 446b8e80941Smrgvir_entry_block(struct v3d_compile *c) 447b8e80941Smrg{ 448b8e80941Smrg return list_first_entry(&c->blocks, struct qblock, link); 449b8e80941Smrg} 450b8e80941Smrg 451b8e80941Smrgstruct qblock * 452b8e80941Smrgvir_exit_block(struct v3d_compile *c) 453b8e80941Smrg{ 454b8e80941Smrg return list_last_entry(&c->blocks, struct qblock, link); 455b8e80941Smrg} 456b8e80941Smrg 457b8e80941Smrgvoid 458b8e80941Smrgvir_link_blocks(struct qblock *predecessor, struct qblock *successor) 459b8e80941Smrg{ 460b8e80941Smrg _mesa_set_add(successor->predecessors, predecessor); 461b8e80941Smrg if (predecessor->successors[0]) { 462b8e80941Smrg assert(!predecessor->successors[1]); 463b8e80941Smrg predecessor->successors[1] = successor; 464b8e80941Smrg } else { 465b8e80941Smrg predecessor->successors[0] = successor; 466b8e80941Smrg } 467b8e80941Smrg} 468b8e80941Smrg 469b8e80941Smrgconst struct v3d_compiler * 470b8e80941Smrgv3d_compiler_init(const struct v3d_device_info *devinfo) 471b8e80941Smrg{ 472b8e80941Smrg struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler); 473b8e80941Smrg if (!compiler) 474b8e80941Smrg return NULL; 475b8e80941Smrg 476b8e80941Smrg compiler->devinfo = devinfo; 477b8e80941Smrg 478b8e80941Smrg if (!vir_init_reg_sets(compiler)) { 479b8e80941Smrg ralloc_free(compiler); 480b8e80941Smrg return NULL; 481b8e80941Smrg } 482b8e80941Smrg 483b8e80941Smrg return compiler; 484b8e80941Smrg} 485b8e80941Smrg 486b8e80941Smrgvoid 487b8e80941Smrgv3d_compiler_free(const struct v3d_compiler *compiler) 488b8e80941Smrg{ 489b8e80941Smrg ralloc_free((void *)compiler); 490b8e80941Smrg} 491b8e80941Smrg 492b8e80941Smrgstatic struct v3d_compile * 493b8e80941Smrgvir_compile_init(const struct v3d_compiler *compiler, 494b8e80941Smrg struct v3d_key *key, 495b8e80941Smrg nir_shader *s, 496b8e80941Smrg void (*debug_output)(const char *msg, 497b8e80941Smrg void *debug_output_data), 498b8e80941Smrg void *debug_output_data, 499b8e80941Smrg int program_id, int variant_id) 500b8e80941Smrg{ 501b8e80941Smrg struct v3d_compile *c = rzalloc(NULL, struct v3d_compile); 502b8e80941Smrg 503b8e80941Smrg c->compiler = compiler; 504b8e80941Smrg c->devinfo = compiler->devinfo; 505b8e80941Smrg c->key = key; 506b8e80941Smrg c->program_id = program_id; 507b8e80941Smrg c->variant_id = variant_id; 508b8e80941Smrg c->threads = 4; 509b8e80941Smrg c->debug_output = debug_output; 510b8e80941Smrg c->debug_output_data = debug_output_data; 511b8e80941Smrg 512b8e80941Smrg s = nir_shader_clone(c, s); 513b8e80941Smrg c->s = s; 514b8e80941Smrg 515b8e80941Smrg list_inithead(&c->blocks); 516b8e80941Smrg vir_set_emit_block(c, vir_new_block(c)); 517b8e80941Smrg 518b8e80941Smrg c->output_position_index = -1; 519b8e80941Smrg c->output_sample_mask_index = -1; 520b8e80941Smrg 521b8e80941Smrg c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, 522b8e80941Smrg _mesa_key_pointer_equal); 523b8e80941Smrg 524b8e80941Smrg return c; 525b8e80941Smrg} 526b8e80941Smrg 527b8e80941Smrgstatic int 528b8e80941Smrgtype_size_vec4(const struct glsl_type *type, bool bindless) 529b8e80941Smrg{ 530b8e80941Smrg return glsl_count_attribute_slots(type, false); 531b8e80941Smrg} 532b8e80941Smrg 533b8e80941Smrgstatic void 534b8e80941Smrgv3d_lower_nir(struct v3d_compile *c) 535b8e80941Smrg{ 536b8e80941Smrg struct nir_lower_tex_options tex_options = { 537b8e80941Smrg .lower_txd = true, 538b8e80941Smrg .lower_tg4_broadcom_swizzle = true, 539b8e80941Smrg 540b8e80941Smrg .lower_rect = false, /* XXX: Use this on V3D 3.x */ 541b8e80941Smrg .lower_txp = ~0, 542b8e80941Smrg /* Apply swizzles to all samplers. */ 543b8e80941Smrg .swizzle_result = ~0, 544b8e80941Smrg }; 545b8e80941Smrg 546b8e80941Smrg /* Lower the format swizzle and (for 32-bit returns) 547b8e80941Smrg * ARB_texture_swizzle-style swizzle. 548b8e80941Smrg */ 549b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) { 550b8e80941Smrg for (int j = 0; j < 4; j++) 551b8e80941Smrg tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j]; 552b8e80941Smrg 553b8e80941Smrg if (c->key->tex[i].clamp_s) 554b8e80941Smrg tex_options.saturate_s |= 1 << i; 555b8e80941Smrg if (c->key->tex[i].clamp_t) 556b8e80941Smrg tex_options.saturate_t |= 1 << i; 557b8e80941Smrg if (c->key->tex[i].clamp_r) 558b8e80941Smrg tex_options.saturate_r |= 1 << i; 559b8e80941Smrg if (c->key->tex[i].return_size == 16) { 560b8e80941Smrg tex_options.lower_tex_packing[i] = 561b8e80941Smrg nir_lower_tex_packing_16; 562b8e80941Smrg } 563b8e80941Smrg } 564b8e80941Smrg 565b8e80941Smrg /* CS textures may not have return_size reflecting the shadow state. */ 566b8e80941Smrg nir_foreach_variable(var, &c->s->uniforms) { 567b8e80941Smrg const struct glsl_type *type = glsl_without_array(var->type); 568b8e80941Smrg unsigned array_len = MAX2(glsl_get_length(var->type), 1); 569b8e80941Smrg 570b8e80941Smrg if (!glsl_type_is_sampler(type) || 571b8e80941Smrg !glsl_sampler_type_is_shadow(type)) 572b8e80941Smrg continue; 573b8e80941Smrg 574b8e80941Smrg for (int i = 0; i < array_len; i++) { 575b8e80941Smrg tex_options.lower_tex_packing[var->data.binding + i] = 576b8e80941Smrg nir_lower_tex_packing_16; 577b8e80941Smrg } 578b8e80941Smrg } 579b8e80941Smrg 580b8e80941Smrg NIR_PASS_V(c->s, nir_lower_tex, &tex_options); 581b8e80941Smrg NIR_PASS_V(c->s, nir_lower_system_values); 582b8e80941Smrg 583b8e80941Smrg NIR_PASS_V(c->s, nir_lower_vars_to_scratch, 584b8e80941Smrg nir_var_function_temp, 585b8e80941Smrg 0, 586b8e80941Smrg glsl_get_natural_size_align_bytes); 587b8e80941Smrg NIR_PASS_V(c->s, v3d_nir_lower_scratch); 588b8e80941Smrg} 589b8e80941Smrg 590b8e80941Smrgstatic void 591b8e80941Smrgv3d_set_prog_data_uniforms(struct v3d_compile *c, 592b8e80941Smrg struct v3d_prog_data *prog_data) 593b8e80941Smrg{ 594b8e80941Smrg int count = c->num_uniforms; 595b8e80941Smrg struct v3d_uniform_list *ulist = &prog_data->uniforms; 596b8e80941Smrg 597b8e80941Smrg ulist->count = count; 598b8e80941Smrg ulist->data = ralloc_array(prog_data, uint32_t, count); 599b8e80941Smrg memcpy(ulist->data, c->uniform_data, 600b8e80941Smrg count * sizeof(*ulist->data)); 601b8e80941Smrg ulist->contents = ralloc_array(prog_data, enum quniform_contents, count); 602b8e80941Smrg memcpy(ulist->contents, c->uniform_contents, 603b8e80941Smrg count * sizeof(*ulist->contents)); 604b8e80941Smrg} 605b8e80941Smrg 606b8e80941Smrgstatic void 607b8e80941Smrgv3d_vs_set_prog_data(struct v3d_compile *c, 608b8e80941Smrg struct v3d_vs_prog_data *prog_data) 609b8e80941Smrg{ 610b8e80941Smrg /* The vertex data gets format converted by the VPM so that 611b8e80941Smrg * each attribute channel takes up a VPM column. Precompute 612b8e80941Smrg * the sizes for the shader record. 613b8e80941Smrg */ 614b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) { 615b8e80941Smrg prog_data->vattr_sizes[i] = c->vattr_sizes[i]; 616b8e80941Smrg prog_data->vpm_input_size += c->vattr_sizes[i]; 617b8e80941Smrg } 618b8e80941Smrg 619b8e80941Smrg prog_data->uses_vid = (c->s->info.system_values_read & 620b8e80941Smrg (1ull << SYSTEM_VALUE_VERTEX_ID)); 621b8e80941Smrg prog_data->uses_iid = (c->s->info.system_values_read & 622b8e80941Smrg (1ull << SYSTEM_VALUE_INSTANCE_ID)); 623b8e80941Smrg 624b8e80941Smrg if (prog_data->uses_vid) 625b8e80941Smrg prog_data->vpm_input_size++; 626b8e80941Smrg if (prog_data->uses_iid) 627b8e80941Smrg prog_data->vpm_input_size++; 628b8e80941Smrg 629b8e80941Smrg /* Input/output segment size are in sectors (8 rows of 32 bits per 630b8e80941Smrg * channel). 631b8e80941Smrg */ 632b8e80941Smrg prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; 633b8e80941Smrg prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8; 634b8e80941Smrg 635b8e80941Smrg /* Set us up for shared input/output segments. This is apparently 636b8e80941Smrg * necessary for our VCM setup to avoid varying corruption. 637b8e80941Smrg */ 638b8e80941Smrg prog_data->separate_segments = false; 639b8e80941Smrg prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size, 640b8e80941Smrg prog_data->vpm_input_size); 641b8e80941Smrg prog_data->vpm_input_size = 0; 642b8e80941Smrg 643b8e80941Smrg /* Compute VCM cache size. We set up our program to take up less than 644b8e80941Smrg * half of the VPM, so that any set of bin and render programs won't 645b8e80941Smrg * run out of space. We need space for at least one input segment, 646b8e80941Smrg * and then allocate the rest to output segments (one for the current 647b8e80941Smrg * program, the rest to VCM). The valid range of the VCM cache size 648b8e80941Smrg * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 649b8e80941Smrg * batches. 650b8e80941Smrg */ 651b8e80941Smrg assert(c->devinfo->vpm_size); 652b8e80941Smrg int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8; 653b8e80941Smrg int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; 654b8e80941Smrg int half_vpm = vpm_size_in_sectors / 2; 655b8e80941Smrg int vpm_output_sectors = half_vpm - prog_data->vpm_input_size; 656b8e80941Smrg int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size; 657b8e80941Smrg assert(vpm_output_batches >= 2); 658b8e80941Smrg prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); 659b8e80941Smrg} 660b8e80941Smrg 661b8e80941Smrgstatic void 662b8e80941Smrgv3d_set_fs_prog_data_inputs(struct v3d_compile *c, 663b8e80941Smrg struct v3d_fs_prog_data *prog_data) 664b8e80941Smrg{ 665b8e80941Smrg prog_data->num_inputs = c->num_inputs; 666b8e80941Smrg memcpy(prog_data->input_slots, c->input_slots, 667b8e80941Smrg c->num_inputs * sizeof(*c->input_slots)); 668b8e80941Smrg 669b8e80941Smrg STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) > 670b8e80941Smrg (V3D_MAX_FS_INPUTS - 1) / 24); 671b8e80941Smrg for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) { 672b8e80941Smrg if (BITSET_TEST(c->flat_shade_flags, i)) 673b8e80941Smrg prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24); 674b8e80941Smrg 675b8e80941Smrg if (BITSET_TEST(c->noperspective_flags, i)) 676b8e80941Smrg prog_data->noperspective_flags[i / 24] |= 1 << (i % 24); 677b8e80941Smrg 678b8e80941Smrg if (BITSET_TEST(c->centroid_flags, i)) 679b8e80941Smrg prog_data->centroid_flags[i / 24] |= 1 << (i % 24); 680b8e80941Smrg } 681b8e80941Smrg} 682b8e80941Smrg 683b8e80941Smrgstatic void 684b8e80941Smrgv3d_fs_set_prog_data(struct v3d_compile *c, 685b8e80941Smrg struct v3d_fs_prog_data *prog_data) 686b8e80941Smrg{ 687b8e80941Smrg v3d_set_fs_prog_data_inputs(c, prog_data); 688b8e80941Smrg prog_data->writes_z = c->writes_z; 689b8e80941Smrg prog_data->disable_ez = !c->s->info.fs.early_fragment_tests; 690b8e80941Smrg prog_data->uses_center_w = c->uses_center_w; 691b8e80941Smrg} 692b8e80941Smrg 693b8e80941Smrgstatic void 694b8e80941Smrgv3d_cs_set_prog_data(struct v3d_compile *c, 695b8e80941Smrg struct v3d_compute_prog_data *prog_data) 696b8e80941Smrg{ 697b8e80941Smrg prog_data->shared_size = c->s->info.cs.shared_size; 698b8e80941Smrg} 699b8e80941Smrg 700b8e80941Smrgstatic void 701b8e80941Smrgv3d_set_prog_data(struct v3d_compile *c, 702b8e80941Smrg struct v3d_prog_data *prog_data) 703b8e80941Smrg{ 704b8e80941Smrg prog_data->threads = c->threads; 705b8e80941Smrg prog_data->single_seg = !c->last_thrsw; 706b8e80941Smrg prog_data->spill_size = c->spill_size; 707b8e80941Smrg 708b8e80941Smrg v3d_set_prog_data_uniforms(c, prog_data); 709b8e80941Smrg 710b8e80941Smrg if (c->s->info.stage == MESA_SHADER_COMPUTE) { 711b8e80941Smrg v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data); 712b8e80941Smrg } else if (c->s->info.stage == MESA_SHADER_VERTEX) { 713b8e80941Smrg v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data); 714b8e80941Smrg } else { 715b8e80941Smrg assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 716b8e80941Smrg v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data); 717b8e80941Smrg } 718b8e80941Smrg} 719b8e80941Smrg 720b8e80941Smrgstatic uint64_t * 721b8e80941Smrgv3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size) 722b8e80941Smrg{ 723b8e80941Smrg *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t); 724b8e80941Smrg 725b8e80941Smrg uint64_t *qpu_insts = malloc(*final_assembly_size); 726b8e80941Smrg if (!qpu_insts) 727b8e80941Smrg return NULL; 728b8e80941Smrg 729b8e80941Smrg memcpy(qpu_insts, c->qpu_insts, *final_assembly_size); 730b8e80941Smrg 731b8e80941Smrg vir_compile_destroy(c); 732b8e80941Smrg 733b8e80941Smrg return qpu_insts; 734b8e80941Smrg} 735b8e80941Smrg 736b8e80941Smrgstatic void 737b8e80941Smrgv3d_nir_lower_vs_early(struct v3d_compile *c) 738b8e80941Smrg{ 739b8e80941Smrg /* Split our I/O vars and dead code eliminate the unused 740b8e80941Smrg * components. 741b8e80941Smrg */ 742b8e80941Smrg NIR_PASS_V(c->s, nir_lower_io_to_scalar_early, 743b8e80941Smrg nir_var_shader_in | nir_var_shader_out); 744b8e80941Smrg uint64_t used_outputs[4] = {0}; 745b8e80941Smrg for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 746b8e80941Smrg int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]); 747b8e80941Smrg int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]); 748b8e80941Smrg used_outputs[comp] |= 1ull << slot; 749b8e80941Smrg } 750b8e80941Smrg NIR_PASS_V(c->s, nir_remove_unused_io_vars, 751b8e80941Smrg &c->s->outputs, used_outputs, NULL); /* demotes to globals */ 752b8e80941Smrg NIR_PASS_V(c->s, nir_lower_global_vars_to_local); 753b8e80941Smrg v3d_optimize_nir(c->s); 754b8e80941Smrg NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in); 755b8e80941Smrg NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 756b8e80941Smrg type_size_vec4, 757b8e80941Smrg (nir_lower_io_options)0); 758b8e80941Smrg} 759b8e80941Smrg 760b8e80941Smrgstatic void 761b8e80941Smrgv3d_fixup_fs_output_types(struct v3d_compile *c) 762b8e80941Smrg{ 763b8e80941Smrg nir_foreach_variable(var, &c->s->outputs) { 764b8e80941Smrg uint32_t mask = 0; 765b8e80941Smrg 766b8e80941Smrg switch (var->data.location) { 767b8e80941Smrg case FRAG_RESULT_COLOR: 768b8e80941Smrg mask = ~0; 769b8e80941Smrg break; 770b8e80941Smrg case FRAG_RESULT_DATA0: 771b8e80941Smrg case FRAG_RESULT_DATA1: 772b8e80941Smrg case FRAG_RESULT_DATA2: 773b8e80941Smrg case FRAG_RESULT_DATA3: 774b8e80941Smrg mask = 1 << (var->data.location - FRAG_RESULT_DATA0); 775b8e80941Smrg break; 776b8e80941Smrg } 777b8e80941Smrg 778b8e80941Smrg if (c->fs_key->int_color_rb & mask) { 779b8e80941Smrg var->type = 780b8e80941Smrg glsl_vector_type(GLSL_TYPE_INT, 781b8e80941Smrg glsl_get_components(var->type)); 782b8e80941Smrg } else if (c->fs_key->uint_color_rb & mask) { 783b8e80941Smrg var->type = 784b8e80941Smrg glsl_vector_type(GLSL_TYPE_UINT, 785b8e80941Smrg glsl_get_components(var->type)); 786b8e80941Smrg } 787b8e80941Smrg } 788b8e80941Smrg} 789b8e80941Smrg 790b8e80941Smrgstatic void 791b8e80941Smrgv3d_nir_lower_fs_early(struct v3d_compile *c) 792b8e80941Smrg{ 793b8e80941Smrg if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb) 794b8e80941Smrg v3d_fixup_fs_output_types(c); 795b8e80941Smrg 796b8e80941Smrg /* If the shader has no non-TLB side effects, we can promote it to 797b8e80941Smrg * enabling early_fragment_tests even if the user didn't. 798b8e80941Smrg */ 799b8e80941Smrg if (!(c->s->info.num_images || 800b8e80941Smrg c->s->info.num_ssbos || 801b8e80941Smrg c->s->info.num_abos)) { 802b8e80941Smrg c->s->info.fs.early_fragment_tests = true; 803b8e80941Smrg } 804b8e80941Smrg} 805b8e80941Smrg 806b8e80941Smrgstatic void 807b8e80941Smrgv3d_nir_lower_vs_late(struct v3d_compile *c) 808b8e80941Smrg{ 809b8e80941Smrg if (c->vs_key->clamp_color) 810b8e80941Smrg NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 811b8e80941Smrg 812b8e80941Smrg if (c->key->ucp_enables) { 813b8e80941Smrg NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables, 814b8e80941Smrg false); 815b8e80941Smrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, 816b8e80941Smrg nir_var_shader_out); 817b8e80941Smrg } 818b8e80941Smrg 819b8e80941Smrg /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */ 820b8e80941Smrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); 821b8e80941Smrg} 822b8e80941Smrg 823b8e80941Smrgstatic void 824b8e80941Smrgv3d_nir_lower_fs_late(struct v3d_compile *c) 825b8e80941Smrg{ 826b8e80941Smrg if (c->fs_key->light_twoside) 827b8e80941Smrg NIR_PASS_V(c->s, nir_lower_two_sided_color); 828b8e80941Smrg 829b8e80941Smrg if (c->fs_key->clamp_color) 830b8e80941Smrg NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); 831b8e80941Smrg 832b8e80941Smrg if (c->fs_key->alpha_test) { 833b8e80941Smrg NIR_PASS_V(c->s, nir_lower_alpha_test, 834b8e80941Smrg c->fs_key->alpha_test_func, 835b8e80941Smrg false); 836b8e80941Smrg } 837b8e80941Smrg 838b8e80941Smrg if (c->key->ucp_enables) 839b8e80941Smrg NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); 840b8e80941Smrg 841b8e80941Smrg /* Note: FS input scalarizing must happen after 842b8e80941Smrg * nir_lower_two_sided_color, which only handles a vec4 at a time. 843b8e80941Smrg */ 844b8e80941Smrg NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); 845b8e80941Smrg} 846b8e80941Smrg 847b8e80941Smrgstatic uint32_t 848b8e80941Smrgvir_get_max_temps(struct v3d_compile *c) 849b8e80941Smrg{ 850b8e80941Smrg int max_ip = 0; 851b8e80941Smrg vir_for_each_inst_inorder(inst, c) 852b8e80941Smrg max_ip++; 853b8e80941Smrg 854b8e80941Smrg uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip); 855b8e80941Smrg 856b8e80941Smrg for (int t = 0; t < c->num_temps; t++) { 857b8e80941Smrg for (int i = c->temp_start[t]; (i < c->temp_end[t] && 858b8e80941Smrg i < max_ip); i++) { 859b8e80941Smrg if (i > max_ip) 860b8e80941Smrg break; 861b8e80941Smrg pressure[i]++; 862b8e80941Smrg } 863b8e80941Smrg } 864b8e80941Smrg 865b8e80941Smrg uint32_t max_temps = 0; 866b8e80941Smrg for (int i = 0; i < max_ip; i++) 867b8e80941Smrg max_temps = MAX2(max_temps, pressure[i]); 868b8e80941Smrg 869b8e80941Smrg ralloc_free(pressure); 870b8e80941Smrg 871b8e80941Smrg return max_temps; 872b8e80941Smrg} 873b8e80941Smrg 874b8e80941Smrguint64_t *v3d_compile(const struct v3d_compiler *compiler, 875b8e80941Smrg struct v3d_key *key, 876b8e80941Smrg struct v3d_prog_data **out_prog_data, 877b8e80941Smrg nir_shader *s, 878b8e80941Smrg void (*debug_output)(const char *msg, 879b8e80941Smrg void *debug_output_data), 880b8e80941Smrg void *debug_output_data, 881b8e80941Smrg int program_id, int variant_id, 882b8e80941Smrg uint32_t *final_assembly_size) 883b8e80941Smrg{ 884b8e80941Smrg struct v3d_prog_data *prog_data; 885b8e80941Smrg struct v3d_compile *c = vir_compile_init(compiler, key, s, 886b8e80941Smrg debug_output, debug_output_data, 887b8e80941Smrg program_id, variant_id); 888b8e80941Smrg 889b8e80941Smrg switch (c->s->info.stage) { 890b8e80941Smrg case MESA_SHADER_VERTEX: 891b8e80941Smrg c->vs_key = (struct v3d_vs_key *)key; 892b8e80941Smrg prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data)); 893b8e80941Smrg break; 894b8e80941Smrg case MESA_SHADER_FRAGMENT: 895b8e80941Smrg c->fs_key = (struct v3d_fs_key *)key; 896b8e80941Smrg prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data)); 897b8e80941Smrg break; 898b8e80941Smrg case MESA_SHADER_COMPUTE: 899b8e80941Smrg prog_data = rzalloc_size(NULL, 900b8e80941Smrg sizeof(struct v3d_compute_prog_data)); 901b8e80941Smrg break; 902b8e80941Smrg default: 903b8e80941Smrg unreachable("unsupported shader stage"); 904b8e80941Smrg } 905b8e80941Smrg 906b8e80941Smrg if (c->s->info.stage == MESA_SHADER_VERTEX) { 907b8e80941Smrg v3d_nir_lower_vs_early(c); 908b8e80941Smrg } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 909b8e80941Smrg assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 910b8e80941Smrg v3d_nir_lower_fs_early(c); 911b8e80941Smrg } 912b8e80941Smrg 913b8e80941Smrg v3d_lower_nir(c); 914b8e80941Smrg 915b8e80941Smrg if (c->s->info.stage == MESA_SHADER_VERTEX) { 916b8e80941Smrg v3d_nir_lower_vs_late(c); 917b8e80941Smrg } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { 918b8e80941Smrg assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 919b8e80941Smrg v3d_nir_lower_fs_late(c); 920b8e80941Smrg } 921b8e80941Smrg 922b8e80941Smrg NIR_PASS_V(c->s, v3d_nir_lower_io, c); 923b8e80941Smrg NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c); 924b8e80941Smrg NIR_PASS_V(c->s, v3d_nir_lower_image_load_store); 925b8e80941Smrg NIR_PASS_V(c->s, nir_lower_idiv); 926b8e80941Smrg 927b8e80941Smrg v3d_optimize_nir(c->s); 928b8e80941Smrg NIR_PASS_V(c->s, nir_lower_bool_to_int32); 929b8e80941Smrg NIR_PASS_V(c->s, nir_convert_from_ssa, true); 930b8e80941Smrg 931b8e80941Smrg v3d_nir_to_vir(c); 932b8e80941Smrg 933b8e80941Smrg v3d_set_prog_data(c, prog_data); 934b8e80941Smrg 935b8e80941Smrg *out_prog_data = prog_data; 936b8e80941Smrg 937b8e80941Smrg char *shaderdb; 938b8e80941Smrg int ret = asprintf(&shaderdb, 939b8e80941Smrg "%s shader: %d inst, %d threads, %d loops, " 940b8e80941Smrg "%d uniforms, %d max-temps, %d:%d spills:fills", 941b8e80941Smrg vir_get_stage_name(c), 942b8e80941Smrg c->qpu_inst_count, 943b8e80941Smrg c->threads, 944b8e80941Smrg c->loops, 945b8e80941Smrg c->num_uniforms, 946b8e80941Smrg vir_get_max_temps(c), 947b8e80941Smrg c->spills, 948b8e80941Smrg c->fills); 949b8e80941Smrg if (ret >= 0) { 950b8e80941Smrg if (V3D_DEBUG & V3D_DEBUG_SHADERDB) 951b8e80941Smrg fprintf(stderr, "SHADER-DB: %s\n", shaderdb); 952b8e80941Smrg 953b8e80941Smrg c->debug_output(shaderdb, c->debug_output_data); 954b8e80941Smrg free(shaderdb); 955b8e80941Smrg } 956b8e80941Smrg 957b8e80941Smrg return v3d_return_qpu_insts(c, final_assembly_size); 958b8e80941Smrg} 959b8e80941Smrg 960b8e80941Smrgvoid 961b8e80941Smrgvir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) 962b8e80941Smrg{ 963b8e80941Smrg if (qinst->dst.file == QFILE_TEMP) 964b8e80941Smrg c->defs[qinst->dst.index] = NULL; 965b8e80941Smrg 966b8e80941Smrg assert(&qinst->link != c->cursor.link); 967b8e80941Smrg 968b8e80941Smrg list_del(&qinst->link); 969b8e80941Smrg free(qinst); 970b8e80941Smrg 971b8e80941Smrg c->live_intervals_valid = false; 972b8e80941Smrg} 973b8e80941Smrg 974b8e80941Smrgstruct qreg 975b8e80941Smrgvir_follow_movs(struct v3d_compile *c, struct qreg reg) 976b8e80941Smrg{ 977b8e80941Smrg /* XXX 978b8e80941Smrg int pack = reg.pack; 979b8e80941Smrg 980b8e80941Smrg while (reg.file == QFILE_TEMP && 981b8e80941Smrg c->defs[reg.index] && 982b8e80941Smrg (c->defs[reg.index]->op == QOP_MOV || 983b8e80941Smrg c->defs[reg.index]->op == QOP_FMOV) && 984b8e80941Smrg !c->defs[reg.index]->dst.pack && 985b8e80941Smrg !c->defs[reg.index]->src[0].pack) { 986b8e80941Smrg reg = c->defs[reg.index]->src[0]; 987b8e80941Smrg } 988b8e80941Smrg 989b8e80941Smrg reg.pack = pack; 990b8e80941Smrg */ 991b8e80941Smrg return reg; 992b8e80941Smrg} 993b8e80941Smrg 994b8e80941Smrgvoid 995b8e80941Smrgvir_compile_destroy(struct v3d_compile *c) 996b8e80941Smrg{ 997b8e80941Smrg /* Defuse the assert that we aren't removing the cursor's instruction. 998b8e80941Smrg */ 999b8e80941Smrg c->cursor.link = NULL; 1000b8e80941Smrg 1001b8e80941Smrg vir_for_each_block(block, c) { 1002b8e80941Smrg while (!list_empty(&block->instructions)) { 1003b8e80941Smrg struct qinst *qinst = 1004b8e80941Smrg list_first_entry(&block->instructions, 1005b8e80941Smrg struct qinst, link); 1006b8e80941Smrg vir_remove_instruction(c, qinst); 1007b8e80941Smrg } 1008b8e80941Smrg } 1009b8e80941Smrg 1010b8e80941Smrg ralloc_free(c); 1011b8e80941Smrg} 1012b8e80941Smrg 1013b8e80941Smrguint32_t 1014b8e80941Smrgvir_get_uniform_index(struct v3d_compile *c, 1015b8e80941Smrg enum quniform_contents contents, 1016b8e80941Smrg uint32_t data) 1017b8e80941Smrg{ 1018b8e80941Smrg for (int i = 0; i < c->num_uniforms; i++) { 1019b8e80941Smrg if (c->uniform_contents[i] == contents && 1020b8e80941Smrg c->uniform_data[i] == data) { 1021b8e80941Smrg return i; 1022b8e80941Smrg } 1023b8e80941Smrg } 1024b8e80941Smrg 1025b8e80941Smrg uint32_t uniform = c->num_uniforms++; 1026b8e80941Smrg 1027b8e80941Smrg if (uniform >= c->uniform_array_size) { 1028b8e80941Smrg c->uniform_array_size = MAX2(MAX2(16, uniform + 1), 1029b8e80941Smrg c->uniform_array_size * 2); 1030b8e80941Smrg 1031b8e80941Smrg c->uniform_data = reralloc(c, c->uniform_data, 1032b8e80941Smrg uint32_t, 1033b8e80941Smrg c->uniform_array_size); 1034b8e80941Smrg c->uniform_contents = reralloc(c, c->uniform_contents, 1035b8e80941Smrg enum quniform_contents, 1036b8e80941Smrg c->uniform_array_size); 1037b8e80941Smrg } 1038b8e80941Smrg 1039b8e80941Smrg c->uniform_contents[uniform] = contents; 1040b8e80941Smrg c->uniform_data[uniform] = data; 1041b8e80941Smrg 1042b8e80941Smrg return uniform; 1043b8e80941Smrg} 1044b8e80941Smrg 1045b8e80941Smrgstruct qreg 1046b8e80941Smrgvir_uniform(struct v3d_compile *c, 1047b8e80941Smrg enum quniform_contents contents, 1048b8e80941Smrg uint32_t data) 1049b8e80941Smrg{ 1050b8e80941Smrg struct qinst *inst = vir_NOP(c); 1051b8e80941Smrg inst->qpu.sig.ldunif = true; 1052b8e80941Smrg inst->uniform = vir_get_uniform_index(c, contents, data); 1053b8e80941Smrg inst->dst = vir_get_temp(c); 1054b8e80941Smrg c->defs[inst->dst.index] = inst; 1055b8e80941Smrg return inst->dst; 1056b8e80941Smrg} 1057b8e80941Smrg 1058b8e80941Smrg#define OPTPASS(func) \ 1059b8e80941Smrg do { \ 1060b8e80941Smrg bool stage_progress = func(c); \ 1061b8e80941Smrg if (stage_progress) { \ 1062b8e80941Smrg progress = true; \ 1063b8e80941Smrg if (print_opt_debug) { \ 1064b8e80941Smrg fprintf(stderr, \ 1065b8e80941Smrg "VIR opt pass %2d: %s progress\n", \ 1066b8e80941Smrg pass, #func); \ 1067b8e80941Smrg } \ 1068b8e80941Smrg /*XXX vir_validate(c);*/ \ 1069b8e80941Smrg } \ 1070b8e80941Smrg } while (0) 1071b8e80941Smrg 1072b8e80941Smrgvoid 1073b8e80941Smrgvir_optimize(struct v3d_compile *c) 1074b8e80941Smrg{ 1075b8e80941Smrg bool print_opt_debug = false; 1076b8e80941Smrg int pass = 1; 1077b8e80941Smrg 1078b8e80941Smrg while (true) { 1079b8e80941Smrg bool progress = false; 1080b8e80941Smrg 1081b8e80941Smrg OPTPASS(vir_opt_copy_propagate); 1082b8e80941Smrg OPTPASS(vir_opt_redundant_flags); 1083b8e80941Smrg OPTPASS(vir_opt_dead_code); 1084b8e80941Smrg OPTPASS(vir_opt_small_immediates); 1085b8e80941Smrg 1086b8e80941Smrg if (!progress) 1087b8e80941Smrg break; 1088b8e80941Smrg 1089b8e80941Smrg pass++; 1090b8e80941Smrg } 1091b8e80941Smrg} 1092b8e80941Smrg 1093b8e80941Smrgconst char * 1094b8e80941Smrgvir_get_stage_name(struct v3d_compile *c) 1095b8e80941Smrg{ 1096b8e80941Smrg if (c->vs_key && c->vs_key->is_coord) 1097b8e80941Smrg return "MESA_SHADER_COORD"; 1098b8e80941Smrg else 1099b8e80941Smrg return gl_shader_stage_name(c->s->info.stage); 1100b8e80941Smrg} 1101