1/* 2 * Copyright (c) 2019 Zodiac Inflight Innovations 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 */ 26 27#include "etnaviv_compiler_nir.h" 28#include "util/compiler.h" 29 30/* to map nir srcs should to etna_inst srcs */ 31enum { 32 SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4), 33 SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4), 34 SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4), 35 SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4), 36 SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4), 37 SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4), 38 SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4), 39}; 40 41/* info to translate a nir op to etna_inst */ 42struct etna_op_info { 43 uint8_t opcode; /* INST_OPCODE_ */ 44 uint8_t src; /* SRC_ enum */ 45 uint8_t cond; /* INST_CONDITION_ */ 46 uint8_t type; /* INST_TYPE_ */ 47}; 48 49static const struct etna_op_info etna_ops[] = { 50 [0 ... nir_num_opcodes - 1] = {0xff}, 51#undef TRUE 52#undef FALSE 53#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \ 54 INST_OPCODE_##op, \ 55 SRC_##src, \ 56 INST_CONDITION_##cond, \ 57 INST_TYPE_##type \ 58} 59#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32) 60#define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32) 61#define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32) 62#define OP(nir, op, src) OPC(nir, op, src, TRUE) 63#define IOP(nir, op, src) IOPC(nir, op, src, TRUE) 64#define UOP(nir, op, src) UOPC(nir, op, src, TRUE) 65 OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0), 66 OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2), 67 OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X), 68 OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT), 69 OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0), 70 OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0), 71 OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0), 72 OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0), 73 OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT), 74 OPC(fcsel, SELECT, 0_1_2, NZ), 75 OP(fdiv, DIV, 0_1_X), 76 OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0), 77 78 /* type convert */ 79 IOP(i2f32, I2F, 0_X_X), 80 UOP(u2f32, I2F, 0_X_X), 81 IOP(f2i32, F2I, 0_X_X), 82 UOP(f2u32, F2I, 0_X_X), 83 UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */ 84 UOP(b2i32, AND, 0_X_X), /* AND with 1 */ 85 OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */ 86 UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */ 87 88 /* arithmetic */ 89 IOP(iadd, ADD, 0_X_1), 90 IOP(imul, IMULLO0, 0_1_X), 91 /* IOP(imad, IMADLO0, 0_1_2), */ 92 IOP(ineg, ADD, X_X_0), /* ADD 0, -x */ 93 IOP(iabs, IABS, X_X_0), 94 IOP(isign, SIGN, X_X_0), 95 IOPC(imin, SELECT, 0_1_0, GT), 96 IOPC(imax, SELECT, 0_1_0, LT), 97 UOPC(umin, SELECT, 0_1_0, GT), 98 UOPC(umax, SELECT, 0_1_0, LT), 99 100 /* select */ 101 UOPC(b32csel, SELECT, 0_1_2, NZ), 102 103 /* compare with int result */ 104 OPC(feq32, CMP, 0_1_X, EQ), 105 OPC(fneu32, CMP, 0_1_X, NE), 106 OPC(fge32, CMP, 0_1_X, GE), 107 OPC(flt32, CMP, 0_1_X, LT), 108 IOPC(ieq32, CMP, 0_1_X, EQ), 109 IOPC(ine32, CMP, 0_1_X, NE), 110 IOPC(ige32, CMP, 0_1_X, GE), 111 IOPC(ilt32, CMP, 0_1_X, LT), 112 UOPC(uge32, CMP, 0_1_X, GE), 113 UOPC(ult32, CMP, 0_1_X, LT), 114 115 /* bit ops */ 116 IOP(ior, OR, 0_X_1), 117 IOP(iand, AND, 0_X_1), 118 IOP(ixor, XOR, 0_X_1), 119 IOP(inot, NOT, X_X_0), 120 IOP(ishl, LSHIFT, 0_X_1), 121 IOP(ishr, RSHIFT, 0_X_1), 122 UOP(ushr, RSHIFT, 0_X_1), 123}; 124 125void 126etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst, 127 struct etna_inst_src src[3], bool saturate) 128{ 129 struct etna_op_info ei = etna_ops[op]; 130 unsigned swiz_scalar = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1); 131 132 if (ei.opcode == 0xff) 133 compile_error(c, "Unhandled ALU op: %s\n", nir_op_infos[op].name); 134 135 struct etna_inst inst = { 136 .opcode = ei.opcode, 137 .type = ei.type, 138 .cond = ei.cond, 139 .dst = dst, 140 .sat = saturate, 141 }; 142 143 switch (op) { 144 case nir_op_fdiv: 145 case nir_op_flog2: 146 case nir_op_fsin: 147 case nir_op_fcos: 148 if (c->specs->has_new_transcendentals) 149 inst.tex.amode = 1; 150 FALLTHROUGH; 151 case nir_op_frsq: 152 case nir_op_frcp: 153 case nir_op_fexp2: 154 case nir_op_fsqrt: 155 case nir_op_imul: 156 /* scalar instructions we want src to be in x component */ 157 src[0].swiz = inst_swiz_compose(src[0].swiz, swiz_scalar); 158 src[1].swiz = inst_swiz_compose(src[1].swiz, swiz_scalar); 159 break; 160 /* deal with instructions which don't have 1:1 mapping */ 161 case nir_op_b2f32: 162 inst.src[2] = etna_immediate_float(1.0f); 163 break; 164 case nir_op_b2i32: 165 inst.src[2] = etna_immediate_int(1); 166 break; 167 case nir_op_f2b32: 168 inst.src[1] = etna_immediate_float(0.0f); 169 break; 170 case nir_op_i2b32: 171 inst.src[1] = etna_immediate_int(0); 172 break; 173 case nir_op_ineg: 174 inst.src[0] = etna_immediate_int(0); 175 src[0].neg = 1; 176 break; 177 default: 178 break; 179 } 180 181 /* set the "true" value for CMP instructions */ 182 if (inst.opcode == INST_OPCODE_CMP) 183 inst.src[2] = etna_immediate_int(-1); 184 185 for (unsigned j = 0; j < 3; j++) { 186 unsigned i = ((ei.src >> j*2) & 3); 187 if (i < 3) 188 inst.src[j] = src[i]; 189 } 190 191 emit_inst(c, &inst); 192} 193 194void 195etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz, 196 struct etna_inst_dst dst, struct etna_inst_src coord, 197 struct etna_inst_src lod_bias, struct etna_inst_src compare) 198{ 199 struct etna_inst inst = { 200 .dst = dst, 201 .tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset), 202 .tex.swiz = dst_swiz, 203 .src[0] = coord, 204 }; 205 206 if (lod_bias.use) 207 inst.src[1] = lod_bias; 208 209 if (compare.use) 210 inst.src[2] = compare; 211 212 switch (op) { 213 case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break; 214 case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break; 215 case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break; 216 default: 217 compile_error(c, "Unhandled NIR tex type: %d\n", op); 218 } 219 220 emit_inst(c, &inst); 221} 222 223void 224etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition) 225{ 226 if (!condition.use) { 227 emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block }); 228 return; 229 } 230 231 struct etna_inst inst = { 232 .opcode = INST_OPCODE_BRANCH, 233 .cond = INST_CONDITION_NOT, 234 .type = INST_TYPE_U32, 235 .src[0] = condition, 236 .imm = block, 237 }; 238 inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); 239 emit_inst(c, &inst); 240} 241 242void 243etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition) 244{ 245 if (!condition.use) { 246 emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL }); 247 return; 248 } 249 250 struct etna_inst inst = { 251 .opcode = INST_OPCODE_TEXKILL, 252 .cond = INST_CONDITION_NZ, 253 .type = (c->specs->halti < 2) ? INST_TYPE_F32 : INST_TYPE_U32, 254 .src[0] = condition, 255 }; 256 inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); 257 emit_inst(c, &inst); 258} 259