1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "compiler/v3d_compiler.h" 25#include "compiler/nir/nir_builder.h" 26 27/** 28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io 29 * intrinsics into something amenable to the V3D architecture. 30 * 31 * Most of the work is turning the VS's store_output intrinsics from working 32 * on a base representing the gallium-level vec4 driver_location to an offset 33 * within the VPM, and emitting the header that's read by the fixed function 34 * hardware between the VS and FS. 35 * 36 * We also adjust the offsets on uniform loads to be in bytes, since that's 37 * what we need for indirect addressing with general TMU access. 38 */ 39 40struct v3d_nir_lower_io_state { 41 int pos_vpm_offset; 42 int vp_vpm_offset; 43 int zs_vpm_offset; 44 int rcp_wc_vpm_offset; 45 int psiz_vpm_offset; 46 int varyings_vpm_offset; 47 48 BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 49 50 nir_ssa_def *pos[4]; 51}; 52 53static void 54v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *chan) 55{ 56 nir_intrinsic_instr *intr = 57 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); 58 nir_ssa_dest_init(&intr->instr, &intr->dest, 59 1, intr->dest.ssa.bit_size, NULL); 60 intr->num_components = 1; 61 62 intr->src[0] = nir_src_for_ssa(chan); 63 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 64 65 nir_intrinsic_set_base(intr, base); 66 nir_intrinsic_set_write_mask(intr, 0x1); 67 nir_intrinsic_set_component(intr, 0); 68 69 nir_builder_instr_insert(b, &intr->instr); 70} 71 72/* Convert the uniform offset to bytes. If it happens to be a constant, 73 * constant-folding will clean up the shift for us. 74 */ 75static void 76v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b, 77 nir_intrinsic_instr *intr) 78{ 79 b->cursor = nir_before_instr(&intr->instr); 80 81 nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16); 82 83 nir_instr_rewrite_src(&intr->instr, 84 &intr->src[0], 85 nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, 86 nir_imm_int(b, 4)))); 87} 88 89static int 90v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan) 91{ 92 int component = var->data.location_frac + chan; 93 94 for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 95 struct v3d_varying_slot slot = c->vs_key->fs_inputs[i]; 96 97 if (v3d_slot_get_slot(slot) == var->data.location && 98 v3d_slot_get_component(slot) == component) { 99 return i; 100 } 101 } 102 103 return -1; 104} 105 106/* Lowers a store_output(gallium driver location) to a series of store_outputs 107 * with a driver_location equal to the offset in the VPM. 108 */ 109static void 110v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b, 111 nir_intrinsic_instr *intr, 112 struct v3d_nir_lower_io_state *state) 113{ 114 b->cursor = nir_before_instr(&intr->instr); 115 116 int start_comp = nir_intrinsic_component(intr); 117 nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0], 118 intr->num_components); 119 120 nir_variable *var = NULL; 121 nir_foreach_variable(scan_var, &c->s->outputs) { 122 if (scan_var->data.driver_location != nir_intrinsic_base(intr) || 123 start_comp < scan_var->data.location_frac || 124 start_comp >= scan_var->data.location_frac + 125 glsl_get_components(scan_var->type)) { 126 continue; 127 } 128 var = scan_var; 129 } 130 131 /* Save off the components of the position for the setup of VPM inputs 132 * read by fixed function HW. 133 */ 134 if (var->data.location == VARYING_SLOT_POS) { 135 for (int i = 0; i < intr->num_components; i++) { 136 state->pos[start_comp + i] = nir_channel(b, src, i); 137 } 138 } 139 140 /* Just psiz to the position in the FF header right now. */ 141 if (var->data.location == VARYING_SLOT_PSIZ && 142 state->psiz_vpm_offset != -1) { 143 v3d_nir_store_output(b, state->psiz_vpm_offset, src); 144 } 145 146 /* Scalarize outputs if it hasn't happened already, since we want to 147 * schedule each VPM write individually. We can skip any outut 148 * components not read by the FS. 149 */ 150 for (int i = 0; i < intr->num_components; i++) { 151 int vpm_offset = 152 v3d_varying_slot_vpm_offset(c, var, 153 i + 154 start_comp - 155 var->data.location_frac); 156 157 if (vpm_offset == -1) 158 continue; 159 160 BITSET_SET(state->varyings_stored, vpm_offset); 161 162 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset, 163 nir_channel(b, src, i)); 164 } 165 166 nir_instr_remove(&intr->instr); 167} 168 169static void 170v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b, 171 struct nir_instr *instr, 172 struct v3d_nir_lower_io_state *state) 173{ 174 if (instr->type != nir_instr_type_intrinsic) 175 return; 176 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 177 178 switch (intr->intrinsic) { 179 case nir_intrinsic_load_uniform: 180 v3d_nir_lower_uniform(c, b, intr); 181 break; 182 183 case nir_intrinsic_store_output: 184 if (c->s->info.stage == MESA_SHADER_VERTEX) 185 v3d_nir_lower_vpm_output(c, b, intr, state); 186 break; 187 188 default: 189 break; 190 } 191} 192 193/* Remap the output var's .driver_location. This is purely for 194 * nir_print_shader() so that store_output can map back to a variable name. 195 */ 196static void 197v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c, 198 struct v3d_nir_lower_io_state *state) 199{ 200 nir_foreach_variable_safe(var, &c->s->outputs) { 201 if (var->data.location == VARYING_SLOT_POS && 202 state->pos_vpm_offset != -1) { 203 var->data.driver_location = state->pos_vpm_offset; 204 continue; 205 } 206 207 if (var->data.location == VARYING_SLOT_PSIZ && 208 state->psiz_vpm_offset != -1) { 209 var->data.driver_location = state->psiz_vpm_offset; 210 continue; 211 } 212 213 int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0); 214 if (vpm_offset != -1) { 215 var->data.driver_location = 216 state->varyings_vpm_offset + vpm_offset; 217 } else { 218 /* If we couldn't find a mapping for the var, delete 219 * it so that its old .driver_location doesn't confuse 220 * nir_print_shader(). 221 */ 222 exec_node_remove(&var->node); 223 } 224 } 225} 226 227static void 228v3d_nir_setup_vpm_layout(struct v3d_compile *c, 229 struct v3d_nir_lower_io_state *state) 230{ 231 uint32_t vpm_offset = 0; 232 233 if (c->vs_key->is_coord) { 234 state->pos_vpm_offset = vpm_offset; 235 vpm_offset += 4; 236 } else { 237 state->pos_vpm_offset = -1; 238 } 239 240 state->vp_vpm_offset = vpm_offset; 241 vpm_offset += 2; 242 243 if (!c->vs_key->is_coord) { 244 state->zs_vpm_offset = vpm_offset++; 245 state->rcp_wc_vpm_offset = vpm_offset++; 246 } else { 247 state->zs_vpm_offset = -1; 248 state->rcp_wc_vpm_offset = -1; 249 } 250 251 if (c->vs_key->per_vertex_point_size) 252 state->psiz_vpm_offset = vpm_offset++; 253 else 254 state->psiz_vpm_offset = -1; 255 256 state->varyings_vpm_offset = vpm_offset; 257 258 c->vpm_output_size = vpm_offset + c->vs_key->num_fs_inputs; 259} 260 261static void 262v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, 263 struct v3d_nir_lower_io_state *state) 264{ 265 for (int i = 0; i < 4; i++) { 266 if (!state->pos[i]) 267 state->pos[i] = nir_ssa_undef(b, 1, 32); 268 } 269 270 nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]); 271 272 if (state->pos_vpm_offset != -1) { 273 for (int i = 0; i < 4; i++) { 274 v3d_nir_store_output(b, state->pos_vpm_offset + i, 275 state->pos[i]); 276 } 277 } 278 279 for (int i = 0; i < 2; i++) { 280 nir_ssa_def *pos; 281 nir_ssa_def *scale; 282 pos = state->pos[i]; 283 if (i == 0) 284 scale = nir_load_viewport_x_scale(b); 285 else 286 scale = nir_load_viewport_y_scale(b); 287 pos = nir_fmul(b, pos, scale); 288 pos = nir_fmul(b, pos, rcp_wc); 289 pos = nir_f2i32(b, nir_fround_even(b, pos)); 290 v3d_nir_store_output(b, state->vp_vpm_offset + i, 291 pos); 292 } 293 294 if (state->zs_vpm_offset != -1) { 295 nir_ssa_def *z = state->pos[2]; 296 z = nir_fmul(b, z, nir_load_viewport_z_scale(b)); 297 z = nir_fmul(b, z, rcp_wc); 298 z = nir_fadd(b, z, nir_load_viewport_z_offset(b)); 299 v3d_nir_store_output(b, state->zs_vpm_offset, z); 300 } 301 302 if (state->rcp_wc_vpm_offset != -1) 303 v3d_nir_store_output(b, state->rcp_wc_vpm_offset, rcp_wc); 304 305 /* Store 0 to varyings requested by the FS but not stored in the VS. 306 * This should be undefined behavior, but glsl-routing seems to rely 307 * on it. 308 */ 309 for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { 310 if (!BITSET_TEST(state->varyings_stored, i)) { 311 v3d_nir_store_output(b, state->varyings_vpm_offset + i, 312 nir_imm_int(b, 0)); 313 } 314 } 315} 316 317void 318v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c) 319{ 320 struct v3d_nir_lower_io_state state = { 0 }; 321 322 /* Set up the layout of the VPM outputs. */ 323 if (s->info.stage == MESA_SHADER_VERTEX) 324 v3d_nir_setup_vpm_layout(c, &state); 325 326 nir_foreach_function(function, s) { 327 if (function->impl) { 328 nir_builder b; 329 nir_builder_init(&b, function->impl); 330 331 nir_foreach_block(block, function->impl) { 332 nir_foreach_instr_safe(instr, block) 333 v3d_nir_lower_io_instr(c, &b, instr, 334 &state); 335 } 336 337 nir_block *last = nir_impl_last_block(function->impl); 338 b.cursor = nir_after_block(last); 339 if (s->info.stage == MESA_SHADER_VERTEX) 340 v3d_nir_emit_ff_vpm_outputs(c, &b, &state); 341 342 nir_metadata_preserve(function->impl, 343 nir_metadata_block_index | 344 nir_metadata_dominance); 345 } 346 } 347 348 if (s->info.stage == MESA_SHADER_VERTEX) 349 v3d_nir_lower_io_update_output_var_base(c, &state); 350} 351