1/* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2018 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#include "sfn_shader_geometry.h" 28#include "sfn_instruction_misc.h" 29#include "sfn_instruction_fetch.h" 30#include "sfn_shaderio.h" 31 32namespace r600 { 33 34GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh, 35 r600_pipe_shader_selector &sel, 36 const r600_shader_key &key, 37 enum chip_class chip_class): 38 VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader, 39 sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter), 40 m_pipe_shader(sh), 41 m_so_info(&sel.so), 42 m_first_vertex_emitted(false), 43 m_offset(0), 44 m_next_input_ring_offset(0), 45 m_key(key), 46 m_clip_dist_mask(0), 47 m_cur_ring_output(0), 48 m_gs_tri_strip_adj_fix(false), 49 m_input_mask(0) 50{ 51 sh_info().atomic_base = key.gs.first_atomic_counter; 52} 53 54bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr) 55{ 56 auto location = nir_intrinsic_io_semantics(instr).location; 57 auto index = nir_src_as_const_value(instr->src[1]); 58 assert(index); 59 auto driver_location = nir_intrinsic_base(instr) + index->u32; 60 61 uint32_t write_mask = nir_intrinsic_write_mask(instr); 62 GPRVector::Swizzle swz = swizzle_from_mask(write_mask); 63 64 auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true); 65 66 sh_info().output[driver_location].write_mask = write_mask; 67 68 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value, 69 4 * driver_location, 70 instr->num_components, m_export_base[0]); 71 streamout_data[location] = ir; 72 73 return true; 74} 75 76bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr) 77{ 78 if (instr->type != nir_instr_type_intrinsic) 79 return true; 80 81 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); 82 83 switch (ii->intrinsic) { 84 case nir_intrinsic_store_output: 85 return process_store_output(ii); 86 case nir_intrinsic_load_input: 87 case nir_intrinsic_load_per_vertex_input: 88 return process_load_input(ii); 89 default: 90 return true; 91 } 92} 93 94bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr) 95{ 96 auto location = nir_intrinsic_io_semantics(instr).location; 97 auto index = nir_src_as_const_value(instr->src[1]); 98 assert(index); 99 100 auto driver_location = nir_intrinsic_base(instr) + index->u32; 101 102 if (location == VARYING_SLOT_COL0 || 103 location == VARYING_SLOT_COL1 || 104 (location >= VARYING_SLOT_VAR0 && 105 location <= VARYING_SLOT_VAR31) || 106 (location >= VARYING_SLOT_TEX0 && 107 location <= VARYING_SLOT_TEX7) || 108 location == VARYING_SLOT_BFC0 || 109 location == VARYING_SLOT_BFC1 || 110 location == VARYING_SLOT_PNTC || 111 location == VARYING_SLOT_CLIP_VERTEX || 112 location == VARYING_SLOT_CLIP_DIST0 || 113 location == VARYING_SLOT_CLIP_DIST1 || 114 location == VARYING_SLOT_PRIMITIVE_ID || 115 location == VARYING_SLOT_POS || 116 location == VARYING_SLOT_PSIZ || 117 location == VARYING_SLOT_LAYER || 118 location == VARYING_SLOT_VIEWPORT || 119 location == VARYING_SLOT_FOGC) { 120 r600_shader_io& io = sh_info().output[driver_location]; 121 122 auto semantic = r600_get_varying_semantic(location); 123 io.name = semantic.first; 124 io.sid = semantic.second; 125 126 evaluate_spi_sid(io); 127 128 if (sh_info().noutput <= driver_location) 129 sh_info().noutput = driver_location + 1; 130 131 if (location == VARYING_SLOT_CLIP_DIST0 || 132 location == VARYING_SLOT_CLIP_DIST1) { 133 m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0); 134 } 135 136 if (location == VARYING_SLOT_VIEWPORT) { 137 sh_info().vs_out_viewport = 1; 138 sh_info().vs_out_misc_write = 1; 139 } 140 return true; 141 } 142 return false; 143} 144 145bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr) 146{ 147 auto location = nir_intrinsic_io_semantics(instr).location; 148 auto index = nir_src_as_const_value(instr->src[1]); 149 assert(index); 150 151 auto driver_location = nir_intrinsic_base(instr) + index->u32; 152 153 if (location == VARYING_SLOT_POS || 154 location == VARYING_SLOT_PSIZ || 155 location == VARYING_SLOT_FOGC || 156 location == VARYING_SLOT_CLIP_VERTEX || 157 location == VARYING_SLOT_CLIP_DIST0 || 158 location == VARYING_SLOT_CLIP_DIST1 || 159 location == VARYING_SLOT_COL0 || 160 location == VARYING_SLOT_COL1 || 161 location == VARYING_SLOT_BFC0 || 162 location == VARYING_SLOT_BFC1 || 163 location == VARYING_SLOT_PNTC || 164 (location >= VARYING_SLOT_VAR0 && 165 location <= VARYING_SLOT_VAR31) || 166 (location >= VARYING_SLOT_TEX0 && 167 location <= VARYING_SLOT_TEX7)) { 168 169 uint64_t bit = 1ull << location; 170 if (!(bit & m_input_mask)) { 171 r600_shader_io& io = sh_info().input[driver_location]; 172 auto semantic = r600_get_varying_semantic(location); 173 io.name = semantic.first; 174 io.sid = semantic.second; 175 176 io.ring_offset = 16 * driver_location; 177 ++sh_info().ninput; 178 m_next_input_ring_offset += 16; 179 m_input_mask |= bit; 180 } 181 return true; 182 } 183 return false; 184} 185 186bool GeometryShaderFromNir::do_allocate_reserved_registers() 187{ 188 const int sel[6] = {0, 0 ,0, 1, 1, 1}; 189 const int chan[6] = {0, 1 ,3, 0, 1, 2}; 190 191 increment_reserved_registers(); 192 increment_reserved_registers(); 193 194 /* Reserve registers used by the shaders (should check how many 195 * components are actually used */ 196 for (int i = 0; i < 6; ++i) { 197 auto reg = new GPRValue(sel[i], chan[i]); 198 reg->set_as_input(); 199 m_per_vertex_offsets[i].reset(reg); 200 inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false); 201 } 202 auto reg = new GPRValue(0, 2); 203 reg->set_as_input(); 204 m_primitive_id.reset(reg); 205 inject_register(0, 2, m_primitive_id, false); 206 207 reg = new GPRValue(1, 3); 208 reg->set_as_input(); 209 m_invocation_id.reset(reg); 210 inject_register(1, 3, m_invocation_id, false); 211 212 m_export_base[0] = get_temp_register(0); 213 m_export_base[1] = get_temp_register(0); 214 m_export_base[2] = get_temp_register(0); 215 m_export_base[3] = get_temp_register(0); 216 emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr})); 217 emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr})); 218 emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr})); 219 emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr})); 220 221 sh_info().ring_item_sizes[0] = m_next_input_ring_offset; 222 223 if (m_key.gs.tri_strip_adj_fix) 224 emit_adj_fix(); 225 226 return true; 227} 228 229void GeometryShaderFromNir::emit_adj_fix() 230{ 231 PValue adjhelp0(new GPRValue(m_export_base[0]->sel(), 1)); 232 emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr}); 233 234 int reg_indices[6]; 235 int reg_chanels[6] = {1, 2, 3, 1, 2, 3}; 236 237 int rotate_indices[6] = {4, 5, 0, 1, 2, 3}; 238 239 reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel(); 240 reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel(); 241 242 std::array<PValue, 6> adjhelp; 243 244 AluInstruction *ir = nullptr; 245 for (int i = 0; i < 6; i++) { 246 adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i])); 247 ir = new AluInstruction(op3_cnde_int, adjhelp[i], 248 {adjhelp0, m_per_vertex_offsets[i], 249 m_per_vertex_offsets[rotate_indices[i]]}, 250 {alu_write}); 251 if ((get_chip_class() == CAYMAN && i == 2) || (i == 3)) 252 ir->set_flag(alu_last_instr); 253 emit_instruction(ir); 254 } 255 ir->set_flag(alu_last_instr); 256 257 for (int i = 0; i < 6; i++) 258 m_per_vertex_offsets[i] = adjhelp[i]; 259} 260 261 262bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) 263{ 264 switch (instr->intrinsic) { 265 case nir_intrinsic_emit_vertex: 266 return emit_vertex(instr, false); 267 case nir_intrinsic_end_primitive: 268 return emit_vertex(instr, true); 269 case nir_intrinsic_load_primitive_id: 270 return load_preloaded_value(instr->dest, 0, m_primitive_id); 271 case nir_intrinsic_load_invocation_id: 272 return load_preloaded_value(instr->dest, 0, m_invocation_id); 273 case nir_intrinsic_store_output: 274 return emit_store(instr); 275 case nir_intrinsic_load_per_vertex_input: 276 return emit_load_per_vertex_input(instr); 277 default: 278 ; 279 } 280 return false; 281} 282 283bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut) 284{ 285 int stream = nir_intrinsic_stream_id(instr); 286 assert(stream < 4); 287 288 for(auto v: streamout_data) { 289 if (stream == 0 || v.first != VARYING_SLOT_POS) { 290 v.second->patch_ring(stream, m_export_base[stream]); 291 emit_instruction(v.second); 292 } else 293 delete v.second; 294 } 295 streamout_data.clear(); 296 emit_instruction(new EmitVertex(stream, cut)); 297 298 if (!cut) 299 emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream], 300 PValue(new LiteralValue(sh_info().noutput)), 301 {alu_write, alu_last_instr})); 302 303 return true; 304} 305 306bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr) 307{ 308 auto dest = vec_from_nir(instr->dest, 4); 309 310 std::array<int, 4> swz = {7,7,7,7}; 311 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 312 swz[i] = i + nir_intrinsic_component(instr); 313 } 314 315 auto literal_index = nir_src_as_const_value(instr->src[0]); 316 317 if (!literal_index) { 318 sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n"; 319 return false; 320 } 321 assert(literal_index->u32 < 6); 322 assert(nir_intrinsic_io_semantics(instr).num_slots == 1); 323 324 PValue addr = m_per_vertex_offsets[literal_index->u32]; 325 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr, 326 16 * nir_intrinsic_base(instr), 327 R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true); 328 fetch->set_dest_swizzle(swz); 329 330 emit_instruction(fetch); 331 return true; 332} 333 334void GeometryShaderFromNir::do_finalize() 335{ 336 if (m_clip_dist_mask) { 337 int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask); 338 sh_info().cc_dist_mask = (1 << num_clip_dist) - 1; 339 sh_info().clip_dist_write = (1 << num_clip_dist) - 1; 340 } 341} 342 343} 344