1#include "sfn_emitssboinstruction.h" 2 3#include "sfn_instruction_fetch.h" 4#include "sfn_instruction_gds.h" 5#include "sfn_instruction_misc.h" 6#include "sfn_instruction_tex.h" 7#include "../r600_pipe.h" 8#include "../r600_asm.h" 9 10namespace r600 { 11 12#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 13 14EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor): 15 EmitInstruction(processor), 16 m_require_rat_return_address(false), 17 m_ssbo_image_offset(0) 18{ 19} 20 21void EmitSSBOInstruction::set_ssbo_offset(int offset) 22{ 23 m_ssbo_image_offset = offset; 24} 25 26 27void EmitSSBOInstruction::set_require_rat_return_address() 28{ 29 m_require_rat_return_address = true; 30} 31 32bool 33EmitSSBOInstruction::load_rat_return_address() 34{ 35 if (m_require_rat_return_address) { 36 m_rat_return_address = get_temp_vec4(); 37 emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write})); 38 emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write})); 39 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)), 40 literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr})); 41 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1), 42 m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0), 43 {alu_write, alu_last_instr})); 44 m_require_rat_return_address = false; 45 } 46 return true; 47} 48 49 50bool EmitSSBOInstruction::do_emit(nir_instr* instr) 51{ 52 const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 53 switch (intr->intrinsic) { 54 case nir_intrinsic_atomic_counter_add: 55 case nir_intrinsic_atomic_counter_and: 56 case nir_intrinsic_atomic_counter_exchange: 57 case nir_intrinsic_atomic_counter_max: 58 case nir_intrinsic_atomic_counter_min: 59 case nir_intrinsic_atomic_counter_or: 60 case nir_intrinsic_atomic_counter_xor: 61 case nir_intrinsic_atomic_counter_comp_swap: 62 return emit_atomic(intr); 63 case nir_intrinsic_atomic_counter_read: 64 case nir_intrinsic_atomic_counter_post_dec: 65 return emit_unary_atomic(intr); 66 case nir_intrinsic_atomic_counter_inc: 67 return emit_atomic_inc(intr); 68 case nir_intrinsic_atomic_counter_pre_dec: 69 return emit_atomic_pre_dec(intr); 70 case nir_intrinsic_load_ssbo: 71 return emit_load_ssbo(intr); 72 case nir_intrinsic_store_ssbo: 73 return emit_store_ssbo(intr); 74 case nir_intrinsic_ssbo_atomic_add: 75 case nir_intrinsic_ssbo_atomic_comp_swap: 76 case nir_intrinsic_ssbo_atomic_or: 77 case nir_intrinsic_ssbo_atomic_xor: 78 case nir_intrinsic_ssbo_atomic_imax: 79 case nir_intrinsic_ssbo_atomic_imin: 80 case nir_intrinsic_ssbo_atomic_umax: 81 case nir_intrinsic_ssbo_atomic_umin: 82 case nir_intrinsic_ssbo_atomic_and: 83 case nir_intrinsic_ssbo_atomic_exchange: 84 return emit_ssbo_atomic_op(intr); 85 case nir_intrinsic_image_store: 86 return emit_image_store(intr); 87 case nir_intrinsic_image_load: 88 case nir_intrinsic_image_atomic_add: 89 case nir_intrinsic_image_atomic_and: 90 case nir_intrinsic_image_atomic_or: 91 case nir_intrinsic_image_atomic_xor: 92 case nir_intrinsic_image_atomic_exchange: 93 case nir_intrinsic_image_atomic_comp_swap: 94 case nir_intrinsic_image_atomic_umin: 95 case nir_intrinsic_image_atomic_umax: 96 case nir_intrinsic_image_atomic_imin: 97 case nir_intrinsic_image_atomic_imax: 98 return emit_image_load(intr); 99 case nir_intrinsic_image_size: 100 return emit_image_size(intr); 101 case nir_intrinsic_get_ssbo_size: 102 return emit_buffer_size(intr); 103 case nir_intrinsic_memory_barrier: 104 case nir_intrinsic_memory_barrier_image: 105 case nir_intrinsic_memory_barrier_buffer: 106 case nir_intrinsic_group_memory_barrier: 107 return make_stores_ack_and_waitack(); 108 default: 109 return false; 110 } 111} 112 113bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr) 114{ 115 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); 116 117 ESDOp op = read_result ? get_opcode(instr->intrinsic) : 118 get_opcode_wo(instr->intrinsic); 119 120 if (DS_OP_INVALID == op) 121 return false; 122 123 124 125 GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7}); 126 127 int base = remap_atomic_base(nir_intrinsic_base(instr)); 128 129 PValue uav_id = from_nir(instr->src[0], 0); 130 131 PValue value = from_nir_with_fetch_constant(instr->src[1], 0); 132 133 GDSInstr *ir = nullptr; 134 if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) { 135 PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0); 136 ir = new GDSInstr(op, dest, value, value2, uav_id, base); 137 } else { 138 ir = new GDSInstr(op, dest, value, uav_id, base); 139 } 140 141 emit_instruction(ir); 142 return true; 143} 144 145bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr) 146{ 147 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); 148 149 ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic); 150 151 if (DS_OP_INVALID == op) 152 return false; 153 154 GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7}); 155 156 PValue uav_id = from_nir(instr->src[0], 0); 157 158 auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr))); 159 160 emit_instruction(ir); 161 return true; 162} 163 164ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const 165{ 166 switch (opcode) { 167 case nir_intrinsic_atomic_counter_add: 168 return DS_OP_ADD_RET; 169 case nir_intrinsic_atomic_counter_and: 170 return DS_OP_AND_RET; 171 case nir_intrinsic_atomic_counter_exchange: 172 return DS_OP_XCHG_RET; 173 case nir_intrinsic_atomic_counter_inc: 174 return DS_OP_INC_RET; 175 case nir_intrinsic_atomic_counter_max: 176 return DS_OP_MAX_UINT_RET; 177 case nir_intrinsic_atomic_counter_min: 178 return DS_OP_MIN_UINT_RET; 179 case nir_intrinsic_atomic_counter_or: 180 return DS_OP_OR_RET; 181 case nir_intrinsic_atomic_counter_read: 182 return DS_OP_READ_RET; 183 case nir_intrinsic_atomic_counter_xor: 184 return DS_OP_XOR_RET; 185 case nir_intrinsic_atomic_counter_post_dec: 186 return DS_OP_DEC_RET; 187 case nir_intrinsic_atomic_counter_comp_swap: 188 return DS_OP_CMP_XCHG_RET; 189 case nir_intrinsic_atomic_counter_pre_dec: 190 default: 191 return DS_OP_INVALID; 192 } 193} 194 195ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const 196{ 197 switch (opcode) { 198 case nir_intrinsic_atomic_counter_add: 199 return DS_OP_ADD; 200 case nir_intrinsic_atomic_counter_and: 201 return DS_OP_AND; 202 case nir_intrinsic_atomic_counter_inc: 203 return DS_OP_INC; 204 case nir_intrinsic_atomic_counter_max: 205 return DS_OP_MAX_UINT; 206 case nir_intrinsic_atomic_counter_min: 207 return DS_OP_MIN_UINT; 208 case nir_intrinsic_atomic_counter_or: 209 return DS_OP_OR; 210 case nir_intrinsic_atomic_counter_xor: 211 return DS_OP_XOR; 212 case nir_intrinsic_atomic_counter_post_dec: 213 return DS_OP_DEC; 214 case nir_intrinsic_atomic_counter_comp_swap: 215 return DS_OP_CMP_XCHG_RET; 216 case nir_intrinsic_atomic_counter_exchange: 217 return DS_OP_XCHG_RET; 218 case nir_intrinsic_atomic_counter_pre_dec: 219 default: 220 return DS_OP_INVALID; 221 } 222} 223 224RatInstruction::ERatOp 225EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const 226{ 227 switch (opcode) { 228 case nir_intrinsic_ssbo_atomic_add: 229 case nir_intrinsic_image_atomic_add: 230 return RatInstruction::ADD_RTN; 231 case nir_intrinsic_ssbo_atomic_and: 232 case nir_intrinsic_image_atomic_and: 233 return RatInstruction::AND_RTN; 234 case nir_intrinsic_ssbo_atomic_exchange: 235 case nir_intrinsic_image_atomic_exchange: 236 return RatInstruction::XCHG_RTN; 237 case nir_intrinsic_ssbo_atomic_or: 238 case nir_intrinsic_image_atomic_or: 239 return RatInstruction::OR_RTN; 240 case nir_intrinsic_ssbo_atomic_imin: 241 case nir_intrinsic_image_atomic_imin: 242 return RatInstruction::MIN_INT_RTN; 243 case nir_intrinsic_ssbo_atomic_imax: 244 case nir_intrinsic_image_atomic_imax: 245 return RatInstruction::MAX_INT_RTN; 246 case nir_intrinsic_ssbo_atomic_umin: 247 case nir_intrinsic_image_atomic_umin: 248 return RatInstruction::MIN_UINT_RTN; 249 case nir_intrinsic_ssbo_atomic_umax: 250 case nir_intrinsic_image_atomic_umax: 251 return RatInstruction::MAX_UINT_RTN; 252 case nir_intrinsic_ssbo_atomic_xor: 253 case nir_intrinsic_image_atomic_xor: 254 return RatInstruction::XOR_RTN; 255 case nir_intrinsic_ssbo_atomic_comp_swap: 256 case nir_intrinsic_image_atomic_comp_swap: 257 if (util_format_is_float(format)) 258 return RatInstruction::CMPXCHG_FLT_RTN; 259 else 260 return RatInstruction::CMPXCHG_INT_RTN; 261 case nir_intrinsic_image_load: 262 return RatInstruction::NOP_RTN; 263 default: 264 unreachable("Unsupported RAT instruction"); 265 } 266} 267 268RatInstruction::ERatOp 269EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const 270{ 271 switch (opcode) { 272 case nir_intrinsic_ssbo_atomic_add: 273 case nir_intrinsic_image_atomic_add: 274 return RatInstruction::ADD; 275 case nir_intrinsic_ssbo_atomic_and: 276 case nir_intrinsic_image_atomic_and: 277 return RatInstruction::AND; 278 case nir_intrinsic_ssbo_atomic_or: 279 case nir_intrinsic_image_atomic_or: 280 return RatInstruction::OR; 281 case nir_intrinsic_ssbo_atomic_imin: 282 case nir_intrinsic_image_atomic_imin: 283 return RatInstruction::MIN_INT; 284 case nir_intrinsic_ssbo_atomic_imax: 285 case nir_intrinsic_image_atomic_imax: 286 return RatInstruction::MAX_INT; 287 case nir_intrinsic_ssbo_atomic_umin: 288 case nir_intrinsic_image_atomic_umin: 289 return RatInstruction::MIN_UINT; 290 case nir_intrinsic_ssbo_atomic_umax: 291 case nir_intrinsic_image_atomic_umax: 292 return RatInstruction::MAX_UINT; 293 case nir_intrinsic_ssbo_atomic_xor: 294 case nir_intrinsic_image_atomic_xor: 295 return RatInstruction::XOR; 296 case nir_intrinsic_ssbo_atomic_comp_swap: 297 case nir_intrinsic_image_atomic_comp_swap: 298 if (util_format_is_float(format)) 299 return RatInstruction::CMPXCHG_FLT; 300 else 301 return RatInstruction::CMPXCHG_INT; 302 default: 303 unreachable("Unsupported WO RAT instruction"); 304 } 305} 306 307bool EmitSSBOInstruction::load_atomic_inc_limits() 308{ 309 m_atomic_update = get_temp_register(); 310 m_atomic_update->set_keep_alive(); 311 emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1), 312 {alu_write, alu_last_instr})); 313 return true; 314} 315 316bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr) 317{ 318 bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses); 319 PValue uav_id = from_nir(instr->src[0], 0); 320 GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7}); 321 auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, 322 m_atomic_update, uav_id, 323 remap_atomic_base(nir_intrinsic_base(instr))); 324 emit_instruction(ir); 325 return true; 326} 327 328bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr) 329{ 330 GPRVector dest = make_dest(instr); 331 332 PValue uav_id = from_nir(instr->src[0], 0); 333 334 auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id, 335 remap_atomic_base(nir_intrinsic_base(instr))); 336 emit_instruction(ir); 337 338 emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write)); 339 340 return true; 341} 342 343bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr) 344{ 345 GPRVector dest = make_dest(instr); 346 347 /** src0 not used, should be some offset */ 348 auto addr = from_nir(instr->src[1], 0); 349 PValue addr_temp = create_register_from_nir_src(instr->src[1], 1); 350 351 /** Should be lowered in nir */ 352 emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))}, 353 {alu_write, alu_last_instr})); 354 355 const EVTXDataFormat formats[4] = { 356 fmt_32, 357 fmt_32_32, 358 fmt_32_32_32, 359 fmt_32_32_32_32 360 }; 361 362 const std::array<int,4> dest_swt[4] = { 363 {0,7,7,7}, 364 {0,1,7,7}, 365 {0,1,2,7}, 366 {0,1,2,3} 367 }; 368 369 /* TODO fix resource index */ 370 auto ir = new FetchInstruction(dest, addr_temp, 371 R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset 372 , from_nir(instr->src[0], 0), 373 formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int); 374 ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]); 375 ir->set_flag(vtx_use_tc); 376 377 emit_instruction(ir); 378 return true; 379} 380 381bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr) 382{ 383 384 GPRVector::Swizzle swz = {7,7,7,7}; 385 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) 386 swz[i] = i; 387 388 auto orig_addr = from_nir(instr->src[2], 0); 389 390 GPRVector addr_vec = get_temp_vec4({0,1,2,7}); 391 392 auto temp2 = get_temp_vec4(); 393 394 auto rat_id = from_nir(instr->src[1], 0); 395 396 emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr, 397 PValue(new LiteralValue(2)), write)); 398 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write)); 399 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write)); 400 401 402 auto values = vec_from_nir_with_fetch_constant(instr->src[0], 403 (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true); 404 405 auto cf_op = cf_mem_rat; 406 //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; 407 auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, 408 values, addr_vec, m_ssbo_image_offset, rat_id, 1, 409 1, 0, false); 410 emit_instruction(store); 411 m_store_ops.push_back(store); 412 413 for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) { 414 emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN ? last_write : write)); 415 emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0), 416 {addr_vec.reg_i(0), Value::one_i}, last_write)); 417 store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, 418 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1, 419 1, 0, false); 420 emit_instruction(store); 421 if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT)) 422 m_store_ops.push_back(store); 423 } 424 425 return true; 426} 427 428bool 429EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin) 430{ 431 int imageid = 0; 432 PValue image_offset; 433 434 if (nir_src_is_const(intrin->src[0])) 435 imageid = nir_src_as_int(intrin->src[0]); 436 else 437 image_offset = from_nir(intrin->src[0], 0); 438 439 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3}); 440 auto undef = from_nir(intrin->src[2], 0); 441 auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3}); 442 auto unknown = from_nir(intrin->src[4], 0); 443 444 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && 445 nir_intrinsic_image_array(intrin)) { 446 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); 447 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); 448 } 449 450 auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; 451 auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid, 452 image_offset, 1, 0xf, 0, false); 453 454 //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT)) 455 m_store_ops.push_back(store); 456 457 emit_instruction(store); 458 return true; 459} 460 461bool 462EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin) 463{ 464 int imageid = 0; 465 PValue image_offset; 466 467 if (nir_src_is_const(intrin->src[0])) 468 imageid = nir_src_as_int(intrin->src[0]); 469 else 470 image_offset = from_nir(intrin->src[0], 0); 471 472 bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses); 473 auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) : 474 get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT); 475 476 auto coord_orig = from_nir(intrin->src[1], 0, 0); 477 auto coord = get_temp_register(0); 478 479 emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write)); 480 481 if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { 482 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), 483 from_nir(intrin->src[3], 0), {alu_write})); 484 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3), 485 from_nir(intrin->src[2], 0), {alu_last_instr, alu_write})); 486 } else { 487 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), 488 from_nir(intrin->src[2], 0), {alu_write})); 489 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write)); 490 } 491 492 493 GPRVector out_vec({coord, coord, coord, coord}); 494 495 auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset, 496 image_offset, 1, 0xf, 0, true); 497 emit_instruction(atomic); 498 499 if (read_result) { 500 emit_instruction(new WaitAck(0)); 501 502 GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components); 503 auto fetch = new FetchInstruction(vc_fetch, 504 no_index_offset, 505 fmt_32, 506 vtx_nf_int, 507 vtx_es_none, 508 m_rat_return_address.reg_i(1), 509 dest, 510 0, 511 false, 512 0xf, 513 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, 514 0, 515 bim_none, 516 false, 517 false, 518 0, 519 0, 520 0, 521 image_offset, 522 {0,7,7,7}); 523 fetch->set_flag(vtx_srf_mode); 524 fetch->set_flag(vtx_use_tc); 525 fetch->set_flag(vtx_vpm); 526 emit_instruction(fetch); 527 } 528 529 return true; 530 531} 532 533bool 534EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin) 535{ 536 int imageid = 0; 537 PValue image_offset; 538 539 if (nir_src_is_const(intrin->src[0])) 540 imageid = nir_src_as_int(intrin->src[0]); 541 else 542 image_offset = from_nir(intrin->src[0], 0); 543 544 bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses); 545 auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)): 546 get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin)); 547 548 GPRVector::Swizzle swz = {0,1,2,3}; 549 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz); 550 551 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && 552 nir_intrinsic_image_array(intrin)) { 553 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); 554 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); 555 } 556 557 if (intrin->intrinsic != nir_intrinsic_image_load) { 558 if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) { 559 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), 560 from_nir(intrin->src[4], 0), {alu_write})); 561 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3), 562 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); 563 } else { 564 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), 565 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); 566 } 567 } 568 auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; 569 570 auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid, 571 image_offset, 1, 0xf, 0, true); 572 emit_instruction(store); 573 return read_retvalue ? fetch_return_value(intrin) : true; 574} 575 576bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin) 577{ 578 emit_instruction(new WaitAck(0)); 579 580 pipe_format format = nir_intrinsic_format(intrin); 581 unsigned fmt = fmt_32; 582 unsigned num_format = 0; 583 unsigned format_comp = 0; 584 unsigned endian = 0; 585 586 int imageid = 0; 587 PValue image_offset; 588 589 if (nir_src_is_const(intrin->src[0])) 590 imageid = nir_src_as_int(intrin->src[0]); 591 else 592 image_offset = from_nir(intrin->src[0], 0); 593 594 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian); 595 596 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); 597 598 auto fetch = new FetchInstruction(vc_fetch, 599 no_index_offset, 600 (EVTXDataFormat)fmt, 601 (EVFetchNumFormat)num_format, 602 (EVFetchEndianSwap)endian, 603 m_rat_return_address.reg_i(1), 604 dest, 605 0, 606 false, 607 0x3, 608 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, 609 0, 610 bim_none, 611 false, 612 false, 613 0, 614 0, 615 0, 616 image_offset, {0,1,2,3}); 617 fetch->set_flag(vtx_srf_mode); 618 fetch->set_flag(vtx_use_tc); 619 fetch->set_flag(vtx_vpm); 620 if (format_comp) 621 fetch->set_flag(vtx_format_comp_signed); 622 623 emit_instruction(fetch); 624 return true; 625} 626 627bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin) 628{ 629 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); 630 GPRVector src{0,{4,4,4,4}}; 631 632 assert(nir_src_as_uint(intrin->src[1]) == 0); 633 634 auto const_offset = nir_src_as_const_value(intrin->src[0]); 635 auto dyn_offset = PValue(); 636 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; 637 if (const_offset) 638 res_id += const_offset[0].u32; 639 else 640 dyn_offset = from_nir(intrin->src[0], 0); 641 642 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) { 643 emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)), 644 res_id, 645 bim_none)); 646 return true; 647 } else { 648 emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src, 649 0/* ?? */, 650 res_id, dyn_offset)); 651 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && 652 nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) { 653 /* Need to load the layers from a const buffer */ 654 655 set_has_txs_cube_array_comp(); 656 657 if (const_offset) { 658 unsigned lookup_resid = const_offset[0].u32; 659 emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2), 660 PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, 661 R600_BUFFER_INFO_CONST_BUFFER)), 662 EmitInstruction::last_write)); 663 } else { 664 /* If the adressing is indirect we have to get the z-value by using a binary search */ 665 GPRVector trgt; 666 GPRVector help; 667 668 auto addr = help.reg_i(0); 669 auto comp = help.reg_i(1); 670 auto low_bit = help.reg_i(2); 671 auto high_bit = help.reg_i(3); 672 673 emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0), 674 literal(2), EmitInstruction::write)); 675 emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0), 676 literal(3), EmitInstruction::last_write)); 677 678 emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL, 679 R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none)); 680 681 emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2), 682 EmitInstruction::write)); 683 emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3), 684 EmitInstruction::last_write)); 685 686 emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write)); 687 } 688 } 689 } 690 return true; 691} 692 693bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr) 694{ 695 std::array<PValue,4> dst_elms; 696 697 698 for (uint16_t i = 0; i < 4; ++i) { 699 dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7); 700 } 701 702 GPRVector dst(dst_elms); 703 GPRVector src(0,{4,4,4,4}); 704 705 auto const_offset = nir_src_as_const_value(intr->src[0]); 706 auto dyn_offset = PValue(); 707 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; 708 if (const_offset) 709 res_id += const_offset[0].u32; 710 else 711 assert(0 && "dynamic buffer offset not supported in buffer_size"); 712 713 emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), 714 res_id, bim_none)); 715 716 return true; 717} 718 719bool EmitSSBOInstruction::make_stores_ack_and_waitack() 720{ 721 for (auto&& store: m_store_ops) 722 store->set_ack(); 723 724 if (!m_store_ops.empty()) 725 emit_instruction(new WaitAck(0)); 726 727 m_store_ops.clear(); 728 729 return true; 730} 731 732GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir) 733{ 734 GPRVector::Values v; 735 int i; 736 for (i = 0; i < 4; ++i) 737 v[i] = from_nir(ir->dest, i); 738 return GPRVector(v); 739} 740 741} 742