1/* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2018 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#include "../r600_pipe.h" 28#include "../r600_shader.h" 29#include "sfn_shader_vertex.h" 30 31#include "sfn_shader_compute.h" 32#include "sfn_shader_fragment.h" 33#include "sfn_shader_geometry.h" 34#include "sfn_liverange.h" 35#include "sfn_ir_to_assembly.h" 36#include "sfn_nir.h" 37#include "sfn_instruction_misc.h" 38#include "sfn_instruction_fetch.h" 39#include "sfn_instruction_lds.h" 40 41#include <iostream> 42 43#define ENABLE_DEBUG 1 44 45#ifdef ENABLE_DEBUG 46#define DEBUG_SFN(X) \ 47 do {\ 48 X; \ 49 } while (0) 50#else 51#define DEBUG_SFN(X) 52#endif 53 54namespace r600 { 55 56using namespace std; 57 58 59ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, 60 r600_pipe_shader_selector& sel, 61 r600_shader &sh_info, int scratch_size, 62 enum chip_class chip_class, 63 int atomic_base): 64 m_processor_type(ptype), 65 m_nesting_depth(0), 66 m_block_number(0), 67 m_export_output(0, -1), 68 m_sh_info(sh_info), 69 m_chip_class(chip_class), 70 m_tex_instr(*this), 71 m_alu_instr(*this), 72 m_ssbo_instr(*this), 73 m_pending_else(nullptr), 74 m_scratch_size(scratch_size), 75 m_next_hwatomic_loc(0), 76 m_sel(sel), 77 m_atomic_base(atomic_base), 78 m_image_count(0), 79 last_emitted_alu(nullptr) 80{ 81 m_sh_info.processor_type = ptype; 82 83} 84 85 86ShaderFromNirProcessor::~ShaderFromNirProcessor() 87{ 88} 89 90bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) 91{ 92 switch (instr->type) { 93 case nir_instr_type_tex: { 94 nir_tex_instr *t = nir_instr_as_tex(instr); 95 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF) 96 sh_info().uses_tex_buffers = true; 97 if (t->op == nir_texop_txs && 98 t->sampler_dim == GLSL_SAMPLER_DIM_CUBE && 99 t->is_array) 100 sh_info().has_txq_cube_array_z_comp = true; 101 break; 102 } 103 case nir_instr_type_intrinsic: { 104 auto *i = nir_instr_as_intrinsic(instr); 105 switch (i->intrinsic) { 106 case nir_intrinsic_ssbo_atomic_add: 107 case nir_intrinsic_image_atomic_add: 108 case nir_intrinsic_ssbo_atomic_and: 109 case nir_intrinsic_image_atomic_and: 110 case nir_intrinsic_ssbo_atomic_or: 111 case nir_intrinsic_image_atomic_or: 112 case nir_intrinsic_ssbo_atomic_imin: 113 case nir_intrinsic_image_atomic_imin: 114 case nir_intrinsic_ssbo_atomic_imax: 115 case nir_intrinsic_image_atomic_imax: 116 case nir_intrinsic_ssbo_atomic_umin: 117 case nir_intrinsic_image_atomic_umin: 118 case nir_intrinsic_ssbo_atomic_umax: 119 case nir_intrinsic_image_atomic_umax: 120 case nir_intrinsic_ssbo_atomic_xor: 121 case nir_intrinsic_image_atomic_xor: 122 case nir_intrinsic_ssbo_atomic_exchange: 123 case nir_intrinsic_image_atomic_exchange: 124 case nir_intrinsic_image_atomic_comp_swap: 125 case nir_intrinsic_ssbo_atomic_comp_swap: 126 m_sel.info.writes_memory = 1; 127 FALLTHROUGH; 128 case nir_intrinsic_image_load: 129 m_ssbo_instr.set_require_rat_return_address(); 130 break; 131 case nir_intrinsic_image_size: { 132 if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE && 133 nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2) 134 sh_info().has_txq_cube_array_z_comp = true; 135 } 136 137 138 139 default: 140 ; 141 } 142 143 144 } 145 default: 146 ; 147 } 148 149 return scan_sysvalue_access(instr); 150} 151 152enum chip_class ShaderFromNirProcessor::get_chip_class(void) const 153{ 154 return m_chip_class; 155} 156 157bool ShaderFromNirProcessor::allocate_reserved_registers() 158{ 159 bool retval = do_allocate_reserved_registers(); 160 m_ssbo_instr.load_rat_return_address(); 161 if (sh_info().uses_atomics) 162 m_ssbo_instr.load_atomic_inc_limits(); 163 m_ssbo_instr.set_ssbo_offset(m_image_count); 164 return retval; 165} 166 167static void remap_shader_info(r600_shader& sh_info, 168 std::vector<rename_reg_pair>& map, 169 UNUSED ValueMap& values) 170{ 171 for (unsigned i = 0; i < sh_info.num_arrays; ++i) { 172 auto new_index = map[sh_info.arrays[i].gpr_start]; 173 if (new_index.valid) 174 sh_info.arrays[i].gpr_start = new_index.new_reg; 175 map[sh_info.arrays[i].gpr_start].used = true; 176 } 177 178 for (unsigned i = 0; i < sh_info.ninput; ++i) { 179 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr 180 << " of map.size()\n"; 181 182 assert(sh_info.input[i].gpr < map.size()); 183 auto new_index = map[sh_info.input[i].gpr]; 184 if (new_index.valid) 185 sh_info.input[i].gpr = new_index.new_reg; 186 map[sh_info.input[i].gpr].used = true; 187 } 188 189 for (unsigned i = 0; i < sh_info.noutput; ++i) { 190 assert(sh_info.output[i].gpr < map.size()); 191 auto new_index = map[sh_info.output[i].gpr]; 192 if (new_index.valid) 193 sh_info.output[i].gpr = new_index.new_reg; 194 map[sh_info.output[i].gpr].used = true; 195 } 196} 197 198void ShaderFromNirProcessor::remap_registers() 199{ 200 // register renumbering 201 auto rc = register_count(); 202 if (!rc) 203 return; 204 205 std::vector<register_live_range> register_live_ranges(rc); 206 207 auto temp_register_map = get_temp_registers(); 208 209 Shader sh{m_output, temp_register_map}; 210 LiverangeEvaluator().run(sh, register_live_ranges); 211 auto register_map = get_temp_registers_remapping(register_live_ranges); 212 213 sfn_log << SfnLog::merge << "=========Mapping===========\n"; 214 for (size_t i = 0; i < register_map.size(); ++i) 215 if (register_map[i].valid) 216 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n"; 217 218 ValueRemapper vmap0(register_map, temp_register_map); 219 for (auto& block: m_output) 220 block.remap_registers(vmap0); 221 222 remap_shader_info(m_sh_info, register_map, temp_register_map); 223 224 /* Mark inputs as used registers, these registers should no be remapped */ 225 for (auto& v: sh.m_temp) { 226 if (v.second->type() == Value::gpr) { 227 const auto& g = static_cast<const GPRValue&>(*v.second); 228 if (g.is_input()) 229 register_map[g.sel()].used = true; 230 } 231 } 232 233 int new_index = 0; 234 for (auto& i : register_map) { 235 i.valid = i.used; 236 if (i.used) 237 i.new_reg = new_index++; 238 } 239 240 ValueRemapper vmap1(register_map, temp_register_map); 241 for (auto& ir: m_output) 242 ir.remap_registers(vmap1); 243 244 remap_shader_info(m_sh_info, register_map, temp_register_map); 245} 246 247bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform) 248{ 249 // m_uniform_type_map 250 m_uniform_type_map[uniform->data.location] = uniform->type; 251 252 if (uniform->type->contains_atomic()) { 253 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE; 254 sh_info().nhwatomic += natomics; 255 256 if (uniform->type->is_array()) 257 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; 258 259 sh_info().uses_atomics = 1; 260 261 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges]; 262 ++sh_info().nhwatomic_ranges; 263 atom.buffer_id = uniform->data.binding; 264 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc; 265 266 atom.start = uniform->data.offset >> 2; 267 atom.end = atom.start + natomics - 1; 268 269 if (m_atomic_base_map.find(uniform->data.binding) == 270 m_atomic_base_map.end()) 271 m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc; 272 273 m_next_hwatomic_loc += natomics; 274 275 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1; 276 277 sfn_log << SfnLog::io << "HW_ATOMIC file count: " 278 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n"; 279 } 280 281 auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type; 282 if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) { 283 sh_info().uses_images = 1; 284 if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo)) 285 sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE; 286 } 287 288 return true; 289} 290 291void ShaderFromNirProcessor::set_shader_info(const nir_shader *sh) 292{ 293 m_image_count = sh->info.num_images; 294 do_set_shader_info(sh); 295} 296 297void ShaderFromNirProcessor::do_set_shader_info(const nir_shader *sh) 298{ 299 (void)sh; 300} 301 302bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh) 303{ 304 return true; 305} 306 307void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr) 308{ 309 auto& dest = instr->dest; 310 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index; 311 assert(util_bitcount(instr->modes) == 1); 312 m_var_mode[instr->var] = instr->modes; 313 m_var_derefs[index] = instr->var; 314 315 sfn_log << SfnLog::io << "Add var deref:" << index 316 << " with DDL:" << instr->var->data.driver_location << "\n"; 317} 318 319void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io) 320{ 321 switch (io.name) { 322 case TGSI_SEMANTIC_POSITION: 323 case TGSI_SEMANTIC_PSIZE: 324 case TGSI_SEMANTIC_EDGEFLAG: 325 case TGSI_SEMANTIC_FACE: 326 case TGSI_SEMANTIC_SAMPLEMASK: 327 case TGSI_SEMANTIC_CLIPVERTEX: 328 io.spi_sid = 0; 329 break; 330 case TGSI_SEMANTIC_GENERIC: 331 case TGSI_SEMANTIC_TEXCOORD: 332 case TGSI_SEMANTIC_PCOORD: 333 io.spi_sid = io.sid + 1; 334 break; 335 default: 336 /* For non-generic params - pack name and sid into 8 bits */ 337 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1; 338 } 339} 340 341const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const 342{ 343 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index; 344 345 sfn_log << SfnLog::io << "Search for deref:" << index << "\n"; 346 347 auto v = m_var_derefs.find(index); 348 if (v != m_var_derefs.end()) 349 return v->second; 350 351 fprintf(stderr, "R600: could not find deref with index %d\n", index); 352 353 return nullptr; 354 355 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr); 356 return nir_deref_instr_get_variable(deref); */ 357} 358 359bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr) 360{ 361 return m_tex_instr.emit(instr); 362} 363 364void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir) 365{ 366 if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) { 367 for (unsigned i = 0; i < ir->n_sources(); ++i) { 368 auto& s = ir->src(i); 369 if (s.type() == Value::kconst) { 370 auto& c = static_cast<UniformValue&>(s); 371 if (c.addr()) { 372 last_emitted_alu->set_flag(alu_last_instr); 373 break; 374 } 375 } 376 } 377 } 378 last_emitted_alu = ir; 379 emit_instruction_internal(ir); 380} 381 382 383void ShaderFromNirProcessor::emit_instruction(Instruction *ir) 384{ 385 386 emit_instruction_internal(ir); 387 last_emitted_alu = nullptr; 388} 389 390void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir) 391{ 392 if (m_pending_else) { 393 append_block(-1); 394 m_output.back().emit(PInstruction(m_pending_else)); 395 append_block(1); 396 m_pending_else = nullptr; 397 } 398 399 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; 400 if (m_output.empty()) 401 append_block(0); 402 403 m_output.back().emit(Instruction::Pointer(ir)); 404} 405 406void ShaderFromNirProcessor::emit_shader_start() 407{ 408 /* placeholder, may become an abstract method */ 409 m_ssbo_instr.set_ssbo_offset(m_image_count); 410} 411 412bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr) 413{ 414 switch (instr->type) { 415 case nir_jump_break: { 416 auto b = new LoopBreakInstruction(); 417 emit_instruction(b); 418 return true; 419 } 420 case nir_jump_continue: { 421 auto b = new LoopContInstruction(); 422 emit_instruction(b); 423 return true; 424 } 425 default: { 426 nir_instr *i = reinterpret_cast<nir_instr*>(instr); 427 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n"; 428 return false; 429 } 430 } 431 return true; 432} 433 434bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr) 435{ 436 return m_alu_instr.emit(instr); 437} 438 439bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr) 440{ 441 return false; 442} 443 444bool ShaderFromNirProcessor::emit_loop_start(int loop_id) 445{ 446 LoopBeginInstruction *loop = new LoopBeginInstruction(); 447 emit_instruction(loop); 448 m_loop_begin_block_map[loop_id] = loop; 449 append_block(1); 450 return true; 451} 452bool ShaderFromNirProcessor::emit_loop_end(int loop_id) 453{ 454 auto start = m_loop_begin_block_map.find(loop_id); 455 if (start == m_loop_begin_block_map.end()) { 456 sfn_log << SfnLog::err << "End loop: Loop start for " 457 << loop_id << " not found\n"; 458 return false; 459 } 460 m_nesting_depth--; 461 m_block_number++; 462 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number)); 463 LoopEndInstruction *loop = new LoopEndInstruction(start->second); 464 emit_instruction(loop); 465 466 m_loop_begin_block_map.erase(start); 467 return true; 468} 469 470bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt) 471{ 472 473 auto value = from_nir(if_stmt->condition, 0, 0); 474 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)), 475 value, Value::zero, EmitInstruction::last); 476 pred->set_flag(alu_update_exec); 477 pred->set_flag(alu_update_pred); 478 pred->set_cf_type(cf_alu_push_before); 479 480 append_block(1); 481 482 IfInstruction *ir = new IfInstruction(pred); 483 emit_instruction(ir); 484 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end()); 485 m_if_block_start_map[if_id] = ir; 486 return true; 487} 488 489bool ShaderFromNirProcessor::emit_else_start(int if_id) 490{ 491 auto iif = m_if_block_start_map.find(if_id); 492 if (iif == m_if_block_start_map.end()) { 493 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n"; 494 return false; 495 } 496 497 if (iif->second->type() != Instruction::cond_if) { 498 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n"; 499 return false; 500 } 501 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second); 502 ElseInstruction *ir = new ElseInstruction(if_instr); 503 m_if_block_start_map[if_id] = ir; 504 m_pending_else = ir; 505 506 return true; 507} 508 509bool ShaderFromNirProcessor::emit_ifelse_end(int if_id) 510{ 511 auto ifelse = m_if_block_start_map.find(if_id); 512 if (ifelse == m_if_block_start_map.end()) { 513 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n"; 514 return false; 515 } 516 517 if (ifelse->second->type() != Instruction::cond_if && 518 ifelse->second->type() != Instruction::cond_else) { 519 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n"; 520 return false; 521 } 522 /* Clear pending else, if the else branch was empty, non will be emitted */ 523 524 m_pending_else = nullptr; 525 526 append_block(-1); 527 IfElseEndInstruction *ir = new IfElseEndInstruction(); 528 emit_instruction(ir); 529 530 return true; 531} 532 533bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset) 534{ 535 PValue src = get_temp_register(); 536 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr})); 537 538 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); 539 emit_instruction(new FetchTCSIOParam(dest, src, offset)); 540 541 return true; 542 543} 544 545bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr) 546{ 547 auto address = varvec_from_nir(instr->src[0], instr->num_components); 548 auto dest_value = varvec_from_nir(instr->dest, instr->num_components); 549 550 emit_instruction(new LDSReadInstruction(address, dest_value)); 551 return true; 552} 553 554static unsigned 555lds_op_from_intrinsic(nir_intrinsic_op op) { 556 switch (op) { 557 case nir_intrinsic_shared_atomic_add: 558 return LDS_OP2_LDS_ADD_RET; 559 case nir_intrinsic_shared_atomic_and: 560 return LDS_OP2_LDS_AND_RET; 561 case nir_intrinsic_shared_atomic_or: 562 return LDS_OP2_LDS_OR_RET; 563 case nir_intrinsic_shared_atomic_imax: 564 return LDS_OP2_LDS_MAX_INT_RET; 565 case nir_intrinsic_shared_atomic_umax: 566 return LDS_OP2_LDS_MAX_UINT_RET; 567 case nir_intrinsic_shared_atomic_imin: 568 return LDS_OP2_LDS_MIN_INT_RET; 569 case nir_intrinsic_shared_atomic_umin: 570 return LDS_OP2_LDS_MIN_UINT_RET; 571 case nir_intrinsic_shared_atomic_xor: 572 return LDS_OP2_LDS_XOR_RET; 573 case nir_intrinsic_shared_atomic_exchange: 574 return LDS_OP2_LDS_XCHG_RET; 575 case nir_intrinsic_shared_atomic_comp_swap: 576 return LDS_OP3_LDS_CMP_XCHG_RET; 577 default: 578 unreachable("Unsupported shared atomic opcode"); 579 } 580} 581 582bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr) 583{ 584 auto address = from_nir(instr->src[0], 0); 585 auto dest_value = from_nir(instr->dest, 0); 586 auto value = from_nir(instr->src[1], 0); 587 auto op = lds_op_from_intrinsic(instr->intrinsic); 588 589 if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) { 590 auto value2 = from_nir(instr->src[2], 0); 591 emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op)); 592 } else { 593 emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op)); 594 } 595 return true; 596} 597 598 599bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr) 600{ 601 unsigned write_mask = nir_intrinsic_write_mask(instr); 602 603 auto address = from_nir(instr->src[1], 0); 604 int swizzle_base = (write_mask & 0x3) ? 0 : 2; 605 write_mask |= write_mask >> 2; 606 607 auto value = from_nir(instr->src[0], swizzle_base); 608 if (!(write_mask & 2)) { 609 emit_instruction(new LDSWriteInstruction(address, 0, value)); 610 } else { 611 auto value1 = from_nir(instr->src[0], swizzle_base + 1); 612 emit_instruction(new LDSWriteInstruction(address, 0, value, value1)); 613 } 614 615 return true; 616} 617 618bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr) 619{ 620 r600::sfn_log << SfnLog::instr << "emit '" 621 << *reinterpret_cast<nir_instr*>(instr) 622 << "' (" << __func__ << ")\n"; 623 624 if (emit_intrinsic_instruction_override(instr)) 625 return true; 626 627 if (m_ssbo_instr.emit(&instr->instr)) { 628 m_sel.info.writes_memory = true; 629 return true; 630 } 631 632 switch (instr->intrinsic) { 633 case nir_intrinsic_load_deref: { 634 auto var = get_deref_location(instr->src[0]); 635 if (!var) 636 return false; 637 auto mode_helper = m_var_mode.find(var); 638 if (mode_helper == m_var_mode.end()) { 639 cerr << "r600-nir: variable '" << var->name << "' not found\n"; 640 return false; 641 } 642 switch (mode_helper->second) { 643 case nir_var_function_temp: 644 return emit_load_function_temp(var, instr); 645 default: 646 cerr << "r600-nir: Unsupported mode" << mode_helper->second 647 << "for src variable\n"; 648 return false; 649 } 650 } 651 case nir_intrinsic_store_scratch: 652 return emit_store_scratch(instr); 653 case nir_intrinsic_load_scratch: 654 return emit_load_scratch(instr); 655 case nir_intrinsic_load_uniform: 656 return load_uniform(instr); 657 case nir_intrinsic_discard: 658 case nir_intrinsic_discard_if: 659 return emit_discard_if(instr); 660 case nir_intrinsic_load_ubo_vec4: 661 return emit_load_ubo_vec4(instr); 662 case nir_intrinsic_load_tcs_in_param_base_r600: 663 return emit_load_tcs_param_base(instr, 0); 664 case nir_intrinsic_load_tcs_out_param_base_r600: 665 return emit_load_tcs_param_base(instr, 16); 666 case nir_intrinsic_load_local_shared_r600: 667 case nir_intrinsic_load_shared: 668 return emit_load_local_shared(instr); 669 case nir_intrinsic_store_local_shared_r600: 670 case nir_intrinsic_store_shared: 671 return emit_store_local_shared(instr); 672 case nir_intrinsic_control_barrier: 673 case nir_intrinsic_memory_barrier_tcs_patch: 674 case nir_intrinsic_memory_barrier_shared: 675 case nir_intrinsic_memory_barrier_buffer: 676 case nir_intrinsic_memory_barrier: 677 case nir_intrinsic_memory_barrier_image: 678 case nir_intrinsic_group_memory_barrier: 679 return emit_barrier(instr); 680 case nir_intrinsic_memory_barrier_atomic_counter: 681 return true; 682 case nir_intrinsic_shared_atomic_add: 683 case nir_intrinsic_shared_atomic_and: 684 case nir_intrinsic_shared_atomic_or: 685 case nir_intrinsic_shared_atomic_imax: 686 case nir_intrinsic_shared_atomic_umax: 687 case nir_intrinsic_shared_atomic_imin: 688 case nir_intrinsic_shared_atomic_umin: 689 case nir_intrinsic_shared_atomic_xor: 690 case nir_intrinsic_shared_atomic_exchange: 691 case nir_intrinsic_shared_atomic_comp_swap: 692 return emit_atomic_local_shared(instr); 693 case nir_intrinsic_shader_clock: 694 return emit_shader_clock(instr); 695 case nir_intrinsic_copy_deref: 696 case nir_intrinsic_load_constant: 697 case nir_intrinsic_load_input: 698 case nir_intrinsic_store_output: 699 700 default: 701 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic); 702 return false; 703 } 704 return false; 705} 706 707bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr) 708{ 709 return false; 710} 711 712bool 713ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr) 714{ 715 return false; 716} 717 718bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr) 719{ 720 AluInstruction *ir = new AluInstruction(op0_group_barrier); 721 ir->set_flag(alu_last_instr); 722 emit_instruction(ir); 723 return true; 724} 725 726 727bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last) 728{ 729 if (!dest.is_ssa) { 730 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write}); 731 if (as_last) 732 ir->set_flag(alu_last_instr); 733 emit_instruction(ir); 734 } else { 735 inject_register(dest.ssa.index, chan, value, true); 736 } 737 return true; 738} 739 740bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr) 741{ 742 PValue address = from_nir(instr->src[1], 0, 0); 743 744 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1, 745 swizzle_from_comps(instr->num_components)); 746 747 int writemask = nir_intrinsic_write_mask(instr); 748 int align = nir_intrinsic_align_mul(instr); 749 int align_offset = nir_intrinsic_align_offset(instr); 750 751 WriteScratchInstruction *ir = nullptr; 752 if (address->type() == Value::literal) { 753 const auto& lv = static_cast<const LiteralValue&>(*address); 754 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask); 755 } else { 756 address = from_nir_with_fetch_constant(instr->src[1], 0); 757 ir = new WriteScratchInstruction(address, value, align, align_offset, 758 writemask, m_scratch_size); 759 } 760 emit_instruction(ir); 761 sh_info().needs_scratch_space = 1; 762 return true; 763} 764 765bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr) 766{ 767 PValue address = from_nir_with_fetch_constant(instr->src[0], 0); 768 std::array<PValue, 4> dst_val; 769 for (int i = 0; i < 4; ++i) 770 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7); 771 772 GPRVector dst(dst_val); 773 auto ir = new LoadFromScratch(dst, address, m_scratch_size); 774 ir->prelude_append(new WaitAck(0)); 775 emit_instruction(ir); 776 sh_info().needs_scratch_space = 1; 777 return true; 778} 779 780bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr) 781{ 782 emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0), 783 PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write)); 784 emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1), 785 PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write)); 786 return true; 787} 788 789GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src, 790 unsigned mask, 791 const GPRVector::Swizzle& swizzle, 792 bool match) 793{ 794 bool use_same = true; 795 GPRVector::Values v; 796 797 std::array<bool,4> used_swizzles = {false, false, false, false}; 798 799 /* Check whether all sources come from a GPR, and, 800 * if requested, whether they are swizzled as expected */ 801 802 for (int i = 0; i < 4 && use_same; ++i) { 803 if ((1 << i) & mask) { 804 if (swizzle[i] < 4) { 805 v[i] = from_nir(src, swizzle[i]); 806 assert(v[i]); 807 use_same &= (v[i]->type() == Value::gpr); 808 if (match) { 809 use_same &= (v[i]->chan() == swizzle[i]); 810 } 811 used_swizzles[v[i]->chan()] = true; 812 } 813 } 814 } 815 816 817 /* Now check whether all inputs come from the same GPR, and fill 818 * empty slots in the vector with unused swizzles, bail out if 819 * the sources are not from the same GPR 820 */ 821 822 if (use_same) { 823 int next_free_swizzle = 0; 824 while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4) 825 next_free_swizzle++; 826 827 /* Find the first GPR index used */ 828 int i = 0; 829 while (!v[i] && i < 4) ++i; 830 assert(i < 4); 831 unsigned sel = v[i]->sel(); 832 833 834 for (i = 0; i < 4 && use_same; ++i) { 835 if (!v[i]) { 836 if (swizzle[i] >= 4) 837 v[i] = PValue(new GPRValue(sel, swizzle[i])); 838 else { 839 assert(next_free_swizzle < 4); 840 v[i] = PValue(new GPRValue(sel, next_free_swizzle)); 841 used_swizzles[next_free_swizzle] = true; 842 while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle]) 843 next_free_swizzle++; 844 } 845 } 846 else 847 use_same &= v[i]->sel() == sel; 848 } 849 } 850 851 /* We can't re-use the source data because they either need re-swizzling, or 852 * they didn't come all from a GPR or the same GPR, so copy to a new vector 853 */ 854 if (!use_same) { 855 AluInstruction *ir = nullptr; 856 GPRVector result = get_temp_vec4(swizzle); 857 for (int i = 0; i < 4; ++i) { 858 if (swizzle[i] < 4 && (mask & (1 << i))) { 859 ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]), 860 EmitInstruction::write); 861 emit_instruction(ir); 862 } 863 } 864 if (ir) 865 ir->set_flag(alu_last_instr); 866 return result; 867 } else 868 return GPRVector(v);; 869} 870 871bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) 872{ 873 auto bufid = nir_src_as_const_value(instr->src[0]); 874 auto buf_offset = nir_src_as_const_value(instr->src[1]); 875 876 if (!buf_offset) { 877 /* TODO: if buf_offset is constant then this can also be solved by using the CF indes 878 * on the ALU block, and this would probably make sense when there are more then one 879 * loads with the same buffer ID. */ 880 881 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0); 882 GPRVector trgt; 883 std::array<int, 4> swz = {7,7,7,7}; 884 for (unsigned i = 0; i < 4; ++i) { 885 if (i < nir_dest_num_components(instr->dest)) { 886 trgt.set_reg_i(i, from_nir(instr->dest, i)); 887 swz[i] = i + nir_intrinsic_component(instr); 888 } else { 889 trgt.set_reg_i(i, from_nir(instr->dest, 7)); 890 } 891 } 892 893 FetchInstruction *ir; 894 if (bufid) { 895 ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, 896 1 + bufid->u32, nullptr, bim_none); 897 } else { 898 PValue bufid = from_nir(instr->src[0], 0, 0); 899 ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, 900 1, bufid, bim_zero); 901 } 902 ir->set_dest_swizzle(swz); 903 emit_instruction(ir); 904 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; 905 return true; 906 } 907 908 909 if (bufid) { 910 int buf_cmp = nir_intrinsic_component(instr); 911 AluInstruction *ir = nullptr; 912 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 913 int cmp = buf_cmp + i; 914 assert(cmp < 4); 915 auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1)); 916 if (instr->dest.is_ssa) 917 load_preloaded_value(instr->dest, i, u); 918 else { 919 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); 920 emit_instruction(ir); 921 } 922 } 923 if (ir) 924 ir->set_flag(alu_last_instr); 925 return true; 926 927 } else { 928 int buf_cmp = nir_intrinsic_component(instr); 929 AluInstruction *ir = nullptr; 930 auto kc_id = from_nir(instr->src[0], 0); 931 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 932 int cmp = buf_cmp + i; 933 auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id)); 934 if (instr->dest.is_ssa) 935 load_preloaded_value(instr->dest, i, u); 936 else { 937 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); 938 emit_instruction(ir); 939 } 940 } 941 if (ir) 942 ir->set_flag(alu_last_instr); 943 return true; 944 } 945} 946 947bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr) 948{ 949 r600::sfn_log << SfnLog::instr << "emit '" 950 << *reinterpret_cast<nir_instr*>(instr) 951 << "' (" << __func__ << ")\n"; 952 953 if (instr->intrinsic == nir_intrinsic_discard_if) { 954 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)), 955 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr})); 956 957 } else { 958 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)), 959 {Value::zero, Value::zero}, {alu_last_instr})); 960 } 961 m_sh_info.uses_kill = 1; 962 return true; 963} 964 965bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr) 966{ 967 r600::sfn_log << SfnLog::instr << __func__ << ": emit '" 968 << *reinterpret_cast<nir_instr*>(instr) 969 << "'\n"; 970 971 972 /* If the target register is a SSA register and the loading is not 973 * indirect then we can do lazy loading, i.e. the uniform value can 974 * be used directly. Otherwise we have to load the data for real 975 * rigt away. 976 */ 977 auto literal = nir_src_as_const_value(instr->src[0]); 978 int base = nir_intrinsic_base(instr); 979 980 if (literal) { 981 AluInstruction *ir = nullptr; 982 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 983 PValue u = PValue(new UniformValue(512 + literal->u32 + base, i)); 984 sfn_log << SfnLog::io << "uniform " 985 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n"; 986 987 if (instr->dest.is_ssa) 988 load_preloaded_value(instr->dest, i, u); 989 else { 990 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), 991 u, {alu_write}); 992 emit_instruction(ir); 993 } 994 } 995 if (ir) 996 ir->set_flag(alu_last_instr); 997 } else { 998 PValue addr = from_nir(instr->src[0], 0, 0); 999 return load_uniform_indirect(instr, addr, 16 * base, 0); 1000 } 1001 return true; 1002} 1003 1004bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid) 1005{ 1006 if (!addr) { 1007 std::cerr << "r600-nir: don't know how uniform is addressed\n"; 1008 return false; 1009 } 1010 1011 GPRVector trgt; 1012 std::array<int, 4> swz = {7,7,7,7}; 1013 for (int i = 0; i < 4; ++i) { 1014 trgt.set_reg_i(i, from_nir(instr->dest, i)); 1015 swz[i] = i; 1016 } 1017 1018 if (addr->type() != Value::gpr) { 1019 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr}); 1020 addr = trgt.reg_i(0); 1021 } 1022 1023 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest, 1024 bufferid, PValue(), bim_none); 1025 ir->set_dest_swizzle(swz); 1026 emit_instruction(ir); 1027 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; 1028 return true; 1029} 1030 1031AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask) 1032{ 1033 AluInstruction *ir = nullptr; 1034 for (int i = 0; i < literal->def.num_components ; ++i) { 1035 if (writemask & (1 << i)){ 1036 PValue lsrc; 1037 switch (literal->def.bit_size) { 1038 1039 case 1: 1040 sfn_log << SfnLog::reg << "Got literal of bit size 1\n"; 1041 lsrc = literal->value[i].b ? 1042 PValue(new LiteralValue( 0xffffffff, i)) : 1043 Value::zero; 1044 break; 1045 case 32: 1046 sfn_log << SfnLog::reg << "Got literal of bit size 32\n"; 1047 if (literal->value[i].u32 == 0) 1048 lsrc = Value::zero; 1049 else if (literal->value[i].u32 == 1) 1050 lsrc = Value::one_i; 1051 else if (literal->value[i].f32 == 1.0f) 1052 lsrc = Value::one_f; 1053 else if (literal->value[i].f32 == 0.5f) 1054 lsrc = Value::zero_dot_5; 1055 else 1056 lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); 1057 break; 1058 default: 1059 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size 1060 << " falling back to 32 bit\n"; 1061 lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); 1062 } 1063 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write); 1064 1065 emit_instruction(ir); 1066 } 1067 } 1068 return ir; 1069} 1070 1071PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel) 1072{ 1073 PValue value = from_nir(src, component); 1074 if (value->type() != Value::gpr && 1075 value->type() != Value::gpr_vector && 1076 value->type() != Value::gpr_array_value) { 1077 PValue retval = get_temp_register(channel); 1078 emit_instruction(new AluInstruction(op1_mov, retval, value, 1079 EmitInstruction::last_write)); 1080 value = retval; 1081 } 1082 return value; 1083} 1084 1085bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr) 1086{ 1087 r600::sfn_log << SfnLog::instr << __func__ << ": emit '" 1088 << *reinterpret_cast<nir_instr*>(instr) 1089 << "'\n"; 1090 1091 /* Give the specific shader type a chance to process this, i.e. Geometry and 1092 * tesselation shaders need specialized deref_array, for the other shaders 1093 * it is lowered. 1094 */ 1095 if (emit_deref_instruction_override(instr)) 1096 return true; 1097 1098 switch (instr->deref_type) { 1099 case nir_deref_type_var: 1100 set_var_address(instr); 1101 return true; 1102 case nir_deref_type_array: 1103 case nir_deref_type_array_wildcard: 1104 case nir_deref_type_struct: 1105 case nir_deref_type_cast: 1106 default: 1107 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type); 1108 } 1109 return false; 1110} 1111 1112bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest, 1113 std::vector<PValue> srcs, 1114 const std::set<AluModifiers>& m_flags) 1115{ 1116 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags); 1117 emit_instruction(ir); 1118 return true; 1119} 1120 1121void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr) 1122{ 1123 m_output_register_map[loc] = gpr; 1124} 1125 1126void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir) 1127{ 1128 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; 1129 m_export_output.emit(PInstruction(ir)); 1130} 1131 1132const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const 1133{ 1134 const GPRVector *retval = nullptr; 1135 auto val = m_output_register_map.find(location); 1136 if (val != m_output_register_map.end()) 1137 retval = val->second; 1138 return retval; 1139} 1140 1141void ShaderFromNirProcessor::set_input(unsigned pos, PValue var) 1142{ 1143 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n"; 1144 m_inputs[pos] = var; 1145} 1146 1147void ShaderFromNirProcessor::set_output(unsigned pos, int sel) 1148{ 1149 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n"; 1150 m_outputs[pos] = sel; 1151} 1152 1153void ShaderFromNirProcessor::append_block(int nesting_change) 1154{ 1155 m_nesting_depth += nesting_change; 1156 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++)); 1157} 1158 1159void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const 1160{ 1161 shader.num_arrays = m_reg_arrays.size(); 1162 if (shader.num_arrays) { 1163 shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array)); 1164 for (unsigned i = 0; i < shader.num_arrays; ++i) { 1165 shader.arrays[i].comp_mask = m_reg_arrays[i]->mask(); 1166 shader.arrays[i].gpr_start = m_reg_arrays[i]->sel(); 1167 shader.arrays[i].gpr_count = m_reg_arrays[i]->size(); 1168 } 1169 shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY); 1170 } 1171} 1172 1173void ShaderFromNirProcessor::finalize() 1174{ 1175 do_finalize(); 1176 1177 for (auto& i : m_inputs) 1178 m_sh_info.input[i.first].gpr = i.second->sel(); 1179 1180 for (auto& i : m_outputs) 1181 m_sh_info.output[i.first].gpr = i.second; 1182 1183 m_output.push_back(m_export_output); 1184} 1185 1186} 1187