1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#include "sb_bc.h" 28#include "sb_shader.h" 29#include "sb_pass.h" 30 31namespace r600_sb { 32 33bc_builder::bc_builder(shader &s) 34 : sh(s), ctx(s.get_ctx()), bb(ctx.hw_class_bit()), error(0) {} 35 36int bc_builder::build() { 37 38 container_node *root = sh.root; 39 int cf_cnt = 0; 40 41 // FIXME reserve total size to avoid reallocs 42 43 for (node_iterator it = root->begin(), end = root->end(); 44 it != end; ++it) { 45 46 cf_node *cf = static_cast<cf_node*>(*it); 47 assert(cf->is_cf_inst() || cf->is_alu_clause() || cf->is_fetch_clause()); 48 49 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 50 51 cf->bc.id = cf_cnt++; 52 53 if (flags & CF_ALU) { 54 if (cf->bc.is_alu_extended()) 55 cf_cnt++; 56 } 57 } 58 59 bb.set_size(cf_cnt << 1); 60 bb.seek(cf_cnt << 1); 61 62 unsigned cf_pos = 0; 63 64 for (node_iterator I = root->begin(), end = root->end(); 65 I != end; ++I) { 66 67 cf_node *cf = static_cast<cf_node*>(*I); 68 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 69 70 if (flags & CF_ALU) { 71 bb.seek(bb.ndw()); 72 cf->bc.addr = bb.ndw() >> 1; 73 build_alu_clause(cf); 74 cf->bc.count = (bb.ndw() >> 1) - cf->bc.addr - 1; 75 } else if (flags & CF_FETCH) { 76 bb.align(4); 77 bb.seek(bb.ndw()); 78 cf->bc.addr = bb.ndw() >> 1; 79 build_fetch_clause(cf); 80 cf->bc.count = (((bb.ndw() >> 1) - cf->bc.addr) >> 1) - 1; 81 } else if (cf->jump_target) { 82 cf->bc.addr = cf->jump_target->bc.id; 83 if (cf->jump_after_target) 84 cf->bc.addr += 1; 85 } 86 87 bb.seek(cf_pos); 88 build_cf(cf); 89 cf_pos = bb.get_pos(); 90 } 91 92 return 0; 93} 94 95int bc_builder::build_alu_clause(cf_node* n) { 96 for (node_iterator I = n->begin(), E = n->end(); 97 I != E; ++I) { 98 99 alu_group_node *g = static_cast<alu_group_node*>(*I); 100 assert(g->is_valid()); 101 102 build_alu_group(g); 103 } 104 return 0; 105} 106 107int bc_builder::build_alu_group(alu_group_node* n) { 108 109 for (node_iterator I = n->begin(), E = n->end(); 110 I != E; ++I) { 111 112 alu_node *a = static_cast<alu_node*>(*I); 113 assert(a->is_valid()); 114 build_alu(a); 115 } 116 117 for(int i = 0, ls = n->literals.size(); i < ls; ++i) { 118 bb << n->literals.at(i).u; 119 } 120 121 bb.align(2); 122 bb.seek(bb.ndw()); 123 124 return 0; 125} 126 127int bc_builder::build_fetch_clause(cf_node* n) { 128 for (node_iterator I = n->begin(), E = n->end(); 129 I != E; ++I) { 130 fetch_node *f = static_cast<fetch_node*>(*I); 131 132 if (f->bc.op_ptr->flags & FF_GDS) 133 build_fetch_gds(f); 134 else if (f->bc.op_ptr->flags & FF_MEM) 135 build_fetch_mem(f); 136 else if (f->bc.op_ptr->flags & FF_VTX) 137 build_fetch_vtx(f); 138 else 139 build_fetch_tex(f); 140 } 141 return 0; 142} 143 144 145int bc_builder::build_cf(cf_node* n) { 146 const bc_cf &bc = n->bc; 147 const cf_op_info *cfop = bc.op_ptr; 148 149 if (cfop->flags & CF_ALU) 150 return build_cf_alu(n); 151 if (cfop->flags & (CF_EXP | CF_MEM)) 152 return build_cf_exp(n); 153 154 if (ctx.is_egcm()) { 155 bb << CF_WORD0_EGCM() 156 .ADDR(bc.addr) 157 .JUMPTABLE_SEL(bc.jumptable_sel); 158 159 if (ctx.is_evergreen()) 160 161 bb << CF_WORD1_EG() 162 .BARRIER(bc.barrier) 163 .CF_CONST(bc.cf_const) 164 .CF_INST(ctx.cf_opcode(bc.op)) 165 .COND(bc.cond) 166 .COUNT(bc.count) 167 .END_OF_PROGRAM(bc.end_of_program) 168 .POP_COUNT(bc.pop_count) 169 .VALID_PIXEL_MODE(bc.valid_pixel_mode) 170 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 171 172 else //cayman 173 174 bb << CF_WORD1_CM() 175 .BARRIER(bc.barrier) 176 .CF_CONST(bc.cf_const) 177 .CF_INST(ctx.cf_opcode(bc.op)) 178 .COND(bc.cond) 179 .COUNT(bc.count) 180 .POP_COUNT(bc.pop_count) 181 .VALID_PIXEL_MODE(bc.valid_pixel_mode); 182 } else { 183 bb << CF_WORD0_R6R7() 184 .ADDR(bc.addr); 185 186 assert(bc.count < ctx.max_fetch); 187 188 bb << CF_WORD1_R6R7() 189 .BARRIER(bc.barrier) 190 .CALL_COUNT(bc.call_count) 191 .CF_CONST(bc.cf_const) 192 .CF_INST(ctx.cf_opcode(bc.op)) 193 .COND(bc.cond) 194 .COUNT(bc.count & 7) 195 .COUNT_3(bc.count >> 3) 196 .END_OF_PROGRAM(bc.end_of_program) 197 .POP_COUNT(bc.pop_count) 198 .VALID_PIXEL_MODE(bc.valid_pixel_mode) 199 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 200 } 201 202 return 0; 203} 204 205int bc_builder::build_cf_alu(cf_node* n) { 206 const bc_cf &bc = n->bc; 207 208 assert(bc.count < 128); 209 210 if (n->bc.is_alu_extended()) { 211 assert(ctx.is_egcm()); 212 213 bb << CF_ALU_WORD0_EXT_EGCM() 214 .KCACHE_BANK2(bc.kc[2].bank) 215 .KCACHE_BANK3(bc.kc[3].bank) 216 .KCACHE_BANK_INDEX_MODE0(bc.kc[0].index_mode) 217 .KCACHE_BANK_INDEX_MODE1(bc.kc[1].index_mode) 218 .KCACHE_BANK_INDEX_MODE2(bc.kc[2].index_mode) 219 .KCACHE_BANK_INDEX_MODE3(bc.kc[3].index_mode) 220 .KCACHE_MODE2(bc.kc[2].mode); 221 222 bb << CF_ALU_WORD1_EXT_EGCM() 223 .BARRIER(bc.barrier) 224 .CF_INST(ctx.cf_opcode(CF_OP_ALU_EXT)) 225 .KCACHE_ADDR2(bc.kc[2].addr) 226 .KCACHE_ADDR3(bc.kc[3].addr) 227 .KCACHE_MODE3(bc.kc[3].mode); 228 } 229 230 bb << CF_ALU_WORD0_ALL() 231 .ADDR(bc.addr) 232 .KCACHE_BANK0(bc.kc[0].bank) 233 .KCACHE_BANK1(bc.kc[1].bank) 234 .KCACHE_MODE0(bc.kc[0].mode); 235 236 assert(bc.count < 128); 237 238 if (ctx.is_r600()) 239 bb << CF_ALU_WORD1_R6() 240 .BARRIER(bc.barrier) 241 .CF_INST(ctx.cf_opcode(bc.op)) 242 .COUNT(bc.count) 243 .KCACHE_ADDR0(bc.kc[0].addr) 244 .KCACHE_ADDR1(bc.kc[1].addr) 245 .KCACHE_MODE1(bc.kc[1].mode) 246 .USES_WATERFALL(bc.uses_waterfall) 247 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 248 else 249 bb << CF_ALU_WORD1_R7EGCM() 250 .ALT_CONST(bc.alt_const) 251 .BARRIER(bc.barrier) 252 .CF_INST(ctx.cf_opcode(bc.op)) 253 .COUNT(bc.count) 254 .KCACHE_ADDR0(bc.kc[0].addr) 255 .KCACHE_ADDR1(bc.kc[1].addr) 256 .KCACHE_MODE1(bc.kc[1].mode) 257 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 258 259 return 0; 260} 261 262int bc_builder::build_cf_exp(cf_node* n) { 263 const bc_cf &bc = n->bc; 264 const cf_op_info *cfop = bc.op_ptr; 265 266 if (cfop->flags & CF_RAT) { 267 assert(ctx.is_egcm()); 268 269 bb << CF_ALLOC_EXPORT_WORD0_RAT_EGCM() 270 .ELEM_SIZE(bc.elem_size) 271 .INDEX_GPR(bc.index_gpr) 272 .RAT_ID(bc.rat_id) 273 .RAT_INDEX_MODE(bc.rat_index_mode) 274 .RAT_INST(bc.rat_inst) 275 .RW_GPR(bc.rw_gpr) 276 .RW_REL(bc.rw_rel) 277 .TYPE(bc.type); 278 } else { 279 280 bb << CF_ALLOC_EXPORT_WORD0_ALL() 281 .ARRAY_BASE(bc.array_base) 282 .ELEM_SIZE(bc.elem_size) 283 .INDEX_GPR(bc.index_gpr) 284 .RW_GPR(bc.rw_gpr) 285 .RW_REL(bc.rw_rel) 286 .TYPE(bc.type); 287 } 288 289 if (cfop->flags & CF_EXP) { 290 291 if (!ctx.is_egcm()) 292 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_R6R7() 293 .BARRIER(bc.barrier) 294 .BURST_COUNT(bc.burst_count) 295 .CF_INST(ctx.cf_opcode(bc.op)) 296 .END_OF_PROGRAM(bc.end_of_program) 297 .SEL_X(bc.sel[0]) 298 .SEL_Y(bc.sel[1]) 299 .SEL_Z(bc.sel[2]) 300 .SEL_W(bc.sel[3]) 301 .VALID_PIXEL_MODE(bc.valid_pixel_mode) 302 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 303 304 else if (ctx.is_evergreen()) 305 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_EG() 306 .BARRIER(bc.barrier) 307 .BURST_COUNT(bc.burst_count) 308 .CF_INST(ctx.cf_opcode(bc.op)) 309 .END_OF_PROGRAM(bc.end_of_program) 310 .MARK(bc.mark) 311 .SEL_X(bc.sel[0]) 312 .SEL_Y(bc.sel[1]) 313 .SEL_Z(bc.sel[2]) 314 .SEL_W(bc.sel[3]) 315 .VALID_PIXEL_MODE(bc.valid_pixel_mode); 316 317 else // cayman 318 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_CM() 319 .BARRIER(bc.barrier) 320 .BURST_COUNT(bc.burst_count) 321 .CF_INST(ctx.cf_opcode(bc.op)) 322 .MARK(bc.mark) 323 .SEL_X(bc.sel[0]) 324 .SEL_Y(bc.sel[1]) 325 .SEL_Z(bc.sel[2]) 326 .SEL_W(bc.sel[3]) 327 .VALID_PIXEL_MODE(bc.valid_pixel_mode); 328 329 } else if (cfop->flags & CF_MEM) { 330 return build_cf_mem(n); 331 } 332 333 return 0; 334} 335 336int bc_builder::build_cf_mem(cf_node* n) { 337 const bc_cf &bc = n->bc; 338 339 if (!ctx.is_egcm()) 340 bb << CF_ALLOC_EXPORT_WORD1_BUF_R6R7() 341 .ARR_SIZE(bc.array_size) 342 .BARRIER(bc.barrier) 343 .BURST_COUNT(bc.burst_count) 344 .CF_INST(ctx.cf_opcode(bc.op)) 345 .COMP_MASK(bc.comp_mask) 346 .END_OF_PROGRAM(bc.end_of_program) 347 .VALID_PIXEL_MODE(bc.valid_pixel_mode) 348 .WHOLE_QUAD_MODE(bc.whole_quad_mode); 349 350 else if (ctx.is_evergreen()) 351 bb << CF_ALLOC_EXPORT_WORD1_BUF_EG() 352 .ARR_SIZE(bc.array_size) 353 .BARRIER(bc.barrier) 354 .BURST_COUNT(bc.burst_count) 355 .CF_INST(ctx.cf_opcode(bc.op)) 356 .COMP_MASK(bc.comp_mask) 357 .END_OF_PROGRAM(bc.end_of_program) 358 .MARK(bc.mark) 359 .VALID_PIXEL_MODE(bc.valid_pixel_mode); 360 361 else // cayman 362 bb << CF_ALLOC_EXPORT_WORD1_BUF_CM() 363 .ARR_SIZE(bc.array_size) 364 .BARRIER(bc.barrier) 365 .BURST_COUNT(bc.burst_count) 366 .CF_INST(ctx.cf_opcode(bc.op)) 367 .COMP_MASK(bc.comp_mask) 368 .MARK(bc.mark) 369 .VALID_PIXEL_MODE(bc.valid_pixel_mode); 370 371 return 0; 372} 373 374int bc_builder::build_alu(alu_node* n) { 375 const bc_alu &bc = n->bc; 376 const alu_op_info *aop = bc.op_ptr; 377 378 if (n->bc.op_ptr->flags & AF_LDS) { 379 assert(ctx.is_egcm()); 380 bb << ALU_WORD0_LDS_IDX_OP_EGCM() 381 .SRC0_SEL(bc.src[0].sel) 382 .SRC0_REL(bc.src[0].rel) 383 .SRC0_CHAN(bc.src[0].chan) 384 .IDX_OFFSET_4((bc.lds_idx_offset >> 4) & 1) 385 .SRC1_SEL(bc.src[1].sel) 386 .SRC1_REL(bc.src[1].rel) 387 .SRC1_CHAN(bc.src[1].chan) 388 .IDX_OFFSET_5((bc.lds_idx_offset >> 5) & 1) 389 .INDEX_MODE(bc.index_mode) 390 .PRED_SEL(bc.pred_sel) 391 .LAST(bc.last); 392 393 bb << ALU_WORD1_LDS_IDX_OP_EGCM() 394 .SRC2_SEL(bc.src[2].sel) 395 .SRC2_REL(bc.src[2].rel) 396 .SRC2_CHAN(bc.src[2].chan) 397 .IDX_OFFSET_1((bc.lds_idx_offset >> 1) & 1) 398 .ALU_INST(ctx.alu_opcode(ALU_OP3_LDS_IDX_OP)) 399 .BANK_SWIZZLE(bc.bank_swizzle) 400 .LDS_OP((bc.op_ptr->opcode[1] >> 8) & 0xff) 401 .IDX_OFFSET_0((bc.lds_idx_offset >> 0) & 1) 402 .IDX_OFFSET_2((bc.lds_idx_offset >> 2) & 1) 403 .DST_CHAN(bc.dst_chan) 404 .IDX_OFFSET_3((bc.lds_idx_offset >> 3) & 1); 405 406 return 0; 407 } 408 409 bb << ALU_WORD0_ALL() 410 .INDEX_MODE(bc.index_mode) 411 .LAST(bc.last) 412 .PRED_SEL(bc.pred_sel) 413 .SRC0_SEL(bc.src[0].sel) 414 .SRC0_CHAN(bc.src[0].chan) 415 .SRC0_NEG(bc.src[0].neg) 416 .SRC0_REL(bc.src[0].rel) 417 .SRC1_SEL(bc.src[1].sel) 418 .SRC1_CHAN(bc.src[1].chan) 419 .SRC1_NEG(bc.src[1].neg) 420 .SRC1_REL(bc.src[1].rel); 421 422 if (aop->src_count<3) { 423 if (ctx.is_r600()) 424 bb << ALU_WORD1_OP2_R6() 425 .ALU_INST(ctx.alu_opcode(bc.op)) 426 .BANK_SWIZZLE(bc.bank_swizzle) 427 .CLAMP(bc.clamp) 428 .DST_GPR(bc.dst_gpr) 429 .DST_CHAN(bc.dst_chan) 430 .DST_REL(bc.dst_rel) 431 .FOG_MERGE(bc.fog_merge) 432 .OMOD(bc.omod) 433 .SRC0_ABS(bc.src[0].abs) 434 .SRC1_ABS(bc.src[1].abs) 435 .UPDATE_EXEC_MASK(bc.update_exec_mask) 436 .UPDATE_PRED(bc.update_pred) 437 .WRITE_MASK(bc.write_mask); 438 else { 439 440 if (ctx.is_cayman() && (aop->flags & AF_MOVA)) { 441 442 bb << ALU_WORD1_OP2_MOVA_CM() 443 .ALU_INST(ctx.alu_opcode(bc.op)) 444 .BANK_SWIZZLE(bc.bank_swizzle) 445 .CLAMP(bc.clamp) 446 .MOVA_DST(bc.dst_gpr) 447 .DST_CHAN(bc.dst_chan) 448 .DST_REL(bc.dst_rel) 449 .OMOD(bc.omod) 450 .UPDATE_EXEC_MASK(bc.update_exec_mask) 451 .UPDATE_PRED(bc.update_pred) 452 .WRITE_MASK(bc.write_mask) 453 .SRC0_ABS(bc.src[0].abs) 454 .SRC1_ABS(bc.src[1].abs); 455 456 } else if (ctx.is_cayman() && (aop->flags & (AF_PRED|AF_KILL))) { 457 bb << ALU_WORD1_OP2_EXEC_MASK_CM() 458 .ALU_INST(ctx.alu_opcode(bc.op)) 459 .BANK_SWIZZLE(bc.bank_swizzle) 460 .CLAMP(bc.clamp) 461 .DST_CHAN(bc.dst_chan) 462 .DST_REL(bc.dst_rel) 463 .EXECUTE_MASK_OP(bc.omod) 464 .UPDATE_EXEC_MASK(bc.update_exec_mask) 465 .UPDATE_PRED(bc.update_pred) 466 .WRITE_MASK(bc.write_mask) 467 .SRC0_ABS(bc.src[0].abs) 468 .SRC1_ABS(bc.src[1].abs); 469 470 } else 471 bb << ALU_WORD1_OP2_R7EGCM() 472 .ALU_INST(ctx.alu_opcode(bc.op)) 473 .BANK_SWIZZLE(bc.bank_swizzle) 474 .CLAMP(bc.clamp) 475 .DST_GPR(bc.dst_gpr) 476 .DST_CHAN(bc.dst_chan) 477 .DST_REL(bc.dst_rel) 478 .OMOD(bc.omod) 479 .UPDATE_EXEC_MASK(bc.update_exec_mask) 480 .UPDATE_PRED(bc.update_pred) 481 .WRITE_MASK(bc.write_mask) 482 .SRC0_ABS(bc.src[0].abs) 483 .SRC1_ABS(bc.src[1].abs); 484 485 } 486 } else 487 bb << ALU_WORD1_OP3_ALL() 488 .ALU_INST(ctx.alu_opcode(bc.op)) 489 .BANK_SWIZZLE(bc.bank_swizzle) 490 .CLAMP(bc.clamp) 491 .DST_GPR(bc.dst_gpr) 492 .DST_CHAN(bc.dst_chan) 493 .DST_REL(bc.dst_rel) 494 .SRC2_SEL(bc.src[2].sel) 495 .SRC2_CHAN(bc.src[2].chan) 496 .SRC2_NEG(bc.src[2].neg) 497 .SRC2_REL(bc.src[2].rel); 498 return 0; 499} 500 501int bc_builder::build_fetch_tex(fetch_node* n) { 502 const bc_fetch &bc = n->bc; 503 const fetch_op_info *fop = bc.op_ptr; 504 505 assert(!(fop->flags & FF_VTX)); 506 507 if (ctx.is_r600()) 508 bb << TEX_WORD0_R6() 509 .BC_FRAC_MODE(bc.bc_frac_mode) 510 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 511 .RESOURCE_ID(bc.resource_id) 512 .SRC_GPR(bc.src_gpr) 513 .SRC_REL(bc.src_rel) 514 .TEX_INST(ctx.fetch_opcode(bc.op)); 515 516 else if (ctx.is_r700()) 517 bb << TEX_WORD0_R7() 518 .ALT_CONST(bc.alt_const) 519 .BC_FRAC_MODE(bc.bc_frac_mode) 520 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 521 .RESOURCE_ID(bc.resource_id) 522 .SRC_GPR(bc.src_gpr) 523 .SRC_REL(bc.src_rel) 524 .TEX_INST(ctx.fetch_opcode(bc.op)); 525 526 else 527 bb << TEX_WORD0_EGCM() 528 .ALT_CONST(bc.alt_const) 529 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 530 .INST_MOD(bc.inst_mod) 531 .RESOURCE_ID(bc.resource_id) 532 .RESOURCE_INDEX_MODE(bc.resource_index_mode) 533 .SAMPLER_INDEX_MODE(bc.sampler_index_mode) 534 .SRC_GPR(bc.src_gpr) 535 .SRC_REL(bc.src_rel) 536 .TEX_INST(ctx.fetch_opcode(bc.op)); 537 538 bb << TEX_WORD1_ALL() 539 .COORD_TYPE_X(bc.coord_type[0]) 540 .COORD_TYPE_Y(bc.coord_type[1]) 541 .COORD_TYPE_Z(bc.coord_type[2]) 542 .COORD_TYPE_W(bc.coord_type[3]) 543 .DST_GPR(bc.dst_gpr) 544 .DST_REL(bc.dst_rel) 545 .DST_SEL_X(bc.dst_sel[0]) 546 .DST_SEL_Y(bc.dst_sel[1]) 547 .DST_SEL_Z(bc.dst_sel[2]) 548 .DST_SEL_W(bc.dst_sel[3]) 549 .LOD_BIAS(bc.lod_bias); 550 551 bb << TEX_WORD2_ALL() 552 .OFFSET_X(bc.offset[0]) 553 .OFFSET_Y(bc.offset[1]) 554 .OFFSET_Z(bc.offset[2]) 555 .SAMPLER_ID(bc.sampler_id) 556 .SRC_SEL_X(bc.src_sel[0]) 557 .SRC_SEL_Y(bc.src_sel[1]) 558 .SRC_SEL_Z(bc.src_sel[2]) 559 .SRC_SEL_W(bc.src_sel[3]); 560 561 bb << 0; 562 return 0; 563} 564 565int bc_builder::build_fetch_gds(fetch_node *n) { 566 const bc_fetch &bc = n->bc; 567 const fetch_op_info *fop = bc.op_ptr; 568 unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; 569 unsigned mem_op = 4; 570 assert(fop->flags & FF_GDS); 571 572 if (bc.op == FETCH_OP_TF_WRITE) { 573 mem_op = 5; 574 gds_op = 0; 575 } 576 577 bb << MEM_GDS_WORD0_EGCM() 578 .MEM_INST(2) 579 .MEM_OP(mem_op) 580 .SRC_GPR(bc.src_gpr) 581 .SRC_SEL_X(bc.src_sel[0]) 582 .SRC_SEL_Y(bc.src_sel[1]) 583 .SRC_SEL_Z(bc.src_sel[2]); 584 585 bb << MEM_GDS_WORD1_EGCM() 586 .DST_GPR(bc.dst_gpr) 587 .DST_REL_MODE(bc.dst_rel) 588 .GDS_OP(gds_op) 589 .SRC_GPR(bc.src2_gpr) 590 .UAV_INDEX_MODE(bc.uav_index_mode) 591 .UAV_ID(bc.uav_id) 592 .ALLOC_CONSUME(bc.alloc_consume) 593 .BCAST_FIRST_REQ(bc.bcast_first_req); 594 595 bb << MEM_GDS_WORD2_EGCM() 596 .DST_SEL_X(bc.dst_sel[0]) 597 .DST_SEL_Y(bc.dst_sel[1]) 598 .DST_SEL_Z(bc.dst_sel[2]) 599 .DST_SEL_W(bc.dst_sel[3]); 600 601 bb << 0; 602 return 0; 603} 604 605int bc_builder::build_fetch_vtx(fetch_node* n) { 606 const bc_fetch &bc = n->bc; 607 const fetch_op_info *fop = bc.op_ptr; 608 609 assert(fop->flags & FF_VTX); 610 611 if (!ctx.is_cayman()) 612 bb << VTX_WORD0_R6R7EG() 613 .BUFFER_ID(bc.resource_id) 614 .FETCH_TYPE(bc.fetch_type) 615 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 616 .MEGA_FETCH_COUNT(bc.mega_fetch_count) 617 .SRC_GPR(bc.src_gpr) 618 .SRC_REL(bc.src_rel) 619 .SRC_SEL_X(bc.src_sel[0]) 620 .VC_INST(ctx.fetch_opcode(bc.op)); 621 622 else 623 bb << VTX_WORD0_CM() 624 .BUFFER_ID(bc.resource_id) 625 .COALESCED_READ(bc.coalesced_read) 626 .FETCH_TYPE(bc.fetch_type) 627 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 628 .LDS_REQ(bc.lds_req) 629 .SRC_GPR(bc.src_gpr) 630 .SRC_REL(bc.src_rel) 631 .SRC_SEL_X(bc.src_sel[0]) 632 .SRC_SEL_Y(bc.src_sel[1]) 633 .STRUCTURED_READ(bc.structured_read) 634 .VC_INST(ctx.fetch_opcode(bc.op)); 635 636 if (bc.op == FETCH_OP_SEMFETCH) 637 bb << VTX_WORD1_SEM_ALL() 638 .DATA_FORMAT(bc.data_format) 639 .DST_SEL_X(bc.dst_sel[0]) 640 .DST_SEL_Y(bc.dst_sel[1]) 641 .DST_SEL_Z(bc.dst_sel[2]) 642 .DST_SEL_W(bc.dst_sel[3]) 643 .FORMAT_COMP_ALL(bc.format_comp_all) 644 .NUM_FORMAT_ALL(bc.num_format_all) 645 .SEMANTIC_ID(bc.semantic_id) 646 .SRF_MODE_ALL(bc.srf_mode_all) 647 .USE_CONST_FIELDS(bc.use_const_fields); 648 else 649 bb << VTX_WORD1_GPR_ALL() 650 .DATA_FORMAT(bc.data_format) 651 .DST_GPR(bc.dst_gpr) 652 .DST_REL(bc.dst_rel) 653 .DST_SEL_X(bc.dst_sel[0]) 654 .DST_SEL_Y(bc.dst_sel[1]) 655 .DST_SEL_Z(bc.dst_sel[2]) 656 .DST_SEL_W(bc.dst_sel[3]) 657 .FORMAT_COMP_ALL(bc.format_comp_all) 658 .NUM_FORMAT_ALL(bc.num_format_all) 659 .SRF_MODE_ALL(bc.srf_mode_all) 660 .USE_CONST_FIELDS(bc.use_const_fields); 661 662 switch (ctx.hw_class) { 663 case HW_CLASS_R600: 664 bb << VTX_WORD2_R6() 665 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) 666 .ENDIAN_SWAP(bc.endian_swap) 667 .MEGA_FETCH(bc.mega_fetch) 668 .OFFSET(bc.offset[0]); 669 break; 670 case HW_CLASS_R700: 671 bb << VTX_WORD2_R7() 672 .ALT_CONST(bc.alt_const) 673 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) 674 .ENDIAN_SWAP(bc.endian_swap) 675 .MEGA_FETCH(bc.mega_fetch) 676 .OFFSET(bc.offset[0]); 677 break; 678 case HW_CLASS_EVERGREEN: 679 bb << VTX_WORD2_EG() 680 .ALT_CONST(bc.alt_const) 681 .BUFFER_INDEX_MODE(bc.resource_index_mode) 682 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) 683 .ENDIAN_SWAP(bc.endian_swap) 684 .MEGA_FETCH(bc.mega_fetch) 685 .OFFSET(bc.offset[0]); 686 break; 687 case HW_CLASS_CAYMAN: 688 bb << VTX_WORD2_CM() 689 .ALT_CONST(bc.alt_const) 690 .BUFFER_INDEX_MODE(bc.resource_index_mode) 691 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) 692 .ENDIAN_SWAP(bc.endian_swap) 693 .OFFSET(bc.offset[0]); 694 break; 695 default: 696 assert(!"unknown hw class"); 697 return -1; 698 } 699 700 bb << 0; 701 return 0; 702} 703 704int bc_builder::build_fetch_mem(fetch_node* n) { 705 const bc_fetch &bc = n->bc; 706 const fetch_op_info *fop = bc.op_ptr; 707 708 assert(fop->flags & FF_MEM); 709 710 bb << MEM_RD_WORD0_R7EGCM() 711 .MEM_INST(2) 712 .ELEM_SIZE(bc.elem_size) 713 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) 714 .MEM_OP(0) 715 .UNCACHED(bc.uncached) 716 .INDEXED(bc.indexed) 717 .SRC_SEL_Y(bc.src_sel[1]) 718 .SRC_GPR(bc.src_gpr) 719 .SRC_REL(bc.src_rel) 720 .SRC_SEL_X(bc.src_sel[0]) 721 .BURST_COUNT(bc.burst_count) 722 .LDS_REQ(bc.lds_req) 723 .COALESCED_READ(bc.coalesced_read); 724 725 bb << MEM_RD_WORD1_R7EGCM() 726 .DST_GPR(bc.dst_gpr) 727 .DST_REL(bc.dst_rel) 728 .DST_SEL_X(bc.dst_sel[0]) 729 .DST_SEL_Y(bc.dst_sel[1]) 730 .DST_SEL_Z(bc.dst_sel[2]) 731 .DST_SEL_W(bc.dst_sel[3]) 732 .DATA_FORMAT(bc.data_format) 733 .NUM_FORMAT_ALL(bc.num_format_all) 734 .FORMAT_COMP_ALL(bc.format_comp_all) 735 .SRF_MODE_ALL(bc.srf_mode_all); 736 737 bb << MEM_RD_WORD2_R7EGCM() 738 .ARRAY_BASE(bc.array_base) 739 .ENDIAN_SWAP(bc.endian_swap) 740 .ARR_SIZE(bc.array_size); 741 742 bb << 0; 743 return 0; 744} 745 746} 747