1/* 2 * Copyright (C) 2005 Ben Skeggs. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28/** 29 * \file 30 * 31 * Emit the r300_fragment_program_code that can be understood by the hardware. 32 * Input is a pre-transformed radeon_program. 33 * 34 * \author Ben Skeggs <darktama@iinet.net.au> 35 * 36 * \author Jerome Glisse <j.glisse@gmail.com> 37 */ 38 39#include "r300_fragprog.h" 40 41#include "r300_reg.h" 42 43#include "radeon_program_pair.h" 44#include "r300_fragprog_swizzle.h" 45 46#include "util/compiler.h" 47 48 49struct r300_emit_state { 50 struct r300_fragment_program_compiler * compiler; 51 52 unsigned current_node : 2; 53 unsigned node_first_tex : 8; 54 unsigned node_first_alu : 8; 55 uint32_t node_flags; 56}; 57 58#define PROG_CODE \ 59 struct r300_fragment_program_compiler *c = emit->compiler; \ 60 struct r300_fragment_program_code *code = &c->code->code.r300 61 62#define error(fmt, args...) do { \ 63 rc_error(&c->Base, "%s::%s(): " fmt "\n", \ 64 __FILE__, __FUNCTION__, ##args); \ 65 } while(0) 66 67static unsigned int get_msbs_alu(unsigned int bits) 68{ 69 return (bits >> 6) & 0x7; 70} 71 72/** 73 * @param lsbs The number of least significant bits 74 */ 75static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) 76{ 77 return (bits >> lsbs) & 0x15; 78} 79 80#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) 81 82/** 83 * Mark a temporary register as used. 84 */ 85static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) 86{ 87 if (index > code->pixsize) 88 code->pixsize = index; 89} 90 91static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) 92{ 93 if (!src.Used) 94 return 0; 95 96 if (src.File == RC_FILE_CONSTANT) { 97 return src.Index | (1 << 5); 98 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { 99 use_temporary(code, src.Index); 100 return src.Index & 0x1f; 101 } 102 103 return 0; 104} 105 106 107static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) 108{ 109 switch(opcode) { 110 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; 111 case RC_OPCODE_CND: return R300_ALU_OUTC_CND; 112 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; 113 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; 114 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; 115 default: 116 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); 117 FALLTHROUGH; 118 case RC_OPCODE_NOP: 119 FALLTHROUGH; 120 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; 121 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; 122 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; 123 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; 124 } 125} 126 127static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) 128{ 129 switch(opcode) { 130 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; 131 case RC_OPCODE_CND: return R300_ALU_OUTA_CND; 132 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; 133 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; 134 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; 135 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; 136 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; 137 default: 138 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); 139 FALLTHROUGH; 140 case RC_OPCODE_NOP: 141 FALLTHROUGH; 142 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; 143 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; 144 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; 145 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; 146 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; 147 } 148} 149 150/** 151 * Emit one paired ALU instruction. 152 */ 153static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) 154{ 155 int ip; 156 int j; 157 PROG_CODE; 158 159 if (code->alu.length >= c->Base.max_alu_insts) { 160 error("Too many ALU instructions"); 161 return 0; 162 } 163 164 ip = code->alu.length++; 165 166 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); 167 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); 168 169 for(j = 0; j < 3; ++j) { 170 /* Set the RGB address */ 171 unsigned int src = use_source(code, inst->RGB.Src[j]); 172 unsigned int arg; 173 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) 174 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); 175 176 code->alu.inst[ip].rgb_addr |= src << (6*j); 177 178 /* Set the Alpha address */ 179 src = use_source(code, inst->Alpha.Src[j]); 180 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) 181 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); 182 183 code->alu.inst[ip].alpha_addr |= src << (6*j); 184 185 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); 186 arg |= inst->RGB.Arg[j].Abs << 6; 187 arg |= inst->RGB.Arg[j].Negate << 5; 188 code->alu.inst[ip].rgb_inst |= arg << (7*j); 189 190 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); 191 arg |= inst->Alpha.Arg[j].Abs << 6; 192 arg |= inst->Alpha.Arg[j].Negate << 5; 193 code->alu.inst[ip].alpha_inst |= arg << (7*j); 194 } 195 196 /* Presubtract */ 197 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 198 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { 199 case RC_PRESUB_BIAS: 200 code->alu.inst[ip].rgb_inst |= 201 R300_ALU_SRCP_1_MINUS_2_SRC0; 202 break; 203 case RC_PRESUB_ADD: 204 code->alu.inst[ip].rgb_inst |= 205 R300_ALU_SRCP_SRC1_PLUS_SRC0; 206 break; 207 case RC_PRESUB_SUB: 208 code->alu.inst[ip].rgb_inst |= 209 R300_ALU_SRCP_SRC1_MINUS_SRC0; 210 break; 211 case RC_PRESUB_INV: 212 code->alu.inst[ip].rgb_inst |= 213 R300_ALU_SRCP_1_MINUS_SRC0; 214 break; 215 default: 216 break; 217 } 218 } 219 220 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { 221 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { 222 case RC_PRESUB_BIAS: 223 code->alu.inst[ip].alpha_inst |= 224 R300_ALU_SRCP_1_MINUS_2_SRC0; 225 break; 226 case RC_PRESUB_ADD: 227 code->alu.inst[ip].alpha_inst |= 228 R300_ALU_SRCP_SRC1_PLUS_SRC0; 229 break; 230 case RC_PRESUB_SUB: 231 code->alu.inst[ip].alpha_inst |= 232 R300_ALU_SRCP_SRC1_MINUS_SRC0; 233 break; 234 case RC_PRESUB_INV: 235 code->alu.inst[ip].alpha_inst |= 236 R300_ALU_SRCP_1_MINUS_SRC0; 237 break; 238 default: 239 break; 240 } 241 } 242 243 if (inst->RGB.Saturate) 244 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; 245 if (inst->Alpha.Saturate) 246 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; 247 248 if (inst->RGB.WriteMask) { 249 use_temporary(code, inst->RGB.DestIndex); 250 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) 251 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; 252 code->alu.inst[ip].rgb_addr |= 253 ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | 254 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); 255 } 256 if (inst->RGB.OutputWriteMask) { 257 code->alu.inst[ip].rgb_addr |= 258 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | 259 R300_RGB_TARGET(inst->RGB.Target); 260 emit->node_flags |= R300_RGBA_OUT; 261 } 262 263 if (inst->Alpha.WriteMask) { 264 use_temporary(code, inst->Alpha.DestIndex); 265 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) 266 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; 267 code->alu.inst[ip].alpha_addr |= 268 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | 269 R300_ALU_DSTA_REG; 270 } 271 if (inst->Alpha.OutputWriteMask) { 272 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | 273 R300_ALPHA_TARGET(inst->Alpha.Target); 274 emit->node_flags |= R300_RGBA_OUT; 275 } 276 if (inst->Alpha.DepthWriteMask) { 277 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; 278 emit->node_flags |= R300_W_OUT; 279 c->code->writes_depth = 1; 280 } 281 if (inst->Nop) 282 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; 283 284 /* Handle Output Modifier 285 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */ 286 if (inst->RGB.Omod) { 287 if (inst->RGB.Omod == RC_OMOD_DISABLE) { 288 rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); 289 } 290 code->alu.inst[ip].rgb_inst |= 291 (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT); 292 } 293 if (inst->Alpha.Omod) { 294 if (inst->Alpha.Omod == RC_OMOD_DISABLE) { 295 rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); 296 } 297 code->alu.inst[ip].alpha_inst |= 298 (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT); 299 } 300 return 1; 301} 302 303 304/** 305 * Finish the current node without advancing to the next one. 306 */ 307static int finish_node(struct r300_emit_state * emit) 308{ 309 struct r300_fragment_program_compiler * c = emit->compiler; 310 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; 311 unsigned alu_offset; 312 unsigned alu_end; 313 unsigned tex_offset; 314 unsigned tex_end; 315 316 unsigned int alu_offset_msbs, alu_end_msbs; 317 318 if (code->alu.length == emit->node_first_alu) { 319 /* Generate a single NOP for this node */ 320 struct rc_pair_instruction inst; 321 memset(&inst, 0, sizeof(inst)); 322 if (!emit_alu(emit, &inst)) 323 return 0; 324 } 325 326 alu_offset = emit->node_first_alu; 327 alu_end = code->alu.length - alu_offset - 1; 328 tex_offset = emit->node_first_tex; 329 tex_end = code->tex.length - tex_offset - 1; 330 331 if (code->tex.length == emit->node_first_tex) { 332 if (emit->current_node > 0) { 333 error("Node %i has no TEX instructions", emit->current_node); 334 return 0; 335 } 336 337 tex_end = 0; 338 } else { 339 if (emit->current_node == 0) 340 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; 341 } 342 343 /* Write the config register. 344 * Note: The order in which the words for each node are written 345 * is not correct here and needs to be fixed up once we're entirely 346 * done 347 * 348 * Also note that the register specification from AMD is slightly 349 * incorrect in its description of this register. */ 350 code->code_addr[emit->current_node] = 351 ((alu_offset << R300_ALU_START_SHIFT) 352 & R300_ALU_START_MASK) 353 | ((alu_end << R300_ALU_SIZE_SHIFT) 354 & R300_ALU_SIZE_MASK) 355 | ((tex_offset << R300_TEX_START_SHIFT) 356 & R300_TEX_START_MASK) 357 | ((tex_end << R300_TEX_SIZE_SHIFT) 358 & R300_TEX_SIZE_MASK) 359 | emit->node_flags 360 | (get_msbs_tex(tex_offset, 5) 361 << R400_TEX_START_MSB_SHIFT) 362 | (get_msbs_tex(tex_end, 5) 363 << R400_TEX_SIZE_MSB_SHIFT) 364 ; 365 366 /* Write r400 extended instruction fields. These will be ignored on 367 * r300 cards. */ 368 alu_offset_msbs = get_msbs_alu(alu_offset); 369 alu_end_msbs = get_msbs_alu(alu_end); 370 switch(emit->current_node) { 371 case 0: 372 code->r400_code_offset_ext |= 373 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT 374 | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; 375 break; 376 case 1: 377 code->r400_code_offset_ext |= 378 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT 379 | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; 380 break; 381 case 2: 382 code->r400_code_offset_ext |= 383 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT 384 | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; 385 break; 386 case 3: 387 code->r400_code_offset_ext |= 388 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT 389 | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; 390 break; 391 } 392 return 1; 393} 394 395 396/** 397 * Begin a block of texture instructions. 398 * Create the necessary indirection. 399 */ 400static int begin_tex(struct r300_emit_state * emit) 401{ 402 PROG_CODE; 403 404 if (code->alu.length == emit->node_first_alu && 405 code->tex.length == emit->node_first_tex) { 406 return 1; 407 } 408 409 if (emit->current_node == 3) { 410 error("Too many texture indirections"); 411 return 0; 412 } 413 414 if (!finish_node(emit)) 415 return 0; 416 417 emit->current_node++; 418 emit->node_first_tex = code->tex.length; 419 emit->node_first_alu = code->alu.length; 420 emit->node_flags = 0; 421 return 1; 422} 423 424 425static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) 426{ 427 unsigned int unit; 428 unsigned int dest; 429 unsigned int opcode; 430 PROG_CODE; 431 432 if (code->tex.length >= emit->compiler->Base.max_tex_insts) { 433 error("Too many TEX instructions"); 434 return 0; 435 } 436 437 unit = inst->U.I.TexSrcUnit; 438 dest = inst->U.I.DstReg.Index; 439 440 switch(inst->U.I.Opcode) { 441 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; 442 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; 443 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; 444 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; 445 default: 446 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); 447 return 0; 448 } 449 450 if (inst->U.I.Opcode == RC_OPCODE_KIL) { 451 unit = 0; 452 dest = 0; 453 } else { 454 use_temporary(code, dest); 455 } 456 457 use_temporary(code, inst->U.I.SrcReg[0].Index); 458 459 code->tex.inst[code->tex.length++] = 460 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) 461 & R300_SRC_ADDR_MASK) 462 | ((dest << R300_DST_ADDR_SHIFT) 463 & R300_DST_ADDR_MASK) 464 | (unit << R300_TEX_ID_SHIFT) 465 | (opcode << R300_TEX_INST_SHIFT) 466 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? 467 R400_SRC_ADDR_EXT_BIT : 0) 468 | (dest >= R300_PFS_NUM_TEMP_REGS ? 469 R400_DST_ADDR_EXT_BIT : 0) 470 ; 471 return 1; 472} 473 474 475/** 476 * Final compilation step: Turn the intermediate radeon_program into 477 * machine-readable instructions. 478 */ 479void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) 480{ 481 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; 482 struct r300_emit_state emit; 483 struct r300_fragment_program_code *code = &compiler->code->code.r300; 484 unsigned int tex_end; 485 486 memset(&emit, 0, sizeof(emit)); 487 emit.compiler = compiler; 488 489 memset(code, 0, sizeof(struct r300_fragment_program_code)); 490 491 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; 492 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; 493 inst = inst->Next) { 494 if (inst->Type == RC_INSTRUCTION_NORMAL) { 495 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { 496 begin_tex(&emit); 497 continue; 498 } 499 500 emit_tex(&emit, inst); 501 } else { 502 emit_alu(&emit, &inst->U.P); 503 } 504 } 505 506 if (code->pixsize >= compiler->Base.max_temp_regs) 507 rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); 508 509 if (compiler->Base.Error) 510 return; 511 512 /* Finish the program */ 513 finish_node(&emit); 514 515 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ 516 517 /* Set r400 extended instruction fields. These values will be ignored 518 * on r300 cards. */ 519 code->r400_code_offset_ext |= 520 (get_msbs_alu(0) 521 << R400_ALU_OFFSET_MSB_SHIFT) 522 | (get_msbs_alu(code->alu.length - 1) 523 << R400_ALU_SIZE_MSB_SHIFT); 524 525 tex_end = code->tex.length ? code->tex.length - 1 : 0; 526 code->code_offset = 527 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) 528 & R300_PFS_CNTL_ALU_OFFSET_MASK) 529 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) 530 & R300_PFS_CNTL_ALU_END_MASK) 531 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) 532 & R300_PFS_CNTL_TEX_OFFSET_MASK) 533 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) 534 & R300_PFS_CNTL_TEX_END_MASK) 535 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) 536 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) 537 ; 538 539 if (emit.current_node < 3) { 540 int shift = 3 - emit.current_node; 541 int i; 542 for(i = emit.current_node; i >= 0; --i) 543 code->code_addr[shift + i] = code->code_addr[i]; 544 for(i = 0; i < shift; ++i) 545 code->code_addr[i] = 0; 546 } 547 548 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS 549 || code->alu.length > R300_PFS_MAX_ALU_INST 550 || code->tex.length > R300_PFS_MAX_TEX_INST) { 551 552 code->r390_mode = 1; 553 } 554} 555