1/* 2 * Copyright (C) 2004 David Airlie All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 */ 21 22#include "main/glheader.h" 23#include "main/macros.h" 24#include "main/atifragshader.h" 25#include "main/samplerobj.h" 26#include "swrast/s_atifragshader.h" 27#include "swrast/s_context.h" 28 29#define ATI_FS_INPUT_PRIMARY 0 30#define ATI_FS_INPUT_SECONDARY 1 31 32/** 33 * State for executing ATI fragment shader. 34 */ 35struct atifs_machine 36{ 37 GLfloat Registers[6][4]; /** six temporary registers */ 38 GLfloat PrevPassRegisters[6][4]; 39 GLfloat Inputs[2][4]; /** Primary, secondary input colors */ 40}; 41 42 43 44/** 45 * Fetch a texel. 46 */ 47static void 48fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda, 49 GLuint unit, GLfloat color[4]) 50{ 51 SWcontext *swrast = SWRAST_CONTEXT(ctx); 52 53 /* XXX use a float-valued TextureSample routine here!!! */ 54 swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit), 55 ctx->Texture.Unit[unit]._Current, 56 1, (const GLfloat(*)[4]) texcoord, 57 &lambda, (GLfloat (*)[4]) color); 58} 59 60static void 61apply_swizzle(GLfloat values[4], GLuint swizzle) 62{ 63 GLfloat s, t, r, q; 64 65 s = values[0]; 66 t = values[1]; 67 r = values[2]; 68 q = values[3]; 69 70 switch (swizzle) { 71 case GL_SWIZZLE_STR_ATI: 72 values[0] = s; 73 values[1] = t; 74 values[2] = r; 75 break; 76 case GL_SWIZZLE_STQ_ATI: 77 values[0] = s; 78 values[1] = t; 79 values[2] = q; 80 break; 81 case GL_SWIZZLE_STR_DR_ATI: 82 values[0] = s / r; 83 values[1] = t / r; 84 values[2] = 1 / r; 85 break; 86 case GL_SWIZZLE_STQ_DQ_ATI: 87/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */ 88 if (q == 0.0F) 89 q = 0.000000001F; 90 values[0] = s / q; 91 values[1] = t / q; 92 values[2] = 1.0F / q; 93 break; 94 } 95 values[3] = 0.0; 96} 97 98static void 99apply_src_rep(GLint optype, GLuint rep, GLfloat * val) 100{ 101 GLint i; 102 GLint start, end; 103 if (!rep) 104 return; 105 106 start = optype ? 3 : 0; 107 end = 4; 108 109 for (i = start; i < end; i++) { 110 switch (rep) { 111 case GL_RED: 112 val[i] = val[0]; 113 break; 114 case GL_GREEN: 115 val[i] = val[1]; 116 break; 117 case GL_BLUE: 118 val[i] = val[2]; 119 break; 120 case GL_ALPHA: 121 val[i] = val[3]; 122 break; 123 } 124 } 125} 126 127static void 128apply_src_mod(GLint optype, GLuint mod, GLfloat * val) 129{ 130 GLint i; 131 GLint start, end; 132 133 if (!mod) 134 return; 135 136 start = optype ? 3 : 0; 137 end = 4; 138 139 for (i = start; i < end; i++) { 140 if (mod & GL_COMP_BIT_ATI) 141 val[i] = 1 - val[i]; 142 143 if (mod & GL_BIAS_BIT_ATI) 144 val[i] = val[i] - 0.5F; 145 146 if (mod & GL_2X_BIT_ATI) 147 val[i] = 2 * val[i]; 148 149 if (mod & GL_NEGATE_BIT_ATI) 150 val[i] = -val[i]; 151 } 152} 153 154static void 155apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val) 156{ 157 GLint i; 158 GLint has_sat = mod & GL_SATURATE_BIT_ATI; 159 GLint start, end; 160 161 mod &= ~GL_SATURATE_BIT_ATI; 162 163 start = optype ? 3 : 0; 164 end = optype ? 4 : 3; 165 166 for (i = start; i < end; i++) { 167 switch (mod) { 168 case GL_2X_BIT_ATI: 169 val[i] = 2 * val[i]; 170 break; 171 case GL_4X_BIT_ATI: 172 val[i] = 4 * val[i]; 173 break; 174 case GL_8X_BIT_ATI: 175 val[i] = 8 * val[i]; 176 break; 177 case GL_HALF_BIT_ATI: 178 val[i] = val[i] * 0.5F; 179 break; 180 case GL_QUARTER_BIT_ATI: 181 val[i] = val[i] * 0.25F; 182 break; 183 case GL_EIGHTH_BIT_ATI: 184 val[i] = val[i] * 0.125F; 185 break; 186 } 187 188 if (has_sat) { 189 if (val[i] < 0.0F) 190 val[i] = 0.0F; 191 else if (val[i] > 1.0F) 192 val[i] = 1.0F; 193 } 194 else { 195 if (val[i] < -8.0F) 196 val[i] = -8.0F; 197 else if (val[i] > 8.0F) 198 val[i] = 8.0F; 199 } 200 } 201} 202 203 204static void 205write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src, 206 GLfloat * dst) 207{ 208 GLint i; 209 apply_dst_mod(optype, mod, src); 210 211 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) { 212 if (mask) { 213 if (mask & GL_RED_BIT_ATI) 214 dst[0] = src[0]; 215 216 if (mask & GL_GREEN_BIT_ATI) 217 dst[1] = src[1]; 218 219 if (mask & GL_BLUE_BIT_ATI) 220 dst[2] = src[2]; 221 } 222 else { 223 for (i = 0; i < 3; i++) 224 dst[i] = src[i]; 225 } 226 } 227 else 228 dst[3] = src[3]; 229} 230 231static void 232finish_pass(struct atifs_machine *machine) 233{ 234 GLint i; 235 236 for (i = 0; i < 6; i++) { 237 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]); 238 } 239} 240 241 242static void 243handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst, 244 const SWspan *span, GLuint column, GLuint idx) 245{ 246 GLuint swizzle = texinst->swizzle; 247 GLuint pass_tex = texinst->src; 248 249 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 250 pass_tex -= GL_TEXTURE0_ARB; 251 COPY_4V(machine->Registers[idx], 252 span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]); 253 } 254 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { 255 pass_tex -= GL_REG_0_ATI; 256 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]); 257 } 258 apply_swizzle(machine->Registers[idx], swizzle); 259 260} 261 262static void 263handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine, 264 struct atifs_setupinst *texinst, const SWspan *span, 265 GLuint column, GLuint idx) 266{ 267/* sample from unit idx using texinst->src as coords */ 268 GLuint swizzle = texinst->swizzle; 269 GLuint coord_source = texinst->src; 270 GLfloat tex_coords[4] = { 0 }; 271 272 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) { 273 coord_source -= GL_TEXTURE0_ARB; 274 COPY_4V(tex_coords, 275 span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]); 276 } 277 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) { 278 coord_source -= GL_REG_0_ATI; 279 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]); 280 } 281 apply_swizzle(tex_coords, swizzle); 282 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]); 283} 284 285#define SETUP_SRC_REG(optype, i, x) \ 286do { \ 287 COPY_4V(src[optype][i], x); \ 288} while (0) 289 290 291 292/** 293 * Execute the given fragment shader. 294 * NOTE: we do everything in single-precision floating point 295 * \param ctx - rendering context 296 * \param shader - the shader to execute 297 * \param machine - virtual machine state 298 * \param span - the SWspan we're operating on 299 * \param column - which pixel [i] we're operating on in the span 300 */ 301static void 302execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader, 303 struct atifs_machine *machine, const SWspan *span, 304 GLuint column) 305{ 306 GLuint pc; 307 struct atifs_instruction *inst; 308 struct atifs_setupinst *texinst; 309 GLint optype; 310 GLuint i; 311 GLint j, pass; 312 GLint dstreg; 313 GLfloat src[2][3][4]; 314 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; 315 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; 316 GLfloat dst[2][4], *dstp; 317 318 for (pass = 0; pass < shader->NumPasses; pass++) { 319 if (pass > 0) 320 finish_pass(machine); 321 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) { 322 texinst = &shader->SetupInst[pass][j]; 323 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) 324 handle_pass_op(machine, texinst, span, column, j); 325 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) 326 handle_sample_op(ctx, machine, texinst, span, column, j); 327 } 328 329 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { 330 inst = &shader->Instructions[pass][pc]; 331 332 /* setup the source registers for color and alpha ops */ 333 for (optype = 0; optype < 2; optype++) { 334 for (i = 0; i < inst->ArgCount[optype]; i++) { 335 GLint index = inst->SrcReg[optype][i].Index; 336 337 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) 338 SETUP_SRC_REG(optype, i, 339 machine->Registers[index - GL_REG_0_ATI]); 340 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { 341 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) { 342 SETUP_SRC_REG(optype, i, 343 shader->Constants[index - GL_CON_0_ATI]); 344 } else { 345 SETUP_SRC_REG(optype, i, 346 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]); 347 } 348 } 349 else if (index == GL_ONE) 350 SETUP_SRC_REG(optype, i, ones); 351 else if (index == GL_ZERO) 352 SETUP_SRC_REG(optype, i, zeros); 353 else if (index == GL_PRIMARY_COLOR_EXT) 354 SETUP_SRC_REG(optype, i, 355 machine->Inputs[ATI_FS_INPUT_PRIMARY]); 356 else if (index == GL_SECONDARY_INTERPOLATOR_ATI) 357 SETUP_SRC_REG(optype, i, 358 machine->Inputs[ATI_FS_INPUT_SECONDARY]); 359 360 apply_src_rep(optype, inst->SrcReg[optype][i].argRep, 361 src[optype][i]); 362 apply_src_mod(optype, inst->SrcReg[optype][i].argMod, 363 src[optype][i]); 364 } 365 } 366 367 /* Execute the operations - color then alpha */ 368 for (optype = 0; optype < 2; optype++) { 369 if (inst->Opcode[optype]) { 370 switch (inst->Opcode[optype]) { 371 case GL_ADD_ATI: 372 if (!optype) 373 for (i = 0; i < 3; i++) { 374 dst[optype][i] = 375 src[optype][0][i] + src[optype][1][i]; 376 } 377 else 378 dst[optype][3] = src[optype][0][3] + src[optype][1][3]; 379 break; 380 case GL_SUB_ATI: 381 if (!optype) 382 for (i = 0; i < 3; i++) { 383 dst[optype][i] = 384 src[optype][0][i] - src[optype][1][i]; 385 } 386 else 387 dst[optype][3] = src[optype][0][3] - src[optype][1][3]; 388 break; 389 case GL_MUL_ATI: 390 if (!optype) 391 for (i = 0; i < 3; i++) { 392 dst[optype][i] = 393 src[optype][0][i] * src[optype][1][i]; 394 } 395 else 396 dst[optype][3] = src[optype][0][3] * src[optype][1][3]; 397 break; 398 case GL_MAD_ATI: 399 if (!optype) 400 for (i = 0; i < 3; i++) { 401 dst[optype][i] = 402 src[optype][0][i] * src[optype][1][i] + 403 src[optype][2][i]; 404 } 405 else 406 dst[optype][3] = 407 src[optype][0][3] * src[optype][1][3] + 408 src[optype][2][3]; 409 break; 410 case GL_LERP_ATI: 411 if (!optype) 412 for (i = 0; i < 3; i++) { 413 dst[optype][i] = 414 src[optype][0][i] * src[optype][1][i] + (1 - 415 src 416 [optype] 417 [0][i]) * 418 src[optype][2][i]; 419 } 420 else 421 dst[optype][3] = 422 src[optype][0][3] * src[optype][1][3] + (1 - 423 src[optype] 424 [0][3]) * 425 src[optype][2][3]; 426 break; 427 428 case GL_MOV_ATI: 429 if (!optype) 430 for (i = 0; i < 3; i++) { 431 dst[optype][i] = src[optype][0][i]; 432 } 433 else 434 dst[optype][3] = src[optype][0][3]; 435 break; 436 case GL_CND_ATI: 437 if (!optype) { 438 for (i = 0; i < 3; i++) { 439 dst[optype][i] = 440 (src[optype][2][i] > 441 0.5F) ? src[optype][0][i] : src[optype][1][i]; 442 } 443 } 444 else { 445 dst[optype][3] = 446 (src[optype][2][3] > 447 0.5F) ? src[optype][0][3] : src[optype][1][3]; 448 } 449 break; 450 451 case GL_CND0_ATI: 452 if (!optype) 453 for (i = 0; i < 3; i++) { 454 dst[optype][i] = 455 (src[optype][2][i] >= 456 0) ? src[optype][0][i] : src[optype][1][i]; 457 } 458 else { 459 dst[optype][3] = 460 (src[optype][2][3] >= 461 0) ? src[optype][0][3] : src[optype][1][3]; 462 } 463 break; 464 case GL_DOT2_ADD_ATI: 465 { 466 GLfloat result; 467 468 /* DOT 2 always uses the source from the color op */ 469 /* could save recalculation of dot products for alpha inst */ 470 result = src[0][0][0] * src[0][1][0] + 471 src[0][0][1] * src[0][1][1] + src[0][2][2]; 472 if (!optype) { 473 for (i = 0; i < 3; i++) { 474 dst[optype][i] = result; 475 } 476 } 477 else 478 dst[optype][3] = result; 479 } 480 break; 481 case GL_DOT3_ATI: 482 { 483 GLfloat result; 484 485 /* DOT 3 always uses the source from the color op */ 486 result = src[0][0][0] * src[0][1][0] + 487 src[0][0][1] * src[0][1][1] + 488 src[0][0][2] * src[0][1][2]; 489 490 if (!optype) { 491 for (i = 0; i < 3; i++) { 492 dst[optype][i] = result; 493 } 494 } 495 else 496 dst[optype][3] = result; 497 } 498 break; 499 case GL_DOT4_ATI: 500 { 501 GLfloat result; 502 503 /* DOT 4 always uses the source from the color op */ 504 result = src[0][0][0] * src[0][1][0] + 505 src[0][0][1] * src[0][1][1] + 506 src[0][0][2] * src[0][1][2] + 507 src[0][0][3] * src[0][1][3]; 508 if (!optype) { 509 for (i = 0; i < 3; i++) { 510 dst[optype][i] = result; 511 } 512 } 513 else 514 dst[optype][3] = result; 515 } 516 break; 517 518 } 519 } 520 } 521 522 /* write out the destination registers */ 523 for (optype = 0; optype < 2; optype++) { 524 if (inst->Opcode[optype]) { 525 dstreg = inst->DstReg[optype].Index; 526 dstp = machine->Registers[dstreg - GL_REG_0_ATI]; 527 528 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) && 529 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI))) 530 write_dst_addr(optype, inst->DstReg[optype].dstMod, 531 inst->DstReg[optype].dstMask, dst[optype], 532 dstp); 533 else 534 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp); 535 } 536 } 537 } 538 } 539} 540 541 542/** 543 * Init fragment shader virtual machine state. 544 */ 545static void 546init_machine(struct gl_context * ctx, struct atifs_machine *machine, 547 const struct ati_fragment_shader *shader, 548 const SWspan *span, GLuint col) 549{ 550 GLfloat (*inputs)[4] = machine->Inputs; 551 GLint i, j; 552 553 for (i = 0; i < 6; i++) { 554 for (j = 0; j < 4; j++) 555 machine->Registers[i][j] = 0.0; 556 } 557 558 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]); 559 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]); 560} 561 562 563 564/** 565 * Execute the current ATI shader program, operating on the given span. 566 */ 567void 568_swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span) 569{ 570 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; 571 struct atifs_machine machine; 572 GLuint i; 573 574 /* incoming colors should be floats */ 575 assert(span->array->ChanType == GL_FLOAT); 576 577 for (i = 0; i < span->end; i++) { 578 if (span->array->mask[i]) { 579 init_machine(ctx, &machine, shader, span, i); 580 581 execute_shader(ctx, shader, &machine, span, i); 582 583 /* store result color */ 584 { 585 const GLfloat *colOut = machine.Registers[0]; 586 /*fprintf(stderr,"outputs %f %f %f %f\n", 587 colOut[0], colOut[1], colOut[2], colOut[3]); */ 588 COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut); 589 } 590 } 591 } 592} 593