1/************************************************************************** 2 * 3 * Copyright 2003 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "main/glheader.h" 29#include "main/macros.h" 30#include "main/enums.h" 31 32#include "program/prog_instruction.h" 33#include "program/prog_parameter.h" 34#include "program/program.h" 35#include "program/programopt.h" 36#include "program/prog_print.h" 37 38#include "tnl/tnl.h" 39#include "tnl/t_context.h" 40 41#include "intel_batchbuffer.h" 42 43#include "i915_reg.h" 44#include "i915_context.h" 45#include "i915_program.h" 46 47static const GLfloat sin_quad_constants[2][4] = { 48 { 49 2.0, 50 -1.0, 51 .5, 52 .75 53 }, 54 { 55 4.0, 56 -4.0, 57 1.0 / (2.0 * M_PI), 58 .2225 59 } 60}; 61 62static const GLfloat sin_constants[4] = { 1.0, 63 -1.0 / (3 * 2 * 1), 64 1.0 / (5 * 4 * 3 * 2 * 1), 65 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) 66}; 67 68/* 1, -1/2!, 1/4!, -1/6! */ 69static const GLfloat cos_constants[4] = { 1.0, 70 -1.0 / (2 * 1), 71 1.0 / (4 * 3 * 2 * 1), 72 -1.0 / (6 * 5 * 4 * 3 * 2 * 1) 73}; 74 75/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */ 76#define TEXCOORD_TEX (0<<7) 77#define TEXCOORD_VAR (1<<7) 78 79static unsigned 80get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord) 81{ 82 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 83 if (p->texcoord_mapping[i] == texcoord) 84 return i; 85 } 86 87 /* blah */ 88 return p->ctx->Const.MaxTextureCoordUnits - 1; 89} 90 91/** 92 * Retrieve a ureg for the given source register. Will emit 93 * constants, apply swizzling and negation as needed. 94 */ 95static GLuint 96src_vector(struct i915_fragment_program *p, 97 const struct prog_src_register *source, 98 const struct gl_program *program) 99{ 100 GLuint src; 101 unsigned unit; 102 103 switch (source->File) { 104 105 /* Registers: 106 */ 107 case PROGRAM_TEMPORARY: 108 if (source->Index >= I915_MAX_TEMPORARY) { 109 i915_program_error(p, "Exceeded max temporary reg: %d/%d", 110 source->Index, I915_MAX_TEMPORARY); 111 return 0; 112 } 113 src = UREG(REG_TYPE_R, source->Index); 114 break; 115 case PROGRAM_INPUT: 116 switch (source->Index) { 117 case VARYING_SLOT_POS: 118 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); 119 break; 120 case VARYING_SLOT_COL0: 121 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 122 break; 123 case VARYING_SLOT_COL1: 124 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 125 src = swizzle(src, X, Y, Z, ONE); 126 break; 127 case VARYING_SLOT_FOGC: 128 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 129 src = swizzle(src, W, ZERO, ZERO, ONE); 130 break; 131 case VARYING_SLOT_TEX0: 132 case VARYING_SLOT_TEX1: 133 case VARYING_SLOT_TEX2: 134 case VARYING_SLOT_TEX3: 135 case VARYING_SLOT_TEX4: 136 case VARYING_SLOT_TEX5: 137 case VARYING_SLOT_TEX6: 138 case VARYING_SLOT_TEX7: 139 unit = get_texcoord_mapping(p, (source->Index - 140 VARYING_SLOT_TEX0) | TEXCOORD_TEX); 141 src = i915_emit_decl(p, REG_TYPE_T, 142 T_TEX0 + unit, 143 D0_CHANNEL_ALL); 144 break; 145 146 case VARYING_SLOT_VAR0: 147 case VARYING_SLOT_VAR0 + 1: 148 case VARYING_SLOT_VAR0 + 2: 149 case VARYING_SLOT_VAR0 + 3: 150 case VARYING_SLOT_VAR0 + 4: 151 case VARYING_SLOT_VAR0 + 5: 152 case VARYING_SLOT_VAR0 + 6: 153 case VARYING_SLOT_VAR0 + 7: 154 unit = get_texcoord_mapping(p, (source->Index - 155 VARYING_SLOT_VAR0) | TEXCOORD_VAR); 156 src = i915_emit_decl(p, REG_TYPE_T, 157 T_TEX0 + unit, 158 D0_CHANNEL_ALL); 159 break; 160 161 default: 162 i915_program_error(p, "Bad source->Index: %d", source->Index); 163 return 0; 164 } 165 break; 166 167 case PROGRAM_OUTPUT: 168 switch (source->Index) { 169 case FRAG_RESULT_COLOR: 170 case FRAG_RESULT_DATA0: 171 src = UREG(REG_TYPE_OC, 0); 172 break; 173 case FRAG_RESULT_DEPTH: 174 src = UREG(REG_TYPE_OD, 0); 175 break; 176 default: 177 i915_program_error(p, "Bad source->Index: %d", source->Index); 178 return 0; 179 } 180 break; 181 182 /* Various paramters and env values. All emitted to 183 * hardware as program constants. 184 */ 185 case PROGRAM_CONSTANT: 186 case PROGRAM_STATE_VAR: 187 case PROGRAM_UNIFORM: { 188 struct gl_program_parameter_list *params = program->Parameters; 189 unsigned offset = params->Parameters[source->Index].ValueOffset; 190 src = i915_emit_param4fv(p, ¶ms->ParameterValues[offset].f); 191 break; 192 } 193 default: 194 i915_program_error(p, "Bad source->File: %d", source->File); 195 return 0; 196 } 197 198 src = swizzle(src, 199 GET_SWZ(source->Swizzle, 0), 200 GET_SWZ(source->Swizzle, 1), 201 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); 202 203 if (source->Negate) 204 src = negate(src, 205 GET_BIT(source->Negate, 0), 206 GET_BIT(source->Negate, 1), 207 GET_BIT(source->Negate, 2), 208 GET_BIT(source->Negate, 3)); 209 210 return src; 211} 212 213 214static GLuint 215get_result_vector(struct i915_fragment_program *p, 216 const struct prog_instruction *inst) 217{ 218 switch (inst->DstReg.File) { 219 case PROGRAM_OUTPUT: 220 switch (inst->DstReg.Index) { 221 case FRAG_RESULT_COLOR: 222 case FRAG_RESULT_DATA0: 223 return UREG(REG_TYPE_OC, 0); 224 case FRAG_RESULT_DEPTH: 225 p->depth_written = 1; 226 return UREG(REG_TYPE_OD, 0); 227 default: 228 i915_program_error(p, "Bad inst->DstReg.Index: %d", 229 inst->DstReg.Index); 230 return 0; 231 } 232 case PROGRAM_TEMPORARY: 233 return UREG(REG_TYPE_R, inst->DstReg.Index); 234 default: 235 i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); 236 return 0; 237 } 238} 239 240static GLuint 241get_result_flags(const struct prog_instruction *inst) 242{ 243 GLuint flags = 0; 244 245 if (inst->Saturate) 246 flags |= A0_DEST_SATURATE; 247 if (inst->DstReg.WriteMask & WRITEMASK_X) 248 flags |= A0_DEST_CHANNEL_X; 249 if (inst->DstReg.WriteMask & WRITEMASK_Y) 250 flags |= A0_DEST_CHANNEL_Y; 251 if (inst->DstReg.WriteMask & WRITEMASK_Z) 252 flags |= A0_DEST_CHANNEL_Z; 253 if (inst->DstReg.WriteMask & WRITEMASK_W) 254 flags |= A0_DEST_CHANNEL_W; 255 256 return flags; 257} 258 259static GLuint 260translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) 261{ 262 switch (bit) { 263 case TEXTURE_1D_INDEX: 264 return D0_SAMPLE_TYPE_2D; 265 case TEXTURE_2D_INDEX: 266 return D0_SAMPLE_TYPE_2D; 267 case TEXTURE_RECT_INDEX: 268 return D0_SAMPLE_TYPE_2D; 269 case TEXTURE_3D_INDEX: 270 return D0_SAMPLE_TYPE_VOLUME; 271 case TEXTURE_CUBE_INDEX: 272 return D0_SAMPLE_TYPE_CUBE; 273 default: 274 i915_program_error(p, "TexSrcBit: %d", bit); 275 return 0; 276 } 277} 278 279#define EMIT_TEX( OP ) \ 280do { \ 281 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \ 282 const struct gl_program *program = &p->FragProg; \ 283 GLuint unit = program->SamplerUnits[inst->TexSrcUnit]; \ 284 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \ 285 unit, dim); \ 286 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \ 287 /* Texel lookup */ \ 288 \ 289 i915_emit_texld( p, get_live_regs(p, inst), \ 290 get_result_vector( p, inst ), \ 291 get_result_flags( inst ), \ 292 sampler, \ 293 coord, \ 294 OP); \ 295} while (0) 296 297#define EMIT_ARITH( OP, N ) \ 298do { \ 299 i915_emit_arith( p, \ 300 OP, \ 301 get_result_vector( p, inst ), \ 302 get_result_flags( inst ), 0, \ 303 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \ 304 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \ 305 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \ 306} while (0) 307 308#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 ) 309#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 ) 310#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 ) 311 312/* 313 * TODO: consider moving this into core 314 */ 315static bool calc_live_regs( struct i915_fragment_program *p ) 316{ 317 const struct gl_program *program = &p->FragProg; 318 GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1); 319 uint8_t live_components[I915_MAX_TEMPORARY] = { 0, }; 320 GLint i; 321 322 for (i = program->arb.NumInstructions - 1; i >= 0; i--) { 323 struct prog_instruction *inst = &program->arb.Instructions[i]; 324 int opArgs = _mesa_num_inst_src_regs(inst->Opcode); 325 int a; 326 327 /* Register is written to: unmark as live for this and preceeding ops */ 328 if (inst->DstReg.File == PROGRAM_TEMPORARY) { 329 if (inst->DstReg.Index >= I915_MAX_TEMPORARY) 330 return false; 331 332 live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; 333 if (live_components[inst->DstReg.Index] == 0) 334 regsUsed &= ~(1 << inst->DstReg.Index); 335 } 336 337 for (a = 0; a < opArgs; a++) { 338 /* Register is read from: mark as live for this and preceeding ops */ 339 if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { 340 unsigned c; 341 342 if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY) 343 return false; 344 345 regsUsed |= 1 << inst->SrcReg[a].Index; 346 347 for (c = 0; c < 4; c++) { 348 const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c); 349 350 if (field <= SWIZZLE_W) 351 live_components[inst->SrcReg[a].Index] |= (1U << field); 352 } 353 } 354 } 355 356 p->usedRegs[i] = regsUsed; 357 } 358 359 return true; 360} 361 362static GLuint get_live_regs( struct i915_fragment_program *p, 363 const struct prog_instruction *inst ) 364{ 365 const struct gl_program *program = &p->FragProg; 366 GLuint nr = inst - program->arb.Instructions; 367 368 return p->usedRegs[nr]; 369} 370 371 372/* Possible concerns: 373 * 374 * SIN, COS -- could use another taylor step? 375 * LIT -- results seem a little different to sw mesa 376 * LOG -- different to mesa on negative numbers, but this is conformant. 377 * 378 * Parse failures -- Mesa doesn't currently give a good indication 379 * internally whether a particular program string parsed or not. This 380 * can lead to confusion -- hopefully we cope with it ok now. 381 * 382 */ 383static void 384upload_program(struct i915_fragment_program *p) 385{ 386 const struct gl_program *program = &p->FragProg; 387 const struct prog_instruction *inst = program->arb.Instructions; 388 389 if (INTEL_DEBUG & DEBUG_WM) 390 _mesa_print_program(program); 391 392 /* Is this a parse-failed program? Ensure a valid program is 393 * loaded, as the flagging of an error isn't sufficient to stop 394 * this being uploaded to hardware. 395 */ 396 if (inst[0].Opcode == OPCODE_END) { 397 GLuint tmp = i915_get_utemp(p); 398 i915_emit_arith(p, 399 A0_MOV, 400 UREG(REG_TYPE_OC, 0), 401 A0_DEST_CHANNEL_ALL, 0, 402 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0); 403 return; 404 } 405 406 if (program->arb.NumInstructions > I915_MAX_INSN) { 407 i915_program_error(p, "Exceeded max instructions (%d out of %d)", 408 program->arb.NumInstructions, I915_MAX_INSN); 409 return; 410 } 411 412 /* Not always needed: 413 */ 414 if (!calc_live_regs(p)) { 415 i915_program_error(p, "Could not allocate registers"); 416 return; 417 } 418 419 while (1) { 420 GLuint src0, src1, src2, flags; 421 GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; 422 423 switch (inst->Opcode) { 424 case OPCODE_ABS: 425 src0 = src_vector(p, &inst->SrcReg[0], program); 426 i915_emit_arith(p, 427 A0_MAX, 428 get_result_vector(p, inst), 429 get_result_flags(inst), 0, 430 src0, negate(src0, 1, 1, 1, 1), 0); 431 break; 432 433 case OPCODE_ADD: 434 EMIT_2ARG_ARITH(A0_ADD); 435 break; 436 437 case OPCODE_CMP: 438 src0 = src_vector(p, &inst->SrcReg[0], program); 439 src1 = src_vector(p, &inst->SrcReg[1], program); 440 src2 = src_vector(p, &inst->SrcReg[2], program); 441 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 442 break; 443 444 case OPCODE_COS: 445 src0 = src_vector(p, &inst->SrcReg[0], program); 446 tmp = i915_get_utemp(p); 447 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 448 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 449 450 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 451 i915_emit_arith(p, 452 A0_MAD, 453 tmp, A0_DEST_CHANNEL_X, 0, 454 src0, 455 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 456 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ 457 458 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 459 460 i915_emit_arith(p, 461 A0_MAD, 462 tmp, A0_DEST_CHANNEL_X, 0, 463 tmp, 464 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 465 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 466 467 /* Compute COS with the same calculation used for SIN, but a 468 * different source range has been mapped to [-1,1] this time. 469 */ 470 471 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 472 i915_emit_arith(p, 473 A0_MAX, 474 tmp, A0_DEST_CHANNEL_Y, 0, 475 swizzle(tmp, ZERO, X, ZERO, ZERO), 476 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 477 0); 478 479 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 480 i915_emit_arith(p, 481 A0_MUL, 482 tmp, A0_DEST_CHANNEL_Y, 0, 483 swizzle(tmp, ZERO, X, ZERO, ZERO), 484 tmp, 485 0); 486 487 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 488 i915_emit_arith(p, 489 A0_DP3, 490 tmp, A0_DEST_CHANNEL_X, 0, 491 tmp, 492 swizzle(consts1, X, Y, ZERO, ZERO), 493 0); 494 495 /* tmp.x now contains a first approximation (y). Now, weight it 496 * against tmp.y**2 to get closer. 497 */ 498 i915_emit_arith(p, 499 A0_MAX, 500 tmp, A0_DEST_CHANNEL_Y, 0, 501 swizzle(tmp, ZERO, X, ZERO, ZERO), 502 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 503 0); 504 505 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 506 i915_emit_arith(p, 507 A0_MAD, 508 tmp, A0_DEST_CHANNEL_Y, 0, 509 swizzle(tmp, ZERO, X, ZERO, ZERO), 510 swizzle(tmp, ZERO, Y, ZERO, ZERO), 511 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 512 513 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 514 i915_emit_arith(p, 515 A0_MAD, 516 get_result_vector(p, inst), 517 get_result_flags(inst), 0, 518 swizzle(consts1, W, W, W, W), 519 swizzle(tmp, Y, Y, Y, Y), 520 swizzle(tmp, X, X, X, X)); 521 break; 522 523 case OPCODE_DP2: 524 src0 = src_vector(p, &inst->SrcReg[0], program); 525 src1 = src_vector(p, &inst->SrcReg[1], program); 526 i915_emit_arith(p, 527 A0_DP3, 528 get_result_vector(p, inst), 529 get_result_flags(inst), 0, 530 swizzle(src0, X, Y, ZERO, ZERO), 531 swizzle(src1, X, Y, ZERO, ZERO), 532 0); 533 break; 534 535 case OPCODE_DP3: 536 EMIT_2ARG_ARITH(A0_DP3); 537 break; 538 539 case OPCODE_DP4: 540 EMIT_2ARG_ARITH(A0_DP4); 541 break; 542 543 case OPCODE_DPH: 544 src0 = src_vector(p, &inst->SrcReg[0], program); 545 src1 = src_vector(p, &inst->SrcReg[1], program); 546 547 i915_emit_arith(p, 548 A0_DP4, 549 get_result_vector(p, inst), 550 get_result_flags(inst), 0, 551 swizzle(src0, X, Y, Z, ONE), src1, 0); 552 break; 553 554 case OPCODE_DST: 555 src0 = src_vector(p, &inst->SrcReg[0], program); 556 src1 = src_vector(p, &inst->SrcReg[1], program); 557 558 /* result[0] = 1 * 1; 559 * result[1] = a[1] * b[1]; 560 * result[2] = a[2] * 1; 561 * result[3] = 1 * b[3]; 562 */ 563 i915_emit_arith(p, 564 A0_MUL, 565 get_result_vector(p, inst), 566 get_result_flags(inst), 0, 567 swizzle(src0, ONE, Y, Z, ONE), 568 swizzle(src1, ONE, Y, ONE, W), 0); 569 break; 570 571 case OPCODE_EX2: 572 src0 = src_vector(p, &inst->SrcReg[0], program); 573 574 i915_emit_arith(p, 575 A0_EXP, 576 get_result_vector(p, inst), 577 get_result_flags(inst), 0, 578 swizzle(src0, X, X, X, X), 0, 0); 579 break; 580 581 case OPCODE_FLR: 582 EMIT_1ARG_ARITH(A0_FLR); 583 break; 584 585 case OPCODE_TRUNC: 586 EMIT_1ARG_ARITH(A0_TRC); 587 break; 588 589 case OPCODE_FRC: 590 EMIT_1ARG_ARITH(A0_FRC); 591 break; 592 593 case OPCODE_KIL: 594 src0 = src_vector(p, &inst->SrcReg[0], program); 595 tmp = i915_get_utemp(p); 596 597 i915_emit_texld(p, get_live_regs(p, inst), 598 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ 599 0, src0, T0_TEXKILL); 600 break; 601 602 case OPCODE_LG2: 603 src0 = src_vector(p, &inst->SrcReg[0], program); 604 605 i915_emit_arith(p, 606 A0_LOG, 607 get_result_vector(p, inst), 608 get_result_flags(inst), 0, 609 swizzle(src0, X, X, X, X), 0, 0); 610 break; 611 612 case OPCODE_LIT: 613 src0 = src_vector(p, &inst->SrcReg[0], program); 614 tmp = i915_get_utemp(p); 615 616 /* tmp = max( a.xyzw, a.00zw ) 617 * XXX: Clamp tmp.w to -128..128 618 * tmp.y = log(tmp.y) 619 * tmp.y = tmp.w * tmp.y 620 * tmp.y = exp(tmp.y) 621 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 622 */ 623 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 624 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 625 626 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 627 swizzle(tmp, Y, Y, Y, Y), 0, 0); 628 629 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 630 swizzle(tmp, ZERO, Y, ZERO, ZERO), 631 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 632 633 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 634 swizzle(tmp, Y, Y, Y, Y), 0, 0); 635 636 i915_emit_arith(p, A0_CMP, 637 get_result_vector(p, inst), 638 get_result_flags(inst), 0, 639 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 640 swizzle(tmp, ONE, X, ZERO, ONE), 641 swizzle(tmp, ONE, X, Y, ONE)); 642 643 break; 644 645 case OPCODE_LRP: 646 src0 = src_vector(p, &inst->SrcReg[0], program); 647 src1 = src_vector(p, &inst->SrcReg[1], program); 648 src2 = src_vector(p, &inst->SrcReg[2], program); 649 flags = get_result_flags(inst); 650 tmp = i915_get_utemp(p); 651 652 /* b*a + c*(1-a) 653 * 654 * b*a + c - ca 655 * 656 * tmp = b*a + c, 657 * result = (-c)*a + tmp 658 */ 659 i915_emit_arith(p, A0_MAD, tmp, 660 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 661 662 i915_emit_arith(p, A0_MAD, 663 get_result_vector(p, inst), 664 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 665 break; 666 667 case OPCODE_MAD: 668 EMIT_3ARG_ARITH(A0_MAD); 669 break; 670 671 case OPCODE_MAX: 672 EMIT_2ARG_ARITH(A0_MAX); 673 break; 674 675 case OPCODE_MIN: 676 EMIT_2ARG_ARITH(A0_MIN); 677 break; 678 679 case OPCODE_MOV: 680 EMIT_1ARG_ARITH(A0_MOV); 681 break; 682 683 case OPCODE_MUL: 684 EMIT_2ARG_ARITH(A0_MUL); 685 break; 686 687 case OPCODE_POW: 688 src0 = src_vector(p, &inst->SrcReg[0], program); 689 src1 = src_vector(p, &inst->SrcReg[1], program); 690 tmp = i915_get_utemp(p); 691 flags = get_result_flags(inst); 692 693 /* XXX: masking on intermediate values, here and elsewhere. 694 */ 695 i915_emit_arith(p, 696 A0_LOG, 697 tmp, A0_DEST_CHANNEL_X, 0, 698 swizzle(src0, X, X, X, X), 0, 0); 699 700 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 701 702 703 i915_emit_arith(p, 704 A0_EXP, 705 get_result_vector(p, inst), 706 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 707 708 break; 709 710 case OPCODE_RCP: 711 src0 = src_vector(p, &inst->SrcReg[0], program); 712 713 i915_emit_arith(p, 714 A0_RCP, 715 get_result_vector(p, inst), 716 get_result_flags(inst), 0, 717 swizzle(src0, X, X, X, X), 0, 0); 718 break; 719 720 case OPCODE_RSQ: 721 722 src0 = src_vector(p, &inst->SrcReg[0], program); 723 724 i915_emit_arith(p, 725 A0_RSQ, 726 get_result_vector(p, inst), 727 get_result_flags(inst), 0, 728 swizzle(src0, X, X, X, X), 0, 0); 729 break; 730 731 case OPCODE_SCS: 732 src0 = src_vector(p, &inst->SrcReg[0], program); 733 tmp = i915_get_utemp(p); 734 735 /* 736 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 737 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 738 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 739 * scs.x = DP4 t1, sin_constants 740 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 741 * scs.y = DP4 t1, cos_constants 742 */ 743 i915_emit_arith(p, 744 A0_MUL, 745 tmp, A0_DEST_CHANNEL_XY, 0, 746 swizzle(src0, X, X, ONE, ONE), 747 swizzle(src0, X, ONE, ONE, ONE), 0); 748 749 i915_emit_arith(p, 750 A0_MUL, 751 tmp, A0_DEST_CHANNEL_ALL, 0, 752 swizzle(tmp, X, Y, X, Y), 753 swizzle(tmp, X, X, ONE, ONE), 0); 754 755 if (inst->DstReg.WriteMask & WRITEMASK_Y) { 756 GLuint tmp1; 757 758 if (inst->DstReg.WriteMask & WRITEMASK_X) 759 tmp1 = i915_get_utemp(p); 760 else 761 tmp1 = tmp; 762 763 i915_emit_arith(p, 764 A0_MUL, 765 tmp1, A0_DEST_CHANNEL_ALL, 0, 766 swizzle(tmp, X, Y, Y, W), 767 swizzle(tmp, X, Z, ONE, ONE), 0); 768 769 i915_emit_arith(p, 770 A0_DP4, 771 get_result_vector(p, inst), 772 A0_DEST_CHANNEL_Y, 0, 773 swizzle(tmp1, W, Z, Y, X), 774 i915_emit_const4fv(p, sin_constants), 0); 775 } 776 777 if (inst->DstReg.WriteMask & WRITEMASK_X) { 778 i915_emit_arith(p, 779 A0_MUL, 780 tmp, A0_DEST_CHANNEL_XYZ, 0, 781 swizzle(tmp, X, X, Z, ONE), 782 swizzle(tmp, Z, ONE, ONE, ONE), 0); 783 784 i915_emit_arith(p, 785 A0_DP4, 786 get_result_vector(p, inst), 787 A0_DEST_CHANNEL_X, 0, 788 swizzle(tmp, ONE, Z, Y, X), 789 i915_emit_const4fv(p, cos_constants), 0); 790 } 791 break; 792 793 case OPCODE_SIN: 794 src0 = src_vector(p, &inst->SrcReg[0], program); 795 tmp = i915_get_utemp(p); 796 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 797 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 798 799 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 800 i915_emit_arith(p, 801 A0_MAD, 802 tmp, A0_DEST_CHANNEL_X, 0, 803 src0, 804 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 805 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ 806 807 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 808 809 i915_emit_arith(p, 810 A0_MAD, 811 tmp, A0_DEST_CHANNEL_X, 0, 812 tmp, 813 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 814 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 815 816 /* Compute sin using a quadratic and quartic. It gives continuity 817 * that repeating the Taylor series lacks every 2*pi, and has 818 * reduced error. 819 * 820 * The idea was described at: 821 * http://www.devmaster.net/forums/showthread.php?t=5784 822 */ 823 824 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 825 i915_emit_arith(p, 826 A0_MAX, 827 tmp, A0_DEST_CHANNEL_Y, 0, 828 swizzle(tmp, ZERO, X, ZERO, ZERO), 829 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 830 0); 831 832 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 833 i915_emit_arith(p, 834 A0_MUL, 835 tmp, A0_DEST_CHANNEL_Y, 0, 836 swizzle(tmp, ZERO, X, ZERO, ZERO), 837 tmp, 838 0); 839 840 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 841 i915_emit_arith(p, 842 A0_DP3, 843 tmp, A0_DEST_CHANNEL_X, 0, 844 tmp, 845 swizzle(consts1, X, Y, ZERO, ZERO), 846 0); 847 848 /* tmp.x now contains a first approximation (y). Now, weight it 849 * against tmp.y**2 to get closer. 850 */ 851 i915_emit_arith(p, 852 A0_MAX, 853 tmp, A0_DEST_CHANNEL_Y, 0, 854 swizzle(tmp, ZERO, X, ZERO, ZERO), 855 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 856 0); 857 858 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 859 i915_emit_arith(p, 860 A0_MAD, 861 tmp, A0_DEST_CHANNEL_Y, 0, 862 swizzle(tmp, ZERO, X, ZERO, ZERO), 863 swizzle(tmp, ZERO, Y, ZERO, ZERO), 864 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 865 866 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 867 i915_emit_arith(p, 868 A0_MAD, 869 get_result_vector(p, inst), 870 get_result_flags(inst), 0, 871 swizzle(consts1, W, W, W, W), 872 swizzle(tmp, Y, Y, Y, Y), 873 swizzle(tmp, X, X, X, X)); 874 875 break; 876 877 case OPCODE_SGE: 878 EMIT_2ARG_ARITH(A0_SGE); 879 break; 880 881 case OPCODE_SLT: 882 EMIT_2ARG_ARITH(A0_SLT); 883 break; 884 885 case OPCODE_SSG: 886 dst = get_result_vector(p, inst); 887 flags = get_result_flags(inst); 888 src0 = src_vector(p, &inst->SrcReg[0], program); 889 tmp = i915_get_utemp(p); 890 891 /* tmp = (src < 0.0) */ 892 i915_emit_arith(p, 893 A0_SLT, 894 tmp, 895 flags, 0, 896 src0, 897 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 898 0); 899 900 /* dst = (0.0 < src) */ 901 i915_emit_arith(p, 902 A0_SLT, 903 dst, 904 flags, 0, 905 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 906 src0, 907 0); 908 909 /* dst = (src > 0.0) - (src < 0.0) */ 910 i915_emit_arith(p, 911 A0_ADD, 912 dst, 913 flags, 0, 914 dst, 915 negate(tmp, 1, 1, 1, 1), 916 0); 917 918 break; 919 920 case OPCODE_SUB: 921 src0 = src_vector(p, &inst->SrcReg[0], program); 922 src1 = src_vector(p, &inst->SrcReg[1], program); 923 924 i915_emit_arith(p, 925 A0_ADD, 926 get_result_vector(p, inst), 927 get_result_flags(inst), 0, 928 src0, negate(src1, 1, 1, 1, 1), 0); 929 break; 930 931 case OPCODE_SWZ: 932 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */ 933 break; 934 935 case OPCODE_TEX: 936 EMIT_TEX(T0_TEXLD); 937 break; 938 939 case OPCODE_TXB: 940 EMIT_TEX(T0_TEXLDB); 941 break; 942 943 case OPCODE_TXP: 944 EMIT_TEX(T0_TEXLDP); 945 break; 946 947 case OPCODE_XPD: 948 /* Cross product: 949 * result.x = src0.y * src1.z - src0.z * src1.y; 950 * result.y = src0.z * src1.x - src0.x * src1.z; 951 * result.z = src0.x * src1.y - src0.y * src1.x; 952 * result.w = undef; 953 */ 954 src0 = src_vector(p, &inst->SrcReg[0], program); 955 src1 = src_vector(p, &inst->SrcReg[1], program); 956 tmp = i915_get_utemp(p); 957 958 i915_emit_arith(p, 959 A0_MUL, 960 tmp, A0_DEST_CHANNEL_ALL, 0, 961 swizzle(src0, Z, X, Y, ONE), 962 swizzle(src1, Y, Z, X, ONE), 0); 963 964 i915_emit_arith(p, 965 A0_MAD, 966 get_result_vector(p, inst), 967 get_result_flags(inst), 0, 968 swizzle(src0, Y, Z, X, ONE), 969 swizzle(src1, Z, X, Y, ONE), 970 negate(tmp, 1, 1, 1, 0)); 971 break; 972 973 case OPCODE_END: 974 return; 975 976 case OPCODE_BGNLOOP: 977 case OPCODE_BGNSUB: 978 case OPCODE_BRK: 979 case OPCODE_CAL: 980 case OPCODE_CONT: 981 case OPCODE_DDX: 982 case OPCODE_DDY: 983 case OPCODE_ELSE: 984 case OPCODE_ENDIF: 985 case OPCODE_ENDLOOP: 986 case OPCODE_ENDSUB: 987 case OPCODE_IF: 988 case OPCODE_RET: 989 p->error = 1; 990 i915_program_error(p, "Unsupported opcode: %s", 991 _mesa_opcode_string(inst->Opcode)); 992 return; 993 994 case OPCODE_EXP: 995 case OPCODE_LOG: 996 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in 997 * prog_instruction.h, but apparently GLSL doesn't ever emit them. 998 * Instead, it translates to EX2 or LG2. 999 */ 1000 case OPCODE_TXD: 1001 case OPCODE_TXL: 1002 /* These opcodes are claimed by GLSL in prog_instruction.h, but 1003 * only NV_vp/fp appears to emit them. 1004 */ 1005 default: 1006 i915_program_error(p, "bad opcode: %s", 1007 _mesa_opcode_string(inst->Opcode)); 1008 return; 1009 } 1010 1011 inst++; 1012 i915_release_utemps(p); 1013 } 1014} 1015 1016/* Rather than trying to intercept and jiggle depth writes during 1017 * emit, just move the value into its correct position at the end of 1018 * the program: 1019 */ 1020static void 1021fixup_depth_write(struct i915_fragment_program *p) 1022{ 1023 if (p->depth_written) { 1024 GLuint depth = UREG(REG_TYPE_OD, 0); 1025 1026 i915_emit_arith(p, 1027 A0_MOV, 1028 depth, A0_DEST_CHANNEL_W, 0, 1029 swizzle(depth, X, Y, Z, Z), 0, 0); 1030 } 1031} 1032 1033static void 1034check_texcoord_mapping(struct i915_fragment_program *p) 1035{ 1036 GLbitfield64 inputs = p->FragProg.info.inputs_read; 1037 unsigned unit = 0; 1038 1039 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1040 if (inputs & VARYING_BIT_TEX(i)) { 1041 if (unit >= p->ctx->Const.MaxTextureCoordUnits) { 1042 unit++; 1043 break; 1044 } 1045 p->texcoord_mapping[unit++] = i | TEXCOORD_TEX; 1046 } 1047 if (inputs & VARYING_BIT_VAR(i)) { 1048 if (unit >= p->ctx->Const.MaxTextureCoordUnits) { 1049 unit++; 1050 break; 1051 } 1052 p->texcoord_mapping[unit++] = i | TEXCOORD_VAR; 1053 } 1054 } 1055 1056 if (unit > p->ctx->Const.MaxTextureCoordUnits) 1057 i915_program_error(p, "Too many texcoord units"); 1058} 1059 1060static void 1061check_wpos(struct i915_fragment_program *p) 1062{ 1063 GLbitfield64 inputs = p->FragProg.info.inputs_read; 1064 GLint i; 1065 unsigned unit = 0; 1066 1067 p->wpos_tex = I915_WPOS_TEX_INVALID; 1068 1069 if ((inputs & VARYING_BIT_POS) == 0) 1070 return; 1071 1072 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1073 unit += !!(inputs & VARYING_BIT_TEX(i)); 1074 unit += !!(inputs & VARYING_BIT_VAR(i)); 1075 } 1076 1077 if (unit < p->ctx->Const.MaxTextureCoordUnits) 1078 p->wpos_tex = unit; 1079 else 1080 i915_program_error(p, "No free texcoord for wpos value"); 1081} 1082 1083 1084static void 1085translate_program(struct i915_fragment_program *p) 1086{ 1087 struct i915_context *i915 = I915_CONTEXT(p->ctx); 1088 1089 if (INTEL_DEBUG & DEBUG_WM) { 1090 printf("fp:\n"); 1091 _mesa_print_program(&p->FragProg); 1092 printf("\n"); 1093 } 1094 1095 i915_init_program(i915, p); 1096 check_texcoord_mapping(p); 1097 check_wpos(p); 1098 upload_program(p); 1099 fixup_depth_write(p); 1100 i915_fini_program(p); 1101 1102 p->translated = 1; 1103} 1104 1105 1106static void 1107track_params(struct i915_fragment_program *p) 1108{ 1109 GLint i; 1110 1111 if (p->nr_params) 1112 _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters); 1113 1114 for (i = 0; i < p->nr_params; i++) { 1115 GLint reg = p->param[i].reg; 1116 COPY_4V(p->constant[reg], p->param[i].values); 1117 } 1118 1119 p->params_uptodate = 1; 1120 p->on_hardware = 0; /* overkill */ 1121} 1122 1123static struct gl_program * 1124i915NewProgram(struct gl_context * ctx, gl_shader_stage stage, GLuint id, 1125 bool is_arb_asm) 1126{ 1127 switch (stage) { 1128 case MESA_SHADER_VERTEX: { 1129 struct gl_program *prog = rzalloc(NULL, struct gl_program); 1130 return _mesa_init_gl_program(prog, stage, id, is_arb_asm); 1131 } 1132 1133 case MESA_SHADER_FRAGMENT:{ 1134 struct i915_fragment_program *prog = 1135 rzalloc(NULL, struct i915_fragment_program); 1136 if (prog) { 1137 i915_init_program(I915_CONTEXT(ctx), prog); 1138 1139 return _mesa_init_gl_program(&prog->FragProg, stage, id, 1140 is_arb_asm); 1141 } 1142 else 1143 return NULL; 1144 } 1145 1146 default: 1147 /* Just fallback: 1148 */ 1149 return _mesa_new_program(ctx, stage, id, is_arb_asm); 1150 } 1151} 1152 1153static void 1154i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog) 1155{ 1156 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1157 struct i915_context *i915 = I915_CONTEXT(ctx); 1158 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1159 1160 if (i915->current_program == p) 1161 i915->current_program = 0; 1162 } 1163 1164 _mesa_delete_program(ctx, prog); 1165} 1166 1167 1168static GLboolean 1169i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog) 1170{ 1171 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1172 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1173 1174 if (!p->translated) 1175 translate_program(p); 1176 1177 return !p->error; 1178 } 1179 else 1180 return true; 1181} 1182 1183static GLboolean 1184i915ProgramStringNotify(struct gl_context * ctx, 1185 GLenum target, struct gl_program *prog) 1186{ 1187 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1188 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1189 p->translated = 0; 1190 } 1191 1192 (void) _tnl_program_string(ctx, target, prog); 1193 1194 /* XXX check if program is legal, within limits */ 1195 return true; 1196} 1197 1198static void 1199i915SamplerUniformChange(struct gl_context *ctx, 1200 GLenum target, struct gl_program *prog) 1201{ 1202 i915ProgramStringNotify(ctx, target, prog); 1203} 1204 1205void 1206i915_update_program(struct gl_context *ctx) 1207{ 1208 struct intel_context *intel = intel_context(ctx); 1209 struct i915_context *i915 = i915_context(&intel->ctx); 1210 struct i915_fragment_program *fp = 1211 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1212 1213 if (i915->current_program != fp) { 1214 if (i915->current_program) { 1215 i915->current_program->on_hardware = 0; 1216 i915->current_program->params_uptodate = 0; 1217 } 1218 1219 i915->current_program = fp; 1220 } 1221 1222 if (!fp->translated) 1223 translate_program(fp); 1224 1225 FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); 1226} 1227 1228void 1229i915ValidateFragmentProgram(struct i915_context *i915) 1230{ 1231 struct gl_context *ctx = &i915->intel.ctx; 1232 struct intel_context *intel = intel_context(ctx); 1233 TNLcontext *tnl = TNL_CONTEXT(ctx); 1234 struct vertex_buffer *VB = &tnl->vb; 1235 1236 struct i915_fragment_program *p = 1237 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1238 1239 const GLbitfield64 inputsRead = p->FragProg.info.inputs_read; 1240 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; 1241 GLuint s2 = S2_TEXCOORD_NONE; 1242 GLuint s3 = 0; 1243 int i, offset = 0; 1244 1245 /* Important: 1246 */ 1247 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; 1248 1249 if (!p->translated) 1250 translate_program(p); 1251 1252 intel->vertex_attr_count = 0; 1253 intel->wpos_offset = 0; 1254 intel->coloroffset = 0; 1255 intel->specoffset = 0; 1256 1257 /* Always emit W to get consistent perspective 1258 * correct interpolation of primary/secondary colors. 1259 */ 1260 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); 1261 1262 /* Handle gl_PointSize builtin var here */ 1263 if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) 1264 EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4); 1265 1266 if (inputsRead & VARYING_BIT_COL0) { 1267 intel->coloroffset = offset / 4; 1268 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); 1269 } 1270 1271 if (inputsRead & VARYING_BIT_COL1) { 1272 intel->specoffset = offset / 4; 1273 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4); 1274 } 1275 1276 if ((inputsRead & VARYING_BIT_FOGC)) { 1277 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); 1278 } 1279 1280 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1281 if (inputsRead & VARYING_BIT_TEX(i)) { 1282 int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX); 1283 int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; 1284 1285 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); 1286 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); 1287 1288 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); 1289 } 1290 if (inputsRead & VARYING_BIT_VAR(i)) { 1291 int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR); 1292 int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; 1293 1294 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); 1295 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); 1296 1297 EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); 1298 } 1299 if (i == p->wpos_tex) { 1300 int wpos_size = 4 * sizeof(float); 1301 /* If WPOS is required, duplicate the XYZ position data in an 1302 * unused texture coordinate: 1303 */ 1304 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1305 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size)); 1306 s3 |= S3_TEXCOORD_PERSPECTIVE_DISABLE(i); 1307 1308 intel->wpos_offset = offset; 1309 EMIT_PAD(wpos_size); 1310 } 1311 } 1312 1313 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || 1314 s3 != i915->state.Ctx[I915_CTXREG_LIS3] || 1315 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { 1316 I915_STATECHANGE(i915, I915_UPLOAD_CTX); 1317 1318 /* Must do this *after* statechange, so as not to affect 1319 * buffered vertices reliant on the old state: 1320 */ 1321 intel->vertex_size = _tnl_install_attrs(&intel->ctx, 1322 intel->vertex_attrs, 1323 intel->vertex_attr_count, 1324 intel->ViewportMatrix.m, 0); 1325 1326 assert(intel->prim.current_offset == intel->prim.start_offset); 1327 intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size; 1328 intel->prim.current_offset = intel->prim.start_offset; 1329 1330 intel->vertex_size >>= 2; 1331 1332 i915->state.Ctx[I915_CTXREG_LIS2] = s2; 1333 i915->state.Ctx[I915_CTXREG_LIS3] = s3; 1334 i915->state.Ctx[I915_CTXREG_LIS4] = s4; 1335 1336 assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size)); 1337 } 1338 1339 if (!p->params_uptodate) 1340 track_params(p); 1341 1342 if (!p->on_hardware) 1343 i915_upload_program(i915, p); 1344 1345 if (INTEL_DEBUG & DEBUG_WM) { 1346 printf("i915:\n"); 1347 i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); 1348 } 1349} 1350 1351void 1352i915InitFragProgFuncs(struct dd_function_table *functions) 1353{ 1354 functions->NewProgram = i915NewProgram; 1355 functions->DeleteProgram = i915DeleteProgram; 1356 functions->IsProgramNative = i915IsProgramNative; 1357 functions->ProgramStringNotify = i915ProgramStringNotify; 1358 functions->SamplerUniformChange = i915SamplerUniformChange; 1359} 1360