1848b8605Smrg/* 2848b8605Smrg * Copyright (C) 2004 David Airlie All Rights Reserved. 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 9848b8605Smrg * Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice shall be included 12848b8605Smrg * in all copies or substantial portions of the Software. 13848b8605Smrg * 14848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17848b8605Smrg * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18848b8605Smrg * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19848b8605Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20848b8605Smrg */ 21848b8605Smrg 22848b8605Smrg#include "main/glheader.h" 23848b8605Smrg#include "main/macros.h" 24848b8605Smrg#include "main/atifragshader.h" 25848b8605Smrg#include "main/samplerobj.h" 26848b8605Smrg#include "swrast/s_atifragshader.h" 27848b8605Smrg#include "swrast/s_context.h" 28848b8605Smrg 29b8e80941Smrg#define ATI_FS_INPUT_PRIMARY 0 30b8e80941Smrg#define ATI_FS_INPUT_SECONDARY 1 31848b8605Smrg 32848b8605Smrg/** 33848b8605Smrg * State for executing ATI fragment shader. 34848b8605Smrg */ 35848b8605Smrgstruct atifs_machine 36848b8605Smrg{ 37848b8605Smrg GLfloat Registers[6][4]; /** six temporary registers */ 38848b8605Smrg GLfloat PrevPassRegisters[6][4]; 39848b8605Smrg GLfloat Inputs[2][4]; /** Primary, secondary input colors */ 40848b8605Smrg}; 41848b8605Smrg 42848b8605Smrg 43848b8605Smrg 44848b8605Smrg/** 45848b8605Smrg * Fetch a texel. 46848b8605Smrg */ 47848b8605Smrgstatic void 48848b8605Smrgfetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda, 49848b8605Smrg GLuint unit, GLfloat color[4]) 50848b8605Smrg{ 51848b8605Smrg SWcontext *swrast = SWRAST_CONTEXT(ctx); 52848b8605Smrg 53848b8605Smrg /* XXX use a float-valued TextureSample routine here!!! */ 54848b8605Smrg swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit), 55848b8605Smrg ctx->Texture.Unit[unit]._Current, 56848b8605Smrg 1, (const GLfloat(*)[4]) texcoord, 57848b8605Smrg &lambda, (GLfloat (*)[4]) color); 58848b8605Smrg} 59848b8605Smrg 60848b8605Smrgstatic void 61848b8605Smrgapply_swizzle(GLfloat values[4], GLuint swizzle) 62848b8605Smrg{ 63848b8605Smrg GLfloat s, t, r, q; 64848b8605Smrg 65848b8605Smrg s = values[0]; 66848b8605Smrg t = values[1]; 67848b8605Smrg r = values[2]; 68848b8605Smrg q = values[3]; 69848b8605Smrg 70848b8605Smrg switch (swizzle) { 71848b8605Smrg case GL_SWIZZLE_STR_ATI: 72848b8605Smrg values[0] = s; 73848b8605Smrg values[1] = t; 74848b8605Smrg values[2] = r; 75848b8605Smrg break; 76848b8605Smrg case GL_SWIZZLE_STQ_ATI: 77848b8605Smrg values[0] = s; 78848b8605Smrg values[1] = t; 79848b8605Smrg values[2] = q; 80848b8605Smrg break; 81848b8605Smrg case GL_SWIZZLE_STR_DR_ATI: 82848b8605Smrg values[0] = s / r; 83848b8605Smrg values[1] = t / r; 84848b8605Smrg values[2] = 1 / r; 85848b8605Smrg break; 86848b8605Smrg case GL_SWIZZLE_STQ_DQ_ATI: 87848b8605Smrg/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */ 88848b8605Smrg if (q == 0.0F) 89848b8605Smrg q = 0.000000001F; 90848b8605Smrg values[0] = s / q; 91848b8605Smrg values[1] = t / q; 92848b8605Smrg values[2] = 1.0F / q; 93848b8605Smrg break; 94848b8605Smrg } 95848b8605Smrg values[3] = 0.0; 96848b8605Smrg} 97848b8605Smrg 98848b8605Smrgstatic void 99848b8605Smrgapply_src_rep(GLint optype, GLuint rep, GLfloat * val) 100848b8605Smrg{ 101848b8605Smrg GLint i; 102848b8605Smrg GLint start, end; 103848b8605Smrg if (!rep) 104848b8605Smrg return; 105848b8605Smrg 106848b8605Smrg start = optype ? 3 : 0; 107848b8605Smrg end = 4; 108848b8605Smrg 109848b8605Smrg for (i = start; i < end; i++) { 110848b8605Smrg switch (rep) { 111848b8605Smrg case GL_RED: 112848b8605Smrg val[i] = val[0]; 113848b8605Smrg break; 114848b8605Smrg case GL_GREEN: 115848b8605Smrg val[i] = val[1]; 116848b8605Smrg break; 117848b8605Smrg case GL_BLUE: 118848b8605Smrg val[i] = val[2]; 119848b8605Smrg break; 120848b8605Smrg case GL_ALPHA: 121848b8605Smrg val[i] = val[3]; 122848b8605Smrg break; 123848b8605Smrg } 124848b8605Smrg } 125848b8605Smrg} 126848b8605Smrg 127848b8605Smrgstatic void 128848b8605Smrgapply_src_mod(GLint optype, GLuint mod, GLfloat * val) 129848b8605Smrg{ 130848b8605Smrg GLint i; 131848b8605Smrg GLint start, end; 132848b8605Smrg 133848b8605Smrg if (!mod) 134848b8605Smrg return; 135848b8605Smrg 136848b8605Smrg start = optype ? 3 : 0; 137848b8605Smrg end = 4; 138848b8605Smrg 139848b8605Smrg for (i = start; i < end; i++) { 140848b8605Smrg if (mod & GL_COMP_BIT_ATI) 141848b8605Smrg val[i] = 1 - val[i]; 142848b8605Smrg 143848b8605Smrg if (mod & GL_BIAS_BIT_ATI) 144848b8605Smrg val[i] = val[i] - 0.5F; 145848b8605Smrg 146848b8605Smrg if (mod & GL_2X_BIT_ATI) 147848b8605Smrg val[i] = 2 * val[i]; 148848b8605Smrg 149848b8605Smrg if (mod & GL_NEGATE_BIT_ATI) 150848b8605Smrg val[i] = -val[i]; 151848b8605Smrg } 152848b8605Smrg} 153848b8605Smrg 154848b8605Smrgstatic void 155848b8605Smrgapply_dst_mod(GLuint optype, GLuint mod, GLfloat * val) 156848b8605Smrg{ 157848b8605Smrg GLint i; 158848b8605Smrg GLint has_sat = mod & GL_SATURATE_BIT_ATI; 159848b8605Smrg GLint start, end; 160848b8605Smrg 161848b8605Smrg mod &= ~GL_SATURATE_BIT_ATI; 162848b8605Smrg 163848b8605Smrg start = optype ? 3 : 0; 164848b8605Smrg end = optype ? 4 : 3; 165848b8605Smrg 166848b8605Smrg for (i = start; i < end; i++) { 167848b8605Smrg switch (mod) { 168848b8605Smrg case GL_2X_BIT_ATI: 169848b8605Smrg val[i] = 2 * val[i]; 170848b8605Smrg break; 171848b8605Smrg case GL_4X_BIT_ATI: 172848b8605Smrg val[i] = 4 * val[i]; 173848b8605Smrg break; 174848b8605Smrg case GL_8X_BIT_ATI: 175848b8605Smrg val[i] = 8 * val[i]; 176848b8605Smrg break; 177848b8605Smrg case GL_HALF_BIT_ATI: 178848b8605Smrg val[i] = val[i] * 0.5F; 179848b8605Smrg break; 180848b8605Smrg case GL_QUARTER_BIT_ATI: 181848b8605Smrg val[i] = val[i] * 0.25F; 182848b8605Smrg break; 183848b8605Smrg case GL_EIGHTH_BIT_ATI: 184848b8605Smrg val[i] = val[i] * 0.125F; 185848b8605Smrg break; 186848b8605Smrg } 187848b8605Smrg 188848b8605Smrg if (has_sat) { 189848b8605Smrg if (val[i] < 0.0F) 190848b8605Smrg val[i] = 0.0F; 191848b8605Smrg else if (val[i] > 1.0F) 192848b8605Smrg val[i] = 1.0F; 193848b8605Smrg } 194848b8605Smrg else { 195848b8605Smrg if (val[i] < -8.0F) 196848b8605Smrg val[i] = -8.0F; 197848b8605Smrg else if (val[i] > 8.0F) 198848b8605Smrg val[i] = 8.0F; 199848b8605Smrg } 200848b8605Smrg } 201848b8605Smrg} 202848b8605Smrg 203848b8605Smrg 204848b8605Smrgstatic void 205848b8605Smrgwrite_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src, 206848b8605Smrg GLfloat * dst) 207848b8605Smrg{ 208848b8605Smrg GLint i; 209848b8605Smrg apply_dst_mod(optype, mod, src); 210848b8605Smrg 211848b8605Smrg if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) { 212848b8605Smrg if (mask) { 213848b8605Smrg if (mask & GL_RED_BIT_ATI) 214848b8605Smrg dst[0] = src[0]; 215848b8605Smrg 216848b8605Smrg if (mask & GL_GREEN_BIT_ATI) 217848b8605Smrg dst[1] = src[1]; 218848b8605Smrg 219848b8605Smrg if (mask & GL_BLUE_BIT_ATI) 220848b8605Smrg dst[2] = src[2]; 221848b8605Smrg } 222848b8605Smrg else { 223848b8605Smrg for (i = 0; i < 3; i++) 224848b8605Smrg dst[i] = src[i]; 225848b8605Smrg } 226848b8605Smrg } 227848b8605Smrg else 228848b8605Smrg dst[3] = src[3]; 229848b8605Smrg} 230848b8605Smrg 231848b8605Smrgstatic void 232848b8605Smrgfinish_pass(struct atifs_machine *machine) 233848b8605Smrg{ 234848b8605Smrg GLint i; 235848b8605Smrg 236848b8605Smrg for (i = 0; i < 6; i++) { 237848b8605Smrg COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]); 238848b8605Smrg } 239848b8605Smrg} 240848b8605Smrg 241848b8605Smrg 242848b8605Smrgstatic void 243848b8605Smrghandle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst, 244848b8605Smrg const SWspan *span, GLuint column, GLuint idx) 245848b8605Smrg{ 246848b8605Smrg GLuint swizzle = texinst->swizzle; 247848b8605Smrg GLuint pass_tex = texinst->src; 248848b8605Smrg 249848b8605Smrg if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 250848b8605Smrg pass_tex -= GL_TEXTURE0_ARB; 251848b8605Smrg COPY_4V(machine->Registers[idx], 252848b8605Smrg span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]); 253848b8605Smrg } 254848b8605Smrg else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { 255848b8605Smrg pass_tex -= GL_REG_0_ATI; 256848b8605Smrg COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]); 257848b8605Smrg } 258848b8605Smrg apply_swizzle(machine->Registers[idx], swizzle); 259848b8605Smrg 260848b8605Smrg} 261848b8605Smrg 262848b8605Smrgstatic void 263848b8605Smrghandle_sample_op(struct gl_context * ctx, struct atifs_machine *machine, 264848b8605Smrg struct atifs_setupinst *texinst, const SWspan *span, 265848b8605Smrg GLuint column, GLuint idx) 266848b8605Smrg{ 267848b8605Smrg/* sample from unit idx using texinst->src as coords */ 268848b8605Smrg GLuint swizzle = texinst->swizzle; 269848b8605Smrg GLuint coord_source = texinst->src; 270848b8605Smrg GLfloat tex_coords[4] = { 0 }; 271848b8605Smrg 272848b8605Smrg if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) { 273848b8605Smrg coord_source -= GL_TEXTURE0_ARB; 274848b8605Smrg COPY_4V(tex_coords, 275848b8605Smrg span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]); 276848b8605Smrg } 277848b8605Smrg else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) { 278848b8605Smrg coord_source -= GL_REG_0_ATI; 279848b8605Smrg COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]); 280848b8605Smrg } 281848b8605Smrg apply_swizzle(tex_coords, swizzle); 282848b8605Smrg fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]); 283848b8605Smrg} 284848b8605Smrg 285848b8605Smrg#define SETUP_SRC_REG(optype, i, x) \ 286848b8605Smrgdo { \ 287848b8605Smrg COPY_4V(src[optype][i], x); \ 288848b8605Smrg} while (0) 289848b8605Smrg 290848b8605Smrg 291848b8605Smrg 292848b8605Smrg/** 293848b8605Smrg * Execute the given fragment shader. 294848b8605Smrg * NOTE: we do everything in single-precision floating point 295848b8605Smrg * \param ctx - rendering context 296848b8605Smrg * \param shader - the shader to execute 297848b8605Smrg * \param machine - virtual machine state 298848b8605Smrg * \param span - the SWspan we're operating on 299848b8605Smrg * \param column - which pixel [i] we're operating on in the span 300848b8605Smrg */ 301848b8605Smrgstatic void 302848b8605Smrgexecute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader, 303848b8605Smrg struct atifs_machine *machine, const SWspan *span, 304848b8605Smrg GLuint column) 305848b8605Smrg{ 306848b8605Smrg GLuint pc; 307848b8605Smrg struct atifs_instruction *inst; 308848b8605Smrg struct atifs_setupinst *texinst; 309848b8605Smrg GLint optype; 310848b8605Smrg GLuint i; 311848b8605Smrg GLint j, pass; 312848b8605Smrg GLint dstreg; 313848b8605Smrg GLfloat src[2][3][4]; 314848b8605Smrg GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; 315848b8605Smrg GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; 316848b8605Smrg GLfloat dst[2][4], *dstp; 317848b8605Smrg 318848b8605Smrg for (pass = 0; pass < shader->NumPasses; pass++) { 319848b8605Smrg if (pass > 0) 320848b8605Smrg finish_pass(machine); 321848b8605Smrg for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) { 322848b8605Smrg texinst = &shader->SetupInst[pass][j]; 323848b8605Smrg if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) 324848b8605Smrg handle_pass_op(machine, texinst, span, column, j); 325848b8605Smrg else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) 326848b8605Smrg handle_sample_op(ctx, machine, texinst, span, column, j); 327848b8605Smrg } 328848b8605Smrg 329848b8605Smrg for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { 330848b8605Smrg inst = &shader->Instructions[pass][pc]; 331848b8605Smrg 332848b8605Smrg /* setup the source registers for color and alpha ops */ 333848b8605Smrg for (optype = 0; optype < 2; optype++) { 334848b8605Smrg for (i = 0; i < inst->ArgCount[optype]; i++) { 335848b8605Smrg GLint index = inst->SrcReg[optype][i].Index; 336848b8605Smrg 337848b8605Smrg if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) 338848b8605Smrg SETUP_SRC_REG(optype, i, 339848b8605Smrg machine->Registers[index - GL_REG_0_ATI]); 340848b8605Smrg else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { 341848b8605Smrg if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) { 342848b8605Smrg SETUP_SRC_REG(optype, i, 343848b8605Smrg shader->Constants[index - GL_CON_0_ATI]); 344848b8605Smrg } else { 345848b8605Smrg SETUP_SRC_REG(optype, i, 346848b8605Smrg ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]); 347848b8605Smrg } 348848b8605Smrg } 349848b8605Smrg else if (index == GL_ONE) 350848b8605Smrg SETUP_SRC_REG(optype, i, ones); 351848b8605Smrg else if (index == GL_ZERO) 352848b8605Smrg SETUP_SRC_REG(optype, i, zeros); 353848b8605Smrg else if (index == GL_PRIMARY_COLOR_EXT) 354848b8605Smrg SETUP_SRC_REG(optype, i, 355848b8605Smrg machine->Inputs[ATI_FS_INPUT_PRIMARY]); 356848b8605Smrg else if (index == GL_SECONDARY_INTERPOLATOR_ATI) 357848b8605Smrg SETUP_SRC_REG(optype, i, 358848b8605Smrg machine->Inputs[ATI_FS_INPUT_SECONDARY]); 359848b8605Smrg 360848b8605Smrg apply_src_rep(optype, inst->SrcReg[optype][i].argRep, 361848b8605Smrg src[optype][i]); 362848b8605Smrg apply_src_mod(optype, inst->SrcReg[optype][i].argMod, 363848b8605Smrg src[optype][i]); 364848b8605Smrg } 365848b8605Smrg } 366848b8605Smrg 367848b8605Smrg /* Execute the operations - color then alpha */ 368848b8605Smrg for (optype = 0; optype < 2; optype++) { 369848b8605Smrg if (inst->Opcode[optype]) { 370848b8605Smrg switch (inst->Opcode[optype]) { 371848b8605Smrg case GL_ADD_ATI: 372848b8605Smrg if (!optype) 373848b8605Smrg for (i = 0; i < 3; i++) { 374848b8605Smrg dst[optype][i] = 375848b8605Smrg src[optype][0][i] + src[optype][1][i]; 376848b8605Smrg } 377848b8605Smrg else 378848b8605Smrg dst[optype][3] = src[optype][0][3] + src[optype][1][3]; 379848b8605Smrg break; 380848b8605Smrg case GL_SUB_ATI: 381848b8605Smrg if (!optype) 382848b8605Smrg for (i = 0; i < 3; i++) { 383848b8605Smrg dst[optype][i] = 384848b8605Smrg src[optype][0][i] - src[optype][1][i]; 385848b8605Smrg } 386848b8605Smrg else 387848b8605Smrg dst[optype][3] = src[optype][0][3] - src[optype][1][3]; 388848b8605Smrg break; 389848b8605Smrg case GL_MUL_ATI: 390848b8605Smrg if (!optype) 391848b8605Smrg for (i = 0; i < 3; i++) { 392848b8605Smrg dst[optype][i] = 393848b8605Smrg src[optype][0][i] * src[optype][1][i]; 394848b8605Smrg } 395848b8605Smrg else 396848b8605Smrg dst[optype][3] = src[optype][0][3] * src[optype][1][3]; 397848b8605Smrg break; 398848b8605Smrg case GL_MAD_ATI: 399848b8605Smrg if (!optype) 400848b8605Smrg for (i = 0; i < 3; i++) { 401848b8605Smrg dst[optype][i] = 402848b8605Smrg src[optype][0][i] * src[optype][1][i] + 403848b8605Smrg src[optype][2][i]; 404848b8605Smrg } 405848b8605Smrg else 406848b8605Smrg dst[optype][3] = 407848b8605Smrg src[optype][0][3] * src[optype][1][3] + 408848b8605Smrg src[optype][2][3]; 409848b8605Smrg break; 410848b8605Smrg case GL_LERP_ATI: 411848b8605Smrg if (!optype) 412848b8605Smrg for (i = 0; i < 3; i++) { 413848b8605Smrg dst[optype][i] = 414848b8605Smrg src[optype][0][i] * src[optype][1][i] + (1 - 415848b8605Smrg src 416848b8605Smrg [optype] 417848b8605Smrg [0][i]) * 418848b8605Smrg src[optype][2][i]; 419848b8605Smrg } 420848b8605Smrg else 421848b8605Smrg dst[optype][3] = 422848b8605Smrg src[optype][0][3] * src[optype][1][3] + (1 - 423848b8605Smrg src[optype] 424848b8605Smrg [0][3]) * 425848b8605Smrg src[optype][2][3]; 426848b8605Smrg break; 427848b8605Smrg 428848b8605Smrg case GL_MOV_ATI: 429848b8605Smrg if (!optype) 430848b8605Smrg for (i = 0; i < 3; i++) { 431848b8605Smrg dst[optype][i] = src[optype][0][i]; 432848b8605Smrg } 433848b8605Smrg else 434848b8605Smrg dst[optype][3] = src[optype][0][3]; 435848b8605Smrg break; 436848b8605Smrg case GL_CND_ATI: 437848b8605Smrg if (!optype) { 438848b8605Smrg for (i = 0; i < 3; i++) { 439848b8605Smrg dst[optype][i] = 440848b8605Smrg (src[optype][2][i] > 441b8e80941Smrg 0.5F) ? src[optype][0][i] : src[optype][1][i]; 442848b8605Smrg } 443848b8605Smrg } 444848b8605Smrg else { 445848b8605Smrg dst[optype][3] = 446848b8605Smrg (src[optype][2][3] > 447b8e80941Smrg 0.5F) ? src[optype][0][3] : src[optype][1][3]; 448848b8605Smrg } 449848b8605Smrg break; 450848b8605Smrg 451848b8605Smrg case GL_CND0_ATI: 452848b8605Smrg if (!optype) 453848b8605Smrg for (i = 0; i < 3; i++) { 454848b8605Smrg dst[optype][i] = 455848b8605Smrg (src[optype][2][i] >= 456848b8605Smrg 0) ? src[optype][0][i] : src[optype][1][i]; 457848b8605Smrg } 458848b8605Smrg else { 459848b8605Smrg dst[optype][3] = 460848b8605Smrg (src[optype][2][3] >= 461848b8605Smrg 0) ? src[optype][0][3] : src[optype][1][3]; 462848b8605Smrg } 463848b8605Smrg break; 464848b8605Smrg case GL_DOT2_ADD_ATI: 465848b8605Smrg { 466848b8605Smrg GLfloat result; 467848b8605Smrg 468848b8605Smrg /* DOT 2 always uses the source from the color op */ 469848b8605Smrg /* could save recalculation of dot products for alpha inst */ 470848b8605Smrg result = src[0][0][0] * src[0][1][0] + 471848b8605Smrg src[0][0][1] * src[0][1][1] + src[0][2][2]; 472848b8605Smrg if (!optype) { 473848b8605Smrg for (i = 0; i < 3; i++) { 474848b8605Smrg dst[optype][i] = result; 475848b8605Smrg } 476848b8605Smrg } 477848b8605Smrg else 478848b8605Smrg dst[optype][3] = result; 479848b8605Smrg } 480848b8605Smrg break; 481848b8605Smrg case GL_DOT3_ATI: 482848b8605Smrg { 483848b8605Smrg GLfloat result; 484848b8605Smrg 485848b8605Smrg /* DOT 3 always uses the source from the color op */ 486848b8605Smrg result = src[0][0][0] * src[0][1][0] + 487848b8605Smrg src[0][0][1] * src[0][1][1] + 488848b8605Smrg src[0][0][2] * src[0][1][2]; 489848b8605Smrg 490848b8605Smrg if (!optype) { 491848b8605Smrg for (i = 0; i < 3; i++) { 492848b8605Smrg dst[optype][i] = result; 493848b8605Smrg } 494848b8605Smrg } 495848b8605Smrg else 496848b8605Smrg dst[optype][3] = result; 497848b8605Smrg } 498848b8605Smrg break; 499848b8605Smrg case GL_DOT4_ATI: 500848b8605Smrg { 501848b8605Smrg GLfloat result; 502848b8605Smrg 503848b8605Smrg /* DOT 4 always uses the source from the color op */ 504848b8605Smrg result = src[0][0][0] * src[0][1][0] + 505848b8605Smrg src[0][0][1] * src[0][1][1] + 506848b8605Smrg src[0][0][2] * src[0][1][2] + 507848b8605Smrg src[0][0][3] * src[0][1][3]; 508848b8605Smrg if (!optype) { 509848b8605Smrg for (i = 0; i < 3; i++) { 510848b8605Smrg dst[optype][i] = result; 511848b8605Smrg } 512848b8605Smrg } 513848b8605Smrg else 514848b8605Smrg dst[optype][3] = result; 515848b8605Smrg } 516848b8605Smrg break; 517848b8605Smrg 518848b8605Smrg } 519848b8605Smrg } 520848b8605Smrg } 521848b8605Smrg 522848b8605Smrg /* write out the destination registers */ 523848b8605Smrg for (optype = 0; optype < 2; optype++) { 524848b8605Smrg if (inst->Opcode[optype]) { 525848b8605Smrg dstreg = inst->DstReg[optype].Index; 526848b8605Smrg dstp = machine->Registers[dstreg - GL_REG_0_ATI]; 527848b8605Smrg 528848b8605Smrg if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) && 529848b8605Smrg (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI))) 530848b8605Smrg write_dst_addr(optype, inst->DstReg[optype].dstMod, 531848b8605Smrg inst->DstReg[optype].dstMask, dst[optype], 532848b8605Smrg dstp); 533848b8605Smrg else 534848b8605Smrg write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp); 535848b8605Smrg } 536848b8605Smrg } 537848b8605Smrg } 538848b8605Smrg } 539848b8605Smrg} 540848b8605Smrg 541848b8605Smrg 542848b8605Smrg/** 543848b8605Smrg * Init fragment shader virtual machine state. 544848b8605Smrg */ 545848b8605Smrgstatic void 546848b8605Smrginit_machine(struct gl_context * ctx, struct atifs_machine *machine, 547848b8605Smrg const struct ati_fragment_shader *shader, 548848b8605Smrg const SWspan *span, GLuint col) 549848b8605Smrg{ 550848b8605Smrg GLfloat (*inputs)[4] = machine->Inputs; 551848b8605Smrg GLint i, j; 552848b8605Smrg 553848b8605Smrg for (i = 0; i < 6; i++) { 554848b8605Smrg for (j = 0; j < 4; j++) 555848b8605Smrg machine->Registers[i][j] = 0.0; 556848b8605Smrg } 557848b8605Smrg 558848b8605Smrg COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]); 559848b8605Smrg COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]); 560848b8605Smrg} 561848b8605Smrg 562848b8605Smrg 563848b8605Smrg 564848b8605Smrg/** 565848b8605Smrg * Execute the current ATI shader program, operating on the given span. 566848b8605Smrg */ 567848b8605Smrgvoid 568848b8605Smrg_swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span) 569848b8605Smrg{ 570848b8605Smrg const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; 571848b8605Smrg struct atifs_machine machine; 572848b8605Smrg GLuint i; 573848b8605Smrg 574848b8605Smrg /* incoming colors should be floats */ 575b8e80941Smrg assert(span->array->ChanType == GL_FLOAT); 576848b8605Smrg 577848b8605Smrg for (i = 0; i < span->end; i++) { 578848b8605Smrg if (span->array->mask[i]) { 579848b8605Smrg init_machine(ctx, &machine, shader, span, i); 580848b8605Smrg 581848b8605Smrg execute_shader(ctx, shader, &machine, span, i); 582848b8605Smrg 583848b8605Smrg /* store result color */ 584848b8605Smrg { 585848b8605Smrg const GLfloat *colOut = machine.Registers[0]; 586848b8605Smrg /*fprintf(stderr,"outputs %f %f %f %f\n", 587848b8605Smrg colOut[0], colOut[1], colOut[2], colOut[3]); */ 588848b8605Smrg COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut); 589848b8605Smrg } 590848b8605Smrg } 591848b8605Smrg } 592848b8605Smrg} 593