radeon_textured_videofuncs.c revision b7e1c893
1/* 2 * Copyright 2008 Alex Deucher 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * 24 * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25 * 26 */ 27 28#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 29#error Cannot define both MMIO and CP acceleration! 30#endif 31 32#if !defined(UNIXCPP) || defined(ANSICPP) 33#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 34#else 35#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 36#endif 37 38#ifdef ACCEL_MMIO 39#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 40#else 41#ifdef ACCEL_CP 42#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 43#else 44#error No accel type defined! 45#endif 46#endif 47 48#ifdef ACCEL_CP 49 50#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 51do { \ 52 OUT_RING_F(_dstX); \ 53 OUT_RING_F(_dstY); \ 54 OUT_RING_F(_srcX); \ 55 OUT_RING_F(_srcY); \ 56 OUT_RING_F(_maskX); \ 57 OUT_RING_F(_maskY); \ 58} while (0) 59 60#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ 61do { \ 62 OUT_RING_F(_dstX); \ 63 OUT_RING_F(_dstY); \ 64 OUT_RING_F(_srcX); \ 65 OUT_RING_F(_srcY); \ 66} while (0) 67 68#else /* ACCEL_CP */ 69 70#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 71do { \ 72 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 73 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 74 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 75 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 76 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX); \ 77 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ 78} while (0) 79 80#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ 81do { \ 82 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 83 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 84 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 85 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 86} while (0) 87 88#endif /* !ACCEL_CP */ 89 90static void 91FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 92{ 93 RADEONInfoPtr info = RADEONPTR(pScrn); 94 PixmapPtr pPixmap = pPriv->pPixmap; 95 uint32_t txformat; 96 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 97 uint32_t dst_offset, dst_pitch, dst_format; 98 uint32_t txenable, colorpitch; 99 uint32_t blendcntl; 100 Bool isplanar = FALSE; 101 int dstxoff, dstyoff, pixel_shift, vtx_count; 102 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 103 int nBox = REGION_NUM_RECTS(&pPriv->clip); 104 ACCEL_PREAMBLE(); 105 106 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 107 108#ifdef USE_EXA 109 if (info->useEXA) { 110 dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; 111 dst_pitch = exaGetPixmapPitch(pPixmap); 112 } else 113#endif 114 { 115 dst_offset = (pPixmap->devPrivate.ptr - info->FB) + 116 info->fbLocation + pScrn->fbOffset; 117 dst_pitch = pPixmap->devKind; 118 } 119 120#ifdef COMPOSITE 121 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 122 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 123#else 124 dstxoff = 0; 125 dstyoff = 0; 126#endif 127 128#ifdef USE_EXA 129 if (info->useEXA) { 130 RADEON_SWITCH_TO_3D(); 131 } else 132#endif 133 { 134 BEGIN_ACCEL(2); 135 if (IS_R300_3D || IS_R500_3D) 136 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 137 else 138 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 139 /* We must wait for 3d to idle, in case source was just written as a dest. */ 140 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 141 RADEON_WAIT_HOST_IDLECLEAN | 142 RADEON_WAIT_2D_IDLECLEAN | 143 RADEON_WAIT_3D_IDLECLEAN | 144 RADEON_WAIT_DMA_GUI_IDLE); 145 FINISH_ACCEL(); 146 147 if (!info->accel_state->XInited3D) 148 RADEONInit3DEngine(pScrn); 149 } 150 151 if (pPriv->bicubic_enabled) 152 vtx_count = 6; 153 else 154 vtx_count = 4; 155 156 if (IS_R300_3D || IS_R500_3D) { 157 uint32_t output_fmt; 158 159 switch (pPixmap->drawable.bitsPerPixel) { 160 case 16: 161 if (pPixmap->drawable.depth == 15) 162 dst_format = R300_COLORFORMAT_ARGB1555; 163 else 164 dst_format = R300_COLORFORMAT_RGB565; 165 break; 166 case 32: 167 dst_format = R300_COLORFORMAT_ARGB8888; 168 break; 169 default: 170 return; 171 } 172 173 output_fmt = (R300_OUT_FMT_C4_8 | 174 R300_OUT_FMT_C0_SEL_BLUE | 175 R300_OUT_FMT_C1_SEL_GREEN | 176 R300_OUT_FMT_C2_SEL_RED | 177 R300_OUT_FMT_C3_SEL_ALPHA); 178 179 colorpitch = dst_pitch >> pixel_shift; 180 colorpitch |= dst_format; 181 182 if (RADEONTilingEnabled(pScrn, pPixmap)) 183 colorpitch |= R300_COLORTILE; 184 185 if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { 186 isplanar = TRUE; 187 } 188 189 if (isplanar) { 190 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 191 txpitch = pPriv->src_pitch; 192 } else { 193 if (pPriv->id == FOURCC_UYVY) 194 txformat1 = R300_TX_FORMAT_YVYU422; 195 else 196 txformat1 = R300_TX_FORMAT_VYUY422; 197 198 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 199 200 /* pitch is in pixels */ 201 txpitch = pPriv->src_pitch / 2; 202 } 203 txpitch -= 1; 204 205 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 206 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 207 R300_TXPITCH_EN); 208 209 info->accel_state->texW[0] = pPriv->w; 210 info->accel_state->texH[0] = pPriv->h; 211 212 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 213 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 214 R300_TX_MAG_FILTER_LINEAR | 215 R300_TX_MIN_FILTER_LINEAR | 216 (0 << R300_TX_ID_SHIFT)); 217 218 219 if (IS_R500_3D && ((pPriv->w - 1) & 0x800)) 220 txpitch |= R500_TXWIDTH_11; 221 222 if (IS_R500_3D && ((pPriv->h - 1) & 0x800)) 223 txpitch |= R500_TXHEIGHT_11; 224 225 txoffset = pPriv->src_offset; 226 227 BEGIN_ACCEL(6); 228 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 229 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 230 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 231 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 232 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 233 OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset); 234 FINISH_ACCEL(); 235 236 txenable = R300_TEX_0_ENABLE; 237 238 if (isplanar) { 239 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 240 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 241 R300_TXPITCH_EN); 242 txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; 243 txpitch -= 1; 244 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 245 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 246 R300_TX_MIN_FILTER_LINEAR | 247 R300_TX_MAG_FILTER_LINEAR); 248 249 BEGIN_ACCEL(12); 250 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 251 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 252 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 253 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 254 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 255 OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset); 256 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 257 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 258 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 259 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 260 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 261 OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset); 262 FINISH_ACCEL(); 263 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 264 } 265 266 if (pPriv->bicubic_enabled) { 267 /* Size is 128x1 */ 268 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 269 (0x0 << R300_TXHEIGHT_SHIFT) | 270 R300_TXPITCH_EN); 271 /* Format is 32-bit floats, 4bpp */ 272 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 273 /* Pitch is 127 (128-1) */ 274 txpitch = 0x7f; 275 /* Tex filter */ 276 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 277 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 278 R300_TX_MIN_FILTER_NEAREST | 279 R300_TX_MAG_FILTER_NEAREST | 280 (1 << R300_TX_ID_SHIFT)); 281 282 BEGIN_ACCEL(6); 283 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 284 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 285 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 286 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 287 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 288 OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset); 289 FINISH_ACCEL(); 290 291 /* Enable tex 1 */ 292 txenable |= R300_TEX_1_ENABLE; 293 } 294 295 /* setup the VAP */ 296 if (info->accel_state->has_tcl) { 297 if (pPriv->bicubic_enabled) 298 BEGIN_ACCEL(7); 299 else 300 BEGIN_ACCEL(6); 301 } else { 302 if (pPriv->bicubic_enabled) 303 BEGIN_ACCEL(5); 304 else 305 BEGIN_ACCEL(4); 306 } 307 308 /* These registers define the number, type, and location of data submitted 309 * to the PVS unit of GA input (when PVS is disabled) 310 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 311 * enabled. This memory provides the imputs to the vertex shader program 312 * and ordering is not important. When PVS/TCL is disabled, this field maps 313 * directly to the GA input memory and the order is signifigant. In 314 * PVS_BYPASS mode the order is as follows: 315 * Position 316 * Point Size 317 * Color 0-3 318 * Textures 0-7 319 * Fog 320 */ 321 if (pPriv->bicubic_enabled) { 322 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 323 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 324 (0 << R300_SKIP_DWORDS_0_SHIFT) | 325 (0 << R300_DST_VEC_LOC_0_SHIFT) | 326 R300_SIGNED_0 | 327 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 328 (0 << R300_SKIP_DWORDS_1_SHIFT) | 329 (6 << R300_DST_VEC_LOC_1_SHIFT) | 330 R300_SIGNED_1)); 331 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 332 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 333 (0 << R300_SKIP_DWORDS_2_SHIFT) | 334 (7 << R300_DST_VEC_LOC_2_SHIFT) | 335 R300_LAST_VEC_2 | 336 R300_SIGNED_2)); 337 } else { 338 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 339 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 340 (0 << R300_SKIP_DWORDS_0_SHIFT) | 341 (0 << R300_DST_VEC_LOC_0_SHIFT) | 342 R300_SIGNED_0 | 343 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 344 (0 << R300_SKIP_DWORDS_1_SHIFT) | 345 (6 << R300_DST_VEC_LOC_1_SHIFT) | 346 R300_LAST_VEC_1 | 347 R300_SIGNED_1)); 348 } 349 350 /* load the vertex shader 351 * We pre-load vertex programs in RADEONInit3DEngine(): 352 * - exa mask/Xv bicubic 353 * - exa no mask 354 * - Xv 355 * Here we select the offset of the vertex program we want to use 356 */ 357 if (info->accel_state->has_tcl) { 358 if (pPriv->bicubic_enabled) { 359 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 360 ((0 << R300_PVS_FIRST_INST_SHIFT) | 361 (2 << R300_PVS_XYZW_VALID_INST_SHIFT) | 362 (2 << R300_PVS_LAST_INST_SHIFT))); 363 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 364 (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 365 } else { 366 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 367 ((5 << R300_PVS_FIRST_INST_SHIFT) | 368 (6 << R300_PVS_XYZW_VALID_INST_SHIFT) | 369 (6 << R300_PVS_LAST_INST_SHIFT))); 370 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 371 (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 372 } 373 } 374 375 /* Position and one set of 2 texture coordinates */ 376 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 377 if (pPriv->bicubic_enabled) 378 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 379 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 380 else 381 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 382 383 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 384 FINISH_ACCEL(); 385 386 /* setup pixel shader */ 387 if (IS_R300_3D) { 388 if (pPriv->bicubic_enabled) { 389 BEGIN_ACCEL(79); 390 391 /* 4 components: 2 for tex0 and 2 for tex1 */ 392 OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 393 R300_RS_COUNT_HIRES_EN)); 394 395 /* R300_INST_COUNT_RS - highest RS instruction used */ 396 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); 397 398 /* Pixel stack frame size. */ 399 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 400 401 /* Indirection levels */ 402 OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 403 R300_FIRST_TEX)); 404 405 /* Set nodes. */ 406 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 407 R300_ALU_CODE_SIZE(14) | 408 R300_TEX_CODE_OFFSET(0) | 409 R300_TEX_CODE_SIZE(6))); 410 411 /* Nodes are allocated highest first, but executed lowest first */ 412 OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); 413 OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 414 R300_ALU_SIZE(0) | 415 R300_TEX_START(0) | 416 R300_TEX_SIZE(0))); 417 OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 418 R300_ALU_SIZE(9) | 419 R300_TEX_START(1) | 420 R300_TEX_SIZE(0))); 421 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 422 R300_ALU_SIZE(2) | 423 R300_TEX_START(2) | 424 R300_TEX_SIZE(3) | 425 R300_RGBA_OUT)); 426 427 /* ** BICUBIC FP ** */ 428 429 /* texcoord0 => temp0 430 * texcoord1 => temp1 */ 431 432 // first node 433 /* TEX temp2, temp1.rrr0, tex1, 1D */ 434 OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 435 R300_TEX_ID(1) | 436 R300_TEX_SRC_ADDR(1) | 437 R300_TEX_DST_ADDR(2))); 438 439 /* MOV temp1.r, temp1.ggg0 */ 440 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 441 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 442 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 443 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 444 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 445 R300_ALU_RGB_ADDRD(1) | 446 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 447 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 448 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 449 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 450 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 451 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 452 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 453 454 455 // second node 456 /* TEX temp1, temp1, tex1, 1D */ 457 OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 458 R300_TEX_ID(1) | 459 R300_TEX_SRC_ADDR(1) | 460 R300_TEX_DST_ADDR(1))); 461 462 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 463 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 464 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 465 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 466 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 467 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 468 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 469 R300_ALU_RGB_ADDRD(3) | 470 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 471 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 472 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 473 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 474 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 475 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 476 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 477 478 479 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 480 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 481 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 482 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 483 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 484 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 485 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 486 R300_ALU_RGB_ADDRD(2) | 487 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 488 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 489 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 490 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 491 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 492 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 493 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 494 495 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 496 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 497 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 498 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 499 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 500 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 501 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 502 R300_ALU_RGB_ADDR2(3) | 503 R300_ALU_RGB_ADDRD(4) | 504 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 505 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 506 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 507 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 508 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 509 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 510 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 511 512 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 513 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 514 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 515 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 516 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 517 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 518 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 519 R300_ALU_RGB_ADDR2(2) | 520 R300_ALU_RGB_ADDRD(5) | 521 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 522 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 523 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 524 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 525 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 526 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 527 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 528 529 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 530 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 531 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 532 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 533 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 534 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 535 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 536 R300_ALU_RGB_ADDR2(3) | 537 R300_ALU_RGB_ADDRD(3) | 538 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 539 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 540 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 541 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 542 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 543 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 544 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 545 546 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 547 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 548 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 549 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 550 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 551 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 552 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 553 R300_ALU_RGB_ADDR2(2) | 554 R300_ALU_RGB_ADDRD(1) | 555 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 556 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 557 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 558 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 559 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 560 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 561 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 562 563 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 564 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 565 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 566 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 567 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 568 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 569 R300_ALU_RGB_ADDR2(1) | 570 R300_ALU_RGB_ADDRD(1) | 571 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 572 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 573 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 574 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 575 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 576 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 577 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 578 579 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 580 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 581 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 582 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 583 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 584 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 585 R300_ALU_RGB_ADDR2(3) | 586 R300_ALU_RGB_ADDRD(2) | 587 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 588 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 589 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 590 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 591 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 592 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 593 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 594 595 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 596 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 597 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 598 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 599 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 600 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 601 R300_ALU_RGB_ADDR2(5) | 602 R300_ALU_RGB_ADDRD(3) | 603 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 604 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 605 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 606 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 607 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 608 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 609 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 610 611 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 612 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 613 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 614 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 615 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 616 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 617 R300_ALU_RGB_ADDR2(4) | 618 R300_ALU_RGB_ADDRD(0) | 619 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 620 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 621 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 622 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 623 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 624 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 625 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 626 627 628 // third node 629 /* TEX temp4, temp1.rg--, tex0, 1D */ 630 OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 631 R300_TEX_ID(0) | 632 R300_TEX_SRC_ADDR(1) | 633 R300_TEX_DST_ADDR(4))); 634 635 /* TEX temp3, temp3.rg--, tex0, 1D */ 636 OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 637 R300_TEX_ID(0) | 638 R300_TEX_SRC_ADDR(3) | 639 R300_TEX_DST_ADDR(3))); 640 641 /* TEX temp5, temp2.rg--, tex0, 1D */ 642 OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 643 R300_TEX_ID(0) | 644 R300_TEX_SRC_ADDR(2) | 645 R300_TEX_DST_ADDR(5))); 646 647 /* TEX temp0, temp0.rg--, tex0, 1D */ 648 OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 649 R300_TEX_ID(0) | 650 R300_TEX_SRC_ADDR(0) | 651 R300_TEX_DST_ADDR(0))); 652 653 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 654 * - PRESUB temps, temp4 - temp3 655 * - MAD temp3, temp1.bbbb, temps, temp3 */ 656 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 657 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 658 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 659 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 660 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 661 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 662 R300_ALU_RGB_ADDR1(4) | 663 R300_ALU_RGB_ADDR2(1) | 664 R300_ALU_RGB_ADDRD(3) | 665 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 666 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 667 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 668 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 669 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 670 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 671 R300_ALU_ALPHA_ADDR1(4) | 672 R300_ALU_ALPHA_ADDR2(1) | 673 R300_ALU_ALPHA_ADDRD(3) | 674 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 675 676 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 677 * - PRESUB temps, temp5 - temp0 678 * - MAD temp0, temp1.bbbb, temps, temp0 */ 679 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 680 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 681 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 682 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 683 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 684 R300_ALU_RGB_INSERT_NOP)); 685 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 686 R300_ALU_RGB_ADDR1(5) | 687 R300_ALU_RGB_ADDR2(1) | 688 R300_ALU_RGB_ADDRD(0) | 689 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 690 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 691 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 692 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 693 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 694 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 695 R300_ALU_ALPHA_ADDR1(5) | 696 R300_ALU_ALPHA_ADDR2(1) | 697 R300_ALU_ALPHA_ADDRD(0) | 698 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 699 700 /* LRP output, temp2.bbbb, temp3, temp0 -> 701 * - PRESUB temps, temp3 - temp0 702 * - MAD output, temp2.bbbb, temps, temp0 */ 703 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 704 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 705 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 706 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 707 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 708 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 709 R300_ALU_RGB_ADDR1(3) | 710 R300_ALU_RGB_ADDR2(2) | 711 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 712 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 713 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 714 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 715 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 716 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 717 R300_ALU_ALPHA_ADDR1(3) | 718 R300_ALU_ALPHA_ADDR2(2) | 719 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 720 721 /* Shader constants. */ 722 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 723 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); 724 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); 725 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); 726 727 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); 728 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 729 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); 730 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); 731 732 FINISH_ACCEL(); 733 } else if (isplanar) { 734 /* 735 * y' = y - .0625 736 * u' = u - .5 737 * v' = v - .5; 738 * 739 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 740 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 741 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 742 * 743 * DP3 might look like the straightforward solution 744 * but we'd need to move the texture yuv values in 745 * the same reg for this to work. Therefore use MADs. 746 * Without changing the shader at all (only the constants) 747 * could also provide hue/saturation/brightness/contrast control. 748 * 749 * yco = 1.1643 750 * uco = 0, -0.39173, 2.017 751 * vco = 1.5958, -0.8129, 0 752 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 753 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 754 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 755 * 756 * temp = MAD(yco, yuv.yyyy, off) 757 * temp = MAD(uco, yuv.uuuu, temp) 758 * result = MAD(vco, yuv.vvvv, temp) 759 */ 760 float yco = 1.1643; 761 float uco[3] = {0.0, -0.39173, 2.018}; 762 float vco[3] = {1.5958, -0.8129, 0.0}; 763 float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0], 764 -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1], 765 -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]}; 766 767 BEGIN_ACCEL(33); 768 /* 2 components: same 2 for tex0/1/2 */ 769 OUT_ACCEL_REG(R300_RS_COUNT, 770 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 771 R300_RS_COUNT_HIRES_EN)); 772 /* R300_INST_COUNT_RS - highest RS instruction used */ 773 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); 774 775 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 776 777 /* Indirection levels */ 778 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 779 R300_FIRST_TEX)); 780 781 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 782 R300_ALU_CODE_SIZE(3) | 783 R300_TEX_CODE_OFFSET(0) | 784 R300_TEX_CODE_SIZE(3))); 785 786 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 787 R300_ALU_SIZE(2) | 788 R300_TEX_START(0) | 789 R300_TEX_SIZE(2) | 790 R300_RGBA_OUT)); 791 792 /* tex inst */ 793 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 794 R300_TEX_DST_ADDR(0) | 795 R300_TEX_ID(0) | 796 R300_TEX_INST(R300_TEX_INST_LD))); 797 OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 798 R300_TEX_DST_ADDR(1) | 799 R300_TEX_ID(1) | 800 R300_TEX_INST(R300_TEX_INST_LD))); 801 OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 802 R300_TEX_DST_ADDR(2) | 803 R300_TEX_ID(2) | 804 R300_TEX_INST(R300_TEX_INST_LD))); 805 806 /* ALU inst */ 807 /* MAD temp0, const0.a, temp0, const0.rgb */ 808 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 809 R300_ALU_RGB_ADDR1(0) | 810 R300_ALU_RGB_ADDR2(0) | 811 R300_ALU_RGB_ADDRD(0) | 812 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 813 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 814 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 815 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 816 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 817 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 818 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 819 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 820 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 821 /* alpha nop, but need to set up alpha source for rgb usage */ 822 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 823 R300_ALU_ALPHA_ADDR1(0) | 824 R300_ALU_ALPHA_ADDR2(0) | 825 R300_ALU_ALPHA_ADDRD(0) | 826 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 827 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 828 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 829 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 830 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 831 832 /* MAD const1, temp1, temp0 */ 833 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 834 R300_ALU_RGB_ADDR1(1) | 835 R300_ALU_RGB_ADDR2(0) | 836 R300_ALU_RGB_ADDRD(0) | 837 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 838 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 839 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 840 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 841 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 842 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 843 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 844 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 845 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 846 /* alpha nop */ 847 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 848 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 849 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 850 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 851 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 852 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 853 854 /* MAD result, const2, temp2, temp0 */ 855 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 856 R300_ALU_RGB_ADDR1(2) | 857 R300_ALU_RGB_ADDR2(0) | 858 R300_ALU_RGB_ADDRD(0) | 859 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 860 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 861 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 862 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 863 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 864 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 865 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 866 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 867 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 868 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 869 R300_ALU_RGB_CLAMP)); 870 /* write alpha 1 */ 871 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 872 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 873 R300_ALU_ALPHA_TARGET_A)); 874 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 875 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 876 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 877 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 878 879 /* Shader constants. */ 880 /* constant 0: off, yco */ 881 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 882 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 883 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 884 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 885 /* constant 1: uco */ 886 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 887 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 888 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 889 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0)); 890 /* constant 2: vco */ 891 OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 892 OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 893 OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 894 OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 895 896 FINISH_ACCEL(); 897 898 } else { 899 BEGIN_ACCEL(11); 900 /* 2 components: 2 for tex0 */ 901 OUT_ACCEL_REG(R300_RS_COUNT, 902 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 903 R300_RS_COUNT_HIRES_EN)); 904 /* R300_INST_COUNT_RS - highest RS instruction used */ 905 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); 906 907 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 908 909 /* Indirection levels */ 910 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 911 R300_FIRST_TEX)); 912 913 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 914 R300_ALU_CODE_SIZE(1) | 915 R300_TEX_CODE_OFFSET(0) | 916 R300_TEX_CODE_SIZE(1))); 917 918 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 919 R300_ALU_SIZE(0) | 920 R300_TEX_START(0) | 921 R300_TEX_SIZE(0) | 922 R300_RGBA_OUT)); 923 924 /* tex inst */ 925 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 926 R300_TEX_DST_ADDR(0) | 927 R300_TEX_ID(0) | 928 R300_TEX_INST(R300_TEX_INST_LD))); 929 930 /* ALU inst */ 931 /* RGB */ 932 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 933 R300_ALU_RGB_ADDR1(0) | 934 R300_ALU_RGB_ADDR2(0) | 935 R300_ALU_RGB_ADDRD(0) | 936 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 937 R300_ALU_RGB_MASK_G | 938 R300_ALU_RGB_MASK_B)) | 939 R300_ALU_RGB_TARGET_A)); 940 OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 941 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 942 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 943 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 944 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 945 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 946 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 947 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 948 R300_ALU_RGB_CLAMP)); 949 /* Alpha */ 950 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 951 R300_ALU_ALPHA_ADDR1(0) | 952 R300_ALU_ALPHA_ADDR2(0) | 953 R300_ALU_ALPHA_ADDRD(0) | 954 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 955 R300_ALU_ALPHA_TARGET_A | 956 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 957 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 958 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 959 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 960 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 961 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 962 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 963 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 964 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 965 R300_ALU_ALPHA_CLAMP)); 966 FINISH_ACCEL(); 967 } 968 } else { 969 if (pPriv->bicubic_enabled) { 970 BEGIN_ACCEL(7); 971 972 /* 4 components: 2 for tex0 and 2 for tex1 */ 973 OUT_ACCEL_REG(R300_RS_COUNT, 974 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 975 R300_RS_COUNT_HIRES_EN)); 976 977 /* R300_INST_COUNT_RS - highest RS instruction used */ 978 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); 979 980 /* Pixel stack frame size. */ 981 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 982 983 /* FP length. */ 984 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 985 R500_US_CODE_END_ADDR(13))); 986 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 987 R500_US_CODE_RANGE_SIZE(13))); 988 989 /* Prepare for FP emission. */ 990 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 991 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 992 FINISH_ACCEL(); 993 994 BEGIN_ACCEL(89); 995 /* Pixel shader. 996 * I've gone ahead and annotated each instruction, since this 997 * thing is MASSIVE. :3 998 * Note: In order to avoid buggies with temps and multiple 999 * inputs, all temps are offset by 2. temp0 -> register2. */ 1000 1001 /* TEX temp2, input1.xxxx, tex1, 1D */ 1002 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1003 R500_INST_RGB_WMASK_R | 1004 R500_INST_RGB_WMASK_G | 1005 R500_INST_RGB_WMASK_B)); 1006 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 1007 R500_TEX_INST_LD | 1008 R500_TEX_IGNORE_UNCOVERED)); 1009 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 1010 R500_TEX_SRC_S_SWIZ_R | 1011 R500_TEX_SRC_T_SWIZ_R | 1012 R500_TEX_SRC_R_SWIZ_R | 1013 R500_TEX_SRC_Q_SWIZ_R | 1014 R500_TEX_DST_ADDR(2) | 1015 R500_TEX_DST_R_SWIZ_R | 1016 R500_TEX_DST_G_SWIZ_G | 1017 R500_TEX_DST_B_SWIZ_B | 1018 R500_TEX_DST_A_SWIZ_A)); 1019 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1020 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1021 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1022 1023 /* TEX temp5, input1.yyyy, tex1, 1D */ 1024 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1025 R500_INST_TEX_SEM_WAIT | 1026 R500_INST_RGB_WMASK_R | 1027 R500_INST_RGB_WMASK_G | 1028 R500_INST_RGB_WMASK_B)); 1029 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 1030 R500_TEX_INST_LD | 1031 R500_TEX_SEM_ACQUIRE | 1032 R500_TEX_IGNORE_UNCOVERED)); 1033 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 1034 R500_TEX_SRC_S_SWIZ_G | 1035 R500_TEX_SRC_T_SWIZ_G | 1036 R500_TEX_SRC_R_SWIZ_G | 1037 R500_TEX_SRC_Q_SWIZ_G | 1038 R500_TEX_DST_ADDR(5) | 1039 R500_TEX_DST_R_SWIZ_R | 1040 R500_TEX_DST_G_SWIZ_G | 1041 R500_TEX_DST_B_SWIZ_B | 1042 R500_TEX_DST_A_SWIZ_A)); 1043 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1044 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1045 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1046 1047 /* MUL temp4, const0.x0x0, temp2.yyxx */ 1048 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1049 R500_INST_TEX_SEM_WAIT | 1050 R500_INST_RGB_WMASK_R | 1051 R500_INST_RGB_WMASK_G | 1052 R500_INST_RGB_WMASK_B | 1053 R500_INST_ALPHA_WMASK)); 1054 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1055 R500_RGB_ADDR0_CONST | 1056 R500_RGB_ADDR1(2))); 1057 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1058 R500_ALPHA_ADDR0_CONST | 1059 R500_ALPHA_ADDR1(2))); 1060 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 1061 R500_ALU_RGB_R_SWIZ_A_R | 1062 R500_ALU_RGB_G_SWIZ_A_0 | 1063 R500_ALU_RGB_B_SWIZ_A_R | 1064 R500_ALU_RGB_SEL_B_SRC1 | 1065 R500_ALU_RGB_R_SWIZ_B_G | 1066 R500_ALU_RGB_G_SWIZ_B_G | 1067 R500_ALU_RGB_B_SWIZ_B_R)); 1068 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 1069 R500_ALPHA_OP_MAD | 1070 R500_ALPHA_SEL_A_SRC0 | 1071 R500_ALPHA_SWIZ_A_0 | 1072 R500_ALPHA_SEL_B_SRC1 | 1073 R500_ALPHA_SWIZ_B_R)); 1074 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 1075 R500_ALU_RGBA_OP_MAD | 1076 R500_ALU_RGBA_R_SWIZ_0 | 1077 R500_ALU_RGBA_G_SWIZ_0 | 1078 R500_ALU_RGBA_B_SWIZ_0 | 1079 R500_ALU_RGBA_A_SWIZ_0)); 1080 1081 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 1082 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1083 R500_INST_RGB_WMASK_R | 1084 R500_INST_RGB_WMASK_G | 1085 R500_INST_RGB_WMASK_B | 1086 R500_INST_ALPHA_WMASK)); 1087 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1088 R500_RGB_ADDR0_CONST | 1089 R500_RGB_ADDR1(5) | 1090 R500_RGB_ADDR2(4))); 1091 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1092 R500_ALPHA_ADDR0_CONST | 1093 R500_ALPHA_ADDR1(5) | 1094 R500_ALPHA_ADDR2(4))); 1095 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 1096 R500_ALU_RGB_R_SWIZ_A_0 | 1097 R500_ALU_RGB_G_SWIZ_A_G | 1098 R500_ALU_RGB_B_SWIZ_A_0 | 1099 R500_ALU_RGB_SEL_B_SRC1 | 1100 R500_ALU_RGB_R_SWIZ_B_R | 1101 R500_ALU_RGB_G_SWIZ_B_R | 1102 R500_ALU_RGB_B_SWIZ_B_R)); 1103 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 1104 R500_ALPHA_OP_MAD | 1105 R500_ALPHA_SEL_A_SRC0 | 1106 R500_ALPHA_SWIZ_A_G | 1107 R500_ALPHA_SEL_B_SRC1 | 1108 R500_ALPHA_SWIZ_B_R)); 1109 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 1110 R500_ALU_RGBA_OP_MAD | 1111 R500_ALU_RGBA_SEL_C_SRC2 | 1112 R500_ALU_RGBA_R_SWIZ_R | 1113 R500_ALU_RGBA_G_SWIZ_G | 1114 R500_ALU_RGBA_B_SWIZ_B | 1115 R500_ALU_RGBA_A_SWIZ_A)); 1116 1117 /* ADD temp3, temp3, input0.xyxy */ 1118 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1119 R500_INST_RGB_WMASK_R | 1120 R500_INST_RGB_WMASK_G | 1121 R500_INST_RGB_WMASK_B | 1122 R500_INST_ALPHA_WMASK)); 1123 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 1124 R500_RGB_ADDR2(0))); 1125 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 1126 R500_ALPHA_ADDR2(0))); 1127 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 1128 R500_ALU_RGB_G_SWIZ_A_1 | 1129 R500_ALU_RGB_B_SWIZ_A_1 | 1130 R500_ALU_RGB_SEL_B_SRC1 | 1131 R500_ALU_RGB_R_SWIZ_B_R | 1132 R500_ALU_RGB_G_SWIZ_B_G | 1133 R500_ALU_RGB_B_SWIZ_B_B)); 1134 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 1135 R500_ALPHA_OP_MAD | 1136 R500_ALPHA_SWIZ_A_1 | 1137 R500_ALPHA_SEL_B_SRC1 | 1138 R500_ALPHA_SWIZ_B_A)); 1139 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 1140 R500_ALU_RGBA_OP_MAD | 1141 R500_ALU_RGBA_SEL_C_SRC2 | 1142 R500_ALU_RGBA_R_SWIZ_R | 1143 R500_ALU_RGBA_G_SWIZ_G | 1144 R500_ALU_RGBA_B_SWIZ_R | 1145 R500_ALU_RGBA_A_SWIZ_G)); 1146 1147 /* TEX temp1, temp3.zwxy, tex0, 2D */ 1148 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1149 R500_INST_RGB_WMASK_R | 1150 R500_INST_RGB_WMASK_G | 1151 R500_INST_RGB_WMASK_B | 1152 R500_INST_ALPHA_WMASK)); 1153 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 1154 R500_TEX_INST_LD | 1155 R500_TEX_IGNORE_UNCOVERED)); 1156 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 1157 R500_TEX_SRC_S_SWIZ_B | 1158 R500_TEX_SRC_T_SWIZ_A | 1159 R500_TEX_SRC_R_SWIZ_R | 1160 R500_TEX_SRC_Q_SWIZ_G | 1161 R500_TEX_DST_ADDR(1) | 1162 R500_TEX_DST_R_SWIZ_R | 1163 R500_TEX_DST_G_SWIZ_G | 1164 R500_TEX_DST_B_SWIZ_B | 1165 R500_TEX_DST_A_SWIZ_A)); 1166 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1167 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1168 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1169 1170 /* TEX temp3, temp3.xyzw, tex0, 2D */ 1171 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1172 R500_INST_TEX_SEM_WAIT | 1173 R500_INST_RGB_WMASK_R | 1174 R500_INST_RGB_WMASK_G | 1175 R500_INST_RGB_WMASK_B | 1176 R500_INST_ALPHA_WMASK)); 1177 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 1178 R500_TEX_INST_LD | 1179 R500_TEX_SEM_ACQUIRE | 1180 R500_TEX_IGNORE_UNCOVERED)); 1181 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 1182 R500_TEX_SRC_S_SWIZ_R | 1183 R500_TEX_SRC_T_SWIZ_G | 1184 R500_TEX_SRC_R_SWIZ_B | 1185 R500_TEX_SRC_Q_SWIZ_A | 1186 R500_TEX_DST_ADDR(3) | 1187 R500_TEX_DST_R_SWIZ_R | 1188 R500_TEX_DST_G_SWIZ_G | 1189 R500_TEX_DST_B_SWIZ_B | 1190 R500_TEX_DST_A_SWIZ_A)); 1191 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1192 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1193 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1194 1195 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 1196 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1197 R500_INST_RGB_WMASK_R | 1198 R500_INST_RGB_WMASK_G | 1199 R500_INST_RGB_WMASK_B | 1200 R500_INST_ALPHA_WMASK)); 1201 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1202 R500_RGB_ADDR0_CONST | 1203 R500_RGB_ADDR1(5) | 1204 R500_RGB_ADDR2(4))); 1205 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1206 R500_ALPHA_ADDR0_CONST | 1207 R500_ALPHA_ADDR1(5) | 1208 R500_ALPHA_ADDR2(4))); 1209 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 1210 R500_ALU_RGB_R_SWIZ_A_0 | 1211 R500_ALU_RGB_G_SWIZ_A_G | 1212 R500_ALU_RGB_B_SWIZ_A_0 | 1213 R500_ALU_RGB_SEL_B_SRC1 | 1214 R500_ALU_RGB_R_SWIZ_B_G | 1215 R500_ALU_RGB_G_SWIZ_B_G | 1216 R500_ALU_RGB_B_SWIZ_B_G)); 1217 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 1218 R500_ALPHA_OP_MAD | 1219 R500_ALPHA_SEL_A_SRC0 | 1220 R500_ALPHA_SWIZ_A_G | 1221 R500_ALPHA_SEL_B_SRC1 | 1222 R500_ALPHA_SWIZ_B_G)); 1223 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 1224 R500_ALU_RGBA_OP_MAD | 1225 R500_ALU_RGBA_SEL_C_SRC2 | 1226 R500_ALU_RGBA_R_SWIZ_R | 1227 R500_ALU_RGBA_G_SWIZ_G | 1228 R500_ALU_RGBA_B_SWIZ_B | 1229 R500_ALU_RGBA_A_SWIZ_A)); 1230 1231 /* ADD temp0, temp4, input0.xyxy */ 1232 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1233 R500_INST_RGB_WMASK_R | 1234 R500_INST_RGB_WMASK_G | 1235 R500_INST_RGB_WMASK_B | 1236 R500_INST_ALPHA_WMASK)); 1237 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 1238 R500_RGB_ADDR2(0))); 1239 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 1240 R500_ALPHA_ADDR2(0))); 1241 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 1242 R500_ALU_RGB_G_SWIZ_A_1 | 1243 R500_ALU_RGB_B_SWIZ_A_1 | 1244 R500_ALU_RGB_SEL_B_SRC1 | 1245 R500_ALU_RGB_R_SWIZ_B_R | 1246 R500_ALU_RGB_G_SWIZ_B_G | 1247 R500_ALU_RGB_B_SWIZ_B_B)); 1248 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 1249 R500_ALPHA_OP_MAD | 1250 R500_ALPHA_SWIZ_A_1 | 1251 R500_ALPHA_SEL_B_SRC1 | 1252 R500_ALPHA_SWIZ_B_A)); 1253 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 1254 R500_ALU_RGBA_OP_MAD | 1255 R500_ALU_RGBA_SEL_C_SRC2 | 1256 R500_ALU_RGBA_R_SWIZ_R | 1257 R500_ALU_RGBA_G_SWIZ_G | 1258 R500_ALU_RGBA_B_SWIZ_R | 1259 R500_ALU_RGBA_A_SWIZ_G)); 1260 1261 /* TEX temp4, temp0.zwzw, tex0, 2D */ 1262 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1263 R500_INST_TEX_SEM_WAIT | 1264 R500_INST_RGB_WMASK_R | 1265 R500_INST_RGB_WMASK_G | 1266 R500_INST_RGB_WMASK_B | 1267 R500_INST_ALPHA_WMASK)); 1268 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 1269 R500_TEX_INST_LD | 1270 R500_TEX_IGNORE_UNCOVERED)); 1271 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 1272 R500_TEX_SRC_S_SWIZ_B | 1273 R500_TEX_SRC_T_SWIZ_A | 1274 R500_TEX_SRC_R_SWIZ_B | 1275 R500_TEX_SRC_Q_SWIZ_A | 1276 R500_TEX_DST_ADDR(4) | 1277 R500_TEX_DST_R_SWIZ_R | 1278 R500_TEX_DST_G_SWIZ_G | 1279 R500_TEX_DST_B_SWIZ_B | 1280 R500_TEX_DST_A_SWIZ_A)); 1281 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1282 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1283 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1284 1285 /* TEX temp0, temp0.xyzw, tex0, 2D */ 1286 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1287 R500_INST_TEX_SEM_WAIT | 1288 R500_INST_RGB_WMASK_R | 1289 R500_INST_RGB_WMASK_G | 1290 R500_INST_RGB_WMASK_B | 1291 R500_INST_ALPHA_WMASK)); 1292 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 1293 R500_TEX_INST_LD | 1294 R500_TEX_SEM_ACQUIRE | 1295 R500_TEX_IGNORE_UNCOVERED)); 1296 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 1297 R500_TEX_SRC_S_SWIZ_R | 1298 R500_TEX_SRC_T_SWIZ_G | 1299 R500_TEX_SRC_R_SWIZ_B | 1300 R500_TEX_SRC_Q_SWIZ_A | 1301 R500_TEX_DST_ADDR(0) | 1302 R500_TEX_DST_R_SWIZ_R | 1303 R500_TEX_DST_G_SWIZ_G | 1304 R500_TEX_DST_B_SWIZ_B | 1305 R500_TEX_DST_A_SWIZ_A)); 1306 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1307 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1308 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1309 1310 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 1311 * - PRESUB temps, temp1 - temp3 1312 * - MAD temp2.zzzz, temps, temp3 */ 1313 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1314 R500_INST_RGB_WMASK_R | 1315 R500_INST_RGB_WMASK_G | 1316 R500_INST_RGB_WMASK_B | 1317 R500_INST_ALPHA_WMASK)); 1318 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 1319 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 1320 R500_RGB_ADDR1(1) | 1321 R500_RGB_ADDR2(2))); 1322 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 1323 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 1324 R500_ALPHA_ADDR1(1) | 1325 R500_ALPHA_ADDR2(2))); 1326 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 1327 R500_ALU_RGB_R_SWIZ_A_B | 1328 R500_ALU_RGB_G_SWIZ_A_B | 1329 R500_ALU_RGB_B_SWIZ_A_B | 1330 R500_ALU_RGB_SEL_B_SRCP | 1331 R500_ALU_RGB_R_SWIZ_B_R | 1332 R500_ALU_RGB_G_SWIZ_B_G | 1333 R500_ALU_RGB_B_SWIZ_B_B)); 1334 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 1335 R500_ALPHA_OP_MAD | 1336 R500_ALPHA_SEL_A_SRC2 | 1337 R500_ALPHA_SWIZ_A_B | 1338 R500_ALPHA_SEL_B_SRCP | 1339 R500_ALPHA_SWIZ_B_A)); 1340 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 1341 R500_ALU_RGBA_OP_MAD | 1342 R500_ALU_RGBA_SEL_C_SRC0 | 1343 R500_ALU_RGBA_R_SWIZ_R | 1344 R500_ALU_RGBA_G_SWIZ_G | 1345 R500_ALU_RGBA_B_SWIZ_B | 1346 R500_ALU_RGBA_A_SWIZ_A)); 1347 1348 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 1349 * - PRESUB temps, temp4 - temp1 1350 * - MAD temp2.zzzz, temps, temp0 */ 1351 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 1352 R500_INST_TEX_SEM_WAIT | 1353 R500_INST_RGB_WMASK_R | 1354 R500_INST_RGB_WMASK_G | 1355 R500_INST_RGB_WMASK_B | 1356 R500_INST_ALPHA_WMASK)); 1357 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1358 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 1359 R500_RGB_ADDR1(4) | 1360 R500_RGB_ADDR2(2))); 1361 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1362 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 1363 R500_ALPHA_ADDR1(4) | 1364 R500_ALPHA_ADDR2(2))); 1365 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 1366 R500_ALU_RGB_R_SWIZ_A_B | 1367 R500_ALU_RGB_G_SWIZ_A_B | 1368 R500_ALU_RGB_B_SWIZ_A_B | 1369 R500_ALU_RGB_SEL_B_SRCP | 1370 R500_ALU_RGB_R_SWIZ_B_R | 1371 R500_ALU_RGB_G_SWIZ_B_G | 1372 R500_ALU_RGB_B_SWIZ_B_B)); 1373 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 1374 R500_ALPHA_OP_MAD | 1375 R500_ALPHA_SEL_A_SRC2 | 1376 R500_ALPHA_SWIZ_A_B | 1377 R500_ALPHA_SEL_B_SRCP | 1378 R500_ALPHA_SWIZ_B_A)); 1379 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 1380 R500_ALU_RGBA_OP_MAD | 1381 R500_ALU_RGBA_SEL_C_SRC0 | 1382 R500_ALU_RGBA_R_SWIZ_R | 1383 R500_ALU_RGBA_G_SWIZ_G | 1384 R500_ALU_RGBA_B_SWIZ_B | 1385 R500_ALU_RGBA_A_SWIZ_A)); 1386 1387 /* LRP output, temp5.zzzz, temp3, temp0 -> 1388 * - PRESUB temps, temp3 - temp0 1389 * - MAD temp5.zzzz, temps, temp0 */ 1390 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 1391 R500_INST_LAST | 1392 R500_INST_TEX_SEM_WAIT | 1393 R500_INST_RGB_WMASK_R | 1394 R500_INST_RGB_WMASK_G | 1395 R500_INST_RGB_WMASK_B | 1396 R500_INST_ALPHA_WMASK | 1397 R500_INST_RGB_OMASK_R | 1398 R500_INST_RGB_OMASK_G | 1399 R500_INST_RGB_OMASK_B | 1400 R500_INST_ALPHA_OMASK)); 1401 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1402 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 1403 R500_RGB_ADDR1(3) | 1404 R500_RGB_ADDR2(5))); 1405 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1406 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 1407 R500_ALPHA_ADDR1(3) | 1408 R500_ALPHA_ADDR2(5))); 1409 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 1410 R500_ALU_RGB_R_SWIZ_A_B | 1411 R500_ALU_RGB_G_SWIZ_A_B | 1412 R500_ALU_RGB_B_SWIZ_A_B | 1413 R500_ALU_RGB_SEL_B_SRCP | 1414 R500_ALU_RGB_R_SWIZ_B_R | 1415 R500_ALU_RGB_G_SWIZ_B_G | 1416 R500_ALU_RGB_B_SWIZ_B_B)); 1417 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 1418 R500_ALPHA_OP_MAD | 1419 R500_ALPHA_SEL_A_SRC2 | 1420 R500_ALPHA_SWIZ_A_B | 1421 R500_ALPHA_SEL_B_SRCP | 1422 R500_ALPHA_SWIZ_B_A)); 1423 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 1424 R500_ALU_RGBA_OP_MAD | 1425 R500_ALU_RGBA_SEL_C_SRC0 | 1426 R500_ALU_RGBA_R_SWIZ_R | 1427 R500_ALU_RGBA_G_SWIZ_G | 1428 R500_ALU_RGBA_B_SWIZ_B | 1429 R500_ALU_RGBA_A_SWIZ_A)); 1430 1431 /* Shader constants. */ 1432 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 1433 1434 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 1435 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 1436 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 1437 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 1438 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 1439 1440 FINISH_ACCEL(); 1441 1442 } else { 1443 BEGIN_ACCEL(19); 1444 /* 2 components: 2 for tex0 */ 1445 OUT_ACCEL_REG(R300_RS_COUNT, 1446 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1447 R300_RS_COUNT_HIRES_EN)); 1448 1449 /* R300_INST_COUNT_RS - highest RS instruction used */ 1450 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); 1451 1452 /* Pixel stack frame size. */ 1453 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1454 1455 /* FP length. */ 1456 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 1457 R500_US_CODE_END_ADDR(1))); 1458 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 1459 R500_US_CODE_RANGE_SIZE(1))); 1460 1461 /* Prepare for FP emission. */ 1462 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 1463 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 1464 1465 /* tex inst */ 1466 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 1467 R500_INST_TEX_SEM_WAIT | 1468 R500_INST_RGB_WMASK_R | 1469 R500_INST_RGB_WMASK_G | 1470 R500_INST_RGB_WMASK_B | 1471 R500_INST_ALPHA_WMASK | 1472 R500_INST_RGB_CLAMP | 1473 R500_INST_ALPHA_CLAMP)); 1474 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 1475 R500_TEX_INST_LD | 1476 R500_TEX_SEM_ACQUIRE | 1477 R500_TEX_IGNORE_UNCOVERED)); 1478 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 1479 R500_TEX_SRC_S_SWIZ_R | 1480 R500_TEX_SRC_T_SWIZ_G | 1481 R500_TEX_DST_ADDR(0) | 1482 R500_TEX_DST_R_SWIZ_R | 1483 R500_TEX_DST_G_SWIZ_G | 1484 R500_TEX_DST_B_SWIZ_B | 1485 R500_TEX_DST_A_SWIZ_A)); 1486 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 1487 R500_DX_S_SWIZ_R | 1488 R500_DX_T_SWIZ_R | 1489 R500_DX_R_SWIZ_R | 1490 R500_DX_Q_SWIZ_R | 1491 R500_DY_ADDR(0) | 1492 R500_DY_S_SWIZ_R | 1493 R500_DY_T_SWIZ_R | 1494 R500_DY_R_SWIZ_R | 1495 R500_DY_Q_SWIZ_R)); 1496 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1497 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 1498 1499 /* ALU inst */ 1500 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 1501 R500_INST_TEX_SEM_WAIT | 1502 R500_INST_LAST | 1503 R500_INST_RGB_OMASK_R | 1504 R500_INST_RGB_OMASK_G | 1505 R500_INST_RGB_OMASK_B | 1506 R500_INST_ALPHA_OMASK | 1507 R500_INST_RGB_CLAMP | 1508 R500_INST_ALPHA_CLAMP)); 1509 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 1510 R500_RGB_ADDR1(0) | 1511 R500_RGB_ADDR1_CONST | 1512 R500_RGB_ADDR2(0) | 1513 R500_RGB_ADDR2_CONST)); 1514 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 1515 R500_ALPHA_ADDR1(0) | 1516 R500_ALPHA_ADDR1_CONST | 1517 R500_ALPHA_ADDR2(0) | 1518 R500_ALPHA_ADDR2_CONST)); 1519 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 1520 R500_ALU_RGB_R_SWIZ_A_R | 1521 R500_ALU_RGB_G_SWIZ_A_G | 1522 R500_ALU_RGB_B_SWIZ_A_B | 1523 R500_ALU_RGB_SEL_B_SRC0 | 1524 R500_ALU_RGB_R_SWIZ_B_1 | 1525 R500_ALU_RGB_B_SWIZ_B_1 | 1526 R500_ALU_RGB_G_SWIZ_B_1)); 1527 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 1528 R500_ALPHA_SWIZ_A_A | 1529 R500_ALPHA_SWIZ_B_1)); 1530 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 1531 R500_ALU_RGBA_R_SWIZ_0 | 1532 R500_ALU_RGBA_G_SWIZ_0 | 1533 R500_ALU_RGBA_B_SWIZ_0 | 1534 R500_ALU_RGBA_A_SWIZ_0)); 1535 FINISH_ACCEL(); 1536 } 1537 } 1538 1539 BEGIN_ACCEL(6); 1540 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 1541 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 1542 1543 OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset); 1544 OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch); 1545 1546 blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO; 1547 /* no need to enable blending */ 1548 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl); 1549 1550 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); 1551 FINISH_ACCEL(); 1552 1553 } else { 1554 1555 /* Same for R100/R200 */ 1556 switch (pPixmap->drawable.bitsPerPixel) { 1557 case 16: 1558 if (pPixmap->drawable.depth == 15) 1559 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 1560 else 1561 dst_format = RADEON_COLOR_FORMAT_RGB565; 1562 break; 1563 case 32: 1564 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 1565 break; 1566 default: 1567 return; 1568 } 1569 1570 if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { 1571 isplanar = TRUE; 1572 } 1573 1574 if (isplanar) { 1575 txformat = RADEON_TXFORMAT_I8; 1576 } else { 1577 if (pPriv->id == FOURCC_UYVY) 1578 txformat = RADEON_TXFORMAT_YVYU422; 1579 else 1580 txformat = RADEON_TXFORMAT_VYUY422; 1581 } 1582 1583 txformat |= RADEON_TXFORMAT_NON_POWER2; 1584 1585 colorpitch = dst_pitch >> pixel_shift; 1586 1587 if (RADEONTilingEnabled(pScrn, pPixmap)) 1588 colorpitch |= RADEON_COLOR_TILE_ENABLE; 1589 1590 BEGIN_ACCEL(4); 1591 1592 OUT_ACCEL_REG(RADEON_RB3D_CNTL, 1593 dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/); 1594 OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); 1595 1596 OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); 1597 1598 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 1599 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 1600 1601 FINISH_ACCEL(); 1602 1603 1604 if ((info->ChipFamily == CHIP_FAMILY_RV250) || 1605 (info->ChipFamily == CHIP_FAMILY_RV280) || 1606 (info->ChipFamily == CHIP_FAMILY_RS300) || 1607 (info->ChipFamily == CHIP_FAMILY_R200)) { 1608 1609 info->accel_state->texW[0] = pPriv->w; 1610 info->accel_state->texH[0] = pPriv->h; 1611 1612 if (isplanar) { 1613 /* note: in contrast to r300, use input biasing on uv components */ 1614 float yco = 1.1643; 1615 float yoff = -0.0625 * yco; 1616 float uco[3] = {0.0, -0.39173, 2.018}; 1617 float vco[3] = {1.5958, -0.8129, 0.0}; 1618 1619 /* need 2 texcoord sets (even though they are identical) due 1620 to denormalization! hw apparently can't premultiply 1621 same coord set by different texture size */ 1622 vtx_count = 6; 1623 1624 txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 1625 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 1626 txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; 1627 txpitch -= 32; 1628 txfilter = R200_MAG_FILTER_LINEAR | 1629 R200_MIN_FILTER_LINEAR | 1630 R200_CLAMP_S_CLAMP_LAST | 1631 R200_CLAMP_T_CLAMP_LAST; 1632 1633 BEGIN_ACCEL(36); 1634 1635 OUT_ACCEL_REG(RADEON_PP_CNTL, 1636 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 1637 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 1638 RADEON_TEX_BLEND_2_ENABLE); 1639 1640 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 1641 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 1642 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 1643 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 1644 1645 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 1646 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 1647 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 1648 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 1649 (pPriv->w - 1) | 1650 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 1651 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 1652 OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); 1653 1654 OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); 1655 OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 1656 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); 1657 OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0); 1658 OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); 1659 OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset); 1660 1661 OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); 1662 OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 1663 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); 1664 OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0); 1665 OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); 1666 OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset); 1667 1668 /* similar to r300 code. Note the big problem is that hardware constants 1669 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 1670 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 1671 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 1672 * the constants not. To get larger range can use output scale, but for 1673 * that 2.018 value we need a total scale by 8, which means the constants 1674 * really have no accuracy whatsoever (5 fractional bits only). 1675 * The only direct way to get high precision "constants" into the fragment 1676 * pipe I know of is to use the texcoord interpolator (not color, this one 1677 * is 8 bit only too), which seems a bit expensive. We're lucky though it 1678 * seems the values we need seem to fit better than worst case (get about 1679 * 6 fractional bits for this instead of 5, at least when not correcting for 1680 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 1681 * yoff get 8 fractional bits). 1682 * 1683 * A higher precision (8 fractional bits) version might just put uco into 1684 * a texcoord, and calculate a new vcoconst in the shader, like so: 1685 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 1686 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 1687 * vcocalc = ADD temp, bias/scale(cohelper), vco 1688 * would in total use 4 tex units, 4 instructions which seems fairly 1689 * balanced for this architecture (instead of 3 + 3 for the solution here) 1690 * 1691 * temp = MAD(yco, yuv.yyyy, yoff) 1692 * temp = MAD(uco, yuv.uuuu, temp) 1693 * result = MAD(vco, yuv.vvvv, temp) 1694 * 1695 * note first mad produces actually scalar, hence we transform 1696 * it into a dp2a to get 8 bit precision of yco instead of 7 - 1697 * That's assuming hw correctly expands consts to internal precision. 1698 * (y * 1 + y * (yco - 1) + yoff) 1699 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 1700 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 1701 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 1702 * 1703 * vco, uco need bias (and hence scale too) 1704 * 1705 */ 1706 1707 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 1708 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 1709 R200_TXC_ARG_A_TFACTOR_COLOR | 1710 R200_TXC_ARG_B_R0_COLOR | 1711 R200_TXC_ARG_C_TFACTOR_COLOR | 1712 R200_TXC_NEG_ARG_C | 1713 R200_TXC_OP_DOT2_ADD); 1714 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 1715 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 1716 R200_TXC_SCALE_INV2 | 1717 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 1718 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 1719 R200_TXA_ARG_A_ZERO | 1720 R200_TXA_ARG_B_ZERO | 1721 R200_TXA_ARG_C_ZERO | 1722 R200_TXA_OP_MADD); 1723 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 1724 R200_TXA_OUTPUT_REG_NONE); 1725 1726 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 1727 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 1728 R200_TXC_ARG_A_TFACTOR_COLOR | 1729 R200_TXC_BIAS_ARG_A | 1730 R200_TXC_SCALE_ARG_A | 1731 R200_TXC_ARG_B_R1_COLOR | 1732 R200_TXC_BIAS_ARG_B | 1733 R200_TXC_SCALE_ARG_B | 1734 R200_TXC_ARG_C_R0_COLOR | 1735 R200_TXC_OP_MADD); 1736 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 1737 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 1738 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 1739 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 1740 R200_TXA_ARG_A_ZERO | 1741 R200_TXA_ARG_B_ZERO | 1742 R200_TXA_ARG_C_ZERO | 1743 R200_TXA_OP_MADD); 1744 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 1745 R200_TXA_OUTPUT_REG_NONE); 1746 1747 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 1748 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 1749 R200_TXC_ARG_A_TFACTOR_COLOR | 1750 R200_TXC_BIAS_ARG_A | 1751 R200_TXC_SCALE_ARG_A | 1752 R200_TXC_ARG_B_R2_COLOR | 1753 R200_TXC_BIAS_ARG_B | 1754 R200_TXC_ARG_C_R0_COLOR | 1755 R200_TXC_OP_MADD); 1756 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 1757 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 1758 R200_TXC_SCALE_2X | 1759 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 1760 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 1761 R200_TXA_ARG_A_ZERO | 1762 R200_TXA_ARG_B_ZERO | 1763 R200_TXA_ARG_C_ZERO | 1764 R200_TXA_COMP_ARG_C | 1765 R200_TXA_OP_MADD); 1766 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 1767 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 1768 1769 /* shader constants */ 1770 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ 1771 yco - 1.0, 1772 -yoff, /* range [-1, 0] */ 1773 0.0)); 1774 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ 1775 uco[1] * 0.125 + 0.5, 1776 uco[2] * 0.125 + 0.5, 1777 0.0)); 1778 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ 1779 vco[1] * 0.25 + 0.5, 1780 vco[2] * 0.25 + 0.5, 1781 0.0)); 1782 1783 FINISH_ACCEL(); 1784 } 1785 else if (info->ChipFamily == CHIP_FAMILY_RV250) { 1786 /* fix up broken packed yuv - shader same as above except 1787 yuv compoents are all in same reg */ 1788 float yco = 1.1643; 1789 float yoff = -0.0625 * yco; 1790 float uco[3] = {0.0, -0.39173, 2.018}; 1791 float vco[3] = {1.5958, -0.8129, 0.0}; 1792 1793 txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 1794 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 1795 txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; 1796 txpitch -= 32; 1797 txfilter = R200_MAG_FILTER_LINEAR | 1798 R200_MIN_FILTER_LINEAR | 1799 R200_CLAMP_S_CLAMP_LAST | 1800 R200_CLAMP_T_CLAMP_LAST; 1801 1802 BEGIN_ACCEL(24); 1803 1804 OUT_ACCEL_REG(RADEON_PP_CNTL, 1805 RADEON_TEX_0_ENABLE | 1806 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 1807 RADEON_TEX_BLEND_2_ENABLE); 1808 1809 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 1810 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 1811 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 1812 1813 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 1814 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 1815 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 1816 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 1817 (pPriv->w - 1) | 1818 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 1819 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 1820 OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); 1821 1822 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 1823 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 1824 R200_TXC_ARG_A_TFACTOR_COLOR | 1825 R200_TXC_ARG_B_R0_COLOR | 1826 R200_TXC_ARG_C_TFACTOR_COLOR | 1827 R200_TXC_NEG_ARG_C | 1828 R200_TXC_OP_DOT2_ADD); 1829 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 1830 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 1831 R200_TXC_SCALE_INV2 | 1832 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 1833 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 1834 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 1835 R200_TXA_ARG_A_ZERO | 1836 R200_TXA_ARG_B_ZERO | 1837 R200_TXA_ARG_C_ZERO | 1838 R200_TXA_OP_MADD); 1839 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 1840 R200_TXA_OUTPUT_REG_NONE); 1841 1842 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 1843 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 1844 R200_TXC_ARG_A_TFACTOR_COLOR | 1845 R200_TXC_BIAS_ARG_A | 1846 R200_TXC_SCALE_ARG_A | 1847 R200_TXC_ARG_B_R0_COLOR | 1848 R200_TXC_BIAS_ARG_B | 1849 R200_TXC_SCALE_ARG_B | 1850 R200_TXC_ARG_C_R1_COLOR | 1851 R200_TXC_OP_MADD); 1852 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 1853 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 1854 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 1855 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 1856 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 1857 R200_TXA_ARG_A_ZERO | 1858 R200_TXA_ARG_B_ZERO | 1859 R200_TXA_ARG_C_ZERO | 1860 R200_TXA_OP_MADD); 1861 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 1862 R200_TXA_OUTPUT_REG_NONE); 1863 1864 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 1865 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 1866 R200_TXC_ARG_A_TFACTOR_COLOR | 1867 R200_TXC_BIAS_ARG_A | 1868 R200_TXC_SCALE_ARG_A | 1869 R200_TXC_ARG_B_R0_COLOR | 1870 R200_TXC_BIAS_ARG_B | 1871 R200_TXC_ARG_C_R1_COLOR | 1872 R200_TXC_OP_MADD); 1873 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 1874 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 1875 R200_TXC_SCALE_2X | 1876 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 1877 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 1878 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 1879 R200_TXA_ARG_A_ZERO | 1880 R200_TXA_ARG_B_ZERO | 1881 R200_TXA_ARG_C_ZERO | 1882 R200_TXA_COMP_ARG_C | 1883 R200_TXA_OP_MADD); 1884 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 1885 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 1886 1887 /* shader constants */ 1888 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ 1889 yco - 1.0, 1890 -yoff, /* range [-1, 0] */ 1891 0.0)); 1892 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ 1893 uco[1] * 0.125 + 0.5, 1894 uco[2] * 0.125 + 0.5, 1895 0.0)); 1896 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ 1897 vco[1] * 0.25 + 0.5, 1898 vco[2] * 0.25 + 0.5, 1899 0.0)); 1900 1901 FINISH_ACCEL(); 1902 } 1903 else { 1904 BEGIN_ACCEL(13); 1905 OUT_ACCEL_REG(RADEON_PP_CNTL, 1906 RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 1907 1908 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 1909 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 1910 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 1911 1912 OUT_ACCEL_REG(R200_PP_TXFILTER_0, 1913 R200_MAG_FILTER_LINEAR | 1914 R200_MIN_FILTER_LINEAR | 1915 R200_CLAMP_S_CLAMP_LAST | 1916 R200_CLAMP_T_CLAMP_LAST | 1917 R200_YUV_TO_RGB); 1918 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 1919 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 1920 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 1921 (pPriv->w - 1) | 1922 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 1923 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 1924 1925 OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); 1926 1927 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 1928 R200_TXC_ARG_A_ZERO | 1929 R200_TXC_ARG_B_ZERO | 1930 R200_TXC_ARG_C_R0_COLOR | 1931 R200_TXC_OP_MADD); 1932 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 1933 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 1934 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 1935 R200_TXA_ARG_A_ZERO | 1936 R200_TXA_ARG_B_ZERO | 1937 R200_TXA_ARG_C_R0_ALPHA | 1938 R200_TXA_OP_MADD); 1939 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 1940 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 1941 FINISH_ACCEL(); 1942 } 1943 } else { 1944 1945 info->accel_state->texW[0] = 1; 1946 info->accel_state->texH[0] = 1; 1947 1948 BEGIN_ACCEL(9); 1949 1950 OUT_ACCEL_REG(RADEON_PP_CNTL, 1951 RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 1952 1953 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 1954 RADEON_SE_VTX_FMT_ST0)); 1955 1956 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 1957 RADEON_MAG_FILTER_LINEAR | 1958 RADEON_MIN_FILTER_LINEAR | 1959 RADEON_CLAMP_S_CLAMP_LAST | 1960 RADEON_CLAMP_T_CLAMP_LAST | 1961 RADEON_YUV_TO_RGB); 1962 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat); 1963 OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset); 1964 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 1965 RADEON_COLOR_ARG_A_ZERO | 1966 RADEON_COLOR_ARG_B_ZERO | 1967 RADEON_COLOR_ARG_C_T0_COLOR | 1968 RADEON_BLEND_CTL_ADD | 1969 RADEON_CLAMP_TX); 1970 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 1971 RADEON_ALPHA_ARG_A_ZERO | 1972 RADEON_ALPHA_ARG_B_ZERO | 1973 RADEON_ALPHA_ARG_C_T0_ALPHA | 1974 RADEON_BLEND_CTL_ADD | 1975 RADEON_CLAMP_TX); 1976 1977 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 1978 (pPriv->w - 1) | 1979 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 1980 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 1981 pPriv->src_pitch - 32); 1982 FINISH_ACCEL(); 1983 } 1984 } 1985 1986 if (pPriv->vsync) { 1987 xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, 1988 pPriv->drw_x, 1989 pPriv->drw_x + pPriv->dst_w, 1990 pPriv->drw_y, 1991 pPriv->drw_y + pPriv->dst_h); 1992 if (crtc) { 1993 RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; 1994 1995 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 1996 radeon_crtc->crtc_id, 1997 pPriv->drw_y - crtc->y, 1998 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 1999 } 2000 } 2001 /* 2002 * Rendering of the actual polygon is done in two different 2003 * ways depending on chip generation: 2004 * 2005 * < R300: 2006 * 2007 * These chips can render a rectangle in one pass, so 2008 * handling is pretty straight-forward. 2009 * 2010 * >= R300: 2011 * 2012 * These chips can accept a quad, but will render it as 2013 * two triangles which results in a diagonal tear. Instead 2014 * We render a single, large triangle and use the scissor 2015 * functionality to restrict it to the desired rectangle. 2016 * Due to guardband limits on r3xx/r4xx, we can only use 2017 * the single triangle up to 2880 pixels; above that we 2018 * render as a quad. 2019 */ 2020 2021 while (nBox--) { 2022 int srcX, srcY, srcw, srch; 2023 int dstX, dstY, dstw, dsth; 2024 Bool use_quad = FALSE; 2025 dstX = pBox->x1 + dstxoff; 2026 dstY = pBox->y1 + dstyoff; 2027 dstw = pBox->x2 - pBox->x1; 2028 dsth = pBox->y2 - pBox->y1; 2029 2030 srcX = ((pBox->x1 - pPriv->drw_x) * 2031 pPriv->src_w) / pPriv->dst_w; 2032 srcY = ((pBox->y1 - pPriv->drw_y) * 2033 pPriv->src_h) / pPriv->dst_h; 2034 2035 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 2036 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 2037 2038#if 0 2039 ErrorF("dst: %d, %d, %d, %d\n", dstX, dstY, dstw, dsth); 2040 ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch); 2041#endif 2042 2043 if (IS_R300_3D || IS_R500_3D) { 2044 if (IS_R300_3D && ((dstw+dsth) > 2880)) 2045 use_quad = TRUE; 2046 /* 2047 * Set up the scissor area to that of the output size. 2048 */ 2049 BEGIN_ACCEL(2); 2050 if (IS_R300_3D) { 2051 /* R300 has an offset */ 2052 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) | 2053 ((dstY + 1088) << R300_SCISSOR_Y_SHIFT))); 2054 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) | 2055 ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT))); 2056 } else { 2057 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 2058 ((dstY) << R300_SCISSOR_Y_SHIFT))); 2059 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 2060 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 2061 } 2062 FINISH_ACCEL(); 2063 } 2064 2065#ifdef ACCEL_CP 2066 if (info->ChipFamily < CHIP_FAMILY_R200) { 2067 BEGIN_RING(3 * vtx_count + 3); 2068 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 2069 3 * vtx_count + 1)); 2070 OUT_RING(RADEON_CP_VC_FRMT_XY | 2071 RADEON_CP_VC_FRMT_ST0); 2072 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 2073 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2074 RADEON_CP_VC_CNTL_MAOS_ENABLE | 2075 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 2076 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2077 } else if (IS_R300_3D || IS_R500_3D) { 2078 if (use_quad) { 2079 BEGIN_RING(4 * vtx_count + 4); 2080 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2081 4 * vtx_count)); 2082 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2083 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2084 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2085 } else { 2086 BEGIN_RING(3 * vtx_count + 4); 2087 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2088 3 * vtx_count)); 2089 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2090 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2091 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2092 } 2093 } else { 2094 BEGIN_RING(3 * vtx_count + 2); 2095 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2096 3 * vtx_count)); 2097 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 2098 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2099 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2100 } 2101#else /* ACCEL_CP */ 2102 if (IS_R300_3D || IS_R500_3D) { 2103 if (use_quad) 2104 BEGIN_ACCEL(2 + vtx_count * 4); 2105 else 2106 BEGIN_ACCEL(2 + vtx_count * 3); 2107 } else 2108 BEGIN_ACCEL(1 + vtx_count * 3); 2109 2110 if (info->ChipFamily < CHIP_FAMILY_R200) 2111 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 2112 RADEON_VF_PRIM_WALK_DATA | 2113 RADEON_VF_RADEON_MODE | 2114 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2115 else if (IS_R300_3D || IS_R500_3D) { 2116 if (use_quad) 2117 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | 2118 RADEON_VF_PRIM_WALK_DATA | 2119 (4 << RADEON_VF_NUM_VERTICES_SHIFT))); 2120 else 2121 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 2122 RADEON_VF_PRIM_WALK_DATA | 2123 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2124 } else 2125 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 2126 RADEON_VF_PRIM_WALK_DATA | 2127 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2128 2129#endif 2130 if (pPriv->bicubic_enabled) { 2131 /* 2132 * This code is only executed on >= R300, so we don't 2133 * have to deal with the legacy handling. 2134 */ 2135 if (use_quad) { 2136 VTX_OUT_FILTER((float)dstX, (float)dstY, 2137 (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], 2138 (float)srcX + 0.5, (float)srcY + 0.5); 2139 VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth), 2140 (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], 2141 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2142 VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth), 2143 (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], 2144 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2145 VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY, 2146 (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], 2147 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2148 } else { 2149 VTX_OUT_FILTER((float)dstX, (float)dstY, 2150 (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], 2151 (float)srcX + 0.5, (float)srcY + 0.5); 2152 VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth), 2153 (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0], 2154 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2155 VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY, 2156 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0], 2157 (float)srcY / info->accel_state->texH[0], 2158 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2159 (float)srcY + 0.5); 2160 } 2161 } else { 2162 if (IS_R300_3D || IS_R500_3D) { 2163 if (use_quad) { 2164 VTX_OUT((float)dstX, (float)dstY, 2165 (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); 2166 VTX_OUT((float)dstX, (float)(dstY + dsth), 2167 (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2168 VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth), 2169 (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2170 VTX_OUT((float)(dstX + dstw), (float)dstY, 2171 (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); 2172 } else { 2173 /* 2174 * Render a big, scissored triangle. This means 2175 * increasing the triangle size and adjusting 2176 * texture coordinates. 2177 */ 2178 VTX_OUT((float)dstX, (float)dstY, 2179 (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); 2180 VTX_OUT((float)dstX, (float)(dstY + dsth + dstw), 2181 (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]); 2182 2183 VTX_OUT((float)(dstX + dstw + dsth), (float)dstY, 2184 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0], 2185 (float)srcY / info->accel_state->texH[0]); 2186 } 2187 } else if (isplanar) { 2188 /* 2189 * Just render a rect (using three coords). 2190 * Filter is a bit a misnomer, it's just texcoords... 2191 */ 2192 VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth), 2193 (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], 2194 (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2195 VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth), 2196 (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], 2197 (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2198 VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY, 2199 (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], 2200 (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); 2201 } else { 2202 /* 2203 * Just render a rect (using three coords). 2204 */ 2205 VTX_OUT((float)dstX, (float)(dstY + dsth), 2206 (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2207 VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth), 2208 (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); 2209 VTX_OUT((float)(dstX + dstw), (float)dstY, 2210 (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); 2211 } 2212 } 2213 2214 if (IS_R300_3D || IS_R500_3D) 2215 /* flushing is pipelined, free/finish is not */ 2216 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2217 2218#ifdef ACCEL_CP 2219 ADVANCE_RING(); 2220#else 2221 FINISH_ACCEL(); 2222#endif /* !ACCEL_CP */ 2223 2224 pBox++; 2225 } 2226 2227 if (IS_R300_3D || IS_R500_3D) { 2228 BEGIN_ACCEL(3); 2229 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 2230 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2231 } else 2232 BEGIN_ACCEL(1); 2233 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2234 FINISH_ACCEL(); 2235 2236 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2237} 2238 2239#undef VTX_OUT 2240#undef VTX_OUT_FILTER 2241#undef FUNC_NAME 2242