radeon_textured_videofuncs.c revision 0a1d3ae0
1/* 2 * Copyright 2008 Alex Deucher 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * 24 * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25 * 26 */ 27 28#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 29do { \ 30 OUT_RING(F_TO_DW(_dstX)); \ 31 OUT_RING(F_TO_DW(_dstY)); \ 32 OUT_RING(F_TO_DW(_srcX)); \ 33 OUT_RING(F_TO_DW(_srcY)); \ 34 OUT_RING(F_TO_DW(_maskX)); \ 35 OUT_RING(F_TO_DW(_maskY)); \ 36} while (0) 37 38#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 39do { \ 40 OUT_RING(F_TO_DW(_dstX)); \ 41 OUT_RING(F_TO_DW(_dstY)); \ 42 OUT_RING(F_TO_DW(_srcX)); \ 43 OUT_RING(F_TO_DW(_srcY)); \ 44} while (0) 45 46 47static Bool 48RADEONPrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 49{ 50 RADEONInfoPtr info = RADEONPTR(pScrn); 51 PixmapPtr pPixmap = pPriv->pPixmap; 52 struct radeon_exa_pixmap_priv *driver_priv; 53 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 54 uint32_t txformat, txsize, txpitch; 55 uint32_t dst_pitch, dst_format; 56 uint32_t colorpitch; 57 int pixel_shift; 58 int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 59 int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 60 int ret; 61 62 radeon_cs_space_reset_bos(info->cs); 63 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 64 65 if (pPriv->bicubic_enabled) 66 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 67 RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 68 69 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 70 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 71 RADEON_GEM_DOMAIN_VRAM); 72 73 ret = radeon_cs_space_check(info->cs); 74 if (ret) { 75 ErrorF("Not enough RAM to hw accel xv operation\n"); 76 return FALSE; 77 } 78 79 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 80 81 dst_pitch = exaGetPixmapPitch(pPixmap); 82 RADEON_SWITCH_TO_3D(); 83 84 /* Same for R100/R200 */ 85 switch (pPixmap->drawable.bitsPerPixel) { 86 case 16: 87 if (pPixmap->drawable.depth == 15) 88 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 89 else 90 dst_format = RADEON_COLOR_FORMAT_RGB565; 91 break; 92 case 32: 93 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 94 break; 95 default: 96 return FALSE; 97 } 98 99 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 100 pPriv->is_planar = TRUE; 101 txformat = RADEON_TXFORMAT_Y8; 102 } else { 103 pPriv->is_planar = FALSE; 104 if (pPriv->id == FOURCC_UYVY) 105 txformat = RADEON_TXFORMAT_YVYU422; 106 else 107 txformat = RADEON_TXFORMAT_VYUY422; 108 } 109 110 txformat |= RADEON_TXFORMAT_NON_POWER2; 111 112 colorpitch = dst_pitch >> pixel_shift; 113 114 if (RADEONTilingEnabled(pScrn, pPixmap)) 115 colorpitch |= RADEON_COLOR_TILE_ENABLE; 116 117 BEGIN_ACCEL_RELOC(4,2); 118 119 OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 120 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 121 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 122 OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 123 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 124 125 ADVANCE_RING(); 126 127 if (pPriv->is_planar) { 128 /* need 2 texcoord sets (even though they are identical) due 129 to denormalization! hw apparently can't premultiply 130 same coord set by different texture size */ 131 pPriv->vtx_count = 6; 132 133 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 134 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 135 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 136 txpitch -= 32; 137 138 BEGIN_ACCEL_RELOC(23, 3); 139 140 OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 141 RADEON_SE_VTX_FMT_ST0 | 142 RADEON_SE_VTX_FMT_ST1)); 143 144 OUT_RING_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 145 RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 146 RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 147 RADEON_PLANAR_YUV_ENABLE)); 148 149 /* Y */ 150 OUT_RING_REG(RADEON_PP_TXFILTER_0, 151 RADEON_MAG_FILTER_LINEAR | 152 RADEON_MIN_FILTER_LINEAR | 153 RADEON_CLAMP_S_CLAMP_LAST | 154 RADEON_CLAMP_T_CLAMP_LAST | 155 RADEON_YUV_TO_RGB); 156 OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 157 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 158 OUT_RING_REG(RADEON_PP_TXCBLEND_0, 159 RADEON_COLOR_ARG_A_ZERO | 160 RADEON_COLOR_ARG_B_ZERO | 161 RADEON_COLOR_ARG_C_T0_COLOR | 162 RADEON_BLEND_CTL_ADD | 163 RADEON_CLAMP_TX); 164 OUT_RING_REG(RADEON_PP_TXABLEND_0, 165 RADEON_ALPHA_ARG_A_ZERO | 166 RADEON_ALPHA_ARG_B_ZERO | 167 RADEON_ALPHA_ARG_C_T0_ALPHA | 168 RADEON_BLEND_CTL_ADD | 169 RADEON_CLAMP_TX); 170 171 OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 172 (pPriv->w - 1) | 173 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 174 OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 175 pPriv->src_pitch - 32); 176 177 /* U */ 178 OUT_RING_REG(RADEON_PP_TXFILTER_1, 179 RADEON_MAG_FILTER_LINEAR | 180 RADEON_MIN_FILTER_LINEAR | 181 RADEON_CLAMP_S_CLAMP_LAST | 182 RADEON_CLAMP_T_CLAMP_LAST); 183 OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 184 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 185 OUT_RING_REG(RADEON_PP_TXCBLEND_1, 186 RADEON_COLOR_ARG_A_ZERO | 187 RADEON_COLOR_ARG_B_ZERO | 188 RADEON_COLOR_ARG_C_T0_COLOR | 189 RADEON_BLEND_CTL_ADD | 190 RADEON_CLAMP_TX); 191 OUT_RING_REG(RADEON_PP_TXABLEND_1, 192 RADEON_ALPHA_ARG_A_ZERO | 193 RADEON_ALPHA_ARG_B_ZERO | 194 RADEON_ALPHA_ARG_C_T0_ALPHA | 195 RADEON_BLEND_CTL_ADD | 196 RADEON_CLAMP_TX); 197 198 OUT_RING_REG(RADEON_PP_TEX_SIZE_1, txsize); 199 OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch); 200 201 /* V */ 202 OUT_RING_REG(RADEON_PP_TXFILTER_2, 203 RADEON_MAG_FILTER_LINEAR | 204 RADEON_MIN_FILTER_LINEAR | 205 RADEON_CLAMP_S_CLAMP_LAST | 206 RADEON_CLAMP_T_CLAMP_LAST); 207 OUT_RING_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 208 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 209 OUT_RING_REG(RADEON_PP_TXCBLEND_2, 210 RADEON_COLOR_ARG_A_ZERO | 211 RADEON_COLOR_ARG_B_ZERO | 212 RADEON_COLOR_ARG_C_T0_COLOR | 213 RADEON_BLEND_CTL_ADD | 214 RADEON_CLAMP_TX); 215 OUT_RING_REG(RADEON_PP_TXABLEND_2, 216 RADEON_ALPHA_ARG_A_ZERO | 217 RADEON_ALPHA_ARG_B_ZERO | 218 RADEON_ALPHA_ARG_C_T0_ALPHA | 219 RADEON_BLEND_CTL_ADD | 220 RADEON_CLAMP_TX); 221 222 OUT_RING_REG(RADEON_PP_TEX_SIZE_2, txsize); 223 OUT_RING_REG(RADEON_PP_TEX_PITCH_2, txpitch); 224 ADVANCE_RING(); 225 } else { 226 pPriv->vtx_count = 4; 227 BEGIN_ACCEL_RELOC(9, 1); 228 229 OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 230 RADEON_SE_VTX_FMT_ST0)); 231 232 OUT_RING_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 233 234 OUT_RING_REG(RADEON_PP_TXFILTER_0, 235 RADEON_MAG_FILTER_LINEAR | 236 RADEON_MIN_FILTER_LINEAR | 237 RADEON_CLAMP_S_CLAMP_LAST | 238 RADEON_CLAMP_T_CLAMP_LAST | 239 RADEON_YUV_TO_RGB); 240 OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 241 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 242 OUT_RING_REG(RADEON_PP_TXCBLEND_0, 243 RADEON_COLOR_ARG_A_ZERO | 244 RADEON_COLOR_ARG_B_ZERO | 245 RADEON_COLOR_ARG_C_T0_COLOR | 246 RADEON_BLEND_CTL_ADD | 247 RADEON_CLAMP_TX); 248 OUT_RING_REG(RADEON_PP_TXABLEND_0, 249 RADEON_ALPHA_ARG_A_ZERO | 250 RADEON_ALPHA_ARG_B_ZERO | 251 RADEON_ALPHA_ARG_C_T0_ALPHA | 252 RADEON_BLEND_CTL_ADD | 253 RADEON_CLAMP_TX); 254 255 OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 256 (pPriv->w - 1) | 257 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 258 OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 259 pPriv->src_pitch - 32); 260 ADVANCE_RING(); 261 } 262 263 BEGIN_RING(2*2); 264 OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 265 OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 266 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 267 ADVANCE_RING(); 268 269 if (pPriv->vsync) { 270 xf86CrtcPtr crtc; 271 if (pPriv->desired_crtc) 272 crtc = pPriv->desired_crtc; 273 else 274 crtc = radeon_pick_best_crtc(pScrn, FALSE, 275 pPriv->drw_x, 276 pPriv->drw_x + pPriv->dst_w, 277 pPriv->drw_y, 278 pPriv->drw_y + pPriv->dst_h); 279 if (crtc) 280 RADEONWaitForVLine(pScrn, pPixmap, 281 crtc, 282 pPriv->drw_y - crtc->y, 283 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 284 } 285 286 return TRUE; 287} 288 289static void 290RADEONDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 291{ 292 RADEONInfoPtr info = RADEONPTR(pScrn); 293 PixmapPtr pPixmap = pPriv->pPixmap; 294 int dstxoff, dstyoff; 295 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 296 int nBox = REGION_NUM_RECTS(&pPriv->clip); 297 298#ifdef COMPOSITE 299 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 300 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 301#else 302 dstxoff = 0; 303 dstyoff = 0; 304#endif 305 306 if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 307 return; 308 309 /* 310 * Rendering of the actual polygon is done in two different 311 * ways depending on chip generation: 312 * 313 * < R300: 314 * 315 * These chips can render a rectangle in one pass, so 316 * handling is pretty straight-forward. 317 * 318 * >= R300: 319 * 320 * These chips can accept a quad, but will render it as 321 * two triangles which results in a diagonal tear. Instead 322 * We render a single, large triangle and use the scissor 323 * functionality to restrict it to the desired rectangle. 324 * Due to guardband limits on r3xx/r4xx, we can only use 325 * the single triangle up to 2560/4021 pixels; above that we 326 * render as a quad. 327 */ 328 while (nBox) { 329 int draw_size = 3 * pPriv->vtx_count + 5; 330 int loop_boxes; 331 332 if (draw_size > radeon_cs_space_remaining(pScrn)) { 333 radeon_cs_flush_indirect(pScrn); 334 if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 335 return; 336 } 337 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 338 nBox -= loop_boxes; 339 340 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 341 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 342 loop_boxes * 3 * pPriv->vtx_count + 1)); 343 if (pPriv->is_planar) 344 OUT_RING(RADEON_CP_VC_FRMT_XY | 345 RADEON_CP_VC_FRMT_ST0 | 346 RADEON_CP_VC_FRMT_ST1); 347 else 348 OUT_RING(RADEON_CP_VC_FRMT_XY | 349 RADEON_CP_VC_FRMT_ST0); 350 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 351 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 352 RADEON_CP_VC_CNTL_MAOS_ENABLE | 353 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 354 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 355 356 while (loop_boxes--) { 357 float srcX, srcY, srcw, srch; 358 int dstX, dstY, dstw, dsth; 359 dstX = pBox->x1 + dstxoff; 360 dstY = pBox->y1 + dstyoff; 361 dstw = pBox->x2 - pBox->x1; 362 dsth = pBox->y2 - pBox->y1; 363 364 srcX = pPriv->src_x; 365 srcX += ((pBox->x1 - pPriv->drw_x) * 366 pPriv->src_w) / (float)pPriv->dst_w; 367 srcY = pPriv->src_y; 368 srcY += ((pBox->y1 - pPriv->drw_y) * 369 pPriv->src_h) / (float)pPriv->dst_h; 370 371 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 372 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 373 374 375 if (pPriv->is_planar) { 376 /* 377 * Just render a rect (using three coords). 378 */ 379 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 380 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 381 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 382 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 383 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 384 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 385 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 386 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 387 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 388 } else { 389 /* 390 * Just render a rect (using three coords). 391 */ 392 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 393 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 394 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 395 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 396 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 397 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 398 } 399 400 pBox++; 401 } 402 403 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 404 ADVANCE_RING(); 405 } 406 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 407} 408 409static Bool 410R200PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 411{ 412 RADEONInfoPtr info = RADEONPTR(pScrn); 413 PixmapPtr pPixmap = pPriv->pPixmap; 414 struct radeon_exa_pixmap_priv *driver_priv; 415 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 416 uint32_t txformat; 417 uint32_t txfilter, txsize, txpitch; 418 uint32_t dst_pitch, dst_format; 419 uint32_t colorpitch; 420 int pixel_shift; 421 int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 422 int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 423 /* note: in contrast to r300, use input biasing on uv components */ 424 const float Loff = -0.0627; 425 float uvcosf, uvsinf; 426 float yco, yoff; 427 float uco[3], vco[3]; 428 float bright, cont, sat; 429 int ref = pPriv->transform_index; 430 float ucscale = 0.25, vcscale = 0.25; 431 Bool needux8 = FALSE, needvx8 = FALSE; 432 int ret; 433 434 radeon_cs_space_reset_bos(info->cs); 435 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 436 437 if (pPriv->bicubic_enabled) 438 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 439 RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 440 441 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 442 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 443 RADEON_GEM_DOMAIN_VRAM); 444 445 ret = radeon_cs_space_check(info->cs); 446 if (ret) { 447 ErrorF("Not enough RAM to hw accel xv operation\n"); 448 return FALSE; 449 } 450 451 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 452 453 dst_pitch = exaGetPixmapPitch(pPixmap); 454 455 RADEON_SWITCH_TO_3D(); 456 457 /* Same for R100/R200 */ 458 switch (pPixmap->drawable.bitsPerPixel) { 459 case 16: 460 if (pPixmap->drawable.depth == 15) 461 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 462 else 463 dst_format = RADEON_COLOR_FORMAT_RGB565; 464 break; 465 case 32: 466 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 467 break; 468 default: 469 return FALSE; 470 } 471 472 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 473 pPriv->is_planar = TRUE; 474 txformat = RADEON_TXFORMAT_I8; 475 } else { 476 pPriv->is_planar = FALSE; 477 if (pPriv->id == FOURCC_UYVY) 478 txformat = RADEON_TXFORMAT_YVYU422; 479 else 480 txformat = RADEON_TXFORMAT_VYUY422; 481 } 482 483 txformat |= RADEON_TXFORMAT_NON_POWER2; 484 485 colorpitch = dst_pitch >> pixel_shift; 486 487 if (RADEONTilingEnabled(pScrn, pPixmap)) 488 colorpitch |= RADEON_COLOR_TILE_ENABLE; 489 490 BEGIN_ACCEL_RELOC(4,2); 491 492 OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 493 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 494 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 495 496 OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 497 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 498 499 ADVANCE_RING(); 500 501 txfilter = R200_MAG_FILTER_LINEAR | 502 R200_MIN_FILTER_LINEAR | 503 R200_CLAMP_S_CLAMP_LAST | 504 R200_CLAMP_T_CLAMP_LAST; 505 506 /* contrast can cause constant overflow, clamp */ 507 cont = RTFContrast(pPriv->contrast); 508 if (cont * trans[ref].RefLuma > 2.0) 509 cont = 2.0 / trans[ref].RefLuma; 510 /* brightness is only from -0.5 to 0.5 should be safe */ 511 bright = RTFBrightness(pPriv->brightness); 512 /* saturation can also cause overflow, clamp */ 513 sat = RTFSaturation(pPriv->saturation); 514 if (sat * trans[ref].RefBCb > 4.0) 515 sat = 4.0 / trans[ref].RefBCb; 516 uvcosf = sat * cos(RTFHue(pPriv->hue)); 517 uvsinf = sat * sin(RTFHue(pPriv->hue)); 518 519 yco = trans[ref].RefLuma * cont; 520 uco[0] = -trans[ref].RefRCr * uvsinf; 521 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 522 uco[2] = trans[ref].RefBCb * uvcosf; 523 vco[0] = trans[ref].RefRCr * uvcosf; 524 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 525 vco[2] = trans[ref].RefBCb * uvsinf; 526 yoff = Loff * yco + bright; 527 528 if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 529 needux8 = TRUE; 530 ucscale = 0.125; 531 } 532 if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 533 needvx8 = TRUE; 534 vcscale = 0.125; 535 } 536 537 if (pPriv->is_planar) { 538 /* need 2 texcoord sets (even though they are identical) due 539 to denormalization! hw apparently can't premultiply 540 same coord set by different texture size */ 541 pPriv->vtx_count = 6; 542 543 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 544 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 545 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 546 txpitch -= 32; 547 548 BEGIN_ACCEL_RELOC(36, 3); 549 550 OUT_RING_REG(RADEON_PP_CNTL, 551 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 552 RADEON_TEX_BLEND_0_ENABLE | 553 RADEON_TEX_BLEND_1_ENABLE | 554 RADEON_TEX_BLEND_2_ENABLE); 555 556 OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 557 OUT_RING_REG(R200_SE_VTX_FMT_1, 558 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 559 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 560 561 OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 562 OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 563 OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 564 OUT_RING_REG(R200_PP_TXSIZE_0, 565 (pPriv->w - 1) | 566 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 567 OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 568 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 569 570 OUT_RING_REG(R200_PP_TXFILTER_1, txfilter); 571 OUT_RING_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 572 OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0); 573 OUT_RING_REG(R200_PP_TXSIZE_1, txsize); 574 OUT_RING_REG(R200_PP_TXPITCH_1, txpitch); 575 OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 576 577 OUT_RING_REG(R200_PP_TXFILTER_2, txfilter); 578 OUT_RING_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 579 OUT_RING_REG(R200_PP_TXFORMAT_X_2, 0); 580 OUT_RING_REG(R200_PP_TXSIZE_2, txsize); 581 OUT_RING_REG(R200_PP_TXPITCH_2, txpitch); 582 OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 583 584 /* similar to r300 code. Note the big problem is that hardware constants 585 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 586 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 587 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 588 * the constants not. To get larger range can use output scale, but for 589 * that 2.018 value we need a total scale by 8, which means the constants 590 * really have no accuracy whatsoever (5 fractional bits only). 591 * The only direct way to get high precision "constants" into the fragment 592 * pipe I know of is to use the texcoord interpolator (not color, this one 593 * is 8 bit only too), which seems a bit expensive. We're lucky though it 594 * seems the values we need seem to fit better than worst case (get about 595 * 6 fractional bits for this instead of 5, at least when not correcting for 596 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 597 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 598 * even with non-default saturation/hue/contrast/brightness adjustments, 599 * it gets a little crazy and ultimately precision might still be lacking. 600 * 601 * A higher precision (8 fractional bits) version might just put uco into 602 * a texcoord, and calculate a new vcoconst in the shader, like so: 603 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 604 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 605 * vcocalc = ADD temp, bias/scale(cohelper), vco 606 * would in total use 4 tex units, 4 instructions which seems fairly 607 * balanced for this architecture (instead of 3 + 3 for the solution here) 608 * 609 * temp = MAD(yco, yuv.yyyy, yoff) 610 * temp = MAD(uco, yuv.uuuu, temp) 611 * result = MAD(vco, yuv.vvvv, temp) 612 * 613 * note first mad produces actually scalar, hence we transform 614 * it into a dp2a to get 8 bit precision of yco instead of 7 - 615 * That's assuming hw correctly expands consts to internal precision. 616 * (y * 1 + y * (yco - 1) + yoff) 617 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 618 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 619 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 620 * 621 * vco, uco need bias (and hence scale too) 622 * 623 */ 624 625 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 626 OUT_RING_REG(R200_PP_TXCBLEND_0, 627 R200_TXC_ARG_A_TFACTOR_COLOR | 628 R200_TXC_ARG_B_R0_COLOR | 629 R200_TXC_ARG_C_TFACTOR_COLOR | 630 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 631 R200_TXC_OP_DOT2_ADD); 632 OUT_RING_REG(R200_PP_TXCBLEND2_0, 633 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 634 R200_TXC_SCALE_INV2 | 635 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 636 OUT_RING_REG(R200_PP_TXABLEND_0, 637 R200_TXA_ARG_A_ZERO | 638 R200_TXA_ARG_B_ZERO | 639 R200_TXA_ARG_C_ZERO | 640 R200_TXA_OP_MADD); 641 OUT_RING_REG(R200_PP_TXABLEND2_0, 642 R200_TXA_OUTPUT_REG_NONE); 643 644 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 645 OUT_RING_REG(R200_PP_TXCBLEND_1, 646 R200_TXC_ARG_A_TFACTOR_COLOR | 647 R200_TXC_BIAS_ARG_A | 648 R200_TXC_SCALE_ARG_A | 649 R200_TXC_ARG_B_R1_COLOR | 650 R200_TXC_BIAS_ARG_B | 651 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 652 R200_TXC_ARG_C_R0_COLOR | 653 R200_TXC_OP_MADD); 654 OUT_RING_REG(R200_PP_TXCBLEND2_1, 655 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 656 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 657 OUT_RING_REG(R200_PP_TXABLEND_1, 658 R200_TXA_ARG_A_ZERO | 659 R200_TXA_ARG_B_ZERO | 660 R200_TXA_ARG_C_ZERO | 661 R200_TXA_OP_MADD); 662 OUT_RING_REG(R200_PP_TXABLEND2_1, 663 R200_TXA_OUTPUT_REG_NONE); 664 665 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 666 OUT_RING_REG(R200_PP_TXCBLEND_2, 667 R200_TXC_ARG_A_TFACTOR_COLOR | 668 R200_TXC_BIAS_ARG_A | 669 R200_TXC_SCALE_ARG_A | 670 R200_TXC_ARG_B_R2_COLOR | 671 R200_TXC_BIAS_ARG_B | 672 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 673 R200_TXC_ARG_C_R0_COLOR | 674 R200_TXC_OP_MADD); 675 OUT_RING_REG(R200_PP_TXCBLEND2_2, 676 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 677 R200_TXC_SCALE_2X | 678 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 679 OUT_RING_REG(R200_PP_TXABLEND_2, 680 R200_TXA_ARG_A_ZERO | 681 R200_TXA_ARG_B_ZERO | 682 R200_TXA_ARG_C_ZERO | 683 R200_TXA_COMP_ARG_C | 684 R200_TXA_OP_MADD); 685 OUT_RING_REG(R200_PP_TXABLEND2_2, 686 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 687 688 /* shader constants */ 689 OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 690 yco > 1.0 ? yco - 1.0: yco, 691 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 692 0.0)); 693 OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 694 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 695 uco[2] * ucscale + 0.5, 696 0.0)); 697 OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 698 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 699 vco[2] * vcscale + 0.5, 700 0.0)); 701 702 ADVANCE_RING(); 703 } else { 704 pPriv->vtx_count = 4; 705 706 BEGIN_ACCEL_RELOC(24, 1); 707 708 OUT_RING_REG(RADEON_PP_CNTL, 709 RADEON_TEX_0_ENABLE | 710 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 711 RADEON_TEX_BLEND_2_ENABLE); 712 713 OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 714 OUT_RING_REG(R200_SE_VTX_FMT_1, 715 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 716 717 OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 718 OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 719 OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 720 OUT_RING_REG(R200_PP_TXSIZE_0, 721 (pPriv->w - 1) | 722 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 723 OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 724 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 725 726 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 727 OUT_RING_REG(R200_PP_TXCBLEND_0, 728 R200_TXC_ARG_A_TFACTOR_COLOR | 729 R200_TXC_ARG_B_R0_COLOR | 730 R200_TXC_ARG_C_TFACTOR_COLOR | 731 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 732 R200_TXC_OP_DOT2_ADD); 733 OUT_RING_REG(R200_PP_TXCBLEND2_0, 734 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 735 R200_TXC_SCALE_INV2 | 736 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 737 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 738 OUT_RING_REG(R200_PP_TXABLEND_0, 739 R200_TXA_ARG_A_ZERO | 740 R200_TXA_ARG_B_ZERO | 741 R200_TXA_ARG_C_ZERO | 742 R200_TXA_OP_MADD); 743 OUT_RING_REG(R200_PP_TXABLEND2_0, 744 R200_TXA_OUTPUT_REG_NONE); 745 746 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 747 OUT_RING_REG(R200_PP_TXCBLEND_1, 748 R200_TXC_ARG_A_TFACTOR_COLOR | 749 R200_TXC_BIAS_ARG_A | 750 R200_TXC_SCALE_ARG_A | 751 R200_TXC_ARG_B_R0_COLOR | 752 R200_TXC_BIAS_ARG_B | 753 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 754 R200_TXC_ARG_C_R1_COLOR | 755 R200_TXC_OP_MADD); 756 OUT_RING_REG(R200_PP_TXCBLEND2_1, 757 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 758 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 759 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 760 OUT_RING_REG(R200_PP_TXABLEND_1, 761 R200_TXA_ARG_A_ZERO | 762 R200_TXA_ARG_B_ZERO | 763 R200_TXA_ARG_C_ZERO | 764 R200_TXA_OP_MADD); 765 OUT_RING_REG(R200_PP_TXABLEND2_1, 766 R200_TXA_OUTPUT_REG_NONE); 767 768 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 769 OUT_RING_REG(R200_PP_TXCBLEND_2, 770 R200_TXC_ARG_A_TFACTOR_COLOR | 771 R200_TXC_BIAS_ARG_A | 772 R200_TXC_SCALE_ARG_A | 773 R200_TXC_ARG_B_R0_COLOR | 774 R200_TXC_BIAS_ARG_B | 775 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 776 R200_TXC_ARG_C_R1_COLOR | 777 R200_TXC_OP_MADD); 778 OUT_RING_REG(R200_PP_TXCBLEND2_2, 779 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 780 R200_TXC_SCALE_2X | 781 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 782 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 783 OUT_RING_REG(R200_PP_TXABLEND_2, 784 R200_TXA_ARG_A_ZERO | 785 R200_TXA_ARG_B_ZERO | 786 R200_TXA_ARG_C_ZERO | 787 R200_TXA_COMP_ARG_C | 788 R200_TXA_OP_MADD); 789 OUT_RING_REG(R200_PP_TXABLEND2_2, 790 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 791 792 /* shader constants */ 793 OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 794 yco > 1.0 ? yco - 1.0: yco, 795 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 796 0.0)); 797 OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 798 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 799 uco[2] * ucscale + 0.5, 800 0.0)); 801 OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 802 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 803 vco[2] * vcscale + 0.5, 804 0.0)); 805 806 ADVANCE_RING(); 807 } 808 809 BEGIN_RING(2*2); 810 OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 811 OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 812 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 813 ADVANCE_RING(); 814 815 if (pPriv->vsync) { 816 xf86CrtcPtr crtc; 817 if (pPriv->desired_crtc) 818 crtc = pPriv->desired_crtc; 819 else 820 crtc = radeon_pick_best_crtc(pScrn, FALSE, 821 pPriv->drw_x, 822 pPriv->drw_x + pPriv->dst_w, 823 pPriv->drw_y, 824 pPriv->drw_y + pPriv->dst_h); 825 if (crtc) 826 RADEONWaitForVLine(pScrn, pPixmap, 827 crtc, 828 pPriv->drw_y - crtc->y, 829 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 830 } 831 832 return TRUE; 833} 834 835static void 836R200DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 837{ 838 RADEONInfoPtr info = RADEONPTR(pScrn); 839 PixmapPtr pPixmap = pPriv->pPixmap; 840 int dstxoff, dstyoff; 841 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 842 int nBox = REGION_NUM_RECTS(&pPriv->clip); 843 844#ifdef COMPOSITE 845 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 846 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 847#else 848 dstxoff = 0; 849 dstyoff = 0; 850#endif 851 852 if (!R200PrepareTexturedVideo(pScrn, pPriv)) 853 return; 854 855 /* 856 * Rendering of the actual polygon is done in two different 857 * ways depending on chip generation: 858 * 859 * < R300: 860 * 861 * These chips can render a rectangle in one pass, so 862 * handling is pretty straight-forward. 863 * 864 * >= R300: 865 * 866 * These chips can accept a quad, but will render it as 867 * two triangles which results in a diagonal tear. Instead 868 * We render a single, large triangle and use the scissor 869 * functionality to restrict it to the desired rectangle. 870 * Due to guardband limits on r3xx/r4xx, we can only use 871 * the single triangle up to 2560/4021 pixels; above that we 872 * render as a quad. 873 */ 874 875 while (nBox) { 876 int draw_size = 3 * pPriv->vtx_count + 4; 877 int loop_boxes; 878 879 if (draw_size > radeon_cs_space_remaining(pScrn)) { 880 radeon_cs_flush_indirect(pScrn); 881 if (!R200PrepareTexturedVideo(pScrn, pPriv)) 882 return; 883 } 884 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 885 nBox -= loop_boxes; 886 887 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 888 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 889 loop_boxes * 3 * pPriv->vtx_count)); 890 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 891 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 892 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 893 894 while (loop_boxes--) { 895 float srcX, srcY, srcw, srch; 896 int dstX, dstY, dstw, dsth; 897 dstX = pBox->x1 + dstxoff; 898 dstY = pBox->y1 + dstyoff; 899 dstw = pBox->x2 - pBox->x1; 900 dsth = pBox->y2 - pBox->y1; 901 902 srcX = pPriv->src_x; 903 srcX += ((pBox->x1 - pPriv->drw_x) * 904 pPriv->src_w) / (float)pPriv->dst_w; 905 srcY = pPriv->src_y; 906 srcY += ((pBox->y1 - pPriv->drw_y) * 907 pPriv->src_h) / (float)pPriv->dst_h; 908 909 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 910 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 911 912 if (pPriv->is_planar) { 913 /* 914 * Just render a rect (using three coords). 915 */ 916 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 917 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 918 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 919 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 920 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 921 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 922 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 923 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 924 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 925 } else { 926 /* 927 * Just render a rect (using three coords). 928 */ 929 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 930 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 931 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 932 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 933 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 934 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 935 } 936 937 pBox++; 938 } 939 940 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 941 ADVANCE_RING(); 942 } 943 944 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 945} 946 947static Bool 948R300PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 949{ 950 RADEONInfoPtr info = RADEONPTR(pScrn); 951 PixmapPtr pPixmap = pPriv->pPixmap; 952 struct radeon_exa_pixmap_priv *driver_priv; 953 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 954 uint32_t txfilter, txformat0, txformat1, txpitch; 955 uint32_t dst_pitch, dst_format; 956 uint32_t txenable, colorpitch; 957 uint32_t output_fmt; 958 int pixel_shift; 959 int ret; 960 961 radeon_cs_space_reset_bos(info->cs); 962 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 963 964 if (pPriv->bicubic_enabled) 965 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 966 RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 967 968 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 969 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 970 RADEON_GEM_DOMAIN_VRAM); 971 972 ret = radeon_cs_space_check(info->cs); 973 if (ret) { 974 ErrorF("Not enough RAM to hw accel xv operation\n"); 975 return FALSE; 976 } 977 978 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 979 980 dst_pitch = exaGetPixmapPitch(pPixmap); 981 RADEON_SWITCH_TO_3D(); 982 983 if (pPriv->bicubic_enabled) 984 pPriv->vtx_count = 6; 985 else 986 pPriv->vtx_count = 4; 987 988 switch (pPixmap->drawable.bitsPerPixel) { 989 case 16: 990 if (pPixmap->drawable.depth == 15) 991 dst_format = R300_COLORFORMAT_ARGB1555; 992 else 993 dst_format = R300_COLORFORMAT_RGB565; 994 break; 995 case 32: 996 dst_format = R300_COLORFORMAT_ARGB8888; 997 break; 998 default: 999 return FALSE; 1000 } 1001 1002 output_fmt = (R300_OUT_FMT_C4_8 | 1003 R300_OUT_FMT_C0_SEL_BLUE | 1004 R300_OUT_FMT_C1_SEL_GREEN | 1005 R300_OUT_FMT_C2_SEL_RED | 1006 R300_OUT_FMT_C3_SEL_ALPHA); 1007 1008 colorpitch = dst_pitch >> pixel_shift; 1009 colorpitch |= dst_format; 1010 1011 if (RADEONTilingEnabled(pScrn, pPixmap)) 1012 colorpitch |= R300_COLORTILE; 1013 1014 1015 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1016 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1017 pPriv->is_planar = TRUE; 1018 else 1019 pPriv->is_planar = FALSE; 1020 1021 if (pPriv->is_planar) { 1022 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1023 txpitch = pPriv->src_pitch; 1024 } else { 1025 if (pPriv->id == FOURCC_UYVY) 1026 txformat1 = R300_TX_FORMAT_YVYU422; 1027 else 1028 txformat1 = R300_TX_FORMAT_VYUY422; 1029 1030 if (pPriv->bicubic_state != BICUBIC_OFF) 1031 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1032 1033 /* pitch is in pixels */ 1034 txpitch = pPriv->src_pitch / 2; 1035 } 1036 txpitch -= 1; 1037 1038 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1039 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1040 R300_TXPITCH_EN); 1041 1042 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1043 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1044 R300_TX_MAG_FILTER_LINEAR | 1045 R300_TX_MIN_FILTER_LINEAR | 1046 (0 << R300_TX_ID_SHIFT)); 1047 1048 BEGIN_ACCEL_RELOC(6, 1); 1049 OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 1050 OUT_RING_REG(R300_TX_FILTER1_0, 0); 1051 OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 1052 if (pPriv->is_planar) 1053 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1054 else 1055 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 1056 OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 1057 OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 1058 ADVANCE_RING(); 1059 1060 txenable = R300_TEX_0_ENABLE; 1061 1062 if (pPriv->is_planar) { 1063 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1064 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1065 R300_TXPITCH_EN); 1066 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1067 txpitch -= 1; 1068 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1069 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1070 R300_TX_MIN_FILTER_LINEAR | 1071 R300_TX_MAG_FILTER_LINEAR); 1072 1073 BEGIN_ACCEL_RELOC(12, 2); 1074 OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 1075 OUT_RING_REG(R300_TX_FILTER1_1, 0); 1076 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 1077 OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 1078 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 1079 OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 1080 OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 1081 OUT_RING_REG(R300_TX_FILTER1_2, 0); 1082 OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 1083 OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 1084 OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 1085 OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 1086 ADVANCE_RING(); 1087 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1088 } 1089 1090 if (pPriv->bicubic_enabled) { 1091 /* Size is 128x1 */ 1092 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1093 (0x0 << R300_TXHEIGHT_SHIFT) | 1094 R300_TXPITCH_EN); 1095 /* Format is 32-bit floats, 4bpp */ 1096 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1097 /* Pitch is 127 (128-1) */ 1098 txpitch = 0x7f; 1099 /* Tex filter */ 1100 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1101 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1102 R300_TX_MIN_FILTER_NEAREST | 1103 R300_TX_MAG_FILTER_NEAREST | 1104 (1 << R300_TX_ID_SHIFT)); 1105 1106 BEGIN_ACCEL_RELOC(6, 1); 1107 OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 1108 OUT_RING_REG(R300_TX_FILTER1_1, 0); 1109 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 1110 OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 1111 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 1112 OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 1113 ADVANCE_RING(); 1114 1115 /* Enable tex 1 */ 1116 txenable |= R300_TEX_1_ENABLE; 1117 } 1118 1119 /* setup the VAP */ 1120 if (info->accel_state->has_tcl) { 1121 if (pPriv->bicubic_enabled) 1122 BEGIN_RING(2*7); 1123 else 1124 BEGIN_RING(2*6); 1125 } else { 1126 if (pPriv->bicubic_enabled) 1127 BEGIN_RING(2*5); 1128 else 1129 BEGIN_RING(2*4); 1130 } 1131 1132 /* These registers define the number, type, and location of data submitted 1133 * to the PVS unit of GA input (when PVS is disabled) 1134 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1135 * enabled. This memory provides the imputs to the vertex shader program 1136 * and ordering is not important. When PVS/TCL is disabled, this field maps 1137 * directly to the GA input memory and the order is significant. In 1138 * PVS_BYPASS mode the order is as follows: 1139 * Position 1140 * Point Size 1141 * Color 0-3 1142 * Textures 0-7 1143 * Fog 1144 */ 1145 if (pPriv->bicubic_enabled) { 1146 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1147 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1148 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1149 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1150 R300_SIGNED_0 | 1151 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1152 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1153 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1154 R300_SIGNED_1)); 1155 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 1156 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1157 (0 << R300_SKIP_DWORDS_2_SHIFT) | 1158 (7 << R300_DST_VEC_LOC_2_SHIFT) | 1159 R300_LAST_VEC_2 | 1160 R300_SIGNED_2)); 1161 } else { 1162 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1163 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1164 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1165 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1166 R300_SIGNED_0 | 1167 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1168 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1169 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1170 R300_LAST_VEC_1 | 1171 R300_SIGNED_1)); 1172 } 1173 1174 /* load the vertex shader 1175 * We pre-load vertex programs in RADEONInit3DEngine(): 1176 * - exa 1177 * - Xv 1178 * - Xv bicubic 1179 * Here we select the offset of the vertex program we want to use 1180 */ 1181 if (info->accel_state->has_tcl) { 1182 if (pPriv->bicubic_enabled) { 1183 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1184 ((11 << R300_PVS_FIRST_INST_SHIFT) | 1185 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1186 (13 << R300_PVS_LAST_INST_SHIFT))); 1187 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1188 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1189 } else { 1190 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1191 ((9 << R300_PVS_FIRST_INST_SHIFT) | 1192 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1193 (10 << R300_PVS_LAST_INST_SHIFT))); 1194 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1195 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1196 } 1197 } 1198 1199 /* Position and one set of 2 texture coordinates */ 1200 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1201 if (pPriv->bicubic_enabled) 1202 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1203 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1204 else 1205 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1206 1207 OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 1208 ADVANCE_RING(); 1209 1210 /* setup pixel shader */ 1211 if (pPriv->bicubic_state != BICUBIC_OFF) { 1212 if (pPriv->bicubic_enabled) { 1213 BEGIN_RING(2*79); 1214 1215 /* 4 components: 2 for tex0 and 2 for tex1 */ 1216 OUT_RING_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1217 R300_RS_COUNT_HIRES_EN)); 1218 1219 /* R300_INST_COUNT_RS - highest RS instruction used */ 1220 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1221 1222 /* Pixel stack frame size. */ 1223 OUT_RING_REG(R300_US_PIXSIZE, 5); 1224 1225 /* Indirection levels */ 1226 OUT_RING_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1227 R300_FIRST_TEX)); 1228 1229 /* Set nodes. */ 1230 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1231 R300_ALU_CODE_SIZE(14) | 1232 R300_TEX_CODE_OFFSET(0) | 1233 R300_TEX_CODE_SIZE(6))); 1234 1235 /* Nodes are allocated highest first, but executed lowest first */ 1236 OUT_RING_REG(R300_US_CODE_ADDR_0, 0); 1237 OUT_RING_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1238 R300_ALU_SIZE(0) | 1239 R300_TEX_START(0) | 1240 R300_TEX_SIZE(0))); 1241 OUT_RING_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1242 R300_ALU_SIZE(9) | 1243 R300_TEX_START(1) | 1244 R300_TEX_SIZE(0))); 1245 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1246 R300_ALU_SIZE(2) | 1247 R300_TEX_START(2) | 1248 R300_TEX_SIZE(3) | 1249 R300_RGBA_OUT)); 1250 1251 /* ** BICUBIC FP ** */ 1252 1253 /* texcoord0 => temp0 1254 * texcoord1 => temp1 */ 1255 1256 // first node 1257 /* TEX temp2, temp1.rrr0, tex1, 1D */ 1258 OUT_RING_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1259 R300_TEX_ID(1) | 1260 R300_TEX_SRC_ADDR(1) | 1261 R300_TEX_DST_ADDR(2))); 1262 1263 /* MOV temp1.r, temp1.ggg0 */ 1264 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1265 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1266 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1267 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1268 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1269 R300_ALU_RGB_ADDRD(1) | 1270 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1271 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1272 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1273 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1274 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1275 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1276 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1277 1278 1279 // second node 1280 /* TEX temp1, temp1, tex1, 1D */ 1281 OUT_RING_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1282 R300_TEX_ID(1) | 1283 R300_TEX_SRC_ADDR(1) | 1284 R300_TEX_DST_ADDR(1))); 1285 1286 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 1287 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1288 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1289 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1290 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1291 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1292 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1293 R300_ALU_RGB_ADDRD(3) | 1294 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1295 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1296 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1297 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1298 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1299 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1300 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1301 1302 1303 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 1304 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1305 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1306 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1307 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1308 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1309 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1310 R300_ALU_RGB_ADDRD(2) | 1311 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1312 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1313 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1314 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1315 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1316 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1317 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1318 1319 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 1320 OUT_RING_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1321 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1322 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1323 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1324 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1325 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1326 R300_ALU_RGB_ADDR2(3) | 1327 R300_ALU_RGB_ADDRD(4) | 1328 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1329 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1330 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1331 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1332 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1333 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1334 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1335 1336 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 1337 OUT_RING_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1338 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1339 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1340 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1341 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1342 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1343 R300_ALU_RGB_ADDR2(2) | 1344 R300_ALU_RGB_ADDRD(5) | 1345 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1346 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1347 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1348 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1349 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1350 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1351 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1352 1353 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 1354 OUT_RING_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1355 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1356 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1357 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1358 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1359 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1360 R300_ALU_RGB_ADDR2(3) | 1361 R300_ALU_RGB_ADDRD(3) | 1362 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1363 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1364 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1365 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1366 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1367 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1368 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1369 1370 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 1371 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1372 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1373 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1374 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1375 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1376 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1377 R300_ALU_RGB_ADDR2(2) | 1378 R300_ALU_RGB_ADDRD(1) | 1379 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1380 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1381 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1382 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1383 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1384 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1385 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1386 1387 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 1388 OUT_RING_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1389 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1390 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1391 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1392 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1393 R300_ALU_RGB_ADDR2(1) | 1394 R300_ALU_RGB_ADDRD(1) | 1395 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1396 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1397 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1398 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1399 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1400 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1401 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1402 1403 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 1404 OUT_RING_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1405 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1406 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1407 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1408 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1409 R300_ALU_RGB_ADDR2(3) | 1410 R300_ALU_RGB_ADDRD(2) | 1411 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1412 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1413 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1414 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1415 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1416 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1417 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1418 1419 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 1420 OUT_RING_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1421 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1422 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1423 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1424 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1425 R300_ALU_RGB_ADDR2(5) | 1426 R300_ALU_RGB_ADDRD(3) | 1427 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1428 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1429 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1430 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1431 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1432 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1433 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1434 1435 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 1436 OUT_RING_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1437 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1438 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1439 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1440 OUT_RING_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1441 R300_ALU_RGB_ADDR2(4) | 1442 R300_ALU_RGB_ADDRD(0) | 1443 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1444 OUT_RING_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1445 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1446 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1447 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1448 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1449 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1450 1451 1452 // third node 1453 /* TEX temp4, temp1.rg--, tex0, 1D */ 1454 OUT_RING_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1455 R300_TEX_ID(0) | 1456 R300_TEX_SRC_ADDR(1) | 1457 R300_TEX_DST_ADDR(4))); 1458 1459 /* TEX temp3, temp3.rg--, tex0, 1D */ 1460 OUT_RING_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1461 R300_TEX_ID(0) | 1462 R300_TEX_SRC_ADDR(3) | 1463 R300_TEX_DST_ADDR(3))); 1464 1465 /* TEX temp5, temp2.rg--, tex0, 1D */ 1466 OUT_RING_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1467 R300_TEX_ID(0) | 1468 R300_TEX_SRC_ADDR(2) | 1469 R300_TEX_DST_ADDR(5))); 1470 1471 /* TEX temp0, temp0.rg--, tex0, 1D */ 1472 OUT_RING_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1473 R300_TEX_ID(0) | 1474 R300_TEX_SRC_ADDR(0) | 1475 R300_TEX_DST_ADDR(0))); 1476 1477 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1478 * - PRESUB temps, temp4 - temp3 1479 * - MAD temp3, temp1.bbbb, temps, temp3 */ 1480 OUT_RING_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1481 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1482 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1483 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1484 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1485 OUT_RING_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1486 R300_ALU_RGB_ADDR1(4) | 1487 R300_ALU_RGB_ADDR2(1) | 1488 R300_ALU_RGB_ADDRD(3) | 1489 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1490 OUT_RING_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1491 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1492 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1493 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1494 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1495 R300_ALU_ALPHA_ADDR1(4) | 1496 R300_ALU_ALPHA_ADDR2(1) | 1497 R300_ALU_ALPHA_ADDRD(3) | 1498 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1499 1500 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1501 * - PRESUB temps, temp5 - temp0 1502 * - MAD temp0, temp1.bbbb, temps, temp0 */ 1503 OUT_RING_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1504 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1505 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1506 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1507 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1508 R300_ALU_RGB_INSERT_NOP)); 1509 OUT_RING_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1510 R300_ALU_RGB_ADDR1(5) | 1511 R300_ALU_RGB_ADDR2(1) | 1512 R300_ALU_RGB_ADDRD(0) | 1513 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1514 OUT_RING_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1515 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1516 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1517 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1518 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1519 R300_ALU_ALPHA_ADDR1(5) | 1520 R300_ALU_ALPHA_ADDR2(1) | 1521 R300_ALU_ALPHA_ADDRD(0) | 1522 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1523 1524 /* LRP output, temp2.bbbb, temp3, temp0 -> 1525 * - PRESUB temps, temp3 - temp0 1526 * - MAD output, temp2.bbbb, temps, temp0 */ 1527 OUT_RING_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1528 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1529 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1530 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1531 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1532 OUT_RING_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1533 R300_ALU_RGB_ADDR1(3) | 1534 R300_ALU_RGB_ADDR2(2) | 1535 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 1536 OUT_RING_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1537 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1538 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1539 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1540 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1541 R300_ALU_ALPHA_ADDR1(3) | 1542 R300_ALU_ALPHA_ADDR2(2) | 1543 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1544 1545 /* Shader constants. */ 1546 OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 1547 OUT_RING_REG(R300_US_ALU_CONST_G(0), 0); 1548 OUT_RING_REG(R300_US_ALU_CONST_B(0), 0); 1549 OUT_RING_REG(R300_US_ALU_CONST_A(0), 0); 1550 1551 OUT_RING_REG(R300_US_ALU_CONST_R(1), 0); 1552 OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 1553 OUT_RING_REG(R300_US_ALU_CONST_B(1), 0); 1554 OUT_RING_REG(R300_US_ALU_CONST_A(1), 0); 1555 1556 ADVANCE_RING(); 1557 } else { 1558 BEGIN_RING(2*11); 1559 /* 2 components: 2 for tex0 */ 1560 OUT_RING_REG(R300_RS_COUNT, 1561 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1562 R300_RS_COUNT_HIRES_EN)); 1563 /* R300_INST_COUNT_RS - highest RS instruction used */ 1564 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1565 1566 OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1567 1568 /* Indirection levels */ 1569 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1570 R300_FIRST_TEX)); 1571 1572 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1573 R300_ALU_CODE_SIZE(1) | 1574 R300_TEX_CODE_OFFSET(0) | 1575 R300_TEX_CODE_SIZE(1))); 1576 1577 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1578 R300_ALU_SIZE(0) | 1579 R300_TEX_START(0) | 1580 R300_TEX_SIZE(0) | 1581 R300_RGBA_OUT)); 1582 1583 /* tex inst */ 1584 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1585 R300_TEX_DST_ADDR(0) | 1586 R300_TEX_ID(0) | 1587 R300_TEX_INST(R300_TEX_INST_LD))); 1588 1589 /* ALU inst */ 1590 /* RGB */ 1591 OUT_RING_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1592 R300_ALU_RGB_ADDR1(0) | 1593 R300_ALU_RGB_ADDR2(0) | 1594 R300_ALU_RGB_ADDRD(0) | 1595 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1596 R300_ALU_RGB_MASK_G | 1597 R300_ALU_RGB_MASK_B)) | 1598 R300_ALU_RGB_TARGET_A)); 1599 OUT_RING_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1600 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1601 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1602 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1603 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1604 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1605 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1606 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1607 R300_ALU_RGB_CLAMP)); 1608 /* Alpha */ 1609 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1610 R300_ALU_ALPHA_ADDR1(0) | 1611 R300_ALU_ALPHA_ADDR2(0) | 1612 R300_ALU_ALPHA_ADDRD(0) | 1613 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1614 R300_ALU_ALPHA_TARGET_A | 1615 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 1616 OUT_RING_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1617 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1618 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1619 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1620 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1621 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1622 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1623 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1624 R300_ALU_ALPHA_CLAMP)); 1625 ADVANCE_RING(); 1626 } 1627 } else { 1628 /* 1629 * y' = y - .0625 1630 * u' = u - .5 1631 * v' = v - .5; 1632 * 1633 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1634 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1635 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1636 * 1637 * DP3 might look like the straightforward solution 1638 * but we'd need to move the texture yuv values in 1639 * the same reg for this to work. Therefore use MADs. 1640 * Brightness just adds to the off constant. 1641 * Contrast is multiplication of luminance. 1642 * Saturation and hue change the u and v coeffs. 1643 * Default values (before adjustments - depend on colorspace): 1644 * yco = 1.1643 1645 * uco = 0, -0.39173, 2.017 1646 * vco = 1.5958, -0.8129, 0 1647 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1648 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1649 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1650 * 1651 * temp = MAD(yco, yuv.yyyy, off) 1652 * temp = MAD(uco, yuv.uuuu, temp) 1653 * result = MAD(vco, yuv.vvvv, temp) 1654 */ 1655 /* TODO: don't recalc consts always */ 1656 const float Loff = -0.0627; 1657 const float Coff = -0.502; 1658 float uvcosf, uvsinf; 1659 float yco; 1660 float uco[3], vco[3], off[3]; 1661 float bright, cont, gamma; 1662 int ref = pPriv->transform_index; 1663 Bool needgamma = FALSE; 1664 1665 cont = RTFContrast(pPriv->contrast); 1666 bright = RTFBrightness(pPriv->brightness); 1667 gamma = (float)pPriv->gamma / 1000.0; 1668 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1669 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1670 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1671 1672 yco = trans[ref].RefLuma * cont; 1673 uco[0] = -trans[ref].RefRCr * uvsinf; 1674 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1675 uco[2] = trans[ref].RefBCb * uvcosf; 1676 vco[0] = trans[ref].RefRCr * uvcosf; 1677 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1678 vco[2] = trans[ref].RefBCb * uvsinf; 1679 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1680 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1681 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1682 1683 if (gamma != 1.0) { 1684 needgamma = TRUE; 1685 /* note: gamma correction is out = in ^ gamma; 1686 gpu can only do LG2/EX2 therefore we transform into 1687 in ^ gamma = 2 ^ (log2(in) * gamma). 1688 Lots of scalar ops, unfortunately (better solution?) - 1689 without gamma that's 3 inst, with gamma it's 10... 1690 could use different gamma factors per channel, 1691 if that's of any use. */ 1692 } 1693 1694 if (pPriv->is_planar) { 1695 BEGIN_RING(2 * (needgamma ? (28 + 33) : 33)); 1696 /* 2 components: same 2 for tex0/1/2 */ 1697 OUT_RING_REG(R300_RS_COUNT, 1698 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1699 R300_RS_COUNT_HIRES_EN)); 1700 /* R300_INST_COUNT_RS - highest RS instruction used */ 1701 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1702 1703 OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1704 1705 /* Indirection levels */ 1706 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1707 R300_FIRST_TEX)); 1708 1709 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1710 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1711 R300_TEX_CODE_OFFSET(0) | 1712 R300_TEX_CODE_SIZE(3))); 1713 1714 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1715 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1716 R300_TEX_START(0) | 1717 R300_TEX_SIZE(2) | 1718 R300_RGBA_OUT)); 1719 1720 /* tex inst */ 1721 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1722 R300_TEX_DST_ADDR(2) | 1723 R300_TEX_ID(0) | 1724 R300_TEX_INST(R300_TEX_INST_LD))); 1725 OUT_RING_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1726 R300_TEX_DST_ADDR(1) | 1727 R300_TEX_ID(1) | 1728 R300_TEX_INST(R300_TEX_INST_LD))); 1729 OUT_RING_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 1730 R300_TEX_DST_ADDR(0) | 1731 R300_TEX_ID(2) | 1732 R300_TEX_INST(R300_TEX_INST_LD))); 1733 1734 /* ALU inst */ 1735 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 1736 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1737 R300_ALU_RGB_ADDR1(2) | 1738 R300_ALU_RGB_ADDR2(0) | 1739 R300_ALU_RGB_ADDRD(2) | 1740 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1741 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1742 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1743 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1744 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1745 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1746 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1747 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1748 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1749 /* alpha nop, but need to set up alpha source for rgb usage */ 1750 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1751 R300_ALU_ALPHA_ADDR1(2) | 1752 R300_ALU_ALPHA_ADDR2(0) | 1753 R300_ALU_ALPHA_ADDRD(2) | 1754 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1755 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1756 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1757 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1758 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1759 1760 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 1761 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1762 R300_ALU_RGB_ADDR1(1) | 1763 R300_ALU_RGB_ADDR2(2) | 1764 R300_ALU_RGB_ADDRD(2) | 1765 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1766 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1767 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1768 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1769 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1770 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1771 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1772 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1773 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1774 /* alpha nop */ 1775 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 1776 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1777 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1778 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1779 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1780 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1781 1782 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 1783 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 1784 R300_ALU_RGB_ADDR1(0) | 1785 R300_ALU_RGB_ADDR2(2) | 1786 R300_ALU_RGB_ADDRD(0) | 1787 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 1788 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 1789 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1790 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1791 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1792 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1793 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1794 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1795 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1796 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1797 R300_ALU_RGB_CLAMP)); 1798 /* write alpha 1 */ 1799 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 1800 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1801 R300_ALU_ALPHA_TARGET_A)); 1802 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1803 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1804 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1805 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 1806 1807 if (needgamma) { 1808 /* rgb temp0.r = op_sop, set up src0 reg */ 1809 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 1810 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1811 OUT_RING_REG(R300_US_ALU_RGB_INST(3), 1812 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1813 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1814 /* alpha lg2 temp0, temp0.r */ 1815 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 1816 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1817 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1818 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1819 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1820 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1821 1822 /* rgb temp0.g = op_sop, set up src0 reg */ 1823 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 1824 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 1825 OUT_RING_REG(R300_US_ALU_RGB_INST(4), 1826 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1827 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1828 /* alpha lg2 temp0, temp0.g */ 1829 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 1830 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1831 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1832 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1833 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1834 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1835 1836 /* rgb temp0.b = op_sop, set up src0 reg */ 1837 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 1838 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 1839 OUT_RING_REG(R300_US_ALU_RGB_INST(5), 1840 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1841 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1842 /* alpha lg2 temp0, temp0.b */ 1843 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 1844 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1845 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1846 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1847 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1848 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1849 1850 /* MUL const1, temp1, temp0 */ 1851 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 1852 R300_ALU_RGB_ADDR1(0) | 1853 R300_ALU_RGB_ADDR2(0) | 1854 R300_ALU_RGB_ADDRD(0) | 1855 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1856 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1857 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1858 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 1859 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1860 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1861 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1862 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1863 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1864 /* alpha nop, but set up const1 */ 1865 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 1866 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 1867 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1868 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1869 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1870 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1871 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1872 1873 /* rgb out0.r = op_sop, set up src0 reg */ 1874 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1875 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 1876 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 1877 OUT_RING_REG(R300_US_ALU_RGB_INST(7), 1878 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1879 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1880 /* alpha ex2 temp0, temp0.r */ 1881 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 1882 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1883 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1884 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1885 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1886 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1887 1888 /* rgb out0.g = op_sop, set up src0 reg */ 1889 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1890 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 1891 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 1892 OUT_RING_REG(R300_US_ALU_RGB_INST(8), 1893 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1894 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1895 /* alpha ex2 temp0, temp0.g */ 1896 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 1897 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1898 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1899 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1900 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1901 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1902 1903 /* rgb out0.b = op_sop, set up src0 reg */ 1904 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1905 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 1906 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 1907 OUT_RING_REG(R300_US_ALU_RGB_INST(9), 1908 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1909 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1910 /* alpha ex2 temp0, temp0.b */ 1911 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 1912 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1913 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1914 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1915 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1916 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1917 } 1918 } else { 1919 BEGIN_RING(2 * (needgamma ? (28 + 31) : 31)); 1920 /* 2 components */ 1921 OUT_RING_REG(R300_RS_COUNT, 1922 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1923 R300_RS_COUNT_HIRES_EN)); 1924 /* R300_INST_COUNT_RS - highest RS instruction used */ 1925 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1926 1927 OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 1928 1929 /* Indirection levels */ 1930 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1931 R300_FIRST_TEX)); 1932 1933 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1934 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1935 R300_TEX_CODE_OFFSET(0) | 1936 R300_TEX_CODE_SIZE(1))); 1937 1938 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1939 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1940 R300_TEX_START(0) | 1941 R300_TEX_SIZE(0) | 1942 R300_RGBA_OUT)); 1943 1944 /* tex inst */ 1945 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1946 R300_TEX_DST_ADDR(0) | 1947 R300_TEX_ID(0) | 1948 R300_TEX_INST(R300_TEX_INST_LD))); 1949 1950 /* ALU inst */ 1951 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 1952 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1953 R300_ALU_RGB_ADDR1(0) | 1954 R300_ALU_RGB_ADDR2(0) | 1955 R300_ALU_RGB_ADDRD(1) | 1956 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1957 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1958 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1959 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 1960 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1961 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1962 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1963 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1964 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1965 /* alpha nop, but need to set up alpha source for rgb usage */ 1966 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1967 R300_ALU_ALPHA_ADDR1(0) | 1968 R300_ALU_ALPHA_ADDR2(0) | 1969 R300_ALU_ALPHA_ADDRD(0) | 1970 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1971 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1972 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1973 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1974 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1975 1976 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 1977 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1978 R300_ALU_RGB_ADDR1(0) | 1979 R300_ALU_RGB_ADDR2(1) | 1980 R300_ALU_RGB_ADDRD(1) | 1981 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1982 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1983 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1984 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 1985 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1986 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1987 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1988 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1989 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1990 /* alpha nop */ 1991 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 1992 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1993 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1994 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1995 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1996 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1997 1998 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 1999 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2000 R300_ALU_RGB_ADDR1(0) | 2001 R300_ALU_RGB_ADDR2(1) | 2002 R300_ALU_RGB_ADDRD(0) | 2003 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2004 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2005 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2006 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2007 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2008 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2009 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2010 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2011 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2012 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2013 R300_ALU_RGB_CLAMP)); 2014 /* write alpha 1 */ 2015 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2016 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2017 R300_ALU_ALPHA_TARGET_A)); 2018 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2019 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2020 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2021 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2022 2023 if (needgamma) { 2024 /* rgb temp0.r = op_sop, set up src0 reg */ 2025 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2026 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2027 OUT_RING_REG(R300_US_ALU_RGB_INST(3), 2028 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2029 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2030 /* alpha lg2 temp0, temp0.r */ 2031 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2032 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2033 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2034 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2035 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2036 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2037 2038 /* rgb temp0.g = op_sop, set up src0 reg */ 2039 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2040 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2041 OUT_RING_REG(R300_US_ALU_RGB_INST(4), 2042 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2043 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2044 /* alpha lg2 temp0, temp0.g */ 2045 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2046 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2047 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2048 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2049 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2050 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2051 2052 /* rgb temp0.b = op_sop, set up src0 reg */ 2053 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2054 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2055 OUT_RING_REG(R300_US_ALU_RGB_INST(5), 2056 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2057 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2058 /* alpha lg2 temp0, temp0.b */ 2059 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2060 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2061 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2062 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2063 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2064 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2065 2066 /* MUL const1, temp1, temp0 */ 2067 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2068 R300_ALU_RGB_ADDR1(0) | 2069 R300_ALU_RGB_ADDR2(0) | 2070 R300_ALU_RGB_ADDRD(0) | 2071 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2072 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2073 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2074 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2075 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2076 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2077 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2078 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2079 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2080 /* alpha nop, but set up const1 */ 2081 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2082 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2083 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2084 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2085 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2086 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2087 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2088 2089 /* rgb out0.r = op_sop, set up src0 reg */ 2090 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2091 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2092 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2093 OUT_RING_REG(R300_US_ALU_RGB_INST(7), 2094 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2095 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2096 /* alpha ex2 temp0, temp0.r */ 2097 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2098 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2099 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2100 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2101 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2102 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2103 2104 /* rgb out0.g = op_sop, set up src0 reg */ 2105 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2106 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2107 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2108 OUT_RING_REG(R300_US_ALU_RGB_INST(8), 2109 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2110 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2111 /* alpha ex2 temp0, temp0.g */ 2112 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2113 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2114 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2115 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2116 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2117 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2118 2119 /* rgb out0.b = op_sop, set up src0 reg */ 2120 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2121 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2122 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2123 OUT_RING_REG(R300_US_ALU_RGB_INST(9), 2124 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2125 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2126 /* alpha ex2 temp0, temp0.b */ 2127 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2128 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2129 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2130 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2131 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2132 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2133 } 2134 } 2135 2136 /* Shader constants. */ 2137 /* constant 0: off, yco */ 2138 OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 2139 OUT_RING_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 2140 OUT_RING_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 2141 OUT_RING_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2142 /* constant 1: uco */ 2143 OUT_RING_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 2144 OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 2145 OUT_RING_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 2146 OUT_RING_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2147 /* constant 2: vco */ 2148 OUT_RING_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 2149 OUT_RING_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 2150 OUT_RING_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 2151 OUT_RING_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2152 2153 ADVANCE_RING(); 2154 } 2155 2156 BEGIN_ACCEL_RELOC(6, 2); 2157 OUT_RING_REG(R300_TX_INVALTAGS, 0); 2158 OUT_RING_REG(R300_TX_ENABLE, txenable); 2159 2160 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2161 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2162 2163 /* no need to enable blending */ 2164 OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2165 2166 OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 2167 ADVANCE_RING(); 2168 2169 if (pPriv->vsync) { 2170 xf86CrtcPtr crtc; 2171 if (pPriv->desired_crtc) 2172 crtc = pPriv->desired_crtc; 2173 else 2174 crtc = radeon_pick_best_crtc(pScrn, FALSE, 2175 pPriv->drw_x, 2176 pPriv->drw_x + pPriv->dst_w, 2177 pPriv->drw_y, 2178 pPriv->drw_y + pPriv->dst_h); 2179 if (crtc) 2180 RADEONWaitForVLine(pScrn, pPixmap, 2181 crtc, 2182 pPriv->drw_y - crtc->y, 2183 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2184 } 2185 2186 return TRUE; 2187} 2188 2189static void 2190R300DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2191{ 2192 RADEONInfoPtr info = RADEONPTR(pScrn); 2193 PixmapPtr pPixmap = pPriv->pPixmap; 2194 int dstxoff, dstyoff; 2195 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2196 int nBox = REGION_NUM_RECTS(&pPriv->clip); 2197 2198#ifdef COMPOSITE 2199 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2200 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2201#else 2202 dstxoff = 0; 2203 dstyoff = 0; 2204#endif 2205 2206 if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2207 return; 2208 2209 /* 2210 * Rendering of the actual polygon is done in two different 2211 * ways depending on chip generation: 2212 * 2213 * < R300: 2214 * 2215 * These chips can render a rectangle in one pass, so 2216 * handling is pretty straight-forward. 2217 * 2218 * >= R300: 2219 * 2220 * These chips can accept a quad, but will render it as 2221 * two triangles which results in a diagonal tear. Instead 2222 * We render a single, large triangle and use the scissor 2223 * functionality to restrict it to the desired rectangle. 2224 * Due to guardband limits on r3xx/r4xx, we can only use 2225 * the single triangle up to 2560/4021 pixels; above that we 2226 * render as a quad. 2227 */ 2228 2229 while (nBox--) { 2230 float srcX, srcY, srcw, srch; 2231 int dstX, dstY, dstw, dsth; 2232 Bool use_quad = FALSE; 2233 int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2234 2235 if (draw_size > radeon_cs_space_remaining(pScrn)) { 2236 radeon_cs_flush_indirect(pScrn); 2237 if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2238 return; 2239 } 2240 2241 dstX = pBox->x1 + dstxoff; 2242 dstY = pBox->y1 + dstyoff; 2243 dstw = pBox->x2 - pBox->x1; 2244 dsth = pBox->y2 - pBox->y1; 2245 2246 srcX = pPriv->src_x; 2247 srcX += ((pBox->x1 - pPriv->drw_x) * 2248 pPriv->src_w) / (float)pPriv->dst_w; 2249 srcY = pPriv->src_y; 2250 srcY += ((pBox->y1 - pPriv->drw_y) * 2251 pPriv->src_h) / (float)pPriv->dst_h; 2252 2253 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 2254 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 2255 2256 if (IS_R400_3D) { 2257 if ((dstw+dsth) > 4021) 2258 use_quad = TRUE; 2259 } else { 2260 if ((dstw+dsth) > 2560) 2261 use_quad = TRUE; 2262 } 2263 /* 2264 * Set up the scissor area to that of the output size. 2265 */ 2266 BEGIN_RING(2*2); 2267 /* R300 has an offset */ 2268 OUT_RING_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2269 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 2270 OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2271 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 2272 ADVANCE_RING(); 2273 2274 if (use_quad) { 2275 BEGIN_RING(4 * pPriv->vtx_count + 4); 2276 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2277 4 * pPriv->vtx_count)); 2278 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2279 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2280 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2281 } else { 2282 BEGIN_RING(3 * pPriv->vtx_count + 4); 2283 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2284 3 * pPriv->vtx_count)); 2285 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2286 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2287 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2288 } 2289 2290 if (pPriv->bicubic_enabled) { 2291 /* 2292 * This code is only executed on >= R300, so we don't 2293 * have to deal with the legacy handling. 2294 */ 2295 if (use_quad) { 2296 VTX_OUT_6((float)dstX, (float)dstY, 2297 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2298 (float)srcX + 0.5, (float)srcY + 0.5); 2299 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2300 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2301 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2302 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2303 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2304 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2305 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2306 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2307 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2308 } else { 2309 VTX_OUT_6((float)dstX, (float)dstY, 2310 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2311 (float)srcX + 0.5, (float)srcY + 0.5); 2312 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2313 (float)srcX / pPriv->w, 2314 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2315 (float)srcX + 0.5, 2316 (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2317 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2318 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2319 (float)srcY / pPriv->h, 2320 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2321 (float)srcY + 0.5); 2322 } 2323 } else { 2324 if (use_quad) { 2325 VTX_OUT_4((float)dstX, (float)dstY, 2326 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2327 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2328 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2329 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2330 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2331 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2332 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2333 } else { 2334 /* 2335 * Render a big, scissored triangle. This means 2336 * increasing the triangle size and adjusting 2337 * texture coordinates. 2338 */ 2339 VTX_OUT_4((float)dstX, (float)dstY, 2340 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2341 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2342 (float)srcX / pPriv->w, 2343 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2344 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2345 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2346 (float)srcY / pPriv->h); 2347 } 2348 } 2349 2350 /* flushing is pipelined, free/finish is not */ 2351 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2352 2353 ADVANCE_RING(); 2354 2355 pBox++; 2356 } 2357 2358 BEGIN_RING(2*3); 2359 OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 2360 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2361 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2362 ADVANCE_RING(); 2363 2364 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2365} 2366 2367static Bool 2368R500PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2369{ 2370 RADEONInfoPtr info = RADEONPTR(pScrn); 2371 PixmapPtr pPixmap = pPriv->pPixmap; 2372 struct radeon_exa_pixmap_priv *driver_priv; 2373 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 2374 uint32_t txfilter, txformat0, txformat1, txpitch, us_format = 0; 2375 uint32_t dst_pitch, dst_format; 2376 uint32_t txenable, colorpitch; 2377 uint32_t output_fmt; 2378 int pixel_shift, out_size = 6; 2379 int ret; 2380 2381 radeon_cs_space_reset_bos(info->cs); 2382 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2383 2384 if (pPriv->bicubic_enabled) 2385 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 2386 RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2387 2388 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 2389 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 2390 RADEON_GEM_DOMAIN_VRAM); 2391 2392 ret = radeon_cs_space_check(info->cs); 2393 if (ret) { 2394 ErrorF("Not enough RAM to hw accel xv operation\n"); 2395 return FALSE; 2396 } 2397 2398 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2399 2400 dst_pitch = exaGetPixmapPitch(pPixmap); 2401 RADEON_SWITCH_TO_3D(); 2402 2403 if (pPriv->bicubic_enabled) 2404 pPriv->vtx_count = 6; 2405 else 2406 pPriv->vtx_count = 4; 2407 2408 switch (pPixmap->drawable.bitsPerPixel) { 2409 case 16: 2410 if (pPixmap->drawable.depth == 15) 2411 dst_format = R300_COLORFORMAT_ARGB1555; 2412 else 2413 dst_format = R300_COLORFORMAT_RGB565; 2414 break; 2415 case 32: 2416 dst_format = R300_COLORFORMAT_ARGB8888; 2417 break; 2418 default: 2419 return FALSE; 2420 } 2421 2422 output_fmt = (R300_OUT_FMT_C4_8 | 2423 R300_OUT_FMT_C0_SEL_BLUE | 2424 R300_OUT_FMT_C1_SEL_GREEN | 2425 R300_OUT_FMT_C2_SEL_RED | 2426 R300_OUT_FMT_C3_SEL_ALPHA); 2427 2428 colorpitch = dst_pitch >> pixel_shift; 2429 colorpitch |= dst_format; 2430 2431 if (RADEONTilingEnabled(pScrn, pPixmap)) 2432 colorpitch |= R300_COLORTILE; 2433 2434 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2435 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2436 pPriv->is_planar = TRUE; 2437 else 2438 pPriv->is_planar = FALSE; 2439 2440 if (pPriv->is_planar) { 2441 txformat1 = R300_TX_FORMAT_X8; 2442 txpitch = pPriv->src_pitch; 2443 } else { 2444 if (pPriv->id == FOURCC_UYVY) 2445 txformat1 = R300_TX_FORMAT_YVYU422; 2446 else 2447 txformat1 = R300_TX_FORMAT_VYUY422; 2448 2449 if (pPriv->bicubic_state != BICUBIC_OFF) 2450 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2451 2452 /* pitch is in pixels */ 2453 txpitch = pPriv->src_pitch / 2; 2454 } 2455 txpitch -= 1; 2456 2457 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2458 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2459 R300_TXPITCH_EN); 2460 2461 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2462 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2463 R300_TX_MAG_FILTER_LINEAR | 2464 R300_TX_MIN_FILTER_LINEAR | 2465 (0 << R300_TX_ID_SHIFT)); 2466 2467 2468 if ((pPriv->w - 1) & 0x800) 2469 txpitch |= R500_TXWIDTH_11; 2470 2471 if ((pPriv->h - 1) & 0x800) 2472 txpitch |= R500_TXHEIGHT_11; 2473 2474 if (info->ChipFamily == CHIP_FAMILY_R520) { 2475 unsigned us_width = (pPriv->w - 1) & 0x7ff; 2476 unsigned us_height = (pPriv->h - 1) & 0x7ff; 2477 unsigned us_depth = 0; 2478 2479 if (pPriv->w > 2048) { 2480 us_width = (0x7ff + us_width) >> 1; 2481 us_depth |= 0x0d; 2482 } 2483 if (pPriv->h > 2048) { 2484 us_height = (0x7ff + us_height) >> 1; 2485 us_depth |= 0x0e; 2486 } 2487 us_format = (us_width << R300_TXWIDTH_SHIFT) | 2488 (us_height << R300_TXHEIGHT_SHIFT) | 2489 (us_depth << R300_TXDEPTH_SHIFT); 2490 out_size++; 2491 } 2492 2493 BEGIN_ACCEL_RELOC(out_size, 1); 2494 OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 2495 OUT_RING_REG(R300_TX_FILTER1_0, 0); 2496 OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 2497 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 2498 OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 2499 OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 2500 if (info->ChipFamily == CHIP_FAMILY_R520) 2501 OUT_RING_REG(R500_US_FORMAT0_0, us_format); 2502 ADVANCE_RING(); 2503 2504 txenable = R300_TEX_0_ENABLE; 2505 2506 if (pPriv->is_planar) { 2507 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2508 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2509 R300_TXPITCH_EN); 2510 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2511 txpitch -= 1; 2512 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2513 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2514 R300_TX_MIN_FILTER_LINEAR | 2515 R300_TX_MAG_FILTER_LINEAR); 2516 2517 BEGIN_ACCEL_RELOC(12, 2); 2518 OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 2519 OUT_RING_REG(R300_TX_FILTER1_1, 0); 2520 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 2521 OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 2522 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 2523 OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 2524 OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 2525 OUT_RING_REG(R300_TX_FILTER1_2, 0); 2526 OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 2527 OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 2528 OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 2529 OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 2530 ADVANCE_RING(); 2531 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2532 } 2533 2534 if (pPriv->bicubic_enabled) { 2535 /* Size is 128x1 */ 2536 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2537 (0x0 << R300_TXHEIGHT_SHIFT) | 2538 R300_TXPITCH_EN); 2539 /* Format is 32-bit floats, 4bpp */ 2540 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2541 /* Pitch is 127 (128-1) */ 2542 txpitch = 0x7f; 2543 /* Tex filter */ 2544 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2545 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2546 R300_TX_MIN_FILTER_NEAREST | 2547 R300_TX_MAG_FILTER_NEAREST | 2548 (1 << R300_TX_ID_SHIFT)); 2549 2550 BEGIN_ACCEL_RELOC(6, 1); 2551 OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 2552 OUT_RING_REG(R300_TX_FILTER1_1, 0); 2553 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 2554 OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 2555 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 2556 OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 2557 ADVANCE_RING(); 2558 2559 /* Enable tex 1 */ 2560 txenable |= R300_TEX_1_ENABLE; 2561 } 2562 2563 /* setup the VAP */ 2564 if (info->accel_state->has_tcl) { 2565 if (pPriv->bicubic_enabled) 2566 BEGIN_RING(2*7); 2567 else 2568 BEGIN_RING(2*6); 2569 } else { 2570 if (pPriv->bicubic_enabled) 2571 BEGIN_RING(2*5); 2572 else 2573 BEGIN_RING(2*4); 2574 } 2575 2576 /* These registers define the number, type, and location of data submitted 2577 * to the PVS unit of GA input (when PVS is disabled) 2578 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2579 * enabled. This memory provides the imputs to the vertex shader program 2580 * and ordering is not important. When PVS/TCL is disabled, this field maps 2581 * directly to the GA input memory and the order is significant. In 2582 * PVS_BYPASS mode the order is as follows: 2583 * Position 2584 * Point Size 2585 * Color 0-3 2586 * Textures 0-7 2587 * Fog 2588 */ 2589 if (pPriv->bicubic_enabled) { 2590 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2591 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2592 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2593 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2594 R300_SIGNED_0 | 2595 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2596 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2597 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2598 R300_SIGNED_1)); 2599 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 2600 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2601 (0 << R300_SKIP_DWORDS_2_SHIFT) | 2602 (7 << R300_DST_VEC_LOC_2_SHIFT) | 2603 R300_LAST_VEC_2 | 2604 R300_SIGNED_2)); 2605 } else { 2606 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2607 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2608 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2609 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2610 R300_SIGNED_0 | 2611 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2612 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2613 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2614 R300_LAST_VEC_1 | 2615 R300_SIGNED_1)); 2616 } 2617 2618 /* load the vertex shader 2619 * We pre-load vertex programs in RADEONInit3DEngine(): 2620 * - exa 2621 * - Xv 2622 * - Xv bicubic 2623 * Here we select the offset of the vertex program we want to use 2624 */ 2625 if (info->accel_state->has_tcl) { 2626 if (pPriv->bicubic_enabled) { 2627 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2628 ((11 << R300_PVS_FIRST_INST_SHIFT) | 2629 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2630 (13 << R300_PVS_LAST_INST_SHIFT))); 2631 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2632 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2633 } else { 2634 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2635 ((9 << R300_PVS_FIRST_INST_SHIFT) | 2636 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2637 (10 << R300_PVS_LAST_INST_SHIFT))); 2638 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2639 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2640 } 2641 } 2642 2643 /* Position and one set of 2 texture coordinates */ 2644 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2645 if (pPriv->bicubic_enabled) 2646 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2647 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2648 else 2649 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2650 2651 OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 2652 ADVANCE_RING(); 2653 2654 /* setup pixel shader */ 2655 if (pPriv->bicubic_state != BICUBIC_OFF) { 2656 if (pPriv->bicubic_enabled) { 2657 BEGIN_RING(2*7); 2658 2659 /* 4 components: 2 for tex0 and 2 for tex1 */ 2660 OUT_RING_REG(R300_RS_COUNT, 2661 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2662 R300_RS_COUNT_HIRES_EN)); 2663 2664 /* R300_INST_COUNT_RS - highest RS instruction used */ 2665 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 2666 2667 /* Pixel stack frame size. */ 2668 OUT_RING_REG(R300_US_PIXSIZE, 5); 2669 2670 /* FP length. */ 2671 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 2672 R500_US_CODE_END_ADDR(13))); 2673 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 2674 R500_US_CODE_RANGE_SIZE(13))); 2675 2676 /* Prepare for FP emission. */ 2677 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 2678 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 2679 ADVANCE_RING(); 2680 2681 BEGIN_RING(2*89); 2682 /* Pixel shader. 2683 * I've gone ahead and annotated each instruction, since this 2684 * thing is MASSIVE. :3 2685 * Note: In order to avoid buggies with temps and multiple 2686 * inputs, all temps are offset by 2. temp0 -> register2. */ 2687 2688 /* TEX temp2, input1.xxxx, tex1, 1D */ 2689 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2690 R500_INST_RGB_WMASK_R | 2691 R500_INST_RGB_WMASK_G | 2692 R500_INST_RGB_WMASK_B)); 2693 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2694 R500_TEX_INST_LD | 2695 R500_TEX_IGNORE_UNCOVERED)); 2696 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2697 R500_TEX_SRC_S_SWIZ_R | 2698 R500_TEX_SRC_T_SWIZ_R | 2699 R500_TEX_SRC_R_SWIZ_R | 2700 R500_TEX_SRC_Q_SWIZ_R | 2701 R500_TEX_DST_ADDR(2) | 2702 R500_TEX_DST_R_SWIZ_R | 2703 R500_TEX_DST_G_SWIZ_G | 2704 R500_TEX_DST_B_SWIZ_B | 2705 R500_TEX_DST_A_SWIZ_A)); 2706 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2707 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2708 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2709 2710 /* TEX temp5, input1.yyyy, tex1, 1D */ 2711 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2712 R500_INST_TEX_SEM_WAIT | 2713 R500_INST_RGB_WMASK_R | 2714 R500_INST_RGB_WMASK_G | 2715 R500_INST_RGB_WMASK_B)); 2716 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2717 R500_TEX_INST_LD | 2718 R500_TEX_SEM_ACQUIRE | 2719 R500_TEX_IGNORE_UNCOVERED)); 2720 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2721 R500_TEX_SRC_S_SWIZ_G | 2722 R500_TEX_SRC_T_SWIZ_G | 2723 R500_TEX_SRC_R_SWIZ_G | 2724 R500_TEX_SRC_Q_SWIZ_G | 2725 R500_TEX_DST_ADDR(5) | 2726 R500_TEX_DST_R_SWIZ_R | 2727 R500_TEX_DST_G_SWIZ_G | 2728 R500_TEX_DST_B_SWIZ_B | 2729 R500_TEX_DST_A_SWIZ_A)); 2730 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2731 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2732 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2733 2734 /* MUL temp4, const0.x0x0, temp2.yyxx */ 2735 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2736 R500_INST_TEX_SEM_WAIT | 2737 R500_INST_RGB_WMASK_R | 2738 R500_INST_RGB_WMASK_G | 2739 R500_INST_RGB_WMASK_B | 2740 R500_INST_ALPHA_WMASK)); 2741 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2742 R500_RGB_ADDR0_CONST | 2743 R500_RGB_ADDR1(2))); 2744 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2745 R500_ALPHA_ADDR0_CONST | 2746 R500_ALPHA_ADDR1(2))); 2747 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2748 R500_ALU_RGB_R_SWIZ_A_R | 2749 R500_ALU_RGB_G_SWIZ_A_0 | 2750 R500_ALU_RGB_B_SWIZ_A_R | 2751 R500_ALU_RGB_SEL_B_SRC1 | 2752 R500_ALU_RGB_R_SWIZ_B_G | 2753 R500_ALU_RGB_G_SWIZ_B_G | 2754 R500_ALU_RGB_B_SWIZ_B_R)); 2755 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2756 R500_ALPHA_OP_MAD | 2757 R500_ALPHA_SEL_A_SRC0 | 2758 R500_ALPHA_SWIZ_A_0 | 2759 R500_ALPHA_SEL_B_SRC1 | 2760 R500_ALPHA_SWIZ_B_R)); 2761 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2762 R500_ALU_RGBA_OP_MAD | 2763 R500_ALU_RGBA_R_SWIZ_0 | 2764 R500_ALU_RGBA_G_SWIZ_0 | 2765 R500_ALU_RGBA_B_SWIZ_0 | 2766 R500_ALU_RGBA_A_SWIZ_0)); 2767 2768 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 2769 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2770 R500_INST_RGB_WMASK_R | 2771 R500_INST_RGB_WMASK_G | 2772 R500_INST_RGB_WMASK_B | 2773 R500_INST_ALPHA_WMASK)); 2774 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2775 R500_RGB_ADDR0_CONST | 2776 R500_RGB_ADDR1(5) | 2777 R500_RGB_ADDR2(4))); 2778 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2779 R500_ALPHA_ADDR0_CONST | 2780 R500_ALPHA_ADDR1(5) | 2781 R500_ALPHA_ADDR2(4))); 2782 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2783 R500_ALU_RGB_R_SWIZ_A_0 | 2784 R500_ALU_RGB_G_SWIZ_A_G | 2785 R500_ALU_RGB_B_SWIZ_A_0 | 2786 R500_ALU_RGB_SEL_B_SRC1 | 2787 R500_ALU_RGB_R_SWIZ_B_R | 2788 R500_ALU_RGB_G_SWIZ_B_R | 2789 R500_ALU_RGB_B_SWIZ_B_R)); 2790 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2791 R500_ALPHA_OP_MAD | 2792 R500_ALPHA_SEL_A_SRC0 | 2793 R500_ALPHA_SWIZ_A_G | 2794 R500_ALPHA_SEL_B_SRC1 | 2795 R500_ALPHA_SWIZ_B_R)); 2796 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2797 R500_ALU_RGBA_OP_MAD | 2798 R500_ALU_RGBA_SEL_C_SRC2 | 2799 R500_ALU_RGBA_R_SWIZ_R | 2800 R500_ALU_RGBA_G_SWIZ_G | 2801 R500_ALU_RGBA_B_SWIZ_B | 2802 R500_ALU_RGBA_A_SWIZ_A)); 2803 2804 /* ADD temp3, temp3, input0.xyxy */ 2805 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2806 R500_INST_RGB_WMASK_R | 2807 R500_INST_RGB_WMASK_G | 2808 R500_INST_RGB_WMASK_B | 2809 R500_INST_ALPHA_WMASK)); 2810 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 2811 R500_RGB_ADDR2(0))); 2812 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 2813 R500_ALPHA_ADDR2(0))); 2814 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2815 R500_ALU_RGB_G_SWIZ_A_1 | 2816 R500_ALU_RGB_B_SWIZ_A_1 | 2817 R500_ALU_RGB_SEL_B_SRC1 | 2818 R500_ALU_RGB_R_SWIZ_B_R | 2819 R500_ALU_RGB_G_SWIZ_B_G | 2820 R500_ALU_RGB_B_SWIZ_B_B)); 2821 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2822 R500_ALPHA_OP_MAD | 2823 R500_ALPHA_SWIZ_A_1 | 2824 R500_ALPHA_SEL_B_SRC1 | 2825 R500_ALPHA_SWIZ_B_A)); 2826 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2827 R500_ALU_RGBA_OP_MAD | 2828 R500_ALU_RGBA_SEL_C_SRC2 | 2829 R500_ALU_RGBA_R_SWIZ_R | 2830 R500_ALU_RGBA_G_SWIZ_G | 2831 R500_ALU_RGBA_B_SWIZ_R | 2832 R500_ALU_RGBA_A_SWIZ_G)); 2833 2834 /* TEX temp1, temp3.zwxy, tex0, 2D */ 2835 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2836 R500_INST_RGB_WMASK_R | 2837 R500_INST_RGB_WMASK_G | 2838 R500_INST_RGB_WMASK_B | 2839 R500_INST_ALPHA_WMASK)); 2840 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2841 R500_TEX_INST_LD | 2842 R500_TEX_IGNORE_UNCOVERED)); 2843 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2844 R500_TEX_SRC_S_SWIZ_B | 2845 R500_TEX_SRC_T_SWIZ_A | 2846 R500_TEX_SRC_R_SWIZ_R | 2847 R500_TEX_SRC_Q_SWIZ_G | 2848 R500_TEX_DST_ADDR(1) | 2849 R500_TEX_DST_R_SWIZ_R | 2850 R500_TEX_DST_G_SWIZ_G | 2851 R500_TEX_DST_B_SWIZ_B | 2852 R500_TEX_DST_A_SWIZ_A)); 2853 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2854 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2855 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2856 2857 /* TEX temp3, temp3.xyzw, tex0, 2D */ 2858 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2859 R500_INST_TEX_SEM_WAIT | 2860 R500_INST_RGB_WMASK_R | 2861 R500_INST_RGB_WMASK_G | 2862 R500_INST_RGB_WMASK_B | 2863 R500_INST_ALPHA_WMASK)); 2864 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2865 R500_TEX_INST_LD | 2866 R500_TEX_SEM_ACQUIRE | 2867 R500_TEX_IGNORE_UNCOVERED)); 2868 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2869 R500_TEX_SRC_S_SWIZ_R | 2870 R500_TEX_SRC_T_SWIZ_G | 2871 R500_TEX_SRC_R_SWIZ_B | 2872 R500_TEX_SRC_Q_SWIZ_A | 2873 R500_TEX_DST_ADDR(3) | 2874 R500_TEX_DST_R_SWIZ_R | 2875 R500_TEX_DST_G_SWIZ_G | 2876 R500_TEX_DST_B_SWIZ_B | 2877 R500_TEX_DST_A_SWIZ_A)); 2878 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2879 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2880 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2881 2882 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 2883 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2884 R500_INST_RGB_WMASK_R | 2885 R500_INST_RGB_WMASK_G | 2886 R500_INST_RGB_WMASK_B | 2887 R500_INST_ALPHA_WMASK)); 2888 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2889 R500_RGB_ADDR0_CONST | 2890 R500_RGB_ADDR1(5) | 2891 R500_RGB_ADDR2(4))); 2892 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2893 R500_ALPHA_ADDR0_CONST | 2894 R500_ALPHA_ADDR1(5) | 2895 R500_ALPHA_ADDR2(4))); 2896 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2897 R500_ALU_RGB_R_SWIZ_A_0 | 2898 R500_ALU_RGB_G_SWIZ_A_G | 2899 R500_ALU_RGB_B_SWIZ_A_0 | 2900 R500_ALU_RGB_SEL_B_SRC1 | 2901 R500_ALU_RGB_R_SWIZ_B_G | 2902 R500_ALU_RGB_G_SWIZ_B_G | 2903 R500_ALU_RGB_B_SWIZ_B_G)); 2904 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2905 R500_ALPHA_OP_MAD | 2906 R500_ALPHA_SEL_A_SRC0 | 2907 R500_ALPHA_SWIZ_A_G | 2908 R500_ALPHA_SEL_B_SRC1 | 2909 R500_ALPHA_SWIZ_B_G)); 2910 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2911 R500_ALU_RGBA_OP_MAD | 2912 R500_ALU_RGBA_SEL_C_SRC2 | 2913 R500_ALU_RGBA_R_SWIZ_R | 2914 R500_ALU_RGBA_G_SWIZ_G | 2915 R500_ALU_RGBA_B_SWIZ_B | 2916 R500_ALU_RGBA_A_SWIZ_A)); 2917 2918 /* ADD temp0, temp4, input0.xyxy */ 2919 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2920 R500_INST_RGB_WMASK_R | 2921 R500_INST_RGB_WMASK_G | 2922 R500_INST_RGB_WMASK_B | 2923 R500_INST_ALPHA_WMASK)); 2924 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 2925 R500_RGB_ADDR2(0))); 2926 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 2927 R500_ALPHA_ADDR2(0))); 2928 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2929 R500_ALU_RGB_G_SWIZ_A_1 | 2930 R500_ALU_RGB_B_SWIZ_A_1 | 2931 R500_ALU_RGB_SEL_B_SRC1 | 2932 R500_ALU_RGB_R_SWIZ_B_R | 2933 R500_ALU_RGB_G_SWIZ_B_G | 2934 R500_ALU_RGB_B_SWIZ_B_B)); 2935 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 2936 R500_ALPHA_OP_MAD | 2937 R500_ALPHA_SWIZ_A_1 | 2938 R500_ALPHA_SEL_B_SRC1 | 2939 R500_ALPHA_SWIZ_B_A)); 2940 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 2941 R500_ALU_RGBA_OP_MAD | 2942 R500_ALU_RGBA_SEL_C_SRC2 | 2943 R500_ALU_RGBA_R_SWIZ_R | 2944 R500_ALU_RGBA_G_SWIZ_G | 2945 R500_ALU_RGBA_B_SWIZ_R | 2946 R500_ALU_RGBA_A_SWIZ_G)); 2947 2948 /* TEX temp4, temp0.zwzw, tex0, 2D */ 2949 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2950 R500_INST_TEX_SEM_WAIT | 2951 R500_INST_RGB_WMASK_R | 2952 R500_INST_RGB_WMASK_G | 2953 R500_INST_RGB_WMASK_B | 2954 R500_INST_ALPHA_WMASK)); 2955 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2956 R500_TEX_INST_LD | 2957 R500_TEX_IGNORE_UNCOVERED)); 2958 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2959 R500_TEX_SRC_S_SWIZ_B | 2960 R500_TEX_SRC_T_SWIZ_A | 2961 R500_TEX_SRC_R_SWIZ_B | 2962 R500_TEX_SRC_Q_SWIZ_A | 2963 R500_TEX_DST_ADDR(4) | 2964 R500_TEX_DST_R_SWIZ_R | 2965 R500_TEX_DST_G_SWIZ_G | 2966 R500_TEX_DST_B_SWIZ_B | 2967 R500_TEX_DST_A_SWIZ_A)); 2968 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2969 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2970 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2971 2972 /* TEX temp0, temp0.xyzw, tex0, 2D */ 2973 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2974 R500_INST_TEX_SEM_WAIT | 2975 R500_INST_RGB_WMASK_R | 2976 R500_INST_RGB_WMASK_G | 2977 R500_INST_RGB_WMASK_B | 2978 R500_INST_ALPHA_WMASK)); 2979 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2980 R500_TEX_INST_LD | 2981 R500_TEX_SEM_ACQUIRE | 2982 R500_TEX_IGNORE_UNCOVERED)); 2983 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2984 R500_TEX_SRC_S_SWIZ_R | 2985 R500_TEX_SRC_T_SWIZ_G | 2986 R500_TEX_SRC_R_SWIZ_B | 2987 R500_TEX_SRC_Q_SWIZ_A | 2988 R500_TEX_DST_ADDR(0) | 2989 R500_TEX_DST_R_SWIZ_R | 2990 R500_TEX_DST_G_SWIZ_G | 2991 R500_TEX_DST_B_SWIZ_B | 2992 R500_TEX_DST_A_SWIZ_A)); 2993 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2994 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2995 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2996 2997 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 2998 * - PRESUB temps, temp1 - temp3 2999 * - MAD temp2.zzzz, temps, temp3 */ 3000 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3001 R500_INST_RGB_WMASK_R | 3002 R500_INST_RGB_WMASK_G | 3003 R500_INST_RGB_WMASK_B | 3004 R500_INST_ALPHA_WMASK)); 3005 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3006 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3007 R500_RGB_ADDR1(1) | 3008 R500_RGB_ADDR2(2))); 3009 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3010 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3011 R500_ALPHA_ADDR1(1) | 3012 R500_ALPHA_ADDR2(2))); 3013 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3014 R500_ALU_RGB_R_SWIZ_A_B | 3015 R500_ALU_RGB_G_SWIZ_A_B | 3016 R500_ALU_RGB_B_SWIZ_A_B | 3017 R500_ALU_RGB_SEL_B_SRCP | 3018 R500_ALU_RGB_R_SWIZ_B_R | 3019 R500_ALU_RGB_G_SWIZ_B_G | 3020 R500_ALU_RGB_B_SWIZ_B_B)); 3021 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3022 R500_ALPHA_OP_MAD | 3023 R500_ALPHA_SEL_A_SRC2 | 3024 R500_ALPHA_SWIZ_A_B | 3025 R500_ALPHA_SEL_B_SRCP | 3026 R500_ALPHA_SWIZ_B_A)); 3027 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3028 R500_ALU_RGBA_OP_MAD | 3029 R500_ALU_RGBA_SEL_C_SRC0 | 3030 R500_ALU_RGBA_R_SWIZ_R | 3031 R500_ALU_RGBA_G_SWIZ_G | 3032 R500_ALU_RGBA_B_SWIZ_B | 3033 R500_ALU_RGBA_A_SWIZ_A)); 3034 3035 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3036 * - PRESUB temps, temp4 - temp1 3037 * - MAD temp2.zzzz, temps, temp0 */ 3038 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3039 R500_INST_TEX_SEM_WAIT | 3040 R500_INST_RGB_WMASK_R | 3041 R500_INST_RGB_WMASK_G | 3042 R500_INST_RGB_WMASK_B | 3043 R500_INST_ALPHA_WMASK)); 3044 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3045 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3046 R500_RGB_ADDR1(4) | 3047 R500_RGB_ADDR2(2))); 3048 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3049 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3050 R500_ALPHA_ADDR1(4) | 3051 R500_ALPHA_ADDR2(2))); 3052 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3053 R500_ALU_RGB_R_SWIZ_A_B | 3054 R500_ALU_RGB_G_SWIZ_A_B | 3055 R500_ALU_RGB_B_SWIZ_A_B | 3056 R500_ALU_RGB_SEL_B_SRCP | 3057 R500_ALU_RGB_R_SWIZ_B_R | 3058 R500_ALU_RGB_G_SWIZ_B_G | 3059 R500_ALU_RGB_B_SWIZ_B_B)); 3060 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3061 R500_ALPHA_OP_MAD | 3062 R500_ALPHA_SEL_A_SRC2 | 3063 R500_ALPHA_SWIZ_A_B | 3064 R500_ALPHA_SEL_B_SRCP | 3065 R500_ALPHA_SWIZ_B_A)); 3066 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3067 R500_ALU_RGBA_OP_MAD | 3068 R500_ALU_RGBA_SEL_C_SRC0 | 3069 R500_ALU_RGBA_R_SWIZ_R | 3070 R500_ALU_RGBA_G_SWIZ_G | 3071 R500_ALU_RGBA_B_SWIZ_B | 3072 R500_ALU_RGBA_A_SWIZ_A)); 3073 3074 /* LRP output, temp5.zzzz, temp3, temp0 -> 3075 * - PRESUB temps, temp3 - temp0 3076 * - MAD temp5.zzzz, temps, temp0 */ 3077 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3078 R500_INST_LAST | 3079 R500_INST_TEX_SEM_WAIT | 3080 R500_INST_RGB_WMASK_R | 3081 R500_INST_RGB_WMASK_G | 3082 R500_INST_RGB_WMASK_B | 3083 R500_INST_ALPHA_WMASK | 3084 R500_INST_RGB_OMASK_R | 3085 R500_INST_RGB_OMASK_G | 3086 R500_INST_RGB_OMASK_B | 3087 R500_INST_ALPHA_OMASK)); 3088 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3089 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3090 R500_RGB_ADDR1(3) | 3091 R500_RGB_ADDR2(5))); 3092 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3093 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3094 R500_ALPHA_ADDR1(3) | 3095 R500_ALPHA_ADDR2(5))); 3096 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3097 R500_ALU_RGB_R_SWIZ_A_B | 3098 R500_ALU_RGB_G_SWIZ_A_B | 3099 R500_ALU_RGB_B_SWIZ_A_B | 3100 R500_ALU_RGB_SEL_B_SRCP | 3101 R500_ALU_RGB_R_SWIZ_B_R | 3102 R500_ALU_RGB_G_SWIZ_B_G | 3103 R500_ALU_RGB_B_SWIZ_B_B)); 3104 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3105 R500_ALPHA_OP_MAD | 3106 R500_ALPHA_SEL_A_SRC2 | 3107 R500_ALPHA_SWIZ_A_B | 3108 R500_ALPHA_SEL_B_SRCP | 3109 R500_ALPHA_SWIZ_B_A)); 3110 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3111 R500_ALU_RGBA_OP_MAD | 3112 R500_ALU_RGBA_SEL_C_SRC0 | 3113 R500_ALU_RGBA_R_SWIZ_R | 3114 R500_ALU_RGBA_G_SWIZ_G | 3115 R500_ALU_RGBA_B_SWIZ_B | 3116 R500_ALU_RGBA_A_SWIZ_A)); 3117 3118 /* Shader constants. */ 3119 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3120 3121 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3122 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3123 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3124 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3125 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3126 3127 ADVANCE_RING(); 3128 } else { 3129 BEGIN_RING(2*19); 3130 /* 2 components: 2 for tex0 */ 3131 OUT_RING_REG(R300_RS_COUNT, 3132 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3133 R300_RS_COUNT_HIRES_EN)); 3134 3135 /* R300_INST_COUNT_RS - highest RS instruction used */ 3136 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3137 3138 /* Pixel stack frame size. */ 3139 OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3140 3141 /* FP length. */ 3142 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3143 R500_US_CODE_END_ADDR(1))); 3144 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3145 R500_US_CODE_RANGE_SIZE(1))); 3146 3147 /* Prepare for FP emission. */ 3148 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3149 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3150 3151 /* tex inst */ 3152 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3153 R500_INST_TEX_SEM_WAIT | 3154 R500_INST_RGB_WMASK_R | 3155 R500_INST_RGB_WMASK_G | 3156 R500_INST_RGB_WMASK_B | 3157 R500_INST_ALPHA_WMASK | 3158 R500_INST_RGB_CLAMP | 3159 R500_INST_ALPHA_CLAMP)); 3160 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3161 R500_TEX_INST_LD | 3162 R500_TEX_SEM_ACQUIRE | 3163 R500_TEX_IGNORE_UNCOVERED)); 3164 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3165 R500_TEX_SRC_S_SWIZ_R | 3166 R500_TEX_SRC_T_SWIZ_G | 3167 R500_TEX_DST_ADDR(0) | 3168 R500_TEX_DST_R_SWIZ_R | 3169 R500_TEX_DST_G_SWIZ_G | 3170 R500_TEX_DST_B_SWIZ_B | 3171 R500_TEX_DST_A_SWIZ_A)); 3172 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3173 R500_DX_S_SWIZ_R | 3174 R500_DX_T_SWIZ_R | 3175 R500_DX_R_SWIZ_R | 3176 R500_DX_Q_SWIZ_R | 3177 R500_DY_ADDR(0) | 3178 R500_DY_S_SWIZ_R | 3179 R500_DY_T_SWIZ_R | 3180 R500_DY_R_SWIZ_R | 3181 R500_DY_Q_SWIZ_R)); 3182 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3183 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3184 3185 /* ALU inst */ 3186 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3187 R500_INST_TEX_SEM_WAIT | 3188 R500_INST_LAST | 3189 R500_INST_RGB_OMASK_R | 3190 R500_INST_RGB_OMASK_G | 3191 R500_INST_RGB_OMASK_B | 3192 R500_INST_ALPHA_OMASK | 3193 R500_INST_RGB_CLAMP | 3194 R500_INST_ALPHA_CLAMP)); 3195 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3196 R500_RGB_ADDR1(0) | 3197 R500_RGB_ADDR1_CONST | 3198 R500_RGB_ADDR2(0) | 3199 R500_RGB_ADDR2_CONST)); 3200 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3201 R500_ALPHA_ADDR1(0) | 3202 R500_ALPHA_ADDR1_CONST | 3203 R500_ALPHA_ADDR2(0) | 3204 R500_ALPHA_ADDR2_CONST)); 3205 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3206 R500_ALU_RGB_R_SWIZ_A_R | 3207 R500_ALU_RGB_G_SWIZ_A_G | 3208 R500_ALU_RGB_B_SWIZ_A_B | 3209 R500_ALU_RGB_SEL_B_SRC0 | 3210 R500_ALU_RGB_R_SWIZ_B_1 | 3211 R500_ALU_RGB_B_SWIZ_B_1 | 3212 R500_ALU_RGB_G_SWIZ_B_1)); 3213 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3214 R500_ALPHA_SWIZ_A_A | 3215 R500_ALPHA_SWIZ_B_1)); 3216 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3217 R500_ALU_RGBA_R_SWIZ_0 | 3218 R500_ALU_RGBA_G_SWIZ_0 | 3219 R500_ALU_RGBA_B_SWIZ_0 | 3220 R500_ALU_RGBA_A_SWIZ_0)); 3221 ADVANCE_RING(); 3222 } 3223 } else { 3224 /* 3225 * y' = y - .0625 3226 * u' = u - .5 3227 * v' = v - .5; 3228 * 3229 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3230 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3231 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3232 * 3233 * DP3 might look like the straightforward solution 3234 * but we'd need to move the texture yuv values in 3235 * the same reg for this to work. Therefore use MADs. 3236 * Brightness just adds to the off constant. 3237 * Contrast is multiplication of luminance. 3238 * Saturation and hue change the u and v coeffs. 3239 * Default values (before adjustments - depend on colorspace): 3240 * yco = 1.1643 3241 * uco = 0, -0.39173, 2.017 3242 * vco = 1.5958, -0.8129, 0 3243 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3244 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3245 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3246 * 3247 * temp = MAD(yco, yuv.yyyy, off) 3248 * temp = MAD(uco, yuv.uuuu, temp) 3249 * result = MAD(vco, yuv.vvvv, temp) 3250 */ 3251 /* TODO: don't recalc consts always */ 3252 const float Loff = -0.0627; 3253 const float Coff = -0.502; 3254 float uvcosf, uvsinf; 3255 float yco; 3256 float uco[3], vco[3], off[3]; 3257 float bright, cont, gamma; 3258 int ref = pPriv->transform_index; 3259 3260 cont = RTFContrast(pPriv->contrast); 3261 bright = RTFBrightness(pPriv->brightness); 3262 gamma = (float)pPriv->gamma / 1000.0; 3263 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3264 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3265 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3266 3267 yco = trans[ref].RefLuma * cont; 3268 uco[0] = -trans[ref].RefRCr * uvsinf; 3269 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3270 uco[2] = trans[ref].RefBCb * uvcosf; 3271 vco[0] = trans[ref].RefRCr * uvcosf; 3272 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3273 vco[2] = trans[ref].RefBCb * uvsinf; 3274 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3275 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3276 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3277 3278 //XXX gamma 3279 3280 if (pPriv->is_planar) { 3281 BEGIN_RING(2*56); 3282 /* 2 components: 2 for tex0 */ 3283 OUT_RING_REG(R300_RS_COUNT, 3284 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3285 R300_RS_COUNT_HIRES_EN)); 3286 3287 /* R300_INST_COUNT_RS - highest RS instruction used */ 3288 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3289 3290 /* Pixel stack frame size. */ 3291 OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3292 3293 /* FP length. */ 3294 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3295 R500_US_CODE_END_ADDR(5))); 3296 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3297 R500_US_CODE_RANGE_SIZE(5))); 3298 3299 /* Prepare for FP emission. */ 3300 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3301 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3302 3303 /* tex inst */ 3304 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3305 R500_INST_TEX_SEM_WAIT | 3306 R500_INST_RGB_WMASK_R | 3307 R500_INST_RGB_WMASK_G | 3308 R500_INST_RGB_WMASK_B | 3309 R500_INST_ALPHA_WMASK | 3310 R500_INST_RGB_CLAMP | 3311 R500_INST_ALPHA_CLAMP)); 3312 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3313 R500_TEX_INST_LD | 3314 R500_TEX_IGNORE_UNCOVERED)); 3315 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3316 R500_TEX_SRC_S_SWIZ_R | 3317 R500_TEX_SRC_T_SWIZ_G | 3318 R500_TEX_DST_ADDR(2) | 3319 R500_TEX_DST_R_SWIZ_R | 3320 R500_TEX_DST_G_SWIZ_G | 3321 R500_TEX_DST_B_SWIZ_B | 3322 R500_TEX_DST_A_SWIZ_A)); 3323 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3324 R500_DX_S_SWIZ_R | 3325 R500_DX_T_SWIZ_R | 3326 R500_DX_R_SWIZ_R | 3327 R500_DX_Q_SWIZ_R | 3328 R500_DY_ADDR(0) | 3329 R500_DY_S_SWIZ_R | 3330 R500_DY_T_SWIZ_R | 3331 R500_DY_R_SWIZ_R | 3332 R500_DY_Q_SWIZ_R)); 3333 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3334 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3335 3336 /* tex inst */ 3337 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3338 R500_INST_TEX_SEM_WAIT | 3339 R500_INST_RGB_WMASK_R | 3340 R500_INST_RGB_WMASK_G | 3341 R500_INST_RGB_WMASK_B | 3342 R500_INST_ALPHA_WMASK | 3343 R500_INST_RGB_CLAMP | 3344 R500_INST_ALPHA_CLAMP)); 3345 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3346 R500_TEX_INST_LD | 3347 R500_TEX_IGNORE_UNCOVERED)); 3348 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3349 R500_TEX_SRC_S_SWIZ_R | 3350 R500_TEX_SRC_T_SWIZ_G | 3351 R500_TEX_DST_ADDR(1) | 3352 R500_TEX_DST_R_SWIZ_R | 3353 R500_TEX_DST_G_SWIZ_G | 3354 R500_TEX_DST_B_SWIZ_B | 3355 R500_TEX_DST_A_SWIZ_A)); 3356 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3357 R500_DX_S_SWIZ_R | 3358 R500_DX_T_SWIZ_R | 3359 R500_DX_R_SWIZ_R | 3360 R500_DX_Q_SWIZ_R | 3361 R500_DY_ADDR(0) | 3362 R500_DY_S_SWIZ_R | 3363 R500_DY_T_SWIZ_R | 3364 R500_DY_R_SWIZ_R | 3365 R500_DY_Q_SWIZ_R)); 3366 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3367 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3368 3369 /* tex inst */ 3370 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3371 R500_INST_TEX_SEM_WAIT | 3372 R500_INST_RGB_WMASK_R | 3373 R500_INST_RGB_WMASK_G | 3374 R500_INST_RGB_WMASK_B | 3375 R500_INST_ALPHA_WMASK | 3376 R500_INST_RGB_CLAMP | 3377 R500_INST_ALPHA_CLAMP)); 3378 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3379 R500_TEX_INST_LD | 3380 R500_TEX_SEM_ACQUIRE | 3381 R500_TEX_IGNORE_UNCOVERED)); 3382 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3383 R500_TEX_SRC_S_SWIZ_R | 3384 R500_TEX_SRC_T_SWIZ_G | 3385 R500_TEX_DST_ADDR(0) | 3386 R500_TEX_DST_R_SWIZ_R | 3387 R500_TEX_DST_G_SWIZ_G | 3388 R500_TEX_DST_B_SWIZ_B | 3389 R500_TEX_DST_A_SWIZ_A)); 3390 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3391 R500_DX_S_SWIZ_R | 3392 R500_DX_T_SWIZ_R | 3393 R500_DX_R_SWIZ_R | 3394 R500_DX_Q_SWIZ_R | 3395 R500_DY_ADDR(0) | 3396 R500_DY_S_SWIZ_R | 3397 R500_DY_T_SWIZ_R | 3398 R500_DY_R_SWIZ_R | 3399 R500_DY_Q_SWIZ_R)); 3400 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3401 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3402 3403 /* ALU inst */ 3404 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 3405 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3406 R500_INST_TEX_SEM_WAIT | 3407 R500_INST_RGB_WMASK_R | 3408 R500_INST_RGB_WMASK_G | 3409 R500_INST_RGB_WMASK_B | 3410 R500_INST_ALPHA_WMASK)); 3411 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3412 R500_RGB_ADDR0_CONST | 3413 R500_RGB_ADDR1(2) | 3414 R500_RGB_ADDR2(0) | 3415 R500_RGB_ADDR2_CONST)); 3416 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3417 R500_ALPHA_ADDR0_CONST | 3418 R500_ALPHA_ADDR1(2) | 3419 R500_ALPHA_ADDR2(0) | 3420 R500_ALPHA_ADDR2_CONST)); 3421 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3422 R500_ALU_RGB_R_SWIZ_A_A | 3423 R500_ALU_RGB_G_SWIZ_A_A | 3424 R500_ALU_RGB_B_SWIZ_A_A | 3425 R500_ALU_RGB_SEL_B_SRC1 | 3426 R500_ALU_RGB_R_SWIZ_B_R | 3427 R500_ALU_RGB_B_SWIZ_B_G | 3428 R500_ALU_RGB_G_SWIZ_B_B)); 3429 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3430 R500_ALPHA_ADDRD(2) | 3431 R500_ALPHA_SWIZ_A_0 | 3432 R500_ALPHA_SWIZ_B_0)); 3433 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3434 R500_ALU_RGBA_ADDRD(2) | 3435 R500_ALU_RGBA_SEL_C_SRC0 | 3436 R500_ALU_RGBA_R_SWIZ_R | 3437 R500_ALU_RGBA_G_SWIZ_G | 3438 R500_ALU_RGBA_B_SWIZ_B | 3439 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3440 R500_ALU_RGBA_A_SWIZ_0)); 3441 3442 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 3443 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3444 R500_INST_TEX_SEM_WAIT | 3445 R500_INST_RGB_WMASK_R | 3446 R500_INST_RGB_WMASK_G | 3447 R500_INST_RGB_WMASK_B | 3448 R500_INST_ALPHA_WMASK)); 3449 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3450 R500_RGB_ADDR0_CONST | 3451 R500_RGB_ADDR1(1) | 3452 R500_RGB_ADDR2(2))); 3453 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3454 R500_ALPHA_ADDR0_CONST | 3455 R500_ALPHA_ADDR1(1) | 3456 R500_ALPHA_ADDR2(2))); 3457 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3458 R500_ALU_RGB_R_SWIZ_A_R | 3459 R500_ALU_RGB_G_SWIZ_A_G | 3460 R500_ALU_RGB_B_SWIZ_A_B | 3461 R500_ALU_RGB_SEL_B_SRC1 | 3462 R500_ALU_RGB_R_SWIZ_B_R | 3463 R500_ALU_RGB_B_SWIZ_B_G | 3464 R500_ALU_RGB_G_SWIZ_B_B)); 3465 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3466 R500_ALPHA_ADDRD(2) | 3467 R500_ALPHA_SWIZ_A_0 | 3468 R500_ALPHA_SWIZ_B_0)); 3469 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3470 R500_ALU_RGBA_ADDRD(2) | 3471 R500_ALU_RGBA_SEL_C_SRC2 | 3472 R500_ALU_RGBA_R_SWIZ_R | 3473 R500_ALU_RGBA_G_SWIZ_G | 3474 R500_ALU_RGBA_B_SWIZ_B | 3475 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3476 R500_ALU_RGBA_A_SWIZ_0)); 3477 3478 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 3479 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3480 R500_INST_TEX_SEM_WAIT | 3481 R500_INST_LAST | 3482 R500_INST_RGB_OMASK_R | 3483 R500_INST_RGB_OMASK_G | 3484 R500_INST_RGB_OMASK_B | 3485 R500_INST_ALPHA_OMASK | 3486 R500_INST_RGB_CLAMP | 3487 R500_INST_ALPHA_CLAMP)); 3488 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3489 R500_RGB_ADDR0_CONST | 3490 R500_RGB_ADDR1(0) | 3491 R500_RGB_ADDR2(2))); 3492 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3493 R500_ALPHA_ADDR0_CONST | 3494 R500_ALPHA_ADDR1(0) | 3495 R500_ALPHA_ADDR2(2))); 3496 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3497 R500_ALU_RGB_R_SWIZ_A_R | 3498 R500_ALU_RGB_G_SWIZ_A_G | 3499 R500_ALU_RGB_B_SWIZ_A_B | 3500 R500_ALU_RGB_SEL_B_SRC1 | 3501 R500_ALU_RGB_R_SWIZ_B_R | 3502 R500_ALU_RGB_B_SWIZ_B_G | 3503 R500_ALU_RGB_G_SWIZ_B_B)); 3504 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3505 R500_ALPHA_ADDRD(0) | 3506 R500_ALPHA_SWIZ_A_0 | 3507 R500_ALPHA_SWIZ_B_0)); 3508 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3509 R500_ALU_RGBA_ADDRD(0) | 3510 R500_ALU_RGBA_SEL_C_SRC2 | 3511 R500_ALU_RGBA_R_SWIZ_R | 3512 R500_ALU_RGBA_G_SWIZ_G | 3513 R500_ALU_RGBA_B_SWIZ_B | 3514 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3515 R500_ALU_RGBA_A_SWIZ_1)); 3516 3517 } else { 3518 BEGIN_RING(2*44); 3519 /* 2 components: 2 for tex0/1/2 */ 3520 OUT_RING_REG(R300_RS_COUNT, 3521 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3522 R300_RS_COUNT_HIRES_EN)); 3523 3524 /* R300_INST_COUNT_RS - highest RS instruction used */ 3525 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3526 3527 /* Pixel stack frame size. */ 3528 OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3529 3530 /* FP length. */ 3531 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3532 R500_US_CODE_END_ADDR(3))); 3533 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3534 R500_US_CODE_RANGE_SIZE(3))); 3535 3536 /* Prepare for FP emission. */ 3537 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3538 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3539 3540 /* tex inst */ 3541 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3542 R500_INST_TEX_SEM_WAIT | 3543 R500_INST_RGB_WMASK_R | 3544 R500_INST_RGB_WMASK_G | 3545 R500_INST_RGB_WMASK_B | 3546 R500_INST_ALPHA_WMASK | 3547 R500_INST_RGB_CLAMP | 3548 R500_INST_ALPHA_CLAMP)); 3549 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3550 R500_TEX_INST_LD | 3551 R500_TEX_SEM_ACQUIRE | 3552 R500_TEX_IGNORE_UNCOVERED)); 3553 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3554 R500_TEX_SRC_S_SWIZ_R | 3555 R500_TEX_SRC_T_SWIZ_G | 3556 R500_TEX_DST_ADDR(0) | 3557 R500_TEX_DST_R_SWIZ_R | 3558 R500_TEX_DST_G_SWIZ_G | 3559 R500_TEX_DST_B_SWIZ_B | 3560 R500_TEX_DST_A_SWIZ_A)); 3561 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3562 R500_DX_S_SWIZ_R | 3563 R500_DX_T_SWIZ_R | 3564 R500_DX_R_SWIZ_R | 3565 R500_DX_Q_SWIZ_R | 3566 R500_DY_ADDR(0) | 3567 R500_DY_S_SWIZ_R | 3568 R500_DY_T_SWIZ_R | 3569 R500_DY_R_SWIZ_R | 3570 R500_DY_Q_SWIZ_R)); 3571 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3572 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3573 3574 /* ALU inst */ 3575 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 3576 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3577 R500_INST_TEX_SEM_WAIT | 3578 R500_INST_RGB_WMASK_R | 3579 R500_INST_RGB_WMASK_G | 3580 R500_INST_RGB_WMASK_B | 3581 R500_INST_ALPHA_WMASK)); 3582 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3583 R500_RGB_ADDR0_CONST | 3584 R500_RGB_ADDR1(0) | 3585 R500_RGB_ADDR2(0) | 3586 R500_RGB_ADDR2_CONST)); 3587 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3588 R500_ALPHA_ADDR0_CONST | 3589 R500_ALPHA_ADDR1(0) | 3590 R500_ALPHA_ADDR2(0) | 3591 R500_ALPHA_ADDR2_CONST)); 3592 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3593 R500_ALU_RGB_R_SWIZ_A_A | 3594 R500_ALU_RGB_G_SWIZ_A_A | 3595 R500_ALU_RGB_B_SWIZ_A_A | 3596 R500_ALU_RGB_SEL_B_SRC1 | 3597 R500_ALU_RGB_R_SWIZ_B_G | 3598 R500_ALU_RGB_B_SWIZ_B_G | 3599 R500_ALU_RGB_G_SWIZ_B_G)); 3600 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3601 R500_ALPHA_ADDRD(1) | 3602 R500_ALPHA_SWIZ_A_0 | 3603 R500_ALPHA_SWIZ_B_0)); 3604 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3605 R500_ALU_RGBA_ADDRD(1) | 3606 R500_ALU_RGBA_SEL_C_SRC0 | 3607 R500_ALU_RGBA_R_SWIZ_R | 3608 R500_ALU_RGBA_G_SWIZ_G | 3609 R500_ALU_RGBA_B_SWIZ_B | 3610 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3611 R500_ALU_RGBA_A_SWIZ_0)); 3612 3613 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 3614 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3615 R500_INST_TEX_SEM_WAIT | 3616 R500_INST_RGB_WMASK_R | 3617 R500_INST_RGB_WMASK_G | 3618 R500_INST_RGB_WMASK_B | 3619 R500_INST_ALPHA_WMASK)); 3620 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3621 R500_RGB_ADDR0_CONST | 3622 R500_RGB_ADDR1(0) | 3623 R500_RGB_ADDR2(1))); 3624 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3625 R500_ALPHA_ADDR0_CONST | 3626 R500_ALPHA_ADDR1(0) | 3627 R500_ALPHA_ADDR2(1))); 3628 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3629 R500_ALU_RGB_R_SWIZ_A_R | 3630 R500_ALU_RGB_G_SWIZ_A_G | 3631 R500_ALU_RGB_B_SWIZ_A_B | 3632 R500_ALU_RGB_SEL_B_SRC1 | 3633 R500_ALU_RGB_R_SWIZ_B_B | 3634 R500_ALU_RGB_B_SWIZ_B_B | 3635 R500_ALU_RGB_G_SWIZ_B_B)); 3636 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3637 R500_ALPHA_ADDRD(1) | 3638 R500_ALPHA_SWIZ_A_0 | 3639 R500_ALPHA_SWIZ_B_0)); 3640 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3641 R500_ALU_RGBA_ADDRD(1) | 3642 R500_ALU_RGBA_SEL_C_SRC2 | 3643 R500_ALU_RGBA_R_SWIZ_R | 3644 R500_ALU_RGBA_G_SWIZ_G | 3645 R500_ALU_RGBA_B_SWIZ_B | 3646 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3647 R500_ALU_RGBA_A_SWIZ_0)); 3648 3649 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 3650 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3651 R500_INST_TEX_SEM_WAIT | 3652 R500_INST_LAST | 3653 R500_INST_RGB_OMASK_R | 3654 R500_INST_RGB_OMASK_G | 3655 R500_INST_RGB_OMASK_B | 3656 R500_INST_ALPHA_OMASK | 3657 R500_INST_RGB_CLAMP | 3658 R500_INST_ALPHA_CLAMP)); 3659 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3660 R500_RGB_ADDR0_CONST | 3661 R500_RGB_ADDR1(0) | 3662 R500_RGB_ADDR2(1))); 3663 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3664 R500_ALPHA_ADDR0_CONST | 3665 R500_ALPHA_ADDR1(0) | 3666 R500_ALPHA_ADDR2(1))); 3667 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3668 R500_ALU_RGB_R_SWIZ_A_R | 3669 R500_ALU_RGB_G_SWIZ_A_G | 3670 R500_ALU_RGB_B_SWIZ_A_B | 3671 R500_ALU_RGB_SEL_B_SRC1 | 3672 R500_ALU_RGB_R_SWIZ_B_R | 3673 R500_ALU_RGB_B_SWIZ_B_R | 3674 R500_ALU_RGB_G_SWIZ_B_R)); 3675 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3676 R500_ALPHA_ADDRD(1) | 3677 R500_ALPHA_SWIZ_A_0 | 3678 R500_ALPHA_SWIZ_B_0)); 3679 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3680 R500_ALU_RGBA_ADDRD(1) | 3681 R500_ALU_RGBA_SEL_C_SRC2 | 3682 R500_ALU_RGBA_R_SWIZ_R | 3683 R500_ALU_RGBA_G_SWIZ_G | 3684 R500_ALU_RGBA_B_SWIZ_B | 3685 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3686 R500_ALU_RGBA_A_SWIZ_1)); 3687 } 3688 3689 /* Shader constants. */ 3690 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3691 3692 /* constant 0: off, yco */ 3693 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 3694 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 3695 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 3696 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 3697 /* constant 1: uco */ 3698 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 3699 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 3700 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 3701 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 3702 /* constant 2: vco */ 3703 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 3704 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 3705 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 3706 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 3707 3708 ADVANCE_RING(); 3709 } 3710 3711 BEGIN_ACCEL_RELOC(6, 2); 3712 OUT_RING_REG(R300_TX_INVALTAGS, 0); 3713 OUT_RING_REG(R300_TX_ENABLE, txenable); 3714 3715 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 3716 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 3717 3718 /* no need to enable blending */ 3719 OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 3720 3721 OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 3722 ADVANCE_RING(); 3723 3724 if (pPriv->vsync) { 3725 xf86CrtcPtr crtc; 3726 if (pPriv->desired_crtc) 3727 crtc = pPriv->desired_crtc; 3728 else 3729 crtc = radeon_pick_best_crtc(pScrn, FALSE, 3730 pPriv->drw_x, 3731 pPriv->drw_x + pPriv->dst_w, 3732 pPriv->drw_y, 3733 pPriv->drw_y + pPriv->dst_h); 3734 if (crtc) 3735 RADEONWaitForVLine(pScrn, pPixmap, 3736 crtc, 3737 pPriv->drw_y - crtc->y, 3738 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 3739 } 3740 3741 return TRUE; 3742} 3743 3744static void 3745R500DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 3746{ 3747 RADEONInfoPtr info = RADEONPTR(pScrn); 3748 PixmapPtr pPixmap = pPriv->pPixmap; 3749 int dstxoff, dstyoff; 3750 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 3751 int nBox = REGION_NUM_RECTS(&pPriv->clip); 3752 3753#ifdef COMPOSITE 3754 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 3755 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 3756#else 3757 dstxoff = 0; 3758 dstyoff = 0; 3759#endif 3760 3761 if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3762 return; 3763 3764 /* 3765 * Rendering of the actual polygon is done in two different 3766 * ways depending on chip generation: 3767 * 3768 * < R300: 3769 * 3770 * These chips can render a rectangle in one pass, so 3771 * handling is pretty straight-forward. 3772 * 3773 * >= R300: 3774 * 3775 * These chips can accept a quad, but will render it as 3776 * two triangles which results in a diagonal tear. Instead 3777 * We render a single, large triangle and use the scissor 3778 * functionality to restrict it to the desired rectangle. 3779 * Due to guardband limits on r3xx/r4xx, we can only use 3780 * the single triangle up to 2880 pixels; above that we 3781 * render as a quad. 3782 */ 3783 3784 while (nBox--) { 3785 float srcX, srcY, srcw, srch; 3786 int dstX, dstY, dstw, dsth; 3787 int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 3788 3789 if (draw_size > radeon_cs_space_remaining(pScrn)) { 3790 radeon_cs_flush_indirect(pScrn); 3791 if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3792 return; 3793 } 3794 3795 dstX = pBox->x1 + dstxoff; 3796 dstY = pBox->y1 + dstyoff; 3797 dstw = pBox->x2 - pBox->x1; 3798 dsth = pBox->y2 - pBox->y1; 3799 3800 srcX = pPriv->src_x; 3801 srcX += ((pBox->x1 - pPriv->drw_x) * 3802 pPriv->src_w) / (float)pPriv->dst_w; 3803 srcY = pPriv->src_y; 3804 srcY += ((pBox->y1 - pPriv->drw_y) * 3805 pPriv->src_h) / (float)pPriv->dst_h; 3806 3807 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 3808 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 3809 3810 BEGIN_RING(2*2); 3811 OUT_RING_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 3812 ((dstY) << R300_SCISSOR_Y_SHIFT))); 3813 OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 3814 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 3815 ADVANCE_RING(); 3816 3817 BEGIN_RING(3 * pPriv->vtx_count + 4); 3818 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 3819 3 * pPriv->vtx_count)); 3820 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 3821 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 3822 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 3823 3824 if (pPriv->bicubic_enabled) { 3825 VTX_OUT_6((float)dstX, (float)dstY, 3826 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 3827 (float)srcX + 0.5, (float)srcY + 0.5); 3828 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 3829 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 3830 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 3831 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 3832 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3833 (float)srcY / pPriv->h, 3834 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 3835 (float)srcY + 0.5); 3836 } else { 3837 /* 3838 * Render a big, scissored triangle. This means 3839 * increasing the triangle size and adjusting 3840 * texture coordinates. 3841 */ 3842 VTX_OUT_4((float)dstX, (float)dstY, 3843 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 3844 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 3845 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 3846 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 3847 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3848 (float)srcY / pPriv->h); 3849 } 3850 3851 /* flushing is pipelined, free/finish is not */ 3852 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 3853 3854 ADVANCE_RING(); 3855 3856 pBox++; 3857 } 3858 3859 BEGIN_RING(2*3); 3860 OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 3861 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 3862 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 3863 ADVANCE_RING(); 3864 3865 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 3866} 3867 3868#undef VTX_OUT_4 3869#undef VTX_OUT_6 3870