radeon_textured_videofuncs.c revision de2362d3
11.9Schristos/* 21.3Sperry * Copyright 2008 Alex Deucher 31.1Scjs * 41.7Sagc * Permission is hereby granted, free of charge, to any person obtaining a 51.1Scjs * copy of this software and associated documentation files (the "Software"), 61.1Scjs * to deal in the Software without restriction, including without limitation 71.1Scjs * the rights to use, copy, modify, merge, publish, distribute, sublicense, 81.1Scjs * and/or sell copies of the Software, and to permit persons to whom the 91.1Scjs * Software is furnished to do so, subject to the following conditions: 101.1Scjs * 111.1Scjs * The above copyright notice and this permission notice (including the next 121.1Scjs * paragraph) shall be included in all copies or substantial portions of the 131.1Scjs * Software. 141.1Scjs * 151.1Scjs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 161.6Sagc * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 171.6Sagc * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 181.6Sagc * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 191.6Sagc * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 201.6Sagc * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 211.6Sagc * SOFTWARE. 221.6Sagc * 231.6Sagc * 241.6Sagc * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 251.6Sagc * 261.6Sagc */ 271.6Sagc 281.6Sagc#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 291.6Sagcdo { \ 301.6Sagc OUT_RING(F_TO_DW(_dstX)); \ 311.6Sagc OUT_RING(F_TO_DW(_dstY)); \ 321.6Sagc OUT_RING(F_TO_DW(_srcX)); \ 331.4Schristos OUT_RING(F_TO_DW(_srcY)); \ 341.1Scjs OUT_RING(F_TO_DW(_maskX)); \ 351.4Schristos OUT_RING(F_TO_DW(_maskY)); \ 361.1Scjs} while (0) 371.4Schristos 381.9Schristos#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 391.4Schristosdo { \ 401.1Scjs OUT_RING(F_TO_DW(_dstX)); \ 411.1Scjs OUT_RING(F_TO_DW(_dstY)); \ 421.1Scjs OUT_RING(F_TO_DW(_srcX)); \ 431.1Scjs OUT_RING(F_TO_DW(_srcY)); \ 441.1Scjs} while (0) 451.1Scjs 461.1Scjs 471.1Scjsstatic Bool 481.1ScjsRADEONPrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 491.1Scjs{ 501.1Scjs RADEONInfoPtr info = RADEONPTR(pScrn); 511.1Scjs PixmapPtr pPixmap = pPriv->pPixmap; 521.1Scjs struct radeon_exa_pixmap_priv *driver_priv; 531.1Scjs struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 541.1Scjs uint32_t txformat, txsize, txpitch; 551.1Scjs uint32_t dst_pitch, dst_format; 561.1Scjs uint32_t colorpitch; 571.1Scjs int pixel_shift; 581.1Scjs int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 591.1Scjs int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 601.2Scjs int ret; 611.4Schristos 621.4Schristos radeon_cs_space_reset_bos(info->cs); 631.4Schristos radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 641.4Schristos 651.4Schristos if (pPriv->bicubic_enabled) 661.4Schristos radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 671.1Scjs 681.1Scjs driver_priv = exaGetPixmapDriverPrivate(pPixmap); 691.1Scjs radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 701.1Scjs 711.1Scjs ret = radeon_cs_space_check(info->cs); 721.1Scjs if (ret) { 731.1Scjs ErrorF("Not enough RAM to hw accel xv operation\n"); 741.1Scjs return FALSE; 751.1Scjs } 761.1Scjs 771.4Schristos pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 781.1Scjs 791.1Scjs dst_pitch = exaGetPixmapPitch(pPixmap); 801.1Scjs RADEON_SWITCH_TO_3D(); 811.1Scjs 821.1Scjs /* Same for R100/R200 */ 831.4Schristos switch (pPixmap->drawable.bitsPerPixel) { 841.1Scjs case 16: 851.1Scjs if (pPixmap->drawable.depth == 15) 861.1Scjs dst_format = RADEON_COLOR_FORMAT_ARGB1555; 871.1Scjs else 881.1Scjs dst_format = RADEON_COLOR_FORMAT_RGB565; 891.1Scjs break; 901.1Scjs case 32: 911.1Scjs dst_format = RADEON_COLOR_FORMAT_ARGB8888; 921.1Scjs break; 931.1Scjs default: 941.1Scjs return FALSE; 951.1Scjs } 961.1Scjs 971.1Scjs if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 981.1Scjs pPriv->is_planar = TRUE; 991.1Scjs txformat = RADEON_TXFORMAT_Y8; 1001.1Scjs } else { 1011.1Scjs pPriv->is_planar = FALSE; 1021.1Scjs if (pPriv->id == FOURCC_UYVY) 1031.1Scjs txformat = RADEON_TXFORMAT_YVYU422; 1041.1Scjs else 1051.1Scjs txformat = RADEON_TXFORMAT_VYUY422; 1061.1Scjs } 1071.1Scjs 1081.1Scjs txformat |= RADEON_TXFORMAT_NON_POWER2; 1091.1Scjs 1101.1Scjs colorpitch = dst_pitch >> pixel_shift; 1111.1Scjs 1121.1Scjs if (RADEONTilingEnabled(pScrn, pPixmap)) 1131.1Scjs colorpitch |= RADEON_COLOR_TILE_ENABLE; 1141.1Scjs 1151.1Scjs BEGIN_ACCEL_RELOC(4,2); 1161.1Scjs 1171.1Scjs OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 1181.1Scjs EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 1191.1Scjs EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 1201.1Scjs OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 1211.1Scjs RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 1221.1Scjs 1231.1Scjs ADVANCE_RING(); 1241.1Scjs 1251.1Scjs if (pPriv->is_planar) { 1261.1Scjs /* need 2 texcoord sets (even though they are identical) due 1271.1Scjs to denormalization! hw apparently can't premultiply 1281.1Scjs same coord set by different texture size */ 1291.1Scjs pPriv->vtx_count = 6; 1301.1Scjs 1311.1Scjs txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 1321.1Scjs (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 1331.5Sitojun txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1341.5Sitojun txpitch -= 32; 1351.1Scjs 1361.1Scjs BEGIN_ACCEL_RELOC(23, 3); 1371.1Scjs 1381.1Scjs OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 1391.1Scjs RADEON_SE_VTX_FMT_ST0 | 1401.1Scjs RADEON_SE_VTX_FMT_ST1)); 1411.1Scjs 1421.1Scjs OUT_RING_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 1431.1Scjs RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 1441.1Scjs RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 1451.1Scjs RADEON_PLANAR_YUV_ENABLE)); 1461.1Scjs 1471.1Scjs /* Y */ 1481.1Scjs OUT_RING_REG(RADEON_PP_TXFILTER_0, 1491.1Scjs RADEON_MAG_FILTER_LINEAR | 1501.1Scjs RADEON_MIN_FILTER_LINEAR | 1511.1Scjs RADEON_CLAMP_S_CLAMP_LAST | 1521.1Scjs RADEON_CLAMP_T_CLAMP_LAST | 1531.1Scjs RADEON_YUV_TO_RGB); 1541.1Scjs OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 1551.1Scjs OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 1561.1Scjs OUT_RING_REG(RADEON_PP_TXCBLEND_0, 1571.1Scjs RADEON_COLOR_ARG_A_ZERO | 1581.1Scjs RADEON_COLOR_ARG_B_ZERO | 1591.4Schristos RADEON_COLOR_ARG_C_T0_COLOR | 1601.1Scjs RADEON_BLEND_CTL_ADD | 1611.1Scjs RADEON_CLAMP_TX); 1621.1Scjs OUT_RING_REG(RADEON_PP_TXABLEND_0, 1631.1Scjs RADEON_ALPHA_ARG_A_ZERO | 1641.1Scjs RADEON_ALPHA_ARG_B_ZERO | 1651.1Scjs RADEON_ALPHA_ARG_C_T0_ALPHA | 1661.1Scjs RADEON_BLEND_CTL_ADD | 1671.1Scjs RADEON_CLAMP_TX); 1681.4Schristos 1691.1Scjs OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 1701.1Scjs (pPriv->w - 1) | 1711.1Scjs ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 1721.1Scjs OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 1731.1Scjs pPriv->src_pitch - 32); 1741.4Schristos 1751.1Scjs /* U */ 1761.1Scjs OUT_RING_REG(RADEON_PP_TXFILTER_1, 1771.1Scjs RADEON_MAG_FILTER_LINEAR | 1781.1Scjs RADEON_MIN_FILTER_LINEAR | 1791.1Scjs RADEON_CLAMP_S_CLAMP_LAST | 1801.1Scjs RADEON_CLAMP_T_CLAMP_LAST); 1811.1Scjs OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 1821.1Scjs OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 1831.1Scjs OUT_RING_REG(RADEON_PP_TXCBLEND_1, 1841.1Scjs RADEON_COLOR_ARG_A_ZERO | 1851.1Scjs RADEON_COLOR_ARG_B_ZERO | 1861.1Scjs RADEON_COLOR_ARG_C_T0_COLOR | 1871.1Scjs RADEON_BLEND_CTL_ADD | 1881.1Scjs RADEON_CLAMP_TX); 1891.1Scjs OUT_RING_REG(RADEON_PP_TXABLEND_1, 1901.1Scjs RADEON_ALPHA_ARG_A_ZERO | 1911.4Schristos RADEON_ALPHA_ARG_B_ZERO | 1921.1Scjs RADEON_ALPHA_ARG_C_T0_ALPHA | 1931.1Scjs RADEON_BLEND_CTL_ADD | 1941.1Scjs RADEON_CLAMP_TX); 1951.1Scjs 1961.1Scjs OUT_RING_REG(RADEON_PP_TEX_SIZE_1, txsize); 1971.1Scjs OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch); 1981.1Scjs 1991.1Scjs /* V */ 2001.1Scjs OUT_RING_REG(RADEON_PP_TXFILTER_2, 2011.1Scjs RADEON_MAG_FILTER_LINEAR | 2021.1Scjs RADEON_MIN_FILTER_LINEAR | 2031.1Scjs RADEON_CLAMP_S_CLAMP_LAST | 2041.1Scjs RADEON_CLAMP_T_CLAMP_LAST); 2051.1Scjs OUT_RING_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 2061.1Scjs OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 2071.1Scjs OUT_RING_REG(RADEON_PP_TXCBLEND_2, 2081.1Scjs RADEON_COLOR_ARG_A_ZERO | 2091.1Scjs RADEON_COLOR_ARG_B_ZERO | 2101.4Schristos RADEON_COLOR_ARG_C_T0_COLOR | 2111.1Scjs RADEON_BLEND_CTL_ADD | 2121.1Scjs RADEON_CLAMP_TX); 2131.1Scjs OUT_RING_REG(RADEON_PP_TXABLEND_2, 2141.1Scjs RADEON_ALPHA_ARG_A_ZERO | 2151.1Scjs RADEON_ALPHA_ARG_B_ZERO | 2161.1Scjs RADEON_ALPHA_ARG_C_T0_ALPHA | 2171.1Scjs RADEON_BLEND_CTL_ADD | 2181.1Scjs RADEON_CLAMP_TX); 2191.1Scjs 2201.1Scjs OUT_RING_REG(RADEON_PP_TEX_SIZE_2, txsize); 2211.1Scjs OUT_RING_REG(RADEON_PP_TEX_PITCH_2, txpitch); 2221.1Scjs ADVANCE_RING(); 2231.1Scjs } else { 2241.1Scjs pPriv->vtx_count = 4; 2251.1Scjs BEGIN_ACCEL_RELOC(9, 1); 2261.5Sitojun 2271.1Scjs OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 2281.1Scjs RADEON_SE_VTX_FMT_ST0)); 2291.1Scjs 2301.1Scjs OUT_RING_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 2311.1Scjs 2321.1Scjs OUT_RING_REG(RADEON_PP_TXFILTER_0, 2331.1Scjs RADEON_MAG_FILTER_LINEAR | 2341.5Sitojun RADEON_MIN_FILTER_LINEAR | 2351.1Scjs RADEON_CLAMP_S_CLAMP_LAST | 2361.1Scjs RADEON_CLAMP_T_CLAMP_LAST | 2371.1Scjs RADEON_YUV_TO_RGB); 2381.1Scjs OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 2391.9Schristos OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 2401.9Schristos OUT_RING_REG(RADEON_PP_TXCBLEND_0, 2411.1Scjs RADEON_COLOR_ARG_A_ZERO | 2421.9Schristos RADEON_COLOR_ARG_B_ZERO | 2431.1Scjs RADEON_COLOR_ARG_C_T0_COLOR | 2441.1Scjs RADEON_BLEND_CTL_ADD | 2451.5Sitojun RADEON_CLAMP_TX); 2461.1Scjs OUT_RING_REG(RADEON_PP_TXABLEND_0, 2471.1Scjs RADEON_ALPHA_ARG_A_ZERO | 2481.1Scjs RADEON_ALPHA_ARG_B_ZERO | 2491.1Scjs RADEON_ALPHA_ARG_C_T0_ALPHA | 2501.1Scjs RADEON_BLEND_CTL_ADD | 2511.1Scjs RADEON_CLAMP_TX); 2521.1Scjs 2531.1Scjs OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 2541.1Scjs (pPriv->w - 1) | 2551.1Scjs ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 2561.1Scjs OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 2571.1Scjs pPriv->src_pitch - 32); 2581.1Scjs ADVANCE_RING(); 2591.1Scjs } 2601.1Scjs 2611.1Scjs BEGIN_RING(2*2); 2621.1Scjs OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 2631.1Scjs OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 2641.5Sitojun (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 2651.5Sitojun ADVANCE_RING(); 2661.1Scjs 2671.1Scjs if (pPriv->vsync) { 2681.1Scjs xf86CrtcPtr crtc; 2691.1Scjs if (pPriv->desired_crtc) 2701.1Scjs crtc = pPriv->desired_crtc; 2711.1Scjs else 2721.5Sitojun crtc = radeon_pick_best_crtc(pScrn, FALSE, 2731.1Scjs pPriv->drw_x, 2741.1Scjs pPriv->drw_x + pPriv->dst_w, 2751.1Scjs pPriv->drw_y, 2761.1Scjs pPriv->drw_y + pPriv->dst_h); 2771.1Scjs if (crtc) 2781.1Scjs RADEONWaitForVLine(pScrn, pPixmap, 2791.1Scjs crtc, 2801.1Scjs pPriv->drw_y - crtc->y, 2811.1Scjs (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2821.4Schristos } 2831.1Scjs 2841.1Scjs return TRUE; 2851.1Scjs} 2861.1Scjs 2871.1Scjsstatic void 2881.1ScjsRADEONDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2891.1Scjs{ 2901.1Scjs RADEONInfoPtr info = RADEONPTR(pScrn); 291 PixmapPtr pPixmap = pPriv->pPixmap; 292 int dstxoff, dstyoff; 293 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 294 int nBox = REGION_NUM_RECTS(&pPriv->clip); 295 296#ifdef COMPOSITE 297 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 298 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 299#else 300 dstxoff = 0; 301 dstyoff = 0; 302#endif 303 304 if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 305 return; 306 307 /* 308 * Rendering of the actual polygon is done in two different 309 * ways depending on chip generation: 310 * 311 * < R300: 312 * 313 * These chips can render a rectangle in one pass, so 314 * handling is pretty straight-forward. 315 * 316 * >= R300: 317 * 318 * These chips can accept a quad, but will render it as 319 * two triangles which results in a diagonal tear. Instead 320 * We render a single, large triangle and use the scissor 321 * functionality to restrict it to the desired rectangle. 322 * Due to guardband limits on r3xx/r4xx, we can only use 323 * the single triangle up to 2560/4021 pixels; above that we 324 * render as a quad. 325 */ 326 while (nBox) { 327 int draw_size = 3 * pPriv->vtx_count + 5; 328 int loop_boxes; 329 330 if (draw_size > radeon_cs_space_remaining(pScrn)) { 331 radeon_cs_flush_indirect(pScrn); 332 if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 333 return; 334 } 335 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 336 nBox -= loop_boxes; 337 338 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 339 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 340 loop_boxes * 3 * pPriv->vtx_count + 1)); 341 if (pPriv->is_planar) 342 OUT_RING(RADEON_CP_VC_FRMT_XY | 343 RADEON_CP_VC_FRMT_ST0 | 344 RADEON_CP_VC_FRMT_ST1); 345 else 346 OUT_RING(RADEON_CP_VC_FRMT_XY | 347 RADEON_CP_VC_FRMT_ST0); 348 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 349 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 350 RADEON_CP_VC_CNTL_MAOS_ENABLE | 351 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 352 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 353 354 while (loop_boxes--) { 355 float srcX, srcY, srcw, srch; 356 int dstX, dstY, dstw, dsth; 357 dstX = pBox->x1 + dstxoff; 358 dstY = pBox->y1 + dstyoff; 359 dstw = pBox->x2 - pBox->x1; 360 dsth = pBox->y2 - pBox->y1; 361 362 srcX = pPriv->src_x; 363 srcX += ((pBox->x1 - pPriv->drw_x) * 364 pPriv->src_w) / (float)pPriv->dst_w; 365 srcY = pPriv->src_y; 366 srcY += ((pBox->y1 - pPriv->drw_y) * 367 pPriv->src_h) / (float)pPriv->dst_h; 368 369 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 370 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 371 372 373 if (pPriv->is_planar) { 374 /* 375 * Just render a rect (using three coords). 376 */ 377 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 378 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 379 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 380 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 381 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 382 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 383 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 384 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 385 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 386 } else { 387 /* 388 * Just render a rect (using three coords). 389 */ 390 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 391 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 392 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 393 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 394 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 395 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 396 } 397 398 pBox++; 399 } 400 401 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 402 ADVANCE_RING(); 403 } 404 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 405} 406 407static Bool 408R200PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 409{ 410 RADEONInfoPtr info = RADEONPTR(pScrn); 411 PixmapPtr pPixmap = pPriv->pPixmap; 412 struct radeon_exa_pixmap_priv *driver_priv; 413 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 414 uint32_t txformat; 415 uint32_t txfilter, txsize, txpitch; 416 uint32_t dst_pitch, dst_format; 417 uint32_t colorpitch; 418 int pixel_shift; 419 int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 420 int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 421 /* note: in contrast to r300, use input biasing on uv components */ 422 const float Loff = -0.0627; 423 float uvcosf, uvsinf; 424 float yco, yoff; 425 float uco[3], vco[3]; 426 float bright, cont, sat; 427 int ref = pPriv->transform_index; 428 float ucscale = 0.25, vcscale = 0.25; 429 Bool needux8 = FALSE, needvx8 = FALSE; 430 int ret; 431 432 radeon_cs_space_reset_bos(info->cs); 433 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 434 435 if (pPriv->bicubic_enabled) 436 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 437 438 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 439 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 440 441 ret = radeon_cs_space_check(info->cs); 442 if (ret) { 443 ErrorF("Not enough RAM to hw accel xv operation\n"); 444 return FALSE; 445 } 446 447 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 448 449 dst_pitch = exaGetPixmapPitch(pPixmap); 450 451 RADEON_SWITCH_TO_3D(); 452 453 /* Same for R100/R200 */ 454 switch (pPixmap->drawable.bitsPerPixel) { 455 case 16: 456 if (pPixmap->drawable.depth == 15) 457 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 458 else 459 dst_format = RADEON_COLOR_FORMAT_RGB565; 460 break; 461 case 32: 462 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 463 break; 464 default: 465 return FALSE; 466 } 467 468 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 469 pPriv->is_planar = TRUE; 470 txformat = RADEON_TXFORMAT_I8; 471 } else { 472 pPriv->is_planar = FALSE; 473 if (pPriv->id == FOURCC_UYVY) 474 txformat = RADEON_TXFORMAT_YVYU422; 475 else 476 txformat = RADEON_TXFORMAT_VYUY422; 477 } 478 479 txformat |= RADEON_TXFORMAT_NON_POWER2; 480 481 colorpitch = dst_pitch >> pixel_shift; 482 483 if (RADEONTilingEnabled(pScrn, pPixmap)) 484 colorpitch |= RADEON_COLOR_TILE_ENABLE; 485 486 BEGIN_ACCEL_RELOC(4,2); 487 488 OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 489 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 490 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 491 492 OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 493 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 494 495 ADVANCE_RING(); 496 497 txfilter = R200_MAG_FILTER_LINEAR | 498 R200_MIN_FILTER_LINEAR | 499 R200_CLAMP_S_CLAMP_LAST | 500 R200_CLAMP_T_CLAMP_LAST; 501 502 /* contrast can cause constant overflow, clamp */ 503 cont = RTFContrast(pPriv->contrast); 504 if (cont * trans[ref].RefLuma > 2.0) 505 cont = 2.0 / trans[ref].RefLuma; 506 /* brightness is only from -0.5 to 0.5 should be safe */ 507 bright = RTFBrightness(pPriv->brightness); 508 /* saturation can also cause overflow, clamp */ 509 sat = RTFSaturation(pPriv->saturation); 510 if (sat * trans[ref].RefBCb > 4.0) 511 sat = 4.0 / trans[ref].RefBCb; 512 uvcosf = sat * cos(RTFHue(pPriv->hue)); 513 uvsinf = sat * sin(RTFHue(pPriv->hue)); 514 515 yco = trans[ref].RefLuma * cont; 516 uco[0] = -trans[ref].RefRCr * uvsinf; 517 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 518 uco[2] = trans[ref].RefBCb * uvcosf; 519 vco[0] = trans[ref].RefRCr * uvcosf; 520 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 521 vco[2] = trans[ref].RefBCb * uvsinf; 522 yoff = Loff * yco + bright; 523 524 if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 525 needux8 = TRUE; 526 ucscale = 0.125; 527 } 528 if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 529 needvx8 = TRUE; 530 vcscale = 0.125; 531 } 532 533 if (pPriv->is_planar) { 534 /* need 2 texcoord sets (even though they are identical) due 535 to denormalization! hw apparently can't premultiply 536 same coord set by different texture size */ 537 pPriv->vtx_count = 6; 538 539 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 540 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 541 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 542 txpitch -= 32; 543 544 BEGIN_ACCEL_RELOC(36, 3); 545 546 OUT_RING_REG(RADEON_PP_CNTL, 547 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 548 RADEON_TEX_BLEND_0_ENABLE | 549 RADEON_TEX_BLEND_1_ENABLE | 550 RADEON_TEX_BLEND_2_ENABLE); 551 552 OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 553 OUT_RING_REG(R200_SE_VTX_FMT_1, 554 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 555 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 556 557 OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 558 OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 559 OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 560 OUT_RING_REG(R200_PP_TXSIZE_0, 561 (pPriv->w - 1) | 562 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 563 OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 564 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 565 566 OUT_RING_REG(R200_PP_TXFILTER_1, txfilter); 567 OUT_RING_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 568 OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0); 569 OUT_RING_REG(R200_PP_TXSIZE_1, txsize); 570 OUT_RING_REG(R200_PP_TXPITCH_1, txpitch); 571 OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 572 573 OUT_RING_REG(R200_PP_TXFILTER_2, txfilter); 574 OUT_RING_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 575 OUT_RING_REG(R200_PP_TXFORMAT_X_2, 0); 576 OUT_RING_REG(R200_PP_TXSIZE_2, txsize); 577 OUT_RING_REG(R200_PP_TXPITCH_2, txpitch); 578 OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 579 580 /* similar to r300 code. Note the big problem is that hardware constants 581 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 582 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 583 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 584 * the constants not. To get larger range can use output scale, but for 585 * that 2.018 value we need a total scale by 8, which means the constants 586 * really have no accuracy whatsoever (5 fractional bits only). 587 * The only direct way to get high precision "constants" into the fragment 588 * pipe I know of is to use the texcoord interpolator (not color, this one 589 * is 8 bit only too), which seems a bit expensive. We're lucky though it 590 * seems the values we need seem to fit better than worst case (get about 591 * 6 fractional bits for this instead of 5, at least when not correcting for 592 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 593 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 594 * even with non-default saturation/hue/contrast/brightness adjustments, 595 * it gets a little crazy and ultimately precision might still be lacking. 596 * 597 * A higher precision (8 fractional bits) version might just put uco into 598 * a texcoord, and calculate a new vcoconst in the shader, like so: 599 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 600 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 601 * vcocalc = ADD temp, bias/scale(cohelper), vco 602 * would in total use 4 tex units, 4 instructions which seems fairly 603 * balanced for this architecture (instead of 3 + 3 for the solution here) 604 * 605 * temp = MAD(yco, yuv.yyyy, yoff) 606 * temp = MAD(uco, yuv.uuuu, temp) 607 * result = MAD(vco, yuv.vvvv, temp) 608 * 609 * note first mad produces actually scalar, hence we transform 610 * it into a dp2a to get 8 bit precision of yco instead of 7 - 611 * That's assuming hw correctly expands consts to internal precision. 612 * (y * 1 + y * (yco - 1) + yoff) 613 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 614 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 615 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 616 * 617 * vco, uco need bias (and hence scale too) 618 * 619 */ 620 621 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 622 OUT_RING_REG(R200_PP_TXCBLEND_0, 623 R200_TXC_ARG_A_TFACTOR_COLOR | 624 R200_TXC_ARG_B_R0_COLOR | 625 R200_TXC_ARG_C_TFACTOR_COLOR | 626 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 627 R200_TXC_OP_DOT2_ADD); 628 OUT_RING_REG(R200_PP_TXCBLEND2_0, 629 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 630 R200_TXC_SCALE_INV2 | 631 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 632 OUT_RING_REG(R200_PP_TXABLEND_0, 633 R200_TXA_ARG_A_ZERO | 634 R200_TXA_ARG_B_ZERO | 635 R200_TXA_ARG_C_ZERO | 636 R200_TXA_OP_MADD); 637 OUT_RING_REG(R200_PP_TXABLEND2_0, 638 R200_TXA_OUTPUT_REG_NONE); 639 640 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 641 OUT_RING_REG(R200_PP_TXCBLEND_1, 642 R200_TXC_ARG_A_TFACTOR_COLOR | 643 R200_TXC_BIAS_ARG_A | 644 R200_TXC_SCALE_ARG_A | 645 R200_TXC_ARG_B_R1_COLOR | 646 R200_TXC_BIAS_ARG_B | 647 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 648 R200_TXC_ARG_C_R0_COLOR | 649 R200_TXC_OP_MADD); 650 OUT_RING_REG(R200_PP_TXCBLEND2_1, 651 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 652 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 653 OUT_RING_REG(R200_PP_TXABLEND_1, 654 R200_TXA_ARG_A_ZERO | 655 R200_TXA_ARG_B_ZERO | 656 R200_TXA_ARG_C_ZERO | 657 R200_TXA_OP_MADD); 658 OUT_RING_REG(R200_PP_TXABLEND2_1, 659 R200_TXA_OUTPUT_REG_NONE); 660 661 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 662 OUT_RING_REG(R200_PP_TXCBLEND_2, 663 R200_TXC_ARG_A_TFACTOR_COLOR | 664 R200_TXC_BIAS_ARG_A | 665 R200_TXC_SCALE_ARG_A | 666 R200_TXC_ARG_B_R2_COLOR | 667 R200_TXC_BIAS_ARG_B | 668 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 669 R200_TXC_ARG_C_R0_COLOR | 670 R200_TXC_OP_MADD); 671 OUT_RING_REG(R200_PP_TXCBLEND2_2, 672 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 673 R200_TXC_SCALE_2X | 674 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 675 OUT_RING_REG(R200_PP_TXABLEND_2, 676 R200_TXA_ARG_A_ZERO | 677 R200_TXA_ARG_B_ZERO | 678 R200_TXA_ARG_C_ZERO | 679 R200_TXA_COMP_ARG_C | 680 R200_TXA_OP_MADD); 681 OUT_RING_REG(R200_PP_TXABLEND2_2, 682 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 683 684 /* shader constants */ 685 OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 686 yco > 1.0 ? yco - 1.0: yco, 687 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 688 0.0)); 689 OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 690 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 691 uco[2] * ucscale + 0.5, 692 0.0)); 693 OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 694 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 695 vco[2] * vcscale + 0.5, 696 0.0)); 697 698 ADVANCE_RING(); 699 } else { 700 pPriv->vtx_count = 4; 701 702 BEGIN_ACCEL_RELOC(24, 1); 703 704 OUT_RING_REG(RADEON_PP_CNTL, 705 RADEON_TEX_0_ENABLE | 706 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 707 RADEON_TEX_BLEND_2_ENABLE); 708 709 OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 710 OUT_RING_REG(R200_SE_VTX_FMT_1, 711 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 712 713 OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 714 OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 715 OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 716 OUT_RING_REG(R200_PP_TXSIZE_0, 717 (pPriv->w - 1) | 718 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 719 OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 720 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 721 722 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 723 OUT_RING_REG(R200_PP_TXCBLEND_0, 724 R200_TXC_ARG_A_TFACTOR_COLOR | 725 R200_TXC_ARG_B_R0_COLOR | 726 R200_TXC_ARG_C_TFACTOR_COLOR | 727 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 728 R200_TXC_OP_DOT2_ADD); 729 OUT_RING_REG(R200_PP_TXCBLEND2_0, 730 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 731 R200_TXC_SCALE_INV2 | 732 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 733 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 734 OUT_RING_REG(R200_PP_TXABLEND_0, 735 R200_TXA_ARG_A_ZERO | 736 R200_TXA_ARG_B_ZERO | 737 R200_TXA_ARG_C_ZERO | 738 R200_TXA_OP_MADD); 739 OUT_RING_REG(R200_PP_TXABLEND2_0, 740 R200_TXA_OUTPUT_REG_NONE); 741 742 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 743 OUT_RING_REG(R200_PP_TXCBLEND_1, 744 R200_TXC_ARG_A_TFACTOR_COLOR | 745 R200_TXC_BIAS_ARG_A | 746 R200_TXC_SCALE_ARG_A | 747 R200_TXC_ARG_B_R0_COLOR | 748 R200_TXC_BIAS_ARG_B | 749 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 750 R200_TXC_ARG_C_R1_COLOR | 751 R200_TXC_OP_MADD); 752 OUT_RING_REG(R200_PP_TXCBLEND2_1, 753 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 754 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 755 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 756 OUT_RING_REG(R200_PP_TXABLEND_1, 757 R200_TXA_ARG_A_ZERO | 758 R200_TXA_ARG_B_ZERO | 759 R200_TXA_ARG_C_ZERO | 760 R200_TXA_OP_MADD); 761 OUT_RING_REG(R200_PP_TXABLEND2_1, 762 R200_TXA_OUTPUT_REG_NONE); 763 764 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 765 OUT_RING_REG(R200_PP_TXCBLEND_2, 766 R200_TXC_ARG_A_TFACTOR_COLOR | 767 R200_TXC_BIAS_ARG_A | 768 R200_TXC_SCALE_ARG_A | 769 R200_TXC_ARG_B_R0_COLOR | 770 R200_TXC_BIAS_ARG_B | 771 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 772 R200_TXC_ARG_C_R1_COLOR | 773 R200_TXC_OP_MADD); 774 OUT_RING_REG(R200_PP_TXCBLEND2_2, 775 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 776 R200_TXC_SCALE_2X | 777 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 778 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 779 OUT_RING_REG(R200_PP_TXABLEND_2, 780 R200_TXA_ARG_A_ZERO | 781 R200_TXA_ARG_B_ZERO | 782 R200_TXA_ARG_C_ZERO | 783 R200_TXA_COMP_ARG_C | 784 R200_TXA_OP_MADD); 785 OUT_RING_REG(R200_PP_TXABLEND2_2, 786 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 787 788 /* shader constants */ 789 OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 790 yco > 1.0 ? yco - 1.0: yco, 791 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 792 0.0)); 793 OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 794 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 795 uco[2] * ucscale + 0.5, 796 0.0)); 797 OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 798 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 799 vco[2] * vcscale + 0.5, 800 0.0)); 801 802 ADVANCE_RING(); 803 } 804 805 BEGIN_RING(2*2); 806 OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 807 OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 808 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 809 ADVANCE_RING(); 810 811 if (pPriv->vsync) { 812 xf86CrtcPtr crtc; 813 if (pPriv->desired_crtc) 814 crtc = pPriv->desired_crtc; 815 else 816 crtc = radeon_pick_best_crtc(pScrn, FALSE, 817 pPriv->drw_x, 818 pPriv->drw_x + pPriv->dst_w, 819 pPriv->drw_y, 820 pPriv->drw_y + pPriv->dst_h); 821 if (crtc) 822 RADEONWaitForVLine(pScrn, pPixmap, 823 crtc, 824 pPriv->drw_y - crtc->y, 825 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 826 } 827 828 return TRUE; 829} 830 831static void 832R200DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 833{ 834 RADEONInfoPtr info = RADEONPTR(pScrn); 835 PixmapPtr pPixmap = pPriv->pPixmap; 836 int dstxoff, dstyoff; 837 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 838 int nBox = REGION_NUM_RECTS(&pPriv->clip); 839 840#ifdef COMPOSITE 841 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 842 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 843#else 844 dstxoff = 0; 845 dstyoff = 0; 846#endif 847 848 if (!R200PrepareTexturedVideo(pScrn, pPriv)) 849 return; 850 851 /* 852 * Rendering of the actual polygon is done in two different 853 * ways depending on chip generation: 854 * 855 * < R300: 856 * 857 * These chips can render a rectangle in one pass, so 858 * handling is pretty straight-forward. 859 * 860 * >= R300: 861 * 862 * These chips can accept a quad, but will render it as 863 * two triangles which results in a diagonal tear. Instead 864 * We render a single, large triangle and use the scissor 865 * functionality to restrict it to the desired rectangle. 866 * Due to guardband limits on r3xx/r4xx, we can only use 867 * the single triangle up to 2560/4021 pixels; above that we 868 * render as a quad. 869 */ 870 871 while (nBox) { 872 int draw_size = 3 * pPriv->vtx_count + 4; 873 int loop_boxes; 874 875 if (draw_size > radeon_cs_space_remaining(pScrn)) { 876 radeon_cs_flush_indirect(pScrn); 877 if (!R200PrepareTexturedVideo(pScrn, pPriv)) 878 return; 879 } 880 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 881 nBox -= loop_boxes; 882 883 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 884 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 885 loop_boxes * 3 * pPriv->vtx_count)); 886 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 887 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 888 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 889 890 while (loop_boxes--) { 891 float srcX, srcY, srcw, srch; 892 int dstX, dstY, dstw, dsth; 893 dstX = pBox->x1 + dstxoff; 894 dstY = pBox->y1 + dstyoff; 895 dstw = pBox->x2 - pBox->x1; 896 dsth = pBox->y2 - pBox->y1; 897 898 srcX = pPriv->src_x; 899 srcX += ((pBox->x1 - pPriv->drw_x) * 900 pPriv->src_w) / (float)pPriv->dst_w; 901 srcY = pPriv->src_y; 902 srcY += ((pBox->y1 - pPriv->drw_y) * 903 pPriv->src_h) / (float)pPriv->dst_h; 904 905 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 906 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 907 908 if (pPriv->is_planar) { 909 /* 910 * Just render a rect (using three coords). 911 */ 912 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 913 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 914 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 915 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 916 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 917 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 918 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 919 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 920 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 921 } else { 922 /* 923 * Just render a rect (using three coords). 924 */ 925 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 926 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 927 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 928 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 929 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 930 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 931 } 932 933 pBox++; 934 } 935 936 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 937 ADVANCE_RING(); 938 } 939 940 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 941} 942 943static Bool 944R300PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 945{ 946 RADEONInfoPtr info = RADEONPTR(pScrn); 947 PixmapPtr pPixmap = pPriv->pPixmap; 948 struct radeon_exa_pixmap_priv *driver_priv; 949 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 950 uint32_t txfilter, txformat0, txformat1, txpitch; 951 uint32_t dst_pitch, dst_format; 952 uint32_t txenable, colorpitch; 953 uint32_t output_fmt; 954 int pixel_shift; 955 int ret; 956 957 radeon_cs_space_reset_bos(info->cs); 958 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 959 960 if (pPriv->bicubic_enabled) 961 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 962 963 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 964 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 965 966 ret = radeon_cs_space_check(info->cs); 967 if (ret) { 968 ErrorF("Not enough RAM to hw accel xv operation\n"); 969 return FALSE; 970 } 971 972 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 973 974 dst_pitch = exaGetPixmapPitch(pPixmap); 975 RADEON_SWITCH_TO_3D(); 976 977 if (pPriv->bicubic_enabled) 978 pPriv->vtx_count = 6; 979 else 980 pPriv->vtx_count = 4; 981 982 switch (pPixmap->drawable.bitsPerPixel) { 983 case 16: 984 if (pPixmap->drawable.depth == 15) 985 dst_format = R300_COLORFORMAT_ARGB1555; 986 else 987 dst_format = R300_COLORFORMAT_RGB565; 988 break; 989 case 32: 990 dst_format = R300_COLORFORMAT_ARGB8888; 991 break; 992 default: 993 return FALSE; 994 } 995 996 output_fmt = (R300_OUT_FMT_C4_8 | 997 R300_OUT_FMT_C0_SEL_BLUE | 998 R300_OUT_FMT_C1_SEL_GREEN | 999 R300_OUT_FMT_C2_SEL_RED | 1000 R300_OUT_FMT_C3_SEL_ALPHA); 1001 1002 colorpitch = dst_pitch >> pixel_shift; 1003 colorpitch |= dst_format; 1004 1005 if (RADEONTilingEnabled(pScrn, pPixmap)) 1006 colorpitch |= R300_COLORTILE; 1007 1008 1009 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1010 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1011 pPriv->is_planar = TRUE; 1012 else 1013 pPriv->is_planar = FALSE; 1014 1015 if (pPriv->is_planar) { 1016 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1017 txpitch = pPriv->src_pitch; 1018 } else { 1019 if (pPriv->id == FOURCC_UYVY) 1020 txformat1 = R300_TX_FORMAT_YVYU422; 1021 else 1022 txformat1 = R300_TX_FORMAT_VYUY422; 1023 1024 if (pPriv->bicubic_state != BICUBIC_OFF) 1025 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1026 1027 /* pitch is in pixels */ 1028 txpitch = pPriv->src_pitch / 2; 1029 } 1030 txpitch -= 1; 1031 1032 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1033 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1034 R300_TXPITCH_EN); 1035 1036 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1037 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1038 R300_TX_MAG_FILTER_LINEAR | 1039 R300_TX_MIN_FILTER_LINEAR | 1040 (0 << R300_TX_ID_SHIFT)); 1041 1042 BEGIN_ACCEL_RELOC(6, 1); 1043 OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 1044 OUT_RING_REG(R300_TX_FILTER1_0, 0); 1045 OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 1046 if (pPriv->is_planar) 1047 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1048 else 1049 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 1050 OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 1051 OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 1052 ADVANCE_RING(); 1053 1054 txenable = R300_TEX_0_ENABLE; 1055 1056 if (pPriv->is_planar) { 1057 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1058 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1059 R300_TXPITCH_EN); 1060 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1061 txpitch -= 1; 1062 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1063 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1064 R300_TX_MIN_FILTER_LINEAR | 1065 R300_TX_MAG_FILTER_LINEAR); 1066 1067 BEGIN_ACCEL_RELOC(12, 2); 1068 OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 1069 OUT_RING_REG(R300_TX_FILTER1_1, 0); 1070 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 1071 OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 1072 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 1073 OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 1074 OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 1075 OUT_RING_REG(R300_TX_FILTER1_2, 0); 1076 OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 1077 OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 1078 OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 1079 OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 1080 ADVANCE_RING(); 1081 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1082 } 1083 1084 if (pPriv->bicubic_enabled) { 1085 /* Size is 128x1 */ 1086 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1087 (0x0 << R300_TXHEIGHT_SHIFT) | 1088 R300_TXPITCH_EN); 1089 /* Format is 32-bit floats, 4bpp */ 1090 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1091 /* Pitch is 127 (128-1) */ 1092 txpitch = 0x7f; 1093 /* Tex filter */ 1094 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1095 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1096 R300_TX_MIN_FILTER_NEAREST | 1097 R300_TX_MAG_FILTER_NEAREST | 1098 (1 << R300_TX_ID_SHIFT)); 1099 1100 BEGIN_ACCEL_RELOC(6, 1); 1101 OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 1102 OUT_RING_REG(R300_TX_FILTER1_1, 0); 1103 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 1104 OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 1105 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 1106 OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 1107 ADVANCE_RING(); 1108 1109 /* Enable tex 1 */ 1110 txenable |= R300_TEX_1_ENABLE; 1111 } 1112 1113 /* setup the VAP */ 1114 if (info->accel_state->has_tcl) { 1115 if (pPriv->bicubic_enabled) 1116 BEGIN_RING(2*7); 1117 else 1118 BEGIN_RING(2*6); 1119 } else { 1120 if (pPriv->bicubic_enabled) 1121 BEGIN_RING(2*5); 1122 else 1123 BEGIN_RING(2*4); 1124 } 1125 1126 /* These registers define the number, type, and location of data submitted 1127 * to the PVS unit of GA input (when PVS is disabled) 1128 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1129 * enabled. This memory provides the imputs to the vertex shader program 1130 * and ordering is not important. When PVS/TCL is disabled, this field maps 1131 * directly to the GA input memory and the order is signifigant. In 1132 * PVS_BYPASS mode the order is as follows: 1133 * Position 1134 * Point Size 1135 * Color 0-3 1136 * Textures 0-7 1137 * Fog 1138 */ 1139 if (pPriv->bicubic_enabled) { 1140 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1141 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1142 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1143 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1144 R300_SIGNED_0 | 1145 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1146 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1147 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1148 R300_SIGNED_1)); 1149 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 1150 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1151 (0 << R300_SKIP_DWORDS_2_SHIFT) | 1152 (7 << R300_DST_VEC_LOC_2_SHIFT) | 1153 R300_LAST_VEC_2 | 1154 R300_SIGNED_2)); 1155 } else { 1156 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1157 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1158 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1159 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1160 R300_SIGNED_0 | 1161 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1162 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1163 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1164 R300_LAST_VEC_1 | 1165 R300_SIGNED_1)); 1166 } 1167 1168 /* load the vertex shader 1169 * We pre-load vertex programs in RADEONInit3DEngine(): 1170 * - exa 1171 * - Xv 1172 * - Xv bicubic 1173 * Here we select the offset of the vertex program we want to use 1174 */ 1175 if (info->accel_state->has_tcl) { 1176 if (pPriv->bicubic_enabled) { 1177 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1178 ((11 << R300_PVS_FIRST_INST_SHIFT) | 1179 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1180 (13 << R300_PVS_LAST_INST_SHIFT))); 1181 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1182 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1183 } else { 1184 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1185 ((9 << R300_PVS_FIRST_INST_SHIFT) | 1186 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1187 (10 << R300_PVS_LAST_INST_SHIFT))); 1188 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1189 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1190 } 1191 } 1192 1193 /* Position and one set of 2 texture coordinates */ 1194 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1195 if (pPriv->bicubic_enabled) 1196 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1197 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1198 else 1199 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1200 1201 OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 1202 ADVANCE_RING(); 1203 1204 /* setup pixel shader */ 1205 if (pPriv->bicubic_state != BICUBIC_OFF) { 1206 if (pPriv->bicubic_enabled) { 1207 BEGIN_RING(2*79); 1208 1209 /* 4 components: 2 for tex0 and 2 for tex1 */ 1210 OUT_RING_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1211 R300_RS_COUNT_HIRES_EN)); 1212 1213 /* R300_INST_COUNT_RS - highest RS instruction used */ 1214 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1215 1216 /* Pixel stack frame size. */ 1217 OUT_RING_REG(R300_US_PIXSIZE, 5); 1218 1219 /* Indirection levels */ 1220 OUT_RING_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1221 R300_FIRST_TEX)); 1222 1223 /* Set nodes. */ 1224 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1225 R300_ALU_CODE_SIZE(14) | 1226 R300_TEX_CODE_OFFSET(0) | 1227 R300_TEX_CODE_SIZE(6))); 1228 1229 /* Nodes are allocated highest first, but executed lowest first */ 1230 OUT_RING_REG(R300_US_CODE_ADDR_0, 0); 1231 OUT_RING_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1232 R300_ALU_SIZE(0) | 1233 R300_TEX_START(0) | 1234 R300_TEX_SIZE(0))); 1235 OUT_RING_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1236 R300_ALU_SIZE(9) | 1237 R300_TEX_START(1) | 1238 R300_TEX_SIZE(0))); 1239 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1240 R300_ALU_SIZE(2) | 1241 R300_TEX_START(2) | 1242 R300_TEX_SIZE(3) | 1243 R300_RGBA_OUT)); 1244 1245 /* ** BICUBIC FP ** */ 1246 1247 /* texcoord0 => temp0 1248 * texcoord1 => temp1 */ 1249 1250 // first node 1251 /* TEX temp2, temp1.rrr0, tex1, 1D */ 1252 OUT_RING_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1253 R300_TEX_ID(1) | 1254 R300_TEX_SRC_ADDR(1) | 1255 R300_TEX_DST_ADDR(2))); 1256 1257 /* MOV temp1.r, temp1.ggg0 */ 1258 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1259 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1260 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1261 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1262 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1263 R300_ALU_RGB_ADDRD(1) | 1264 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1265 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1266 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1267 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1268 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1269 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1270 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1271 1272 1273 // second node 1274 /* TEX temp1, temp1, tex1, 1D */ 1275 OUT_RING_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1276 R300_TEX_ID(1) | 1277 R300_TEX_SRC_ADDR(1) | 1278 R300_TEX_DST_ADDR(1))); 1279 1280 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 1281 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1282 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1283 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1284 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1285 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1286 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1287 R300_ALU_RGB_ADDRD(3) | 1288 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1289 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1290 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1291 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1292 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1293 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1294 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1295 1296 1297 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 1298 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1299 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1300 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1301 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1302 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1303 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1304 R300_ALU_RGB_ADDRD(2) | 1305 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1306 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1307 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1308 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1309 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1310 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1311 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1312 1313 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 1314 OUT_RING_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1315 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1316 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1317 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1318 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1319 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1320 R300_ALU_RGB_ADDR2(3) | 1321 R300_ALU_RGB_ADDRD(4) | 1322 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1323 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1324 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1325 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1326 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1327 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1328 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1329 1330 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 1331 OUT_RING_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1332 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1333 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1334 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1335 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1336 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1337 R300_ALU_RGB_ADDR2(2) | 1338 R300_ALU_RGB_ADDRD(5) | 1339 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1340 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1341 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1342 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1343 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1344 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1345 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1346 1347 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 1348 OUT_RING_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1349 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1350 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1351 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1352 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1353 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1354 R300_ALU_RGB_ADDR2(3) | 1355 R300_ALU_RGB_ADDRD(3) | 1356 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1357 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1358 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1359 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1360 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1361 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1362 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1363 1364 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 1365 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1366 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1367 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1368 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1369 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1370 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1371 R300_ALU_RGB_ADDR2(2) | 1372 R300_ALU_RGB_ADDRD(1) | 1373 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1374 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1375 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1376 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1377 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1378 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1379 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1380 1381 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 1382 OUT_RING_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1383 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1384 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1385 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1386 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1387 R300_ALU_RGB_ADDR2(1) | 1388 R300_ALU_RGB_ADDRD(1) | 1389 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1390 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1391 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1392 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1393 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1394 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1395 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1396 1397 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 1398 OUT_RING_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1399 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1400 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1401 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1402 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1403 R300_ALU_RGB_ADDR2(3) | 1404 R300_ALU_RGB_ADDRD(2) | 1405 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1406 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1407 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1408 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1409 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1410 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1411 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1412 1413 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 1414 OUT_RING_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1415 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1416 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1417 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1418 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1419 R300_ALU_RGB_ADDR2(5) | 1420 R300_ALU_RGB_ADDRD(3) | 1421 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1422 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1423 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1424 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1425 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1426 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1427 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1428 1429 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 1430 OUT_RING_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1431 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1432 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1433 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1434 OUT_RING_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1435 R300_ALU_RGB_ADDR2(4) | 1436 R300_ALU_RGB_ADDRD(0) | 1437 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1438 OUT_RING_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1439 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1440 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1441 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1442 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1443 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1444 1445 1446 // third node 1447 /* TEX temp4, temp1.rg--, tex0, 1D */ 1448 OUT_RING_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1449 R300_TEX_ID(0) | 1450 R300_TEX_SRC_ADDR(1) | 1451 R300_TEX_DST_ADDR(4))); 1452 1453 /* TEX temp3, temp3.rg--, tex0, 1D */ 1454 OUT_RING_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1455 R300_TEX_ID(0) | 1456 R300_TEX_SRC_ADDR(3) | 1457 R300_TEX_DST_ADDR(3))); 1458 1459 /* TEX temp5, temp2.rg--, tex0, 1D */ 1460 OUT_RING_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1461 R300_TEX_ID(0) | 1462 R300_TEX_SRC_ADDR(2) | 1463 R300_TEX_DST_ADDR(5))); 1464 1465 /* TEX temp0, temp0.rg--, tex0, 1D */ 1466 OUT_RING_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1467 R300_TEX_ID(0) | 1468 R300_TEX_SRC_ADDR(0) | 1469 R300_TEX_DST_ADDR(0))); 1470 1471 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1472 * - PRESUB temps, temp4 - temp3 1473 * - MAD temp3, temp1.bbbb, temps, temp3 */ 1474 OUT_RING_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1475 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1476 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1477 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1478 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1479 OUT_RING_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1480 R300_ALU_RGB_ADDR1(4) | 1481 R300_ALU_RGB_ADDR2(1) | 1482 R300_ALU_RGB_ADDRD(3) | 1483 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1484 OUT_RING_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1485 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1486 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1487 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1488 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1489 R300_ALU_ALPHA_ADDR1(4) | 1490 R300_ALU_ALPHA_ADDR2(1) | 1491 R300_ALU_ALPHA_ADDRD(3) | 1492 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1493 1494 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1495 * - PRESUB temps, temp5 - temp0 1496 * - MAD temp0, temp1.bbbb, temps, temp0 */ 1497 OUT_RING_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1498 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1499 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1500 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1501 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1502 R300_ALU_RGB_INSERT_NOP)); 1503 OUT_RING_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1504 R300_ALU_RGB_ADDR1(5) | 1505 R300_ALU_RGB_ADDR2(1) | 1506 R300_ALU_RGB_ADDRD(0) | 1507 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1508 OUT_RING_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1509 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1510 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1511 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1512 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1513 R300_ALU_ALPHA_ADDR1(5) | 1514 R300_ALU_ALPHA_ADDR2(1) | 1515 R300_ALU_ALPHA_ADDRD(0) | 1516 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1517 1518 /* LRP output, temp2.bbbb, temp3, temp0 -> 1519 * - PRESUB temps, temp3 - temp0 1520 * - MAD output, temp2.bbbb, temps, temp0 */ 1521 OUT_RING_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1522 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1523 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1524 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1525 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1526 OUT_RING_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1527 R300_ALU_RGB_ADDR1(3) | 1528 R300_ALU_RGB_ADDR2(2) | 1529 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 1530 OUT_RING_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1531 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1532 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1533 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1534 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1535 R300_ALU_ALPHA_ADDR1(3) | 1536 R300_ALU_ALPHA_ADDR2(2) | 1537 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1538 1539 /* Shader constants. */ 1540 OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 1541 OUT_RING_REG(R300_US_ALU_CONST_G(0), 0); 1542 OUT_RING_REG(R300_US_ALU_CONST_B(0), 0); 1543 OUT_RING_REG(R300_US_ALU_CONST_A(0), 0); 1544 1545 OUT_RING_REG(R300_US_ALU_CONST_R(1), 0); 1546 OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 1547 OUT_RING_REG(R300_US_ALU_CONST_B(1), 0); 1548 OUT_RING_REG(R300_US_ALU_CONST_A(1), 0); 1549 1550 ADVANCE_RING(); 1551 } else { 1552 BEGIN_RING(2*11); 1553 /* 2 components: 2 for tex0 */ 1554 OUT_RING_REG(R300_RS_COUNT, 1555 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1556 R300_RS_COUNT_HIRES_EN)); 1557 /* R300_INST_COUNT_RS - highest RS instruction used */ 1558 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1559 1560 OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1561 1562 /* Indirection levels */ 1563 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1564 R300_FIRST_TEX)); 1565 1566 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1567 R300_ALU_CODE_SIZE(1) | 1568 R300_TEX_CODE_OFFSET(0) | 1569 R300_TEX_CODE_SIZE(1))); 1570 1571 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1572 R300_ALU_SIZE(0) | 1573 R300_TEX_START(0) | 1574 R300_TEX_SIZE(0) | 1575 R300_RGBA_OUT)); 1576 1577 /* tex inst */ 1578 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1579 R300_TEX_DST_ADDR(0) | 1580 R300_TEX_ID(0) | 1581 R300_TEX_INST(R300_TEX_INST_LD))); 1582 1583 /* ALU inst */ 1584 /* RGB */ 1585 OUT_RING_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1586 R300_ALU_RGB_ADDR1(0) | 1587 R300_ALU_RGB_ADDR2(0) | 1588 R300_ALU_RGB_ADDRD(0) | 1589 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1590 R300_ALU_RGB_MASK_G | 1591 R300_ALU_RGB_MASK_B)) | 1592 R300_ALU_RGB_TARGET_A)); 1593 OUT_RING_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1594 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1595 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1596 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1597 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1598 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1599 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1600 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1601 R300_ALU_RGB_CLAMP)); 1602 /* Alpha */ 1603 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1604 R300_ALU_ALPHA_ADDR1(0) | 1605 R300_ALU_ALPHA_ADDR2(0) | 1606 R300_ALU_ALPHA_ADDRD(0) | 1607 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1608 R300_ALU_ALPHA_TARGET_A | 1609 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 1610 OUT_RING_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1611 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1612 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1613 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1614 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1615 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1616 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1617 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1618 R300_ALU_ALPHA_CLAMP)); 1619 ADVANCE_RING(); 1620 } 1621 } else { 1622 /* 1623 * y' = y - .0625 1624 * u' = u - .5 1625 * v' = v - .5; 1626 * 1627 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1628 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1629 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1630 * 1631 * DP3 might look like the straightforward solution 1632 * but we'd need to move the texture yuv values in 1633 * the same reg for this to work. Therefore use MADs. 1634 * Brightness just adds to the off constant. 1635 * Contrast is multiplication of luminance. 1636 * Saturation and hue change the u and v coeffs. 1637 * Default values (before adjustments - depend on colorspace): 1638 * yco = 1.1643 1639 * uco = 0, -0.39173, 2.017 1640 * vco = 1.5958, -0.8129, 0 1641 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1642 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1643 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1644 * 1645 * temp = MAD(yco, yuv.yyyy, off) 1646 * temp = MAD(uco, yuv.uuuu, temp) 1647 * result = MAD(vco, yuv.vvvv, temp) 1648 */ 1649 /* TODO: don't recalc consts always */ 1650 const float Loff = -0.0627; 1651 const float Coff = -0.502; 1652 float uvcosf, uvsinf; 1653 float yco; 1654 float uco[3], vco[3], off[3]; 1655 float bright, cont, gamma; 1656 int ref = pPriv->transform_index; 1657 Bool needgamma = FALSE; 1658 1659 cont = RTFContrast(pPriv->contrast); 1660 bright = RTFBrightness(pPriv->brightness); 1661 gamma = (float)pPriv->gamma / 1000.0; 1662 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1663 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1664 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1665 1666 yco = trans[ref].RefLuma * cont; 1667 uco[0] = -trans[ref].RefRCr * uvsinf; 1668 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1669 uco[2] = trans[ref].RefBCb * uvcosf; 1670 vco[0] = trans[ref].RefRCr * uvcosf; 1671 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1672 vco[2] = trans[ref].RefBCb * uvsinf; 1673 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1674 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1675 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1676 1677 if (gamma != 1.0) { 1678 needgamma = TRUE; 1679 /* note: gamma correction is out = in ^ gamma; 1680 gpu can only do LG2/EX2 therefore we transform into 1681 in ^ gamma = 2 ^ (log2(in) * gamma). 1682 Lots of scalar ops, unfortunately (better solution?) - 1683 without gamma that's 3 inst, with gamma it's 10... 1684 could use different gamma factors per channel, 1685 if that's of any use. */ 1686 } 1687 1688 if (pPriv->is_planar) { 1689 BEGIN_RING(2 * (needgamma ? (28 + 33) : 33)); 1690 /* 2 components: same 2 for tex0/1/2 */ 1691 OUT_RING_REG(R300_RS_COUNT, 1692 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1693 R300_RS_COUNT_HIRES_EN)); 1694 /* R300_INST_COUNT_RS - highest RS instruction used */ 1695 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1696 1697 OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1698 1699 /* Indirection levels */ 1700 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1701 R300_FIRST_TEX)); 1702 1703 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1704 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1705 R300_TEX_CODE_OFFSET(0) | 1706 R300_TEX_CODE_SIZE(3))); 1707 1708 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1709 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1710 R300_TEX_START(0) | 1711 R300_TEX_SIZE(2) | 1712 R300_RGBA_OUT)); 1713 1714 /* tex inst */ 1715 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1716 R300_TEX_DST_ADDR(2) | 1717 R300_TEX_ID(0) | 1718 R300_TEX_INST(R300_TEX_INST_LD))); 1719 OUT_RING_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1720 R300_TEX_DST_ADDR(1) | 1721 R300_TEX_ID(1) | 1722 R300_TEX_INST(R300_TEX_INST_LD))); 1723 OUT_RING_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 1724 R300_TEX_DST_ADDR(0) | 1725 R300_TEX_ID(2) | 1726 R300_TEX_INST(R300_TEX_INST_LD))); 1727 1728 /* ALU inst */ 1729 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 1730 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1731 R300_ALU_RGB_ADDR1(2) | 1732 R300_ALU_RGB_ADDR2(0) | 1733 R300_ALU_RGB_ADDRD(2) | 1734 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1735 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1736 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1737 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1738 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1739 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1740 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1741 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1742 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1743 /* alpha nop, but need to set up alpha source for rgb usage */ 1744 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1745 R300_ALU_ALPHA_ADDR1(2) | 1746 R300_ALU_ALPHA_ADDR2(0) | 1747 R300_ALU_ALPHA_ADDRD(2) | 1748 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1749 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1750 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1751 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1752 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1753 1754 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 1755 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1756 R300_ALU_RGB_ADDR1(1) | 1757 R300_ALU_RGB_ADDR2(2) | 1758 R300_ALU_RGB_ADDRD(2) | 1759 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1760 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1761 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1762 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1763 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1764 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1765 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1766 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1767 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1768 /* alpha nop */ 1769 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 1770 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1771 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1772 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1773 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1774 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1775 1776 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 1777 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 1778 R300_ALU_RGB_ADDR1(0) | 1779 R300_ALU_RGB_ADDR2(2) | 1780 R300_ALU_RGB_ADDRD(0) | 1781 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 1782 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 1783 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1784 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1785 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1786 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1787 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1788 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1789 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1790 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1791 R300_ALU_RGB_CLAMP)); 1792 /* write alpha 1 */ 1793 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 1794 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1795 R300_ALU_ALPHA_TARGET_A)); 1796 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1797 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1798 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1799 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 1800 1801 if (needgamma) { 1802 /* rgb temp0.r = op_sop, set up src0 reg */ 1803 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 1804 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1805 OUT_RING_REG(R300_US_ALU_RGB_INST(3), 1806 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1807 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1808 /* alpha lg2 temp0, temp0.r */ 1809 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 1810 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1811 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1812 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1813 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1814 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1815 1816 /* rgb temp0.g = op_sop, set up src0 reg */ 1817 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 1818 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 1819 OUT_RING_REG(R300_US_ALU_RGB_INST(4), 1820 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1821 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1822 /* alpha lg2 temp0, temp0.g */ 1823 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 1824 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1825 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1826 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1827 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1828 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1829 1830 /* rgb temp0.b = op_sop, set up src0 reg */ 1831 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 1832 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 1833 OUT_RING_REG(R300_US_ALU_RGB_INST(5), 1834 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1835 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1836 /* alpha lg2 temp0, temp0.b */ 1837 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 1838 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1839 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1840 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1841 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1842 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1843 1844 /* MUL const1, temp1, temp0 */ 1845 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 1846 R300_ALU_RGB_ADDR1(0) | 1847 R300_ALU_RGB_ADDR2(0) | 1848 R300_ALU_RGB_ADDRD(0) | 1849 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1850 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1851 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1852 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 1853 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1854 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1855 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1856 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1857 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1858 /* alpha nop, but set up const1 */ 1859 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 1860 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 1861 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1862 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1863 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1864 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1865 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1866 1867 /* rgb out0.r = op_sop, set up src0 reg */ 1868 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1869 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 1870 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 1871 OUT_RING_REG(R300_US_ALU_RGB_INST(7), 1872 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1873 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1874 /* alpha ex2 temp0, temp0.r */ 1875 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 1876 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1877 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1878 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1879 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1880 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1881 1882 /* rgb out0.g = op_sop, set up src0 reg */ 1883 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1884 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 1885 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 1886 OUT_RING_REG(R300_US_ALU_RGB_INST(8), 1887 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1888 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1889 /* alpha ex2 temp0, temp0.g */ 1890 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 1891 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1892 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1893 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1894 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1895 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1896 1897 /* rgb out0.b = op_sop, set up src0 reg */ 1898 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1899 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 1900 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 1901 OUT_RING_REG(R300_US_ALU_RGB_INST(9), 1902 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1903 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1904 /* alpha ex2 temp0, temp0.b */ 1905 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 1906 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1907 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1908 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1909 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1910 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1911 } 1912 } else { 1913 BEGIN_RING(2 * (needgamma ? (28 + 31) : 31)); 1914 /* 2 components */ 1915 OUT_RING_REG(R300_RS_COUNT, 1916 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1917 R300_RS_COUNT_HIRES_EN)); 1918 /* R300_INST_COUNT_RS - highest RS instruction used */ 1919 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1920 1921 OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 1922 1923 /* Indirection levels */ 1924 OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1925 R300_FIRST_TEX)); 1926 1927 OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1928 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1929 R300_TEX_CODE_OFFSET(0) | 1930 R300_TEX_CODE_SIZE(1))); 1931 1932 OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1933 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1934 R300_TEX_START(0) | 1935 R300_TEX_SIZE(0) | 1936 R300_RGBA_OUT)); 1937 1938 /* tex inst */ 1939 OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1940 R300_TEX_DST_ADDR(0) | 1941 R300_TEX_ID(0) | 1942 R300_TEX_INST(R300_TEX_INST_LD))); 1943 1944 /* ALU inst */ 1945 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 1946 OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1947 R300_ALU_RGB_ADDR1(0) | 1948 R300_ALU_RGB_ADDR2(0) | 1949 R300_ALU_RGB_ADDRD(1) | 1950 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1951 OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1952 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1953 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 1954 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1955 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1956 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1957 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1958 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1959 /* alpha nop, but need to set up alpha source for rgb usage */ 1960 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1961 R300_ALU_ALPHA_ADDR1(0) | 1962 R300_ALU_ALPHA_ADDR2(0) | 1963 R300_ALU_ALPHA_ADDRD(0) | 1964 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1965 OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1966 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1967 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1968 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1969 1970 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 1971 OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1972 R300_ALU_RGB_ADDR1(0) | 1973 R300_ALU_RGB_ADDR2(1) | 1974 R300_ALU_RGB_ADDRD(1) | 1975 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1976 OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1977 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1978 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 1979 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1980 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1981 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1982 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1983 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1984 /* alpha nop */ 1985 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 1986 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1987 OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1988 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1989 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1990 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1991 1992 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 1993 OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 1994 R300_ALU_RGB_ADDR1(0) | 1995 R300_ALU_RGB_ADDR2(1) | 1996 R300_ALU_RGB_ADDRD(0) | 1997 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 1998 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 1999 OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2000 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2001 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2002 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2003 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2004 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2005 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2006 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2007 R300_ALU_RGB_CLAMP)); 2008 /* write alpha 1 */ 2009 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2010 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2011 R300_ALU_ALPHA_TARGET_A)); 2012 OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2013 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2014 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2015 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2016 2017 if (needgamma) { 2018 /* rgb temp0.r = op_sop, set up src0 reg */ 2019 OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2020 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2021 OUT_RING_REG(R300_US_ALU_RGB_INST(3), 2022 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2023 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2024 /* alpha lg2 temp0, temp0.r */ 2025 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2026 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2027 OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2028 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2029 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2030 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2031 2032 /* rgb temp0.g = op_sop, set up src0 reg */ 2033 OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2034 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2035 OUT_RING_REG(R300_US_ALU_RGB_INST(4), 2036 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2037 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2038 /* alpha lg2 temp0, temp0.g */ 2039 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2040 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2041 OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2042 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2043 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2044 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2045 2046 /* rgb temp0.b = op_sop, set up src0 reg */ 2047 OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2048 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2049 OUT_RING_REG(R300_US_ALU_RGB_INST(5), 2050 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2051 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2052 /* alpha lg2 temp0, temp0.b */ 2053 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2054 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2055 OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2056 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2057 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2058 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2059 2060 /* MUL const1, temp1, temp0 */ 2061 OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2062 R300_ALU_RGB_ADDR1(0) | 2063 R300_ALU_RGB_ADDR2(0) | 2064 R300_ALU_RGB_ADDRD(0) | 2065 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2066 OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2067 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2068 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2069 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2070 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2071 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2072 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2073 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2074 /* alpha nop, but set up const1 */ 2075 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2076 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2077 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2078 OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2079 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2080 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2081 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2082 2083 /* rgb out0.r = op_sop, set up src0 reg */ 2084 OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2085 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2086 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2087 OUT_RING_REG(R300_US_ALU_RGB_INST(7), 2088 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2089 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2090 /* alpha ex2 temp0, temp0.r */ 2091 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2092 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2093 OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2094 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2095 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2096 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2097 2098 /* rgb out0.g = op_sop, set up src0 reg */ 2099 OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2100 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2101 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2102 OUT_RING_REG(R300_US_ALU_RGB_INST(8), 2103 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2104 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2105 /* alpha ex2 temp0, temp0.g */ 2106 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2107 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2108 OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2109 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2110 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2111 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2112 2113 /* rgb out0.b = op_sop, set up src0 reg */ 2114 OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2115 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2116 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2117 OUT_RING_REG(R300_US_ALU_RGB_INST(9), 2118 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2119 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2120 /* alpha ex2 temp0, temp0.b */ 2121 OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2122 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2123 OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2124 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2125 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2126 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2127 } 2128 } 2129 2130 /* Shader constants. */ 2131 /* constant 0: off, yco */ 2132 OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 2133 OUT_RING_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 2134 OUT_RING_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 2135 OUT_RING_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2136 /* constant 1: uco */ 2137 OUT_RING_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 2138 OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 2139 OUT_RING_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 2140 OUT_RING_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2141 /* constant 2: vco */ 2142 OUT_RING_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 2143 OUT_RING_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 2144 OUT_RING_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 2145 OUT_RING_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2146 2147 ADVANCE_RING(); 2148 } 2149 2150 BEGIN_ACCEL_RELOC(6, 2); 2151 OUT_RING_REG(R300_TX_INVALTAGS, 0); 2152 OUT_RING_REG(R300_TX_ENABLE, txenable); 2153 2154 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2155 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2156 2157 /* no need to enable blending */ 2158 OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2159 2160 OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 2161 ADVANCE_RING(); 2162 2163 if (pPriv->vsync) { 2164 xf86CrtcPtr crtc; 2165 if (pPriv->desired_crtc) 2166 crtc = pPriv->desired_crtc; 2167 else 2168 crtc = radeon_pick_best_crtc(pScrn, FALSE, 2169 pPriv->drw_x, 2170 pPriv->drw_x + pPriv->dst_w, 2171 pPriv->drw_y, 2172 pPriv->drw_y + pPriv->dst_h); 2173 if (crtc) 2174 RADEONWaitForVLine(pScrn, pPixmap, 2175 crtc, 2176 pPriv->drw_y - crtc->y, 2177 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2178 } 2179 2180 return TRUE; 2181} 2182 2183static void 2184R300DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2185{ 2186 RADEONInfoPtr info = RADEONPTR(pScrn); 2187 PixmapPtr pPixmap = pPriv->pPixmap; 2188 int dstxoff, dstyoff; 2189 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2190 int nBox = REGION_NUM_RECTS(&pPriv->clip); 2191 2192#ifdef COMPOSITE 2193 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2194 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2195#else 2196 dstxoff = 0; 2197 dstyoff = 0; 2198#endif 2199 2200 if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2201 return; 2202 2203 /* 2204 * Rendering of the actual polygon is done in two different 2205 * ways depending on chip generation: 2206 * 2207 * < R300: 2208 * 2209 * These chips can render a rectangle in one pass, so 2210 * handling is pretty straight-forward. 2211 * 2212 * >= R300: 2213 * 2214 * These chips can accept a quad, but will render it as 2215 * two triangles which results in a diagonal tear. Instead 2216 * We render a single, large triangle and use the scissor 2217 * functionality to restrict it to the desired rectangle. 2218 * Due to guardband limits on r3xx/r4xx, we can only use 2219 * the single triangle up to 2560/4021 pixels; above that we 2220 * render as a quad. 2221 */ 2222 2223 while (nBox--) { 2224 float srcX, srcY, srcw, srch; 2225 int dstX, dstY, dstw, dsth; 2226 Bool use_quad = FALSE; 2227 int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2228 2229 if (draw_size > radeon_cs_space_remaining(pScrn)) { 2230 radeon_cs_flush_indirect(pScrn); 2231 if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2232 return; 2233 } 2234 2235 dstX = pBox->x1 + dstxoff; 2236 dstY = pBox->y1 + dstyoff; 2237 dstw = pBox->x2 - pBox->x1; 2238 dsth = pBox->y2 - pBox->y1; 2239 2240 srcX = pPriv->src_x; 2241 srcX += ((pBox->x1 - pPriv->drw_x) * 2242 pPriv->src_w) / (float)pPriv->dst_w; 2243 srcY = pPriv->src_y; 2244 srcY += ((pBox->y1 - pPriv->drw_y) * 2245 pPriv->src_h) / (float)pPriv->dst_h; 2246 2247 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 2248 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 2249 2250 if (IS_R400_3D) { 2251 if ((dstw+dsth) > 4021) 2252 use_quad = TRUE; 2253 } else { 2254 if ((dstw+dsth) > 2560) 2255 use_quad = TRUE; 2256 } 2257 /* 2258 * Set up the scissor area to that of the output size. 2259 */ 2260 BEGIN_RING(2*2); 2261 /* R300 has an offset */ 2262 OUT_RING_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2263 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 2264 OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2265 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 2266 ADVANCE_RING(); 2267 2268 if (use_quad) { 2269 BEGIN_RING(4 * pPriv->vtx_count + 4); 2270 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2271 4 * pPriv->vtx_count)); 2272 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2273 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2274 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2275 } else { 2276 BEGIN_RING(3 * pPriv->vtx_count + 4); 2277 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2278 3 * pPriv->vtx_count)); 2279 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2280 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2281 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2282 } 2283 2284 if (pPriv->bicubic_enabled) { 2285 /* 2286 * This code is only executed on >= R300, so we don't 2287 * have to deal with the legacy handling. 2288 */ 2289 if (use_quad) { 2290 VTX_OUT_6((float)dstX, (float)dstY, 2291 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2292 (float)srcX + 0.5, (float)srcY + 0.5); 2293 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2294 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2295 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2296 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2297 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2298 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2299 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2300 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2301 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2302 } else { 2303 VTX_OUT_6((float)dstX, (float)dstY, 2304 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2305 (float)srcX + 0.5, (float)srcY + 0.5); 2306 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2307 (float)srcX / pPriv->w, 2308 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2309 (float)srcX + 0.5, 2310 (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2311 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2312 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2313 (float)srcY / pPriv->h, 2314 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2315 (float)srcY + 0.5); 2316 } 2317 } else { 2318 if (use_quad) { 2319 VTX_OUT_4((float)dstX, (float)dstY, 2320 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2321 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2322 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2323 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2324 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2325 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2326 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2327 } else { 2328 /* 2329 * Render a big, scissored triangle. This means 2330 * increasing the triangle size and adjusting 2331 * texture coordinates. 2332 */ 2333 VTX_OUT_4((float)dstX, (float)dstY, 2334 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2335 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2336 (float)srcX / pPriv->w, 2337 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2338 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2339 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2340 (float)srcY / pPriv->h); 2341 } 2342 } 2343 2344 /* flushing is pipelined, free/finish is not */ 2345 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2346 2347 ADVANCE_RING(); 2348 2349 pBox++; 2350 } 2351 2352 BEGIN_RING(2*3); 2353 OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 2354 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2355 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2356 ADVANCE_RING(); 2357 2358 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2359} 2360 2361static Bool 2362R500PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2363{ 2364 RADEONInfoPtr info = RADEONPTR(pScrn); 2365 PixmapPtr pPixmap = pPriv->pPixmap; 2366 struct radeon_exa_pixmap_priv *driver_priv; 2367 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 2368 uint32_t txfilter, txformat0, txformat1, txpitch, us_format = 0; 2369 uint32_t dst_pitch, dst_format; 2370 uint32_t txenable, colorpitch; 2371 uint32_t output_fmt; 2372 int pixel_shift, out_size = 6; 2373 int ret; 2374 2375 radeon_cs_space_reset_bos(info->cs); 2376 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2377 2378 if (pPriv->bicubic_enabled) 2379 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2380 2381 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 2382 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 2383 2384 ret = radeon_cs_space_check(info->cs); 2385 if (ret) { 2386 ErrorF("Not enough RAM to hw accel xv operation\n"); 2387 return FALSE; 2388 } 2389 2390 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2391 2392 dst_pitch = exaGetPixmapPitch(pPixmap); 2393 RADEON_SWITCH_TO_3D(); 2394 2395 if (pPriv->bicubic_enabled) 2396 pPriv->vtx_count = 6; 2397 else 2398 pPriv->vtx_count = 4; 2399 2400 switch (pPixmap->drawable.bitsPerPixel) { 2401 case 16: 2402 if (pPixmap->drawable.depth == 15) 2403 dst_format = R300_COLORFORMAT_ARGB1555; 2404 else 2405 dst_format = R300_COLORFORMAT_RGB565; 2406 break; 2407 case 32: 2408 dst_format = R300_COLORFORMAT_ARGB8888; 2409 break; 2410 default: 2411 return FALSE; 2412 } 2413 2414 output_fmt = (R300_OUT_FMT_C4_8 | 2415 R300_OUT_FMT_C0_SEL_BLUE | 2416 R300_OUT_FMT_C1_SEL_GREEN | 2417 R300_OUT_FMT_C2_SEL_RED | 2418 R300_OUT_FMT_C3_SEL_ALPHA); 2419 2420 colorpitch = dst_pitch >> pixel_shift; 2421 colorpitch |= dst_format; 2422 2423 if (RADEONTilingEnabled(pScrn, pPixmap)) 2424 colorpitch |= R300_COLORTILE; 2425 2426 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2427 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2428 pPriv->is_planar = TRUE; 2429 else 2430 pPriv->is_planar = FALSE; 2431 2432 if (pPriv->is_planar) { 2433 txformat1 = R300_TX_FORMAT_X8; 2434 txpitch = pPriv->src_pitch; 2435 } else { 2436 if (pPriv->id == FOURCC_UYVY) 2437 txformat1 = R300_TX_FORMAT_YVYU422; 2438 else 2439 txformat1 = R300_TX_FORMAT_VYUY422; 2440 2441 if (pPriv->bicubic_state != BICUBIC_OFF) 2442 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2443 2444 /* pitch is in pixels */ 2445 txpitch = pPriv->src_pitch / 2; 2446 } 2447 txpitch -= 1; 2448 2449 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2450 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2451 R300_TXPITCH_EN); 2452 2453 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2454 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2455 R300_TX_MAG_FILTER_LINEAR | 2456 R300_TX_MIN_FILTER_LINEAR | 2457 (0 << R300_TX_ID_SHIFT)); 2458 2459 2460 if ((pPriv->w - 1) & 0x800) 2461 txpitch |= R500_TXWIDTH_11; 2462 2463 if ((pPriv->h - 1) & 0x800) 2464 txpitch |= R500_TXHEIGHT_11; 2465 2466 if (info->ChipFamily == CHIP_FAMILY_R520) { 2467 unsigned us_width = (pPriv->w - 1) & 0x7ff; 2468 unsigned us_height = (pPriv->h - 1) & 0x7ff; 2469 unsigned us_depth = 0; 2470 2471 if (pPriv->w > 2048) { 2472 us_width = (0x7ff + us_width) >> 1; 2473 us_depth |= 0x0d; 2474 } 2475 if (pPriv->h > 2048) { 2476 us_height = (0x7ff + us_height) >> 1; 2477 us_depth |= 0x0e; 2478 } 2479 us_format = (us_width << R300_TXWIDTH_SHIFT) | 2480 (us_height << R300_TXHEIGHT_SHIFT) | 2481 (us_depth << R300_TXDEPTH_SHIFT); 2482 out_size++; 2483 } 2484 2485 BEGIN_ACCEL_RELOC(out_size, 1); 2486 OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 2487 OUT_RING_REG(R300_TX_FILTER1_0, 0); 2488 OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 2489 OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 2490 OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 2491 OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 2492 if (info->ChipFamily == CHIP_FAMILY_R520) 2493 OUT_RING_REG(R500_US_FORMAT0_0, us_format); 2494 ADVANCE_RING(); 2495 2496 txenable = R300_TEX_0_ENABLE; 2497 2498 if (pPriv->is_planar) { 2499 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2500 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2501 R300_TXPITCH_EN); 2502 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2503 txpitch -= 1; 2504 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2505 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2506 R300_TX_MIN_FILTER_LINEAR | 2507 R300_TX_MAG_FILTER_LINEAR); 2508 2509 BEGIN_ACCEL_RELOC(12, 2); 2510 OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 2511 OUT_RING_REG(R300_TX_FILTER1_1, 0); 2512 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 2513 OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 2514 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 2515 OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 2516 OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 2517 OUT_RING_REG(R300_TX_FILTER1_2, 0); 2518 OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 2519 OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 2520 OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 2521 OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 2522 ADVANCE_RING(); 2523 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2524 } 2525 2526 if (pPriv->bicubic_enabled) { 2527 /* Size is 128x1 */ 2528 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2529 (0x0 << R300_TXHEIGHT_SHIFT) | 2530 R300_TXPITCH_EN); 2531 /* Format is 32-bit floats, 4bpp */ 2532 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2533 /* Pitch is 127 (128-1) */ 2534 txpitch = 0x7f; 2535 /* Tex filter */ 2536 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2537 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2538 R300_TX_MIN_FILTER_NEAREST | 2539 R300_TX_MAG_FILTER_NEAREST | 2540 (1 << R300_TX_ID_SHIFT)); 2541 2542 BEGIN_ACCEL_RELOC(6, 1); 2543 OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 2544 OUT_RING_REG(R300_TX_FILTER1_1, 0); 2545 OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 2546 OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 2547 OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 2548 OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 2549 ADVANCE_RING(); 2550 2551 /* Enable tex 1 */ 2552 txenable |= R300_TEX_1_ENABLE; 2553 } 2554 2555 /* setup the VAP */ 2556 if (info->accel_state->has_tcl) { 2557 if (pPriv->bicubic_enabled) 2558 BEGIN_RING(2*7); 2559 else 2560 BEGIN_RING(2*6); 2561 } else { 2562 if (pPriv->bicubic_enabled) 2563 BEGIN_RING(2*5); 2564 else 2565 BEGIN_RING(2*4); 2566 } 2567 2568 /* These registers define the number, type, and location of data submitted 2569 * to the PVS unit of GA input (when PVS is disabled) 2570 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2571 * enabled. This memory provides the imputs to the vertex shader program 2572 * and ordering is not important. When PVS/TCL is disabled, this field maps 2573 * directly to the GA input memory and the order is signifigant. In 2574 * PVS_BYPASS mode the order is as follows: 2575 * Position 2576 * Point Size 2577 * Color 0-3 2578 * Textures 0-7 2579 * Fog 2580 */ 2581 if (pPriv->bicubic_enabled) { 2582 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2583 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2584 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2585 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2586 R300_SIGNED_0 | 2587 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2588 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2589 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2590 R300_SIGNED_1)); 2591 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 2592 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2593 (0 << R300_SKIP_DWORDS_2_SHIFT) | 2594 (7 << R300_DST_VEC_LOC_2_SHIFT) | 2595 R300_LAST_VEC_2 | 2596 R300_SIGNED_2)); 2597 } else { 2598 OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2599 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2600 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2601 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2602 R300_SIGNED_0 | 2603 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2604 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2605 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2606 R300_LAST_VEC_1 | 2607 R300_SIGNED_1)); 2608 } 2609 2610 /* load the vertex shader 2611 * We pre-load vertex programs in RADEONInit3DEngine(): 2612 * - exa 2613 * - Xv 2614 * - Xv bicubic 2615 * Here we select the offset of the vertex program we want to use 2616 */ 2617 if (info->accel_state->has_tcl) { 2618 if (pPriv->bicubic_enabled) { 2619 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2620 ((11 << R300_PVS_FIRST_INST_SHIFT) | 2621 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2622 (13 << R300_PVS_LAST_INST_SHIFT))); 2623 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2624 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2625 } else { 2626 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2627 ((9 << R300_PVS_FIRST_INST_SHIFT) | 2628 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2629 (10 << R300_PVS_LAST_INST_SHIFT))); 2630 OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2631 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2632 } 2633 } 2634 2635 /* Position and one set of 2 texture coordinates */ 2636 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2637 if (pPriv->bicubic_enabled) 2638 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2639 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2640 else 2641 OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2642 2643 OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 2644 ADVANCE_RING(); 2645 2646 /* setup pixel shader */ 2647 if (pPriv->bicubic_state != BICUBIC_OFF) { 2648 if (pPriv->bicubic_enabled) { 2649 BEGIN_RING(2*7); 2650 2651 /* 4 components: 2 for tex0 and 2 for tex1 */ 2652 OUT_RING_REG(R300_RS_COUNT, 2653 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2654 R300_RS_COUNT_HIRES_EN)); 2655 2656 /* R300_INST_COUNT_RS - highest RS instruction used */ 2657 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 2658 2659 /* Pixel stack frame size. */ 2660 OUT_RING_REG(R300_US_PIXSIZE, 5); 2661 2662 /* FP length. */ 2663 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 2664 R500_US_CODE_END_ADDR(13))); 2665 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 2666 R500_US_CODE_RANGE_SIZE(13))); 2667 2668 /* Prepare for FP emission. */ 2669 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 2670 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 2671 ADVANCE_RING(); 2672 2673 BEGIN_RING(2*89); 2674 /* Pixel shader. 2675 * I've gone ahead and annotated each instruction, since this 2676 * thing is MASSIVE. :3 2677 * Note: In order to avoid buggies with temps and multiple 2678 * inputs, all temps are offset by 2. temp0 -> register2. */ 2679 2680 /* TEX temp2, input1.xxxx, tex1, 1D */ 2681 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2682 R500_INST_RGB_WMASK_R | 2683 R500_INST_RGB_WMASK_G | 2684 R500_INST_RGB_WMASK_B)); 2685 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2686 R500_TEX_INST_LD | 2687 R500_TEX_IGNORE_UNCOVERED)); 2688 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2689 R500_TEX_SRC_S_SWIZ_R | 2690 R500_TEX_SRC_T_SWIZ_R | 2691 R500_TEX_SRC_R_SWIZ_R | 2692 R500_TEX_SRC_Q_SWIZ_R | 2693 R500_TEX_DST_ADDR(2) | 2694 R500_TEX_DST_R_SWIZ_R | 2695 R500_TEX_DST_G_SWIZ_G | 2696 R500_TEX_DST_B_SWIZ_B | 2697 R500_TEX_DST_A_SWIZ_A)); 2698 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2699 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2700 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2701 2702 /* TEX temp5, input1.yyyy, tex1, 1D */ 2703 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2704 R500_INST_TEX_SEM_WAIT | 2705 R500_INST_RGB_WMASK_R | 2706 R500_INST_RGB_WMASK_G | 2707 R500_INST_RGB_WMASK_B)); 2708 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2709 R500_TEX_INST_LD | 2710 R500_TEX_SEM_ACQUIRE | 2711 R500_TEX_IGNORE_UNCOVERED)); 2712 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2713 R500_TEX_SRC_S_SWIZ_G | 2714 R500_TEX_SRC_T_SWIZ_G | 2715 R500_TEX_SRC_R_SWIZ_G | 2716 R500_TEX_SRC_Q_SWIZ_G | 2717 R500_TEX_DST_ADDR(5) | 2718 R500_TEX_DST_R_SWIZ_R | 2719 R500_TEX_DST_G_SWIZ_G | 2720 R500_TEX_DST_B_SWIZ_B | 2721 R500_TEX_DST_A_SWIZ_A)); 2722 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2723 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2724 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2725 2726 /* MUL temp4, const0.x0x0, temp2.yyxx */ 2727 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2728 R500_INST_TEX_SEM_WAIT | 2729 R500_INST_RGB_WMASK_R | 2730 R500_INST_RGB_WMASK_G | 2731 R500_INST_RGB_WMASK_B | 2732 R500_INST_ALPHA_WMASK)); 2733 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2734 R500_RGB_ADDR0_CONST | 2735 R500_RGB_ADDR1(2))); 2736 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2737 R500_ALPHA_ADDR0_CONST | 2738 R500_ALPHA_ADDR1(2))); 2739 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2740 R500_ALU_RGB_R_SWIZ_A_R | 2741 R500_ALU_RGB_G_SWIZ_A_0 | 2742 R500_ALU_RGB_B_SWIZ_A_R | 2743 R500_ALU_RGB_SEL_B_SRC1 | 2744 R500_ALU_RGB_R_SWIZ_B_G | 2745 R500_ALU_RGB_G_SWIZ_B_G | 2746 R500_ALU_RGB_B_SWIZ_B_R)); 2747 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2748 R500_ALPHA_OP_MAD | 2749 R500_ALPHA_SEL_A_SRC0 | 2750 R500_ALPHA_SWIZ_A_0 | 2751 R500_ALPHA_SEL_B_SRC1 | 2752 R500_ALPHA_SWIZ_B_R)); 2753 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2754 R500_ALU_RGBA_OP_MAD | 2755 R500_ALU_RGBA_R_SWIZ_0 | 2756 R500_ALU_RGBA_G_SWIZ_0 | 2757 R500_ALU_RGBA_B_SWIZ_0 | 2758 R500_ALU_RGBA_A_SWIZ_0)); 2759 2760 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 2761 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2762 R500_INST_RGB_WMASK_R | 2763 R500_INST_RGB_WMASK_G | 2764 R500_INST_RGB_WMASK_B | 2765 R500_INST_ALPHA_WMASK)); 2766 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2767 R500_RGB_ADDR0_CONST | 2768 R500_RGB_ADDR1(5) | 2769 R500_RGB_ADDR2(4))); 2770 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2771 R500_ALPHA_ADDR0_CONST | 2772 R500_ALPHA_ADDR1(5) | 2773 R500_ALPHA_ADDR2(4))); 2774 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2775 R500_ALU_RGB_R_SWIZ_A_0 | 2776 R500_ALU_RGB_G_SWIZ_A_G | 2777 R500_ALU_RGB_B_SWIZ_A_0 | 2778 R500_ALU_RGB_SEL_B_SRC1 | 2779 R500_ALU_RGB_R_SWIZ_B_R | 2780 R500_ALU_RGB_G_SWIZ_B_R | 2781 R500_ALU_RGB_B_SWIZ_B_R)); 2782 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2783 R500_ALPHA_OP_MAD | 2784 R500_ALPHA_SEL_A_SRC0 | 2785 R500_ALPHA_SWIZ_A_G | 2786 R500_ALPHA_SEL_B_SRC1 | 2787 R500_ALPHA_SWIZ_B_R)); 2788 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2789 R500_ALU_RGBA_OP_MAD | 2790 R500_ALU_RGBA_SEL_C_SRC2 | 2791 R500_ALU_RGBA_R_SWIZ_R | 2792 R500_ALU_RGBA_G_SWIZ_G | 2793 R500_ALU_RGBA_B_SWIZ_B | 2794 R500_ALU_RGBA_A_SWIZ_A)); 2795 2796 /* ADD temp3, temp3, input0.xyxy */ 2797 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2798 R500_INST_RGB_WMASK_R | 2799 R500_INST_RGB_WMASK_G | 2800 R500_INST_RGB_WMASK_B | 2801 R500_INST_ALPHA_WMASK)); 2802 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 2803 R500_RGB_ADDR2(0))); 2804 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 2805 R500_ALPHA_ADDR2(0))); 2806 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2807 R500_ALU_RGB_G_SWIZ_A_1 | 2808 R500_ALU_RGB_B_SWIZ_A_1 | 2809 R500_ALU_RGB_SEL_B_SRC1 | 2810 R500_ALU_RGB_R_SWIZ_B_R | 2811 R500_ALU_RGB_G_SWIZ_B_G | 2812 R500_ALU_RGB_B_SWIZ_B_B)); 2813 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2814 R500_ALPHA_OP_MAD | 2815 R500_ALPHA_SWIZ_A_1 | 2816 R500_ALPHA_SEL_B_SRC1 | 2817 R500_ALPHA_SWIZ_B_A)); 2818 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2819 R500_ALU_RGBA_OP_MAD | 2820 R500_ALU_RGBA_SEL_C_SRC2 | 2821 R500_ALU_RGBA_R_SWIZ_R | 2822 R500_ALU_RGBA_G_SWIZ_G | 2823 R500_ALU_RGBA_B_SWIZ_R | 2824 R500_ALU_RGBA_A_SWIZ_G)); 2825 2826 /* TEX temp1, temp3.zwxy, tex0, 2D */ 2827 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2828 R500_INST_RGB_WMASK_R | 2829 R500_INST_RGB_WMASK_G | 2830 R500_INST_RGB_WMASK_B | 2831 R500_INST_ALPHA_WMASK)); 2832 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2833 R500_TEX_INST_LD | 2834 R500_TEX_IGNORE_UNCOVERED)); 2835 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2836 R500_TEX_SRC_S_SWIZ_B | 2837 R500_TEX_SRC_T_SWIZ_A | 2838 R500_TEX_SRC_R_SWIZ_R | 2839 R500_TEX_SRC_Q_SWIZ_G | 2840 R500_TEX_DST_ADDR(1) | 2841 R500_TEX_DST_R_SWIZ_R | 2842 R500_TEX_DST_G_SWIZ_G | 2843 R500_TEX_DST_B_SWIZ_B | 2844 R500_TEX_DST_A_SWIZ_A)); 2845 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2846 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2847 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2848 2849 /* TEX temp3, temp3.xyzw, tex0, 2D */ 2850 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2851 R500_INST_TEX_SEM_WAIT | 2852 R500_INST_RGB_WMASK_R | 2853 R500_INST_RGB_WMASK_G | 2854 R500_INST_RGB_WMASK_B | 2855 R500_INST_ALPHA_WMASK)); 2856 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2857 R500_TEX_INST_LD | 2858 R500_TEX_SEM_ACQUIRE | 2859 R500_TEX_IGNORE_UNCOVERED)); 2860 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2861 R500_TEX_SRC_S_SWIZ_R | 2862 R500_TEX_SRC_T_SWIZ_G | 2863 R500_TEX_SRC_R_SWIZ_B | 2864 R500_TEX_SRC_Q_SWIZ_A | 2865 R500_TEX_DST_ADDR(3) | 2866 R500_TEX_DST_R_SWIZ_R | 2867 R500_TEX_DST_G_SWIZ_G | 2868 R500_TEX_DST_B_SWIZ_B | 2869 R500_TEX_DST_A_SWIZ_A)); 2870 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2871 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2872 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2873 2874 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 2875 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2876 R500_INST_RGB_WMASK_R | 2877 R500_INST_RGB_WMASK_G | 2878 R500_INST_RGB_WMASK_B | 2879 R500_INST_ALPHA_WMASK)); 2880 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2881 R500_RGB_ADDR0_CONST | 2882 R500_RGB_ADDR1(5) | 2883 R500_RGB_ADDR2(4))); 2884 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2885 R500_ALPHA_ADDR0_CONST | 2886 R500_ALPHA_ADDR1(5) | 2887 R500_ALPHA_ADDR2(4))); 2888 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2889 R500_ALU_RGB_R_SWIZ_A_0 | 2890 R500_ALU_RGB_G_SWIZ_A_G | 2891 R500_ALU_RGB_B_SWIZ_A_0 | 2892 R500_ALU_RGB_SEL_B_SRC1 | 2893 R500_ALU_RGB_R_SWIZ_B_G | 2894 R500_ALU_RGB_G_SWIZ_B_G | 2895 R500_ALU_RGB_B_SWIZ_B_G)); 2896 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2897 R500_ALPHA_OP_MAD | 2898 R500_ALPHA_SEL_A_SRC0 | 2899 R500_ALPHA_SWIZ_A_G | 2900 R500_ALPHA_SEL_B_SRC1 | 2901 R500_ALPHA_SWIZ_B_G)); 2902 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2903 R500_ALU_RGBA_OP_MAD | 2904 R500_ALU_RGBA_SEL_C_SRC2 | 2905 R500_ALU_RGBA_R_SWIZ_R | 2906 R500_ALU_RGBA_G_SWIZ_G | 2907 R500_ALU_RGBA_B_SWIZ_B | 2908 R500_ALU_RGBA_A_SWIZ_A)); 2909 2910 /* ADD temp0, temp4, input0.xyxy */ 2911 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2912 R500_INST_RGB_WMASK_R | 2913 R500_INST_RGB_WMASK_G | 2914 R500_INST_RGB_WMASK_B | 2915 R500_INST_ALPHA_WMASK)); 2916 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 2917 R500_RGB_ADDR2(0))); 2918 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 2919 R500_ALPHA_ADDR2(0))); 2920 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2921 R500_ALU_RGB_G_SWIZ_A_1 | 2922 R500_ALU_RGB_B_SWIZ_A_1 | 2923 R500_ALU_RGB_SEL_B_SRC1 | 2924 R500_ALU_RGB_R_SWIZ_B_R | 2925 R500_ALU_RGB_G_SWIZ_B_G | 2926 R500_ALU_RGB_B_SWIZ_B_B)); 2927 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 2928 R500_ALPHA_OP_MAD | 2929 R500_ALPHA_SWIZ_A_1 | 2930 R500_ALPHA_SEL_B_SRC1 | 2931 R500_ALPHA_SWIZ_B_A)); 2932 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 2933 R500_ALU_RGBA_OP_MAD | 2934 R500_ALU_RGBA_SEL_C_SRC2 | 2935 R500_ALU_RGBA_R_SWIZ_R | 2936 R500_ALU_RGBA_G_SWIZ_G | 2937 R500_ALU_RGBA_B_SWIZ_R | 2938 R500_ALU_RGBA_A_SWIZ_G)); 2939 2940 /* TEX temp4, temp0.zwzw, tex0, 2D */ 2941 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2942 R500_INST_TEX_SEM_WAIT | 2943 R500_INST_RGB_WMASK_R | 2944 R500_INST_RGB_WMASK_G | 2945 R500_INST_RGB_WMASK_B | 2946 R500_INST_ALPHA_WMASK)); 2947 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2948 R500_TEX_INST_LD | 2949 R500_TEX_IGNORE_UNCOVERED)); 2950 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2951 R500_TEX_SRC_S_SWIZ_B | 2952 R500_TEX_SRC_T_SWIZ_A | 2953 R500_TEX_SRC_R_SWIZ_B | 2954 R500_TEX_SRC_Q_SWIZ_A | 2955 R500_TEX_DST_ADDR(4) | 2956 R500_TEX_DST_R_SWIZ_R | 2957 R500_TEX_DST_G_SWIZ_G | 2958 R500_TEX_DST_B_SWIZ_B | 2959 R500_TEX_DST_A_SWIZ_A)); 2960 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2961 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2962 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2963 2964 /* TEX temp0, temp0.xyzw, tex0, 2D */ 2965 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2966 R500_INST_TEX_SEM_WAIT | 2967 R500_INST_RGB_WMASK_R | 2968 R500_INST_RGB_WMASK_G | 2969 R500_INST_RGB_WMASK_B | 2970 R500_INST_ALPHA_WMASK)); 2971 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2972 R500_TEX_INST_LD | 2973 R500_TEX_SEM_ACQUIRE | 2974 R500_TEX_IGNORE_UNCOVERED)); 2975 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2976 R500_TEX_SRC_S_SWIZ_R | 2977 R500_TEX_SRC_T_SWIZ_G | 2978 R500_TEX_SRC_R_SWIZ_B | 2979 R500_TEX_SRC_Q_SWIZ_A | 2980 R500_TEX_DST_ADDR(0) | 2981 R500_TEX_DST_R_SWIZ_R | 2982 R500_TEX_DST_G_SWIZ_G | 2983 R500_TEX_DST_B_SWIZ_B | 2984 R500_TEX_DST_A_SWIZ_A)); 2985 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2986 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2987 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2988 2989 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 2990 * - PRESUB temps, temp1 - temp3 2991 * - MAD temp2.zzzz, temps, temp3 */ 2992 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2993 R500_INST_RGB_WMASK_R | 2994 R500_INST_RGB_WMASK_G | 2995 R500_INST_RGB_WMASK_B | 2996 R500_INST_ALPHA_WMASK)); 2997 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 2998 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 2999 R500_RGB_ADDR1(1) | 3000 R500_RGB_ADDR2(2))); 3001 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3002 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3003 R500_ALPHA_ADDR1(1) | 3004 R500_ALPHA_ADDR2(2))); 3005 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3006 R500_ALU_RGB_R_SWIZ_A_B | 3007 R500_ALU_RGB_G_SWIZ_A_B | 3008 R500_ALU_RGB_B_SWIZ_A_B | 3009 R500_ALU_RGB_SEL_B_SRCP | 3010 R500_ALU_RGB_R_SWIZ_B_R | 3011 R500_ALU_RGB_G_SWIZ_B_G | 3012 R500_ALU_RGB_B_SWIZ_B_B)); 3013 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3014 R500_ALPHA_OP_MAD | 3015 R500_ALPHA_SEL_A_SRC2 | 3016 R500_ALPHA_SWIZ_A_B | 3017 R500_ALPHA_SEL_B_SRCP | 3018 R500_ALPHA_SWIZ_B_A)); 3019 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3020 R500_ALU_RGBA_OP_MAD | 3021 R500_ALU_RGBA_SEL_C_SRC0 | 3022 R500_ALU_RGBA_R_SWIZ_R | 3023 R500_ALU_RGBA_G_SWIZ_G | 3024 R500_ALU_RGBA_B_SWIZ_B | 3025 R500_ALU_RGBA_A_SWIZ_A)); 3026 3027 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3028 * - PRESUB temps, temp4 - temp1 3029 * - MAD temp2.zzzz, temps, temp0 */ 3030 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3031 R500_INST_TEX_SEM_WAIT | 3032 R500_INST_RGB_WMASK_R | 3033 R500_INST_RGB_WMASK_G | 3034 R500_INST_RGB_WMASK_B | 3035 R500_INST_ALPHA_WMASK)); 3036 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3037 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3038 R500_RGB_ADDR1(4) | 3039 R500_RGB_ADDR2(2))); 3040 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3041 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3042 R500_ALPHA_ADDR1(4) | 3043 R500_ALPHA_ADDR2(2))); 3044 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3045 R500_ALU_RGB_R_SWIZ_A_B | 3046 R500_ALU_RGB_G_SWIZ_A_B | 3047 R500_ALU_RGB_B_SWIZ_A_B | 3048 R500_ALU_RGB_SEL_B_SRCP | 3049 R500_ALU_RGB_R_SWIZ_B_R | 3050 R500_ALU_RGB_G_SWIZ_B_G | 3051 R500_ALU_RGB_B_SWIZ_B_B)); 3052 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3053 R500_ALPHA_OP_MAD | 3054 R500_ALPHA_SEL_A_SRC2 | 3055 R500_ALPHA_SWIZ_A_B | 3056 R500_ALPHA_SEL_B_SRCP | 3057 R500_ALPHA_SWIZ_B_A)); 3058 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3059 R500_ALU_RGBA_OP_MAD | 3060 R500_ALU_RGBA_SEL_C_SRC0 | 3061 R500_ALU_RGBA_R_SWIZ_R | 3062 R500_ALU_RGBA_G_SWIZ_G | 3063 R500_ALU_RGBA_B_SWIZ_B | 3064 R500_ALU_RGBA_A_SWIZ_A)); 3065 3066 /* LRP output, temp5.zzzz, temp3, temp0 -> 3067 * - PRESUB temps, temp3 - temp0 3068 * - MAD temp5.zzzz, temps, temp0 */ 3069 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3070 R500_INST_LAST | 3071 R500_INST_TEX_SEM_WAIT | 3072 R500_INST_RGB_WMASK_R | 3073 R500_INST_RGB_WMASK_G | 3074 R500_INST_RGB_WMASK_B | 3075 R500_INST_ALPHA_WMASK | 3076 R500_INST_RGB_OMASK_R | 3077 R500_INST_RGB_OMASK_G | 3078 R500_INST_RGB_OMASK_B | 3079 R500_INST_ALPHA_OMASK)); 3080 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3081 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3082 R500_RGB_ADDR1(3) | 3083 R500_RGB_ADDR2(5))); 3084 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3085 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3086 R500_ALPHA_ADDR1(3) | 3087 R500_ALPHA_ADDR2(5))); 3088 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3089 R500_ALU_RGB_R_SWIZ_A_B | 3090 R500_ALU_RGB_G_SWIZ_A_B | 3091 R500_ALU_RGB_B_SWIZ_A_B | 3092 R500_ALU_RGB_SEL_B_SRCP | 3093 R500_ALU_RGB_R_SWIZ_B_R | 3094 R500_ALU_RGB_G_SWIZ_B_G | 3095 R500_ALU_RGB_B_SWIZ_B_B)); 3096 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3097 R500_ALPHA_OP_MAD | 3098 R500_ALPHA_SEL_A_SRC2 | 3099 R500_ALPHA_SWIZ_A_B | 3100 R500_ALPHA_SEL_B_SRCP | 3101 R500_ALPHA_SWIZ_B_A)); 3102 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3103 R500_ALU_RGBA_OP_MAD | 3104 R500_ALU_RGBA_SEL_C_SRC0 | 3105 R500_ALU_RGBA_R_SWIZ_R | 3106 R500_ALU_RGBA_G_SWIZ_G | 3107 R500_ALU_RGBA_B_SWIZ_B | 3108 R500_ALU_RGBA_A_SWIZ_A)); 3109 3110 /* Shader constants. */ 3111 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3112 3113 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3114 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3115 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3116 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3117 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3118 3119 ADVANCE_RING(); 3120 } else { 3121 BEGIN_RING(2*19); 3122 /* 2 components: 2 for tex0 */ 3123 OUT_RING_REG(R300_RS_COUNT, 3124 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3125 R300_RS_COUNT_HIRES_EN)); 3126 3127 /* R300_INST_COUNT_RS - highest RS instruction used */ 3128 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3129 3130 /* Pixel stack frame size. */ 3131 OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3132 3133 /* FP length. */ 3134 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3135 R500_US_CODE_END_ADDR(1))); 3136 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3137 R500_US_CODE_RANGE_SIZE(1))); 3138 3139 /* Prepare for FP emission. */ 3140 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3141 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3142 3143 /* tex inst */ 3144 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3145 R500_INST_TEX_SEM_WAIT | 3146 R500_INST_RGB_WMASK_R | 3147 R500_INST_RGB_WMASK_G | 3148 R500_INST_RGB_WMASK_B | 3149 R500_INST_ALPHA_WMASK | 3150 R500_INST_RGB_CLAMP | 3151 R500_INST_ALPHA_CLAMP)); 3152 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3153 R500_TEX_INST_LD | 3154 R500_TEX_SEM_ACQUIRE | 3155 R500_TEX_IGNORE_UNCOVERED)); 3156 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3157 R500_TEX_SRC_S_SWIZ_R | 3158 R500_TEX_SRC_T_SWIZ_G | 3159 R500_TEX_DST_ADDR(0) | 3160 R500_TEX_DST_R_SWIZ_R | 3161 R500_TEX_DST_G_SWIZ_G | 3162 R500_TEX_DST_B_SWIZ_B | 3163 R500_TEX_DST_A_SWIZ_A)); 3164 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3165 R500_DX_S_SWIZ_R | 3166 R500_DX_T_SWIZ_R | 3167 R500_DX_R_SWIZ_R | 3168 R500_DX_Q_SWIZ_R | 3169 R500_DY_ADDR(0) | 3170 R500_DY_S_SWIZ_R | 3171 R500_DY_T_SWIZ_R | 3172 R500_DY_R_SWIZ_R | 3173 R500_DY_Q_SWIZ_R)); 3174 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3175 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3176 3177 /* ALU inst */ 3178 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3179 R500_INST_TEX_SEM_WAIT | 3180 R500_INST_LAST | 3181 R500_INST_RGB_OMASK_R | 3182 R500_INST_RGB_OMASK_G | 3183 R500_INST_RGB_OMASK_B | 3184 R500_INST_ALPHA_OMASK | 3185 R500_INST_RGB_CLAMP | 3186 R500_INST_ALPHA_CLAMP)); 3187 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3188 R500_RGB_ADDR1(0) | 3189 R500_RGB_ADDR1_CONST | 3190 R500_RGB_ADDR2(0) | 3191 R500_RGB_ADDR2_CONST)); 3192 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3193 R500_ALPHA_ADDR1(0) | 3194 R500_ALPHA_ADDR1_CONST | 3195 R500_ALPHA_ADDR2(0) | 3196 R500_ALPHA_ADDR2_CONST)); 3197 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3198 R500_ALU_RGB_R_SWIZ_A_R | 3199 R500_ALU_RGB_G_SWIZ_A_G | 3200 R500_ALU_RGB_B_SWIZ_A_B | 3201 R500_ALU_RGB_SEL_B_SRC0 | 3202 R500_ALU_RGB_R_SWIZ_B_1 | 3203 R500_ALU_RGB_B_SWIZ_B_1 | 3204 R500_ALU_RGB_G_SWIZ_B_1)); 3205 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3206 R500_ALPHA_SWIZ_A_A | 3207 R500_ALPHA_SWIZ_B_1)); 3208 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3209 R500_ALU_RGBA_R_SWIZ_0 | 3210 R500_ALU_RGBA_G_SWIZ_0 | 3211 R500_ALU_RGBA_B_SWIZ_0 | 3212 R500_ALU_RGBA_A_SWIZ_0)); 3213 ADVANCE_RING(); 3214 } 3215 } else { 3216 /* 3217 * y' = y - .0625 3218 * u' = u - .5 3219 * v' = v - .5; 3220 * 3221 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3222 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3223 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3224 * 3225 * DP3 might look like the straightforward solution 3226 * but we'd need to move the texture yuv values in 3227 * the same reg for this to work. Therefore use MADs. 3228 * Brightness just adds to the off constant. 3229 * Contrast is multiplication of luminance. 3230 * Saturation and hue change the u and v coeffs. 3231 * Default values (before adjustments - depend on colorspace): 3232 * yco = 1.1643 3233 * uco = 0, -0.39173, 2.017 3234 * vco = 1.5958, -0.8129, 0 3235 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3236 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3237 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3238 * 3239 * temp = MAD(yco, yuv.yyyy, off) 3240 * temp = MAD(uco, yuv.uuuu, temp) 3241 * result = MAD(vco, yuv.vvvv, temp) 3242 */ 3243 /* TODO: don't recalc consts always */ 3244 const float Loff = -0.0627; 3245 const float Coff = -0.502; 3246 float uvcosf, uvsinf; 3247 float yco; 3248 float uco[3], vco[3], off[3]; 3249 float bright, cont, gamma; 3250 int ref = pPriv->transform_index; 3251 3252 cont = RTFContrast(pPriv->contrast); 3253 bright = RTFBrightness(pPriv->brightness); 3254 gamma = (float)pPriv->gamma / 1000.0; 3255 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3256 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3257 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3258 3259 yco = trans[ref].RefLuma * cont; 3260 uco[0] = -trans[ref].RefRCr * uvsinf; 3261 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3262 uco[2] = trans[ref].RefBCb * uvcosf; 3263 vco[0] = trans[ref].RefRCr * uvcosf; 3264 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3265 vco[2] = trans[ref].RefBCb * uvsinf; 3266 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3267 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3268 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3269 3270 //XXX gamma 3271 3272 if (pPriv->is_planar) { 3273 BEGIN_RING(2*56); 3274 /* 2 components: 2 for tex0 */ 3275 OUT_RING_REG(R300_RS_COUNT, 3276 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3277 R300_RS_COUNT_HIRES_EN)); 3278 3279 /* R300_INST_COUNT_RS - highest RS instruction used */ 3280 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3281 3282 /* Pixel stack frame size. */ 3283 OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3284 3285 /* FP length. */ 3286 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3287 R500_US_CODE_END_ADDR(5))); 3288 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3289 R500_US_CODE_RANGE_SIZE(5))); 3290 3291 /* Prepare for FP emission. */ 3292 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3293 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3294 3295 /* tex inst */ 3296 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3297 R500_INST_TEX_SEM_WAIT | 3298 R500_INST_RGB_WMASK_R | 3299 R500_INST_RGB_WMASK_G | 3300 R500_INST_RGB_WMASK_B | 3301 R500_INST_ALPHA_WMASK | 3302 R500_INST_RGB_CLAMP | 3303 R500_INST_ALPHA_CLAMP)); 3304 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3305 R500_TEX_INST_LD | 3306 R500_TEX_IGNORE_UNCOVERED)); 3307 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3308 R500_TEX_SRC_S_SWIZ_R | 3309 R500_TEX_SRC_T_SWIZ_G | 3310 R500_TEX_DST_ADDR(2) | 3311 R500_TEX_DST_R_SWIZ_R | 3312 R500_TEX_DST_G_SWIZ_G | 3313 R500_TEX_DST_B_SWIZ_B | 3314 R500_TEX_DST_A_SWIZ_A)); 3315 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3316 R500_DX_S_SWIZ_R | 3317 R500_DX_T_SWIZ_R | 3318 R500_DX_R_SWIZ_R | 3319 R500_DX_Q_SWIZ_R | 3320 R500_DY_ADDR(0) | 3321 R500_DY_S_SWIZ_R | 3322 R500_DY_T_SWIZ_R | 3323 R500_DY_R_SWIZ_R | 3324 R500_DY_Q_SWIZ_R)); 3325 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3326 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3327 3328 /* tex inst */ 3329 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3330 R500_INST_TEX_SEM_WAIT | 3331 R500_INST_RGB_WMASK_R | 3332 R500_INST_RGB_WMASK_G | 3333 R500_INST_RGB_WMASK_B | 3334 R500_INST_ALPHA_WMASK | 3335 R500_INST_RGB_CLAMP | 3336 R500_INST_ALPHA_CLAMP)); 3337 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3338 R500_TEX_INST_LD | 3339 R500_TEX_IGNORE_UNCOVERED)); 3340 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3341 R500_TEX_SRC_S_SWIZ_R | 3342 R500_TEX_SRC_T_SWIZ_G | 3343 R500_TEX_DST_ADDR(1) | 3344 R500_TEX_DST_R_SWIZ_R | 3345 R500_TEX_DST_G_SWIZ_G | 3346 R500_TEX_DST_B_SWIZ_B | 3347 R500_TEX_DST_A_SWIZ_A)); 3348 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3349 R500_DX_S_SWIZ_R | 3350 R500_DX_T_SWIZ_R | 3351 R500_DX_R_SWIZ_R | 3352 R500_DX_Q_SWIZ_R | 3353 R500_DY_ADDR(0) | 3354 R500_DY_S_SWIZ_R | 3355 R500_DY_T_SWIZ_R | 3356 R500_DY_R_SWIZ_R | 3357 R500_DY_Q_SWIZ_R)); 3358 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3359 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3360 3361 /* tex inst */ 3362 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3363 R500_INST_TEX_SEM_WAIT | 3364 R500_INST_RGB_WMASK_R | 3365 R500_INST_RGB_WMASK_G | 3366 R500_INST_RGB_WMASK_B | 3367 R500_INST_ALPHA_WMASK | 3368 R500_INST_RGB_CLAMP | 3369 R500_INST_ALPHA_CLAMP)); 3370 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3371 R500_TEX_INST_LD | 3372 R500_TEX_SEM_ACQUIRE | 3373 R500_TEX_IGNORE_UNCOVERED)); 3374 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3375 R500_TEX_SRC_S_SWIZ_R | 3376 R500_TEX_SRC_T_SWIZ_G | 3377 R500_TEX_DST_ADDR(0) | 3378 R500_TEX_DST_R_SWIZ_R | 3379 R500_TEX_DST_G_SWIZ_G | 3380 R500_TEX_DST_B_SWIZ_B | 3381 R500_TEX_DST_A_SWIZ_A)); 3382 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3383 R500_DX_S_SWIZ_R | 3384 R500_DX_T_SWIZ_R | 3385 R500_DX_R_SWIZ_R | 3386 R500_DX_Q_SWIZ_R | 3387 R500_DY_ADDR(0) | 3388 R500_DY_S_SWIZ_R | 3389 R500_DY_T_SWIZ_R | 3390 R500_DY_R_SWIZ_R | 3391 R500_DY_Q_SWIZ_R)); 3392 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3393 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3394 3395 /* ALU inst */ 3396 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 3397 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3398 R500_INST_TEX_SEM_WAIT | 3399 R500_INST_RGB_WMASK_R | 3400 R500_INST_RGB_WMASK_G | 3401 R500_INST_RGB_WMASK_B | 3402 R500_INST_ALPHA_WMASK)); 3403 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3404 R500_RGB_ADDR0_CONST | 3405 R500_RGB_ADDR1(2) | 3406 R500_RGB_ADDR2(0) | 3407 R500_RGB_ADDR2_CONST)); 3408 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3409 R500_ALPHA_ADDR0_CONST | 3410 R500_ALPHA_ADDR1(2) | 3411 R500_ALPHA_ADDR2(0) | 3412 R500_ALPHA_ADDR2_CONST)); 3413 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3414 R500_ALU_RGB_R_SWIZ_A_A | 3415 R500_ALU_RGB_G_SWIZ_A_A | 3416 R500_ALU_RGB_B_SWIZ_A_A | 3417 R500_ALU_RGB_SEL_B_SRC1 | 3418 R500_ALU_RGB_R_SWIZ_B_R | 3419 R500_ALU_RGB_B_SWIZ_B_G | 3420 R500_ALU_RGB_G_SWIZ_B_B)); 3421 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3422 R500_ALPHA_ADDRD(2) | 3423 R500_ALPHA_SWIZ_A_0 | 3424 R500_ALPHA_SWIZ_B_0)); 3425 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3426 R500_ALU_RGBA_ADDRD(2) | 3427 R500_ALU_RGBA_SEL_C_SRC0 | 3428 R500_ALU_RGBA_R_SWIZ_R | 3429 R500_ALU_RGBA_G_SWIZ_G | 3430 R500_ALU_RGBA_B_SWIZ_B | 3431 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3432 R500_ALU_RGBA_A_SWIZ_0)); 3433 3434 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 3435 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3436 R500_INST_TEX_SEM_WAIT | 3437 R500_INST_RGB_WMASK_R | 3438 R500_INST_RGB_WMASK_G | 3439 R500_INST_RGB_WMASK_B | 3440 R500_INST_ALPHA_WMASK)); 3441 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3442 R500_RGB_ADDR0_CONST | 3443 R500_RGB_ADDR1(1) | 3444 R500_RGB_ADDR2(2))); 3445 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3446 R500_ALPHA_ADDR0_CONST | 3447 R500_ALPHA_ADDR1(1) | 3448 R500_ALPHA_ADDR2(2))); 3449 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3450 R500_ALU_RGB_R_SWIZ_A_R | 3451 R500_ALU_RGB_G_SWIZ_A_G | 3452 R500_ALU_RGB_B_SWIZ_A_B | 3453 R500_ALU_RGB_SEL_B_SRC1 | 3454 R500_ALU_RGB_R_SWIZ_B_R | 3455 R500_ALU_RGB_B_SWIZ_B_G | 3456 R500_ALU_RGB_G_SWIZ_B_B)); 3457 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3458 R500_ALPHA_ADDRD(2) | 3459 R500_ALPHA_SWIZ_A_0 | 3460 R500_ALPHA_SWIZ_B_0)); 3461 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3462 R500_ALU_RGBA_ADDRD(2) | 3463 R500_ALU_RGBA_SEL_C_SRC2 | 3464 R500_ALU_RGBA_R_SWIZ_R | 3465 R500_ALU_RGBA_G_SWIZ_G | 3466 R500_ALU_RGBA_B_SWIZ_B | 3467 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3468 R500_ALU_RGBA_A_SWIZ_0)); 3469 3470 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 3471 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3472 R500_INST_TEX_SEM_WAIT | 3473 R500_INST_LAST | 3474 R500_INST_RGB_OMASK_R | 3475 R500_INST_RGB_OMASK_G | 3476 R500_INST_RGB_OMASK_B | 3477 R500_INST_ALPHA_OMASK | 3478 R500_INST_RGB_CLAMP | 3479 R500_INST_ALPHA_CLAMP)); 3480 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3481 R500_RGB_ADDR0_CONST | 3482 R500_RGB_ADDR1(0) | 3483 R500_RGB_ADDR2(2))); 3484 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3485 R500_ALPHA_ADDR0_CONST | 3486 R500_ALPHA_ADDR1(0) | 3487 R500_ALPHA_ADDR2(2))); 3488 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3489 R500_ALU_RGB_R_SWIZ_A_R | 3490 R500_ALU_RGB_G_SWIZ_A_G | 3491 R500_ALU_RGB_B_SWIZ_A_B | 3492 R500_ALU_RGB_SEL_B_SRC1 | 3493 R500_ALU_RGB_R_SWIZ_B_R | 3494 R500_ALU_RGB_B_SWIZ_B_G | 3495 R500_ALU_RGB_G_SWIZ_B_B)); 3496 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3497 R500_ALPHA_ADDRD(0) | 3498 R500_ALPHA_SWIZ_A_0 | 3499 R500_ALPHA_SWIZ_B_0)); 3500 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3501 R500_ALU_RGBA_ADDRD(0) | 3502 R500_ALU_RGBA_SEL_C_SRC2 | 3503 R500_ALU_RGBA_R_SWIZ_R | 3504 R500_ALU_RGBA_G_SWIZ_G | 3505 R500_ALU_RGBA_B_SWIZ_B | 3506 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3507 R500_ALU_RGBA_A_SWIZ_1)); 3508 3509 } else { 3510 BEGIN_RING(2*44); 3511 /* 2 components: 2 for tex0/1/2 */ 3512 OUT_RING_REG(R300_RS_COUNT, 3513 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3514 R300_RS_COUNT_HIRES_EN)); 3515 3516 /* R300_INST_COUNT_RS - highest RS instruction used */ 3517 OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3518 3519 /* Pixel stack frame size. */ 3520 OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3521 3522 /* FP length. */ 3523 OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3524 R500_US_CODE_END_ADDR(3))); 3525 OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3526 R500_US_CODE_RANGE_SIZE(3))); 3527 3528 /* Prepare for FP emission. */ 3529 OUT_RING_REG(R500_US_CODE_OFFSET, 0); 3530 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3531 3532 /* tex inst */ 3533 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3534 R500_INST_TEX_SEM_WAIT | 3535 R500_INST_RGB_WMASK_R | 3536 R500_INST_RGB_WMASK_G | 3537 R500_INST_RGB_WMASK_B | 3538 R500_INST_ALPHA_WMASK | 3539 R500_INST_RGB_CLAMP | 3540 R500_INST_ALPHA_CLAMP)); 3541 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3542 R500_TEX_INST_LD | 3543 R500_TEX_SEM_ACQUIRE | 3544 R500_TEX_IGNORE_UNCOVERED)); 3545 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3546 R500_TEX_SRC_S_SWIZ_R | 3547 R500_TEX_SRC_T_SWIZ_G | 3548 R500_TEX_DST_ADDR(0) | 3549 R500_TEX_DST_R_SWIZ_R | 3550 R500_TEX_DST_G_SWIZ_G | 3551 R500_TEX_DST_B_SWIZ_B | 3552 R500_TEX_DST_A_SWIZ_A)); 3553 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3554 R500_DX_S_SWIZ_R | 3555 R500_DX_T_SWIZ_R | 3556 R500_DX_R_SWIZ_R | 3557 R500_DX_Q_SWIZ_R | 3558 R500_DY_ADDR(0) | 3559 R500_DY_S_SWIZ_R | 3560 R500_DY_T_SWIZ_R | 3561 R500_DY_R_SWIZ_R | 3562 R500_DY_Q_SWIZ_R)); 3563 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3564 OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3565 3566 /* ALU inst */ 3567 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 3568 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3569 R500_INST_TEX_SEM_WAIT | 3570 R500_INST_RGB_WMASK_R | 3571 R500_INST_RGB_WMASK_G | 3572 R500_INST_RGB_WMASK_B | 3573 R500_INST_ALPHA_WMASK)); 3574 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3575 R500_RGB_ADDR0_CONST | 3576 R500_RGB_ADDR1(0) | 3577 R500_RGB_ADDR2(0) | 3578 R500_RGB_ADDR2_CONST)); 3579 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3580 R500_ALPHA_ADDR0_CONST | 3581 R500_ALPHA_ADDR1(0) | 3582 R500_ALPHA_ADDR2(0) | 3583 R500_ALPHA_ADDR2_CONST)); 3584 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3585 R500_ALU_RGB_R_SWIZ_A_A | 3586 R500_ALU_RGB_G_SWIZ_A_A | 3587 R500_ALU_RGB_B_SWIZ_A_A | 3588 R500_ALU_RGB_SEL_B_SRC1 | 3589 R500_ALU_RGB_R_SWIZ_B_G | 3590 R500_ALU_RGB_B_SWIZ_B_G | 3591 R500_ALU_RGB_G_SWIZ_B_G)); 3592 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3593 R500_ALPHA_ADDRD(1) | 3594 R500_ALPHA_SWIZ_A_0 | 3595 R500_ALPHA_SWIZ_B_0)); 3596 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3597 R500_ALU_RGBA_ADDRD(1) | 3598 R500_ALU_RGBA_SEL_C_SRC0 | 3599 R500_ALU_RGBA_R_SWIZ_R | 3600 R500_ALU_RGBA_G_SWIZ_G | 3601 R500_ALU_RGBA_B_SWIZ_B | 3602 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3603 R500_ALU_RGBA_A_SWIZ_0)); 3604 3605 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 3606 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3607 R500_INST_TEX_SEM_WAIT | 3608 R500_INST_RGB_WMASK_R | 3609 R500_INST_RGB_WMASK_G | 3610 R500_INST_RGB_WMASK_B | 3611 R500_INST_ALPHA_WMASK)); 3612 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3613 R500_RGB_ADDR0_CONST | 3614 R500_RGB_ADDR1(0) | 3615 R500_RGB_ADDR2(1))); 3616 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3617 R500_ALPHA_ADDR0_CONST | 3618 R500_ALPHA_ADDR1(0) | 3619 R500_ALPHA_ADDR2(1))); 3620 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3621 R500_ALU_RGB_R_SWIZ_A_R | 3622 R500_ALU_RGB_G_SWIZ_A_G | 3623 R500_ALU_RGB_B_SWIZ_A_B | 3624 R500_ALU_RGB_SEL_B_SRC1 | 3625 R500_ALU_RGB_R_SWIZ_B_B | 3626 R500_ALU_RGB_B_SWIZ_B_B | 3627 R500_ALU_RGB_G_SWIZ_B_B)); 3628 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3629 R500_ALPHA_ADDRD(1) | 3630 R500_ALPHA_SWIZ_A_0 | 3631 R500_ALPHA_SWIZ_B_0)); 3632 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3633 R500_ALU_RGBA_ADDRD(1) | 3634 R500_ALU_RGBA_SEL_C_SRC2 | 3635 R500_ALU_RGBA_R_SWIZ_R | 3636 R500_ALU_RGBA_G_SWIZ_G | 3637 R500_ALU_RGBA_B_SWIZ_B | 3638 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3639 R500_ALU_RGBA_A_SWIZ_0)); 3640 3641 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 3642 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3643 R500_INST_TEX_SEM_WAIT | 3644 R500_INST_LAST | 3645 R500_INST_RGB_OMASK_R | 3646 R500_INST_RGB_OMASK_G | 3647 R500_INST_RGB_OMASK_B | 3648 R500_INST_ALPHA_OMASK | 3649 R500_INST_RGB_CLAMP | 3650 R500_INST_ALPHA_CLAMP)); 3651 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3652 R500_RGB_ADDR0_CONST | 3653 R500_RGB_ADDR1(0) | 3654 R500_RGB_ADDR2(1))); 3655 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3656 R500_ALPHA_ADDR0_CONST | 3657 R500_ALPHA_ADDR1(0) | 3658 R500_ALPHA_ADDR2(1))); 3659 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3660 R500_ALU_RGB_R_SWIZ_A_R | 3661 R500_ALU_RGB_G_SWIZ_A_G | 3662 R500_ALU_RGB_B_SWIZ_A_B | 3663 R500_ALU_RGB_SEL_B_SRC1 | 3664 R500_ALU_RGB_R_SWIZ_B_R | 3665 R500_ALU_RGB_B_SWIZ_B_R | 3666 R500_ALU_RGB_G_SWIZ_B_R)); 3667 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3668 R500_ALPHA_ADDRD(1) | 3669 R500_ALPHA_SWIZ_A_0 | 3670 R500_ALPHA_SWIZ_B_0)); 3671 OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3672 R500_ALU_RGBA_ADDRD(1) | 3673 R500_ALU_RGBA_SEL_C_SRC2 | 3674 R500_ALU_RGBA_R_SWIZ_R | 3675 R500_ALU_RGBA_G_SWIZ_G | 3676 R500_ALU_RGBA_B_SWIZ_B | 3677 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3678 R500_ALU_RGBA_A_SWIZ_1)); 3679 } 3680 3681 /* Shader constants. */ 3682 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3683 3684 /* constant 0: off, yco */ 3685 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 3686 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 3687 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 3688 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 3689 /* constant 1: uco */ 3690 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 3691 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 3692 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 3693 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 3694 /* constant 2: vco */ 3695 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 3696 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 3697 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 3698 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 3699 3700 ADVANCE_RING(); 3701 } 3702 3703 BEGIN_ACCEL_RELOC(6, 2); 3704 OUT_RING_REG(R300_TX_INVALTAGS, 0); 3705 OUT_RING_REG(R300_TX_ENABLE, txenable); 3706 3707 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 3708 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 3709 3710 /* no need to enable blending */ 3711 OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 3712 3713 OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 3714 ADVANCE_RING(); 3715 3716 if (pPriv->vsync) { 3717 xf86CrtcPtr crtc; 3718 if (pPriv->desired_crtc) 3719 crtc = pPriv->desired_crtc; 3720 else 3721 crtc = radeon_pick_best_crtc(pScrn, FALSE, 3722 pPriv->drw_x, 3723 pPriv->drw_x + pPriv->dst_w, 3724 pPriv->drw_y, 3725 pPriv->drw_y + pPriv->dst_h); 3726 if (crtc) 3727 RADEONWaitForVLine(pScrn, pPixmap, 3728 crtc, 3729 pPriv->drw_y - crtc->y, 3730 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 3731 } 3732 3733 return TRUE; 3734} 3735 3736static void 3737R500DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 3738{ 3739 RADEONInfoPtr info = RADEONPTR(pScrn); 3740 PixmapPtr pPixmap = pPriv->pPixmap; 3741 int dstxoff, dstyoff; 3742 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 3743 int nBox = REGION_NUM_RECTS(&pPriv->clip); 3744 3745#ifdef COMPOSITE 3746 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 3747 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 3748#else 3749 dstxoff = 0; 3750 dstyoff = 0; 3751#endif 3752 3753 if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3754 return; 3755 3756 /* 3757 * Rendering of the actual polygon is done in two different 3758 * ways depending on chip generation: 3759 * 3760 * < R300: 3761 * 3762 * These chips can render a rectangle in one pass, so 3763 * handling is pretty straight-forward. 3764 * 3765 * >= R300: 3766 * 3767 * These chips can accept a quad, but will render it as 3768 * two triangles which results in a diagonal tear. Instead 3769 * We render a single, large triangle and use the scissor 3770 * functionality to restrict it to the desired rectangle. 3771 * Due to guardband limits on r3xx/r4xx, we can only use 3772 * the single triangle up to 2880 pixels; above that we 3773 * render as a quad. 3774 */ 3775 3776 while (nBox--) { 3777 float srcX, srcY, srcw, srch; 3778 int dstX, dstY, dstw, dsth; 3779 int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 3780 3781 if (draw_size > radeon_cs_space_remaining(pScrn)) { 3782 radeon_cs_flush_indirect(pScrn); 3783 if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3784 return; 3785 } 3786 3787 dstX = pBox->x1 + dstxoff; 3788 dstY = pBox->y1 + dstyoff; 3789 dstw = pBox->x2 - pBox->x1; 3790 dsth = pBox->y2 - pBox->y1; 3791 3792 srcX = pPriv->src_x; 3793 srcX += ((pBox->x1 - pPriv->drw_x) * 3794 pPriv->src_w) / (float)pPriv->dst_w; 3795 srcY = pPriv->src_y; 3796 srcY += ((pBox->y1 - pPriv->drw_y) * 3797 pPriv->src_h) / (float)pPriv->dst_h; 3798 3799 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 3800 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 3801 3802 BEGIN_RING(2*2); 3803 OUT_RING_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 3804 ((dstY) << R300_SCISSOR_Y_SHIFT))); 3805 OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 3806 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 3807 ADVANCE_RING(); 3808 3809 BEGIN_RING(3 * pPriv->vtx_count + 4); 3810 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 3811 3 * pPriv->vtx_count)); 3812 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 3813 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 3814 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 3815 3816 if (pPriv->bicubic_enabled) { 3817 VTX_OUT_6((float)dstX, (float)dstY, 3818 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 3819 (float)srcX + 0.5, (float)srcY + 0.5); 3820 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 3821 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 3822 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 3823 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 3824 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3825 (float)srcY / pPriv->h, 3826 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 3827 (float)srcY + 0.5); 3828 } else { 3829 /* 3830 * Render a big, scissored triangle. This means 3831 * increasing the triangle size and adjusting 3832 * texture coordinates. 3833 */ 3834 VTX_OUT_4((float)dstX, (float)dstY, 3835 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 3836 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 3837 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 3838 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 3839 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3840 (float)srcY / pPriv->h); 3841 } 3842 3843 /* flushing is pipelined, free/finish is not */ 3844 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 3845 3846 ADVANCE_RING(); 3847 3848 pBox++; 3849 } 3850 3851 BEGIN_RING(2*3); 3852 OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 3853 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 3854 OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 3855 ADVANCE_RING(); 3856 3857 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 3858} 3859 3860#undef VTX_OUT_4 3861#undef VTX_OUT_6 3862