radeon_textured_videofuncs.c revision 7821949a
1de2362d3Smrg/* 2de2362d3Smrg * Copyright 2008 Alex Deucher 3de2362d3Smrg * 4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5de2362d3Smrg * copy of this software and associated documentation files (the "Software"), 6de2362d3Smrg * to deal in the Software without restriction, including without limitation 7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the 9de2362d3Smrg * Software is furnished to do so, subject to the following conditions: 10de2362d3Smrg * 11de2362d3Smrg * The above copyright notice and this permission notice (including the next 12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the 13de2362d3Smrg * Software. 14de2362d3Smrg * 15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21de2362d3Smrg * SOFTWARE. 22de2362d3Smrg * 23de2362d3Smrg * 24de2362d3Smrg * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25de2362d3Smrg * 26de2362d3Smrg */ 27de2362d3Smrg 287821949aSmrg#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 297821949aSmrg#error Cannot define both MMIO and CP acceleration! 307821949aSmrg#endif 317821949aSmrg 327821949aSmrg#if !defined(UNIXCPP) || defined(ANSICPP) 337821949aSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 347821949aSmrg#else 357821949aSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 367821949aSmrg#endif 377821949aSmrg 387821949aSmrg#ifdef ACCEL_MMIO 397821949aSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 407821949aSmrg#else 417821949aSmrg#ifdef ACCEL_CP 427821949aSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 437821949aSmrg#else 447821949aSmrg#error No accel type defined! 457821949aSmrg#endif 467821949aSmrg#endif 477821949aSmrg 487821949aSmrg#ifdef ACCEL_CP 497821949aSmrg 50de2362d3Smrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 51de2362d3Smrgdo { \ 527821949aSmrg OUT_RING_F(_dstX); \ 537821949aSmrg OUT_RING_F(_dstY); \ 547821949aSmrg OUT_RING_F(_srcX); \ 557821949aSmrg OUT_RING_F(_srcY); \ 567821949aSmrg OUT_RING_F(_maskX); \ 577821949aSmrg OUT_RING_F(_maskY); \ 58de2362d3Smrg} while (0) 59de2362d3Smrg 60de2362d3Smrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 61de2362d3Smrgdo { \ 627821949aSmrg OUT_RING_F(_dstX); \ 637821949aSmrg OUT_RING_F(_dstY); \ 647821949aSmrg OUT_RING_F(_srcX); \ 657821949aSmrg OUT_RING_F(_srcY); \ 66de2362d3Smrg} while (0) 67de2362d3Smrg 687821949aSmrg#else /* ACCEL_CP */ 697821949aSmrg 707821949aSmrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 717821949aSmrgdo { \ 727821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 737821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 747821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 757821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 767821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX); \ 777821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ 787821949aSmrg} while (0) 797821949aSmrg 807821949aSmrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 817821949aSmrgdo { \ 827821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 837821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 847821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 857821949aSmrg OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 867821949aSmrg} while (0) 877821949aSmrg 887821949aSmrg#endif /* !ACCEL_CP */ 89de2362d3Smrg 90de2362d3Smrgstatic Bool 917821949aSmrgFUNC_NAME(RADEONPrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 92de2362d3Smrg{ 93de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 94de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 95de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 96de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 977821949aSmrg uint32_t txformat, txsize, txpitch, txoffset; 98de2362d3Smrg uint32_t dst_pitch, dst_format; 99de2362d3Smrg uint32_t colorpitch; 100de2362d3Smrg int pixel_shift; 1017821949aSmrg int scissor_w = MIN(pPixmap->drawable.width, 2047); 1027821949aSmrg int scissor_h = MIN(pPixmap->drawable.height, 2047); 1037821949aSmrg ACCEL_PREAMBLE(); 104de2362d3Smrg 1057821949aSmrg#ifdef XF86DRM_MODE 1067821949aSmrg if (info->cs) { 1077821949aSmrg int ret; 108de2362d3Smrg 1097821949aSmrg radeon_cs_space_reset_bos(info->cs); 1107821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 111de2362d3Smrg 1127821949aSmrg if (pPriv->bicubic_enabled) 1137821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 114de2362d3Smrg 1157821949aSmrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 1167821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 1177821949aSmrg 1187821949aSmrg ret = radeon_cs_space_check(info->cs); 1197821949aSmrg if (ret) { 1207821949aSmrg ErrorF("Not enough RAM to hw accel xv operation\n"); 1217821949aSmrg return FALSE; 1227821949aSmrg } 123de2362d3Smrg } 1247821949aSmrg#else 1257821949aSmrg (void)src_bo; 1267821949aSmrg#endif 127de2362d3Smrg 128de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 129de2362d3Smrg 1307821949aSmrg 1317821949aSmrg#ifdef USE_EXA 1327821949aSmrg if (info->useEXA) { 1337821949aSmrg dst_pitch = exaGetPixmapPitch(pPixmap); 1347821949aSmrg } else 1357821949aSmrg#endif 1367821949aSmrg { 1377821949aSmrg dst_pitch = pPixmap->devKind; 1387821949aSmrg } 1397821949aSmrg 1407821949aSmrg#ifdef USE_EXA 1417821949aSmrg if (info->useEXA) { 1427821949aSmrg RADEON_SWITCH_TO_3D(); 1437821949aSmrg } else 1447821949aSmrg#endif 1457821949aSmrg { 1467821949aSmrg BEGIN_ACCEL(2); 1477821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 1487821949aSmrg /* We must wait for 3d to idle, in case source was just written as a dest. */ 1497821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 1507821949aSmrg RADEON_WAIT_HOST_IDLECLEAN | 1517821949aSmrg RADEON_WAIT_2D_IDLECLEAN | 1527821949aSmrg RADEON_WAIT_3D_IDLECLEAN | 1537821949aSmrg RADEON_WAIT_DMA_GUI_IDLE); 1547821949aSmrg FINISH_ACCEL(); 1557821949aSmrg 1567821949aSmrg if (!info->accel_state->XInited3D) 1577821949aSmrg RADEONInit3DEngine(pScrn); 1587821949aSmrg } 159de2362d3Smrg 160de2362d3Smrg /* Same for R100/R200 */ 161de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 162de2362d3Smrg case 16: 163de2362d3Smrg if (pPixmap->drawable.depth == 15) 164de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB1555; 165de2362d3Smrg else 166de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_RGB565; 167de2362d3Smrg break; 168de2362d3Smrg case 32: 169de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB8888; 170de2362d3Smrg break; 171de2362d3Smrg default: 172de2362d3Smrg return FALSE; 173de2362d3Smrg } 174de2362d3Smrg 175de2362d3Smrg if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 176de2362d3Smrg pPriv->is_planar = TRUE; 177de2362d3Smrg txformat = RADEON_TXFORMAT_Y8; 178de2362d3Smrg } else { 179de2362d3Smrg pPriv->is_planar = FALSE; 180de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 181de2362d3Smrg txformat = RADEON_TXFORMAT_YVYU422; 182de2362d3Smrg else 183de2362d3Smrg txformat = RADEON_TXFORMAT_VYUY422; 184de2362d3Smrg } 185de2362d3Smrg 186de2362d3Smrg txformat |= RADEON_TXFORMAT_NON_POWER2; 187de2362d3Smrg 188de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 189de2362d3Smrg 190de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 191de2362d3Smrg colorpitch |= RADEON_COLOR_TILE_ENABLE; 192de2362d3Smrg 1937821949aSmrg txoffset = info->cs ? 0 : pPriv->src_offset; 1947821949aSmrg 195de2362d3Smrg BEGIN_ACCEL_RELOC(4,2); 196de2362d3Smrg 1977821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 198de2362d3Smrg EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 199de2362d3Smrg EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 2007821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 201de2362d3Smrg RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 202de2362d3Smrg 2037821949aSmrg FINISH_ACCEL(); 204de2362d3Smrg 205de2362d3Smrg if (pPriv->is_planar) { 206de2362d3Smrg /* need 2 texcoord sets (even though they are identical) due 207de2362d3Smrg to denormalization! hw apparently can't premultiply 208de2362d3Smrg same coord set by different texture size */ 209de2362d3Smrg pPriv->vtx_count = 6; 210de2362d3Smrg 211de2362d3Smrg txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 212de2362d3Smrg (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 213de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 214de2362d3Smrg txpitch -= 32; 215de2362d3Smrg 216de2362d3Smrg BEGIN_ACCEL_RELOC(23, 3); 217de2362d3Smrg 2187821949aSmrg OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 219de2362d3Smrg RADEON_SE_VTX_FMT_ST0 | 220de2362d3Smrg RADEON_SE_VTX_FMT_ST1)); 221de2362d3Smrg 2227821949aSmrg OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 223de2362d3Smrg RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 224de2362d3Smrg RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 225de2362d3Smrg RADEON_PLANAR_YUV_ENABLE)); 226de2362d3Smrg 227de2362d3Smrg /* Y */ 2287821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 229de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 230de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 231de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 232de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST | 233de2362d3Smrg RADEON_YUV_TO_RGB); 2347821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 2357821949aSmrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 2367821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 237de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 238de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 239de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 240de2362d3Smrg RADEON_BLEND_CTL_ADD | 241de2362d3Smrg RADEON_CLAMP_TX); 2427821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 243de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 244de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 245de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 246de2362d3Smrg RADEON_BLEND_CTL_ADD | 247de2362d3Smrg RADEON_CLAMP_TX); 248de2362d3Smrg 2497821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 250de2362d3Smrg (pPriv->w - 1) | 251de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 2527821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 253de2362d3Smrg pPriv->src_pitch - 32); 254de2362d3Smrg 255de2362d3Smrg /* U */ 2567821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, 257de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 258de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 259de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 260de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST); 2617821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 2627821949aSmrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 2637821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1, 264de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 265de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 266de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 267de2362d3Smrg RADEON_BLEND_CTL_ADD | 268de2362d3Smrg RADEON_CLAMP_TX); 2697821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXABLEND_1, 270de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 271de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 272de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 273de2362d3Smrg RADEON_BLEND_CTL_ADD | 274de2362d3Smrg RADEON_CLAMP_TX); 275de2362d3Smrg 2767821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize); 2777821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch); 278de2362d3Smrg 279de2362d3Smrg /* V */ 2807821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFILTER_2, 281de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 282de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 283de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 284de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST); 2857821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 2867821949aSmrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 2877821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2, 288de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 289de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 290de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 291de2362d3Smrg RADEON_BLEND_CTL_ADD | 292de2362d3Smrg RADEON_CLAMP_TX); 2937821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXABLEND_2, 294de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 295de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 296de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 297de2362d3Smrg RADEON_BLEND_CTL_ADD | 298de2362d3Smrg RADEON_CLAMP_TX); 299de2362d3Smrg 3007821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize); 3017821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch); 3027821949aSmrg FINISH_ACCEL(); 303de2362d3Smrg } else { 304de2362d3Smrg pPriv->vtx_count = 4; 305de2362d3Smrg BEGIN_ACCEL_RELOC(9, 1); 306de2362d3Smrg 3077821949aSmrg OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 308de2362d3Smrg RADEON_SE_VTX_FMT_ST0)); 309de2362d3Smrg 3107821949aSmrg OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 311de2362d3Smrg 3127821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 313de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 314de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 315de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 316de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST | 317de2362d3Smrg RADEON_YUV_TO_RGB); 3187821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 3197821949aSmrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 3207821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 321de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 322de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 323de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 324de2362d3Smrg RADEON_BLEND_CTL_ADD | 325de2362d3Smrg RADEON_CLAMP_TX); 3267821949aSmrg OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 327de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 328de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 329de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 330de2362d3Smrg RADEON_BLEND_CTL_ADD | 331de2362d3Smrg RADEON_CLAMP_TX); 332de2362d3Smrg 3337821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 334de2362d3Smrg (pPriv->w - 1) | 335de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 3367821949aSmrg OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 337de2362d3Smrg pPriv->src_pitch - 32); 3387821949aSmrg FINISH_ACCEL(); 339de2362d3Smrg } 340de2362d3Smrg 3417821949aSmrg BEGIN_ACCEL(2); 3427821949aSmrg OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 3437821949aSmrg OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 344de2362d3Smrg (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 3457821949aSmrg FINISH_ACCEL(); 346de2362d3Smrg 347de2362d3Smrg if (pPriv->vsync) { 348de2362d3Smrg xf86CrtcPtr crtc; 349de2362d3Smrg if (pPriv->desired_crtc) 350de2362d3Smrg crtc = pPriv->desired_crtc; 351de2362d3Smrg else 3527821949aSmrg crtc = radeon_pick_best_crtc(pScrn, 353de2362d3Smrg pPriv->drw_x, 354de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 355de2362d3Smrg pPriv->drw_y, 356de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 357de2362d3Smrg if (crtc) 3587821949aSmrg FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 3597821949aSmrg crtc, 3607821949aSmrg pPriv->drw_y - crtc->y, 3617821949aSmrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 362de2362d3Smrg } 363de2362d3Smrg 364de2362d3Smrg return TRUE; 365de2362d3Smrg} 366de2362d3Smrg 367de2362d3Smrgstatic void 3687821949aSmrgFUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 369de2362d3Smrg{ 370de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 371de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 372de2362d3Smrg int dstxoff, dstyoff; 373de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 374de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 3757821949aSmrg ACCEL_PREAMBLE(); 376de2362d3Smrg 377de2362d3Smrg#ifdef COMPOSITE 378de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 379de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 380de2362d3Smrg#else 381de2362d3Smrg dstxoff = 0; 382de2362d3Smrg dstyoff = 0; 383de2362d3Smrg#endif 384de2362d3Smrg 3857821949aSmrg if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 386de2362d3Smrg return; 387de2362d3Smrg 388de2362d3Smrg /* 389de2362d3Smrg * Rendering of the actual polygon is done in two different 390de2362d3Smrg * ways depending on chip generation: 391de2362d3Smrg * 392de2362d3Smrg * < R300: 393de2362d3Smrg * 394de2362d3Smrg * These chips can render a rectangle in one pass, so 395de2362d3Smrg * handling is pretty straight-forward. 396de2362d3Smrg * 397de2362d3Smrg * >= R300: 398de2362d3Smrg * 399de2362d3Smrg * These chips can accept a quad, but will render it as 400de2362d3Smrg * two triangles which results in a diagonal tear. Instead 401de2362d3Smrg * We render a single, large triangle and use the scissor 402de2362d3Smrg * functionality to restrict it to the desired rectangle. 403de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 404de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 405de2362d3Smrg * render as a quad. 406de2362d3Smrg */ 4077821949aSmrg#ifdef ACCEL_CP 408de2362d3Smrg while (nBox) { 409de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 5; 410de2362d3Smrg int loop_boxes; 411de2362d3Smrg 412de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 4137821949aSmrg if (info->cs) 4147821949aSmrg radeon_cs_flush_indirect(pScrn); 4157821949aSmrg else 4167821949aSmrg RADEONCPFlushIndirect(pScrn, 1); 4177821949aSmrg if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 418de2362d3Smrg return; 419de2362d3Smrg } 420de2362d3Smrg loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 421de2362d3Smrg nBox -= loop_boxes; 422de2362d3Smrg 423de2362d3Smrg BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 424de2362d3Smrg OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 425de2362d3Smrg loop_boxes * 3 * pPriv->vtx_count + 1)); 426de2362d3Smrg if (pPriv->is_planar) 427de2362d3Smrg OUT_RING(RADEON_CP_VC_FRMT_XY | 428de2362d3Smrg RADEON_CP_VC_FRMT_ST0 | 429de2362d3Smrg RADEON_CP_VC_FRMT_ST1); 430de2362d3Smrg else 431de2362d3Smrg OUT_RING(RADEON_CP_VC_FRMT_XY | 432de2362d3Smrg RADEON_CP_VC_FRMT_ST0); 433de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 434de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 435de2362d3Smrg RADEON_CP_VC_CNTL_MAOS_ENABLE | 436de2362d3Smrg RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 437de2362d3Smrg ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 438de2362d3Smrg 439de2362d3Smrg while (loop_boxes--) { 440de2362d3Smrg float srcX, srcY, srcw, srch; 441de2362d3Smrg int dstX, dstY, dstw, dsth; 442de2362d3Smrg dstX = pBox->x1 + dstxoff; 443de2362d3Smrg dstY = pBox->y1 + dstyoff; 444de2362d3Smrg dstw = pBox->x2 - pBox->x1; 445de2362d3Smrg dsth = pBox->y2 - pBox->y1; 446de2362d3Smrg 447de2362d3Smrg srcX = pPriv->src_x; 448de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 449de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 450de2362d3Smrg srcY = pPriv->src_y; 451de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 452de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 453de2362d3Smrg 454de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 455de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 456de2362d3Smrg 457de2362d3Smrg 458de2362d3Smrg if (pPriv->is_planar) { 459de2362d3Smrg /* 460de2362d3Smrg * Just render a rect (using three coords). 461de2362d3Smrg */ 462de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 463de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 464de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 465de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 466de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 467de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 468de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 469de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 470de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 471de2362d3Smrg } else { 472de2362d3Smrg /* 473de2362d3Smrg * Just render a rect (using three coords). 474de2362d3Smrg */ 475de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 476de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 477de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 478de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 479de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 480de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 481de2362d3Smrg } 482de2362d3Smrg 483de2362d3Smrg pBox++; 484de2362d3Smrg } 485de2362d3Smrg 4867821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 487de2362d3Smrg ADVANCE_RING(); 488de2362d3Smrg } 4897821949aSmrg#else /* ACCEL_CP */ 4907821949aSmrg BEGIN_ACCEL(nBox * pPriv->vtx_count * 3 + 2); 4917821949aSmrg OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 4927821949aSmrg RADEON_VF_PRIM_WALK_DATA | 4937821949aSmrg RADEON_VF_RADEON_MODE | 4947821949aSmrg ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 4957821949aSmrg while (nBox--) { 4967821949aSmrg float srcX, srcY, srcw, srch; 4977821949aSmrg int dstX, dstY, dstw, dsth; 4987821949aSmrg dstX = pBox->x1 + dstxoff; 4997821949aSmrg dstY = pBox->y1 + dstyoff; 5007821949aSmrg dstw = pBox->x2 - pBox->x1; 5017821949aSmrg dsth = pBox->y2 - pBox->y1; 5027821949aSmrg 5037821949aSmrg srcX = pPriv->src_x; 5047821949aSmrg srcX += ((pBox->x1 - pPriv->drw_x) * 5057821949aSmrg pPriv->src_w) / (float)pPriv->dst_w; 5067821949aSmrg srcY = pPriv->src_y; 5077821949aSmrg srcY += ((pBox->y1 - pPriv->drw_y) * 5087821949aSmrg pPriv->src_h) / (float)pPriv->dst_h; 5097821949aSmrg 5107821949aSmrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 5117821949aSmrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 5127821949aSmrg 5137821949aSmrg 5147821949aSmrg if (pPriv->is_planar) { 5157821949aSmrg /* 5167821949aSmrg * Just render a rect (using three coords). 5177821949aSmrg */ 5187821949aSmrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 5197821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 5207821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 5217821949aSmrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 5227821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 5237821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 5247821949aSmrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 5257821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 5267821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 5277821949aSmrg } else { 5287821949aSmrg /* 5297821949aSmrg * Just render a rect (using three coords). 5307821949aSmrg */ 5317821949aSmrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 5327821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 5337821949aSmrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 5347821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 5357821949aSmrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 5367821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 5377821949aSmrg } 5387821949aSmrg 5397821949aSmrg pBox++; 5407821949aSmrg } 5417821949aSmrg 5427821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 5437821949aSmrg FINISH_ACCEL(); 5447821949aSmrg#endif /* !ACCEL_CP */ 5457821949aSmrg 546de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 547de2362d3Smrg} 548de2362d3Smrg 549de2362d3Smrgstatic Bool 5507821949aSmrgFUNC_NAME(R200PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 551de2362d3Smrg{ 552de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 553de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 554de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 555de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 556de2362d3Smrg uint32_t txformat; 5577821949aSmrg uint32_t txfilter, txsize, txpitch, txoffset; 558de2362d3Smrg uint32_t dst_pitch, dst_format; 559de2362d3Smrg uint32_t colorpitch; 560de2362d3Smrg int pixel_shift; 5617821949aSmrg int scissor_w = MIN(pPixmap->drawable.width, 2047); 5627821949aSmrg int scissor_h = MIN(pPixmap->drawable.height, 2047); 563de2362d3Smrg /* note: in contrast to r300, use input biasing on uv components */ 564de2362d3Smrg const float Loff = -0.0627; 565de2362d3Smrg float uvcosf, uvsinf; 566de2362d3Smrg float yco, yoff; 567de2362d3Smrg float uco[3], vco[3]; 568de2362d3Smrg float bright, cont, sat; 569de2362d3Smrg int ref = pPriv->transform_index; 570de2362d3Smrg float ucscale = 0.25, vcscale = 0.25; 571de2362d3Smrg Bool needux8 = FALSE, needvx8 = FALSE; 5727821949aSmrg ACCEL_PREAMBLE(); 573de2362d3Smrg 5747821949aSmrg#ifdef XF86DRM_MODE 5757821949aSmrg if (info->cs) { 5767821949aSmrg int ret; 577de2362d3Smrg 5787821949aSmrg radeon_cs_space_reset_bos(info->cs); 5797821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 5807821949aSmrg 5817821949aSmrg if (pPriv->bicubic_enabled) 5827821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 583de2362d3Smrg 5847821949aSmrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 5857821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 586de2362d3Smrg 5877821949aSmrg ret = radeon_cs_space_check(info->cs); 5887821949aSmrg if (ret) { 5897821949aSmrg ErrorF("Not enough RAM to hw accel xv operation\n"); 5907821949aSmrg return FALSE; 5917821949aSmrg } 592de2362d3Smrg } 5937821949aSmrg#else 5947821949aSmrg (void)src_bo; 5957821949aSmrg#endif 596de2362d3Smrg 597de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 598de2362d3Smrg 5997821949aSmrg#ifdef USE_EXA 6007821949aSmrg if (info->useEXA) { 6017821949aSmrg dst_pitch = exaGetPixmapPitch(pPixmap); 6027821949aSmrg } else 6037821949aSmrg#endif 6047821949aSmrg { 6057821949aSmrg dst_pitch = pPixmap->devKind; 6067821949aSmrg } 607de2362d3Smrg 6087821949aSmrg#ifdef USE_EXA 6097821949aSmrg if (info->useEXA) { 6107821949aSmrg RADEON_SWITCH_TO_3D(); 6117821949aSmrg } else 6127821949aSmrg#endif 6137821949aSmrg { 6147821949aSmrg BEGIN_ACCEL(2); 6157821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 6167821949aSmrg /* We must wait for 3d to idle, in case source was just written as a dest. */ 6177821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 6187821949aSmrg RADEON_WAIT_HOST_IDLECLEAN | 6197821949aSmrg RADEON_WAIT_2D_IDLECLEAN | 6207821949aSmrg RADEON_WAIT_3D_IDLECLEAN | 6217821949aSmrg RADEON_WAIT_DMA_GUI_IDLE); 6227821949aSmrg FINISH_ACCEL(); 6237821949aSmrg 6247821949aSmrg if (!info->accel_state->XInited3D) 6257821949aSmrg RADEONInit3DEngine(pScrn); 6267821949aSmrg } 627de2362d3Smrg 628de2362d3Smrg /* Same for R100/R200 */ 629de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 630de2362d3Smrg case 16: 631de2362d3Smrg if (pPixmap->drawable.depth == 15) 632de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB1555; 633de2362d3Smrg else 634de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_RGB565; 635de2362d3Smrg break; 636de2362d3Smrg case 32: 637de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB8888; 638de2362d3Smrg break; 639de2362d3Smrg default: 640de2362d3Smrg return FALSE; 641de2362d3Smrg } 642de2362d3Smrg 643de2362d3Smrg if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 644de2362d3Smrg pPriv->is_planar = TRUE; 645de2362d3Smrg txformat = RADEON_TXFORMAT_I8; 646de2362d3Smrg } else { 647de2362d3Smrg pPriv->is_planar = FALSE; 648de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 649de2362d3Smrg txformat = RADEON_TXFORMAT_YVYU422; 650de2362d3Smrg else 651de2362d3Smrg txformat = RADEON_TXFORMAT_VYUY422; 652de2362d3Smrg } 653de2362d3Smrg 654de2362d3Smrg txformat |= RADEON_TXFORMAT_NON_POWER2; 655de2362d3Smrg 656de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 657de2362d3Smrg 658de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 659de2362d3Smrg colorpitch |= RADEON_COLOR_TILE_ENABLE; 660de2362d3Smrg 661de2362d3Smrg BEGIN_ACCEL_RELOC(4,2); 662de2362d3Smrg 6637821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 664de2362d3Smrg EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 665de2362d3Smrg EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 666de2362d3Smrg 6677821949aSmrg OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 668de2362d3Smrg RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 669de2362d3Smrg 6707821949aSmrg FINISH_ACCEL(); 671de2362d3Smrg 672de2362d3Smrg txfilter = R200_MAG_FILTER_LINEAR | 673de2362d3Smrg R200_MIN_FILTER_LINEAR | 674de2362d3Smrg R200_CLAMP_S_CLAMP_LAST | 675de2362d3Smrg R200_CLAMP_T_CLAMP_LAST; 676de2362d3Smrg 677de2362d3Smrg /* contrast can cause constant overflow, clamp */ 678de2362d3Smrg cont = RTFContrast(pPriv->contrast); 679de2362d3Smrg if (cont * trans[ref].RefLuma > 2.0) 680de2362d3Smrg cont = 2.0 / trans[ref].RefLuma; 681de2362d3Smrg /* brightness is only from -0.5 to 0.5 should be safe */ 682de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 683de2362d3Smrg /* saturation can also cause overflow, clamp */ 684de2362d3Smrg sat = RTFSaturation(pPriv->saturation); 685de2362d3Smrg if (sat * trans[ref].RefBCb > 4.0) 686de2362d3Smrg sat = 4.0 / trans[ref].RefBCb; 687de2362d3Smrg uvcosf = sat * cos(RTFHue(pPriv->hue)); 688de2362d3Smrg uvsinf = sat * sin(RTFHue(pPriv->hue)); 689de2362d3Smrg 690de2362d3Smrg yco = trans[ref].RefLuma * cont; 691de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 692de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 693de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 694de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 695de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 696de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 697de2362d3Smrg yoff = Loff * yco + bright; 698de2362d3Smrg 699de2362d3Smrg if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 700de2362d3Smrg needux8 = TRUE; 701de2362d3Smrg ucscale = 0.125; 702de2362d3Smrg } 703de2362d3Smrg if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 704de2362d3Smrg needvx8 = TRUE; 705de2362d3Smrg vcscale = 0.125; 706de2362d3Smrg } 707de2362d3Smrg 7087821949aSmrg txoffset = info->cs ? 0 : pPriv->src_offset; 7097821949aSmrg 710de2362d3Smrg if (pPriv->is_planar) { 711de2362d3Smrg /* need 2 texcoord sets (even though they are identical) due 712de2362d3Smrg to denormalization! hw apparently can't premultiply 713de2362d3Smrg same coord set by different texture size */ 714de2362d3Smrg pPriv->vtx_count = 6; 715de2362d3Smrg 716de2362d3Smrg txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 717de2362d3Smrg (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 718de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 719de2362d3Smrg txpitch -= 32; 720de2362d3Smrg 721de2362d3Smrg BEGIN_ACCEL_RELOC(36, 3); 722de2362d3Smrg 7237821949aSmrg OUT_ACCEL_REG(RADEON_PP_CNTL, 724de2362d3Smrg RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 725de2362d3Smrg RADEON_TEX_BLEND_0_ENABLE | 726de2362d3Smrg RADEON_TEX_BLEND_1_ENABLE | 727de2362d3Smrg RADEON_TEX_BLEND_2_ENABLE); 728de2362d3Smrg 7297821949aSmrg OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 7307821949aSmrg OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 731de2362d3Smrg (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 732de2362d3Smrg (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 733de2362d3Smrg 7347821949aSmrg OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 7357821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 7367821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 7377821949aSmrg OUT_ACCEL_REG(R200_PP_TXSIZE_0, 738de2362d3Smrg (pPriv->w - 1) | 739de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 7407821949aSmrg OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 7417821949aSmrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 7427821949aSmrg 7437821949aSmrg OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); 7447821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 7457821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); 7467821949aSmrg OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize); 7477821949aSmrg OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); 7487821949aSmrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 7497821949aSmrg 7507821949aSmrg OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); 7517821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 7527821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); 7537821949aSmrg OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize); 7547821949aSmrg OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); 7557821949aSmrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 756de2362d3Smrg 757de2362d3Smrg /* similar to r300 code. Note the big problem is that hardware constants 758de2362d3Smrg * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 759de2362d3Smrg * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 760de2362d3Smrg * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 761de2362d3Smrg * the constants not. To get larger range can use output scale, but for 762de2362d3Smrg * that 2.018 value we need a total scale by 8, which means the constants 763de2362d3Smrg * really have no accuracy whatsoever (5 fractional bits only). 764de2362d3Smrg * The only direct way to get high precision "constants" into the fragment 765de2362d3Smrg * pipe I know of is to use the texcoord interpolator (not color, this one 766de2362d3Smrg * is 8 bit only too), which seems a bit expensive. We're lucky though it 767de2362d3Smrg * seems the values we need seem to fit better than worst case (get about 768de2362d3Smrg * 6 fractional bits for this instead of 5, at least when not correcting for 769de2362d3Smrg * hue/saturation/contrast/brightness, which is the same as for vco - yco and 770de2362d3Smrg * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 771de2362d3Smrg * even with non-default saturation/hue/contrast/brightness adjustments, 772de2362d3Smrg * it gets a little crazy and ultimately precision might still be lacking. 773de2362d3Smrg * 774de2362d3Smrg * A higher precision (8 fractional bits) version might just put uco into 775de2362d3Smrg * a texcoord, and calculate a new vcoconst in the shader, like so: 776de2362d3Smrg * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 777de2362d3Smrg * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 778de2362d3Smrg * vcocalc = ADD temp, bias/scale(cohelper), vco 779de2362d3Smrg * would in total use 4 tex units, 4 instructions which seems fairly 780de2362d3Smrg * balanced for this architecture (instead of 3 + 3 for the solution here) 781de2362d3Smrg * 782de2362d3Smrg * temp = MAD(yco, yuv.yyyy, yoff) 783de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 784de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 785de2362d3Smrg * 786de2362d3Smrg * note first mad produces actually scalar, hence we transform 787de2362d3Smrg * it into a dp2a to get 8 bit precision of yco instead of 7 - 788de2362d3Smrg * That's assuming hw correctly expands consts to internal precision. 789de2362d3Smrg * (y * 1 + y * (yco - 1) + yoff) 790de2362d3Smrg * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 791de2362d3Smrg * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 792de2362d3Smrg * result = MAD x2 (vco / 2, yuv.vvvv, temp) 793de2362d3Smrg * 794de2362d3Smrg * vco, uco need bias (and hence scale too) 795de2362d3Smrg * 796de2362d3Smrg */ 797de2362d3Smrg 798de2362d3Smrg /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 7997821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 800de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 801de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 802de2362d3Smrg R200_TXC_ARG_C_TFACTOR_COLOR | 803de2362d3Smrg (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 804de2362d3Smrg R200_TXC_OP_DOT2_ADD); 8057821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 806de2362d3Smrg (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 807de2362d3Smrg R200_TXC_SCALE_INV2 | 808de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 8097821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_0, 810de2362d3Smrg R200_TXA_ARG_A_ZERO | 811de2362d3Smrg R200_TXA_ARG_B_ZERO | 812de2362d3Smrg R200_TXA_ARG_C_ZERO | 813de2362d3Smrg R200_TXA_OP_MADD); 8147821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 815de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 816de2362d3Smrg 817de2362d3Smrg /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 8187821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 819de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 820de2362d3Smrg R200_TXC_BIAS_ARG_A | 821de2362d3Smrg R200_TXC_SCALE_ARG_A | 822de2362d3Smrg R200_TXC_ARG_B_R1_COLOR | 823de2362d3Smrg R200_TXC_BIAS_ARG_B | 824de2362d3Smrg (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 825de2362d3Smrg R200_TXC_ARG_C_R0_COLOR | 826de2362d3Smrg R200_TXC_OP_MADD); 8277821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 828de2362d3Smrg (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 829de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 8307821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_1, 831de2362d3Smrg R200_TXA_ARG_A_ZERO | 832de2362d3Smrg R200_TXA_ARG_B_ZERO | 833de2362d3Smrg R200_TXA_ARG_C_ZERO | 834de2362d3Smrg R200_TXA_OP_MADD); 8357821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 836de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 837de2362d3Smrg 838de2362d3Smrg /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 8397821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 840de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 841de2362d3Smrg R200_TXC_BIAS_ARG_A | 842de2362d3Smrg R200_TXC_SCALE_ARG_A | 843de2362d3Smrg R200_TXC_ARG_B_R2_COLOR | 844de2362d3Smrg R200_TXC_BIAS_ARG_B | 845de2362d3Smrg (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 846de2362d3Smrg R200_TXC_ARG_C_R0_COLOR | 847de2362d3Smrg R200_TXC_OP_MADD); 8487821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 849de2362d3Smrg (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 850de2362d3Smrg R200_TXC_SCALE_2X | 851de2362d3Smrg R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 8527821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_2, 853de2362d3Smrg R200_TXA_ARG_A_ZERO | 854de2362d3Smrg R200_TXA_ARG_B_ZERO | 855de2362d3Smrg R200_TXA_ARG_C_ZERO | 856de2362d3Smrg R200_TXA_COMP_ARG_C | 857de2362d3Smrg R200_TXA_OP_MADD); 8587821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 859de2362d3Smrg R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 860de2362d3Smrg 861de2362d3Smrg /* shader constants */ 8627821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 863de2362d3Smrg yco > 1.0 ? yco - 1.0: yco, 864de2362d3Smrg yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 865de2362d3Smrg 0.0)); 8667821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 867de2362d3Smrg uco[1] * ucscale + 0.5, /* or [-2, 2] */ 868de2362d3Smrg uco[2] * ucscale + 0.5, 869de2362d3Smrg 0.0)); 8707821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 871de2362d3Smrg vco[1] * vcscale + 0.5, /* or [-4, 4] */ 872de2362d3Smrg vco[2] * vcscale + 0.5, 873de2362d3Smrg 0.0)); 874de2362d3Smrg 8757821949aSmrg FINISH_ACCEL(); 876de2362d3Smrg } else { 877de2362d3Smrg pPriv->vtx_count = 4; 878de2362d3Smrg 879de2362d3Smrg BEGIN_ACCEL_RELOC(24, 1); 880de2362d3Smrg 8817821949aSmrg OUT_ACCEL_REG(RADEON_PP_CNTL, 882de2362d3Smrg RADEON_TEX_0_ENABLE | 883de2362d3Smrg RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 884de2362d3Smrg RADEON_TEX_BLEND_2_ENABLE); 885de2362d3Smrg 8867821949aSmrg OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 8877821949aSmrg OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 888de2362d3Smrg (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 889de2362d3Smrg 8907821949aSmrg OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 8917821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 8927821949aSmrg OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 8937821949aSmrg OUT_ACCEL_REG(R200_PP_TXSIZE_0, 894de2362d3Smrg (pPriv->w - 1) | 895de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 8967821949aSmrg OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 8977821949aSmrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 898de2362d3Smrg 899de2362d3Smrg /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 9007821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 901de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 902de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 903de2362d3Smrg R200_TXC_ARG_C_TFACTOR_COLOR | 904de2362d3Smrg (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 905de2362d3Smrg R200_TXC_OP_DOT2_ADD); 9067821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 907de2362d3Smrg (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 908de2362d3Smrg R200_TXC_SCALE_INV2 | 909de2362d3Smrg (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 910de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 9117821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_0, 912de2362d3Smrg R200_TXA_ARG_A_ZERO | 913de2362d3Smrg R200_TXA_ARG_B_ZERO | 914de2362d3Smrg R200_TXA_ARG_C_ZERO | 915de2362d3Smrg R200_TXA_OP_MADD); 9167821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 917de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 918de2362d3Smrg 919de2362d3Smrg /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 9207821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 921de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 922de2362d3Smrg R200_TXC_BIAS_ARG_A | 923de2362d3Smrg R200_TXC_SCALE_ARG_A | 924de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 925de2362d3Smrg R200_TXC_BIAS_ARG_B | 926de2362d3Smrg (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 927de2362d3Smrg R200_TXC_ARG_C_R1_COLOR | 928de2362d3Smrg R200_TXC_OP_MADD); 9297821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 930de2362d3Smrg (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 931de2362d3Smrg (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 932de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 9337821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_1, 934de2362d3Smrg R200_TXA_ARG_A_ZERO | 935de2362d3Smrg R200_TXA_ARG_B_ZERO | 936de2362d3Smrg R200_TXA_ARG_C_ZERO | 937de2362d3Smrg R200_TXA_OP_MADD); 9387821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 939de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 940de2362d3Smrg 941de2362d3Smrg /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 9427821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 943de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 944de2362d3Smrg R200_TXC_BIAS_ARG_A | 945de2362d3Smrg R200_TXC_SCALE_ARG_A | 946de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 947de2362d3Smrg R200_TXC_BIAS_ARG_B | 948de2362d3Smrg (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 949de2362d3Smrg R200_TXC_ARG_C_R1_COLOR | 950de2362d3Smrg R200_TXC_OP_MADD); 9517821949aSmrg OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 952de2362d3Smrg (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 953de2362d3Smrg R200_TXC_SCALE_2X | 954de2362d3Smrg (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 955de2362d3Smrg R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 9567821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND_2, 957de2362d3Smrg R200_TXA_ARG_A_ZERO | 958de2362d3Smrg R200_TXA_ARG_B_ZERO | 959de2362d3Smrg R200_TXA_ARG_C_ZERO | 960de2362d3Smrg R200_TXA_COMP_ARG_C | 961de2362d3Smrg R200_TXA_OP_MADD); 9627821949aSmrg OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 963de2362d3Smrg R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 964de2362d3Smrg 965de2362d3Smrg /* shader constants */ 9667821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 967de2362d3Smrg yco > 1.0 ? yco - 1.0: yco, 968de2362d3Smrg yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 969de2362d3Smrg 0.0)); 9707821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 971de2362d3Smrg uco[1] * ucscale + 0.5, /* or [-2, 2] */ 972de2362d3Smrg uco[2] * ucscale + 0.5, 973de2362d3Smrg 0.0)); 9747821949aSmrg OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 975de2362d3Smrg vco[1] * vcscale + 0.5, /* or [-4, 4] */ 976de2362d3Smrg vco[2] * vcscale + 0.5, 977de2362d3Smrg 0.0)); 978de2362d3Smrg 9797821949aSmrg FINISH_ACCEL(); 980de2362d3Smrg } 981de2362d3Smrg 9827821949aSmrg BEGIN_ACCEL(2); 9837821949aSmrg OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 9847821949aSmrg OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 985de2362d3Smrg (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 9867821949aSmrg FINISH_ACCEL(); 987de2362d3Smrg 988de2362d3Smrg if (pPriv->vsync) { 989de2362d3Smrg xf86CrtcPtr crtc; 990de2362d3Smrg if (pPriv->desired_crtc) 991de2362d3Smrg crtc = pPriv->desired_crtc; 992de2362d3Smrg else 9937821949aSmrg crtc = radeon_pick_best_crtc(pScrn, 994de2362d3Smrg pPriv->drw_x, 995de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 996de2362d3Smrg pPriv->drw_y, 997de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 998de2362d3Smrg if (crtc) 9997821949aSmrg FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 10007821949aSmrg crtc, 10017821949aSmrg pPriv->drw_y - crtc->y, 10027821949aSmrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 1003de2362d3Smrg } 1004de2362d3Smrg 1005de2362d3Smrg return TRUE; 1006de2362d3Smrg} 1007de2362d3Smrg 1008de2362d3Smrgstatic void 10097821949aSmrgFUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1010de2362d3Smrg{ 1011de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1012de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 1013de2362d3Smrg int dstxoff, dstyoff; 1014de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 1015de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 10167821949aSmrg ACCEL_PREAMBLE(); 1017de2362d3Smrg 1018de2362d3Smrg#ifdef COMPOSITE 1019de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 1020de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 1021de2362d3Smrg#else 1022de2362d3Smrg dstxoff = 0; 1023de2362d3Smrg dstyoff = 0; 1024de2362d3Smrg#endif 1025de2362d3Smrg 10267821949aSmrg if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1027de2362d3Smrg return; 1028de2362d3Smrg 1029de2362d3Smrg /* 1030de2362d3Smrg * Rendering of the actual polygon is done in two different 1031de2362d3Smrg * ways depending on chip generation: 1032de2362d3Smrg * 1033de2362d3Smrg * < R300: 1034de2362d3Smrg * 1035de2362d3Smrg * These chips can render a rectangle in one pass, so 1036de2362d3Smrg * handling is pretty straight-forward. 1037de2362d3Smrg * 1038de2362d3Smrg * >= R300: 1039de2362d3Smrg * 1040de2362d3Smrg * These chips can accept a quad, but will render it as 1041de2362d3Smrg * two triangles which results in a diagonal tear. Instead 1042de2362d3Smrg * We render a single, large triangle and use the scissor 1043de2362d3Smrg * functionality to restrict it to the desired rectangle. 1044de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 1045de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 1046de2362d3Smrg * render as a quad. 1047de2362d3Smrg */ 1048de2362d3Smrg 10497821949aSmrg#ifdef ACCEL_CP 1050de2362d3Smrg while (nBox) { 1051de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 4; 1052de2362d3Smrg int loop_boxes; 1053de2362d3Smrg 1054de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 10557821949aSmrg if (info->cs) 10567821949aSmrg radeon_cs_flush_indirect(pScrn); 10577821949aSmrg else 10587821949aSmrg RADEONCPFlushIndirect(pScrn, 1); 10597821949aSmrg if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1060de2362d3Smrg return; 1061de2362d3Smrg } 1062de2362d3Smrg loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 1063de2362d3Smrg nBox -= loop_boxes; 1064de2362d3Smrg 1065de2362d3Smrg BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 1066de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 1067de2362d3Smrg loop_boxes * 3 * pPriv->vtx_count)); 1068de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 1069de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 1070de2362d3Smrg ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 1071de2362d3Smrg 1072de2362d3Smrg while (loop_boxes--) { 1073de2362d3Smrg float srcX, srcY, srcw, srch; 1074de2362d3Smrg int dstX, dstY, dstw, dsth; 1075de2362d3Smrg dstX = pBox->x1 + dstxoff; 1076de2362d3Smrg dstY = pBox->y1 + dstyoff; 1077de2362d3Smrg dstw = pBox->x2 - pBox->x1; 1078de2362d3Smrg dsth = pBox->y2 - pBox->y1; 1079de2362d3Smrg 1080de2362d3Smrg srcX = pPriv->src_x; 1081de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 1082de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 1083de2362d3Smrg srcY = pPriv->src_y; 1084de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 1085de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 1086de2362d3Smrg 1087de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 1088de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 1089de2362d3Smrg 1090de2362d3Smrg if (pPriv->is_planar) { 1091de2362d3Smrg /* 1092de2362d3Smrg * Just render a rect (using three coords). 1093de2362d3Smrg */ 1094de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1095de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1096de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1097de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1098de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1099de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1100de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1101de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1102de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1103de2362d3Smrg } else { 1104de2362d3Smrg /* 1105de2362d3Smrg * Just render a rect (using three coords). 1106de2362d3Smrg */ 1107de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1108de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1109de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1110de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1111de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1112de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1113de2362d3Smrg } 1114de2362d3Smrg 1115de2362d3Smrg pBox++; 1116de2362d3Smrg } 1117de2362d3Smrg 11187821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1119de2362d3Smrg ADVANCE_RING(); 1120de2362d3Smrg } 11217821949aSmrg#else /* ACCEL_CP */ 11227821949aSmrg BEGIN_ACCEL(nBox * 3 * pPriv->vtx_count + 2); 11237821949aSmrg OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 11247821949aSmrg RADEON_VF_PRIM_WALK_DATA | 11257821949aSmrg ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 11267821949aSmrg while (nBox--) { 11277821949aSmrg float srcX, srcY, srcw, srch; 11287821949aSmrg int dstX, dstY, dstw, dsth; 11297821949aSmrg dstX = pBox->x1 + dstxoff; 11307821949aSmrg dstY = pBox->y1 + dstyoff; 11317821949aSmrg dstw = pBox->x2 - pBox->x1; 11327821949aSmrg dsth = pBox->y2 - pBox->y1; 11337821949aSmrg 11347821949aSmrg srcX = pPriv->src_x; 11357821949aSmrg srcX += ((pBox->x1 - pPriv->drw_x) * 11367821949aSmrg pPriv->src_w) / (float)pPriv->dst_w; 11377821949aSmrg srcY = pPriv->src_y; 11387821949aSmrg srcY += ((pBox->y1 - pPriv->drw_y) * 11397821949aSmrg pPriv->src_h) / (float)pPriv->dst_h; 11407821949aSmrg 11417821949aSmrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 11427821949aSmrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 11437821949aSmrg 11447821949aSmrg if (pPriv->is_planar) { 11457821949aSmrg /* 11467821949aSmrg * Just render a rect (using three coords). 11477821949aSmrg */ 11487821949aSmrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 11497821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 11507821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 11517821949aSmrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 11527821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 11537821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 11547821949aSmrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 11557821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 11567821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 11577821949aSmrg } else { 11587821949aSmrg /* 11597821949aSmrg * Just render a rect (using three coords). 11607821949aSmrg */ 11617821949aSmrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 11627821949aSmrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 11637821949aSmrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 11647821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 11657821949aSmrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 11667821949aSmrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 11677821949aSmrg } 11687821949aSmrg 11697821949aSmrg pBox++; 11707821949aSmrg } 11717821949aSmrg 11727821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 11737821949aSmrg FINISH_ACCEL(); 11747821949aSmrg#endif /* !ACCEL_CP */ 1175de2362d3Smrg 1176de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 1177de2362d3Smrg} 1178de2362d3Smrg 1179de2362d3Smrgstatic Bool 11807821949aSmrgFUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1181de2362d3Smrg{ 1182de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1183de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 1184de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 1185de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 11867821949aSmrg uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 1187de2362d3Smrg uint32_t dst_pitch, dst_format; 11887821949aSmrg uint32_t txenable, colorpitch, bicubic_offset; 1189de2362d3Smrg uint32_t output_fmt; 1190de2362d3Smrg int pixel_shift; 11917821949aSmrg ACCEL_PREAMBLE(); 1192de2362d3Smrg 11937821949aSmrg#ifdef XF86DRM_MODE 11947821949aSmrg if (info->cs) { 11957821949aSmrg int ret; 1196de2362d3Smrg 11977821949aSmrg radeon_cs_space_reset_bos(info->cs); 11987821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1199de2362d3Smrg 12007821949aSmrg if (pPriv->bicubic_enabled) 12017821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1202de2362d3Smrg 12037821949aSmrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 12047821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 12057821949aSmrg 12067821949aSmrg ret = radeon_cs_space_check(info->cs); 12077821949aSmrg if (ret) { 12087821949aSmrg ErrorF("Not enough RAM to hw accel xv operation\n"); 12097821949aSmrg return FALSE; 12107821949aSmrg } 1211de2362d3Smrg } 12127821949aSmrg#else 12137821949aSmrg (void)src_bo; 12147821949aSmrg#endif 1215de2362d3Smrg 1216de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 1217de2362d3Smrg 12187821949aSmrg#ifdef USE_EXA 12197821949aSmrg if (info->useEXA) { 12207821949aSmrg dst_pitch = exaGetPixmapPitch(pPixmap); 12217821949aSmrg } else 12227821949aSmrg#endif 12237821949aSmrg { 12247821949aSmrg dst_pitch = pPixmap->devKind; 12257821949aSmrg } 12267821949aSmrg 12277821949aSmrg#ifdef USE_EXA 12287821949aSmrg if (info->useEXA) { 12297821949aSmrg RADEON_SWITCH_TO_3D(); 12307821949aSmrg } else 12317821949aSmrg#endif 12327821949aSmrg { 12337821949aSmrg BEGIN_ACCEL(2); 12347821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 12357821949aSmrg /* We must wait for 3d to idle, in case source was just written as a dest. */ 12367821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 12377821949aSmrg RADEON_WAIT_HOST_IDLECLEAN | 12387821949aSmrg RADEON_WAIT_2D_IDLECLEAN | 12397821949aSmrg RADEON_WAIT_3D_IDLECLEAN | 12407821949aSmrg RADEON_WAIT_DMA_GUI_IDLE); 12417821949aSmrg FINISH_ACCEL(); 12427821949aSmrg 12437821949aSmrg if (!info->accel_state->XInited3D) 12447821949aSmrg RADEONInit3DEngine(pScrn); 12457821949aSmrg } 1246de2362d3Smrg 1247de2362d3Smrg if (pPriv->bicubic_enabled) 1248de2362d3Smrg pPriv->vtx_count = 6; 1249de2362d3Smrg else 1250de2362d3Smrg pPriv->vtx_count = 4; 1251de2362d3Smrg 1252de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 1253de2362d3Smrg case 16: 1254de2362d3Smrg if (pPixmap->drawable.depth == 15) 1255de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB1555; 1256de2362d3Smrg else 1257de2362d3Smrg dst_format = R300_COLORFORMAT_RGB565; 1258de2362d3Smrg break; 1259de2362d3Smrg case 32: 1260de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB8888; 1261de2362d3Smrg break; 1262de2362d3Smrg default: 1263de2362d3Smrg return FALSE; 1264de2362d3Smrg } 1265de2362d3Smrg 1266de2362d3Smrg output_fmt = (R300_OUT_FMT_C4_8 | 1267de2362d3Smrg R300_OUT_FMT_C0_SEL_BLUE | 1268de2362d3Smrg R300_OUT_FMT_C1_SEL_GREEN | 1269de2362d3Smrg R300_OUT_FMT_C2_SEL_RED | 1270de2362d3Smrg R300_OUT_FMT_C3_SEL_ALPHA); 1271de2362d3Smrg 1272de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 1273de2362d3Smrg colorpitch |= dst_format; 1274de2362d3Smrg 1275de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 1276de2362d3Smrg colorpitch |= R300_COLORTILE; 1277de2362d3Smrg 1278de2362d3Smrg 1279de2362d3Smrg if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1280de2362d3Smrg (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1281de2362d3Smrg pPriv->is_planar = TRUE; 1282de2362d3Smrg else 1283de2362d3Smrg pPriv->is_planar = FALSE; 1284de2362d3Smrg 1285de2362d3Smrg if (pPriv->is_planar) { 1286de2362d3Smrg txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1287de2362d3Smrg txpitch = pPriv->src_pitch; 1288de2362d3Smrg } else { 1289de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 1290de2362d3Smrg txformat1 = R300_TX_FORMAT_YVYU422; 1291de2362d3Smrg else 1292de2362d3Smrg txformat1 = R300_TX_FORMAT_VYUY422; 1293de2362d3Smrg 1294de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) 1295de2362d3Smrg txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1296de2362d3Smrg 1297de2362d3Smrg /* pitch is in pixels */ 1298de2362d3Smrg txpitch = pPriv->src_pitch / 2; 1299de2362d3Smrg } 1300de2362d3Smrg txpitch -= 1; 1301de2362d3Smrg 1302de2362d3Smrg txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1303de2362d3Smrg (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1304de2362d3Smrg R300_TXPITCH_EN); 1305de2362d3Smrg 1306de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1307de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1308de2362d3Smrg R300_TX_MAG_FILTER_LINEAR | 1309de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 1310de2362d3Smrg (0 << R300_TX_ID_SHIFT)); 1311de2362d3Smrg 13127821949aSmrg txoffset = info->cs ? 0 : pPriv->src_offset; 13137821949aSmrg 1314de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 13157821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 13167821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 13177821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 1318de2362d3Smrg if (pPriv->is_planar) 13197821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1320de2362d3Smrg else 13217821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 13227821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 13237821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 13247821949aSmrg FINISH_ACCEL(); 1325de2362d3Smrg 1326de2362d3Smrg txenable = R300_TEX_0_ENABLE; 1327de2362d3Smrg 1328de2362d3Smrg if (pPriv->is_planar) { 1329de2362d3Smrg txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1330de2362d3Smrg (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1331de2362d3Smrg R300_TXPITCH_EN); 1332de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1333de2362d3Smrg txpitch -= 1; 1334de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1335de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1336de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 1337de2362d3Smrg R300_TX_MAG_FILTER_LINEAR); 1338de2362d3Smrg 1339de2362d3Smrg BEGIN_ACCEL_RELOC(12, 2); 13407821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 13417821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 13427821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 13437821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 13447821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 13457821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 13467821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 13477821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 13487821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 13497821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 13507821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 13517821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 13527821949aSmrg FINISH_ACCEL(); 1353de2362d3Smrg txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1354de2362d3Smrg } 1355de2362d3Smrg 1356de2362d3Smrg if (pPriv->bicubic_enabled) { 1357de2362d3Smrg /* Size is 128x1 */ 1358de2362d3Smrg txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1359de2362d3Smrg (0x0 << R300_TXHEIGHT_SHIFT) | 1360de2362d3Smrg R300_TXPITCH_EN); 1361de2362d3Smrg /* Format is 32-bit floats, 4bpp */ 1362de2362d3Smrg txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1363de2362d3Smrg /* Pitch is 127 (128-1) */ 1364de2362d3Smrg txpitch = 0x7f; 1365de2362d3Smrg /* Tex filter */ 1366de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1367de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1368de2362d3Smrg R300_TX_MIN_FILTER_NEAREST | 1369de2362d3Smrg R300_TX_MAG_FILTER_NEAREST | 1370de2362d3Smrg (1 << R300_TX_ID_SHIFT)); 1371de2362d3Smrg 13727821949aSmrg if (info->cs) 13737821949aSmrg bicubic_offset = 0; 13747821949aSmrg else 13757821949aSmrg bicubic_offset = pPriv->bicubic_src_offset; 13767821949aSmrg 1377de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 13787821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 13797821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 13807821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 13817821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 13827821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 13837821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 13847821949aSmrg FINISH_ACCEL(); 1385de2362d3Smrg 1386de2362d3Smrg /* Enable tex 1 */ 1387de2362d3Smrg txenable |= R300_TEX_1_ENABLE; 1388de2362d3Smrg } 1389de2362d3Smrg 1390de2362d3Smrg /* setup the VAP */ 1391de2362d3Smrg if (info->accel_state->has_tcl) { 1392de2362d3Smrg if (pPriv->bicubic_enabled) 13937821949aSmrg BEGIN_ACCEL(7); 1394de2362d3Smrg else 13957821949aSmrg BEGIN_ACCEL(6); 1396de2362d3Smrg } else { 1397de2362d3Smrg if (pPriv->bicubic_enabled) 13987821949aSmrg BEGIN_ACCEL(5); 1399de2362d3Smrg else 14007821949aSmrg BEGIN_ACCEL(4); 1401de2362d3Smrg } 1402de2362d3Smrg 1403de2362d3Smrg /* These registers define the number, type, and location of data submitted 1404de2362d3Smrg * to the PVS unit of GA input (when PVS is disabled) 1405de2362d3Smrg * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1406de2362d3Smrg * enabled. This memory provides the imputs to the vertex shader program 1407de2362d3Smrg * and ordering is not important. When PVS/TCL is disabled, this field maps 1408de2362d3Smrg * directly to the GA input memory and the order is signifigant. In 1409de2362d3Smrg * PVS_BYPASS mode the order is as follows: 1410de2362d3Smrg * Position 1411de2362d3Smrg * Point Size 1412de2362d3Smrg * Color 0-3 1413de2362d3Smrg * Textures 0-7 1414de2362d3Smrg * Fog 1415de2362d3Smrg */ 1416de2362d3Smrg if (pPriv->bicubic_enabled) { 14177821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1418de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1419de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 1420de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 1421de2362d3Smrg R300_SIGNED_0 | 1422de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1423de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 1424de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 1425de2362d3Smrg R300_SIGNED_1)); 14267821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 1427de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1428de2362d3Smrg (0 << R300_SKIP_DWORDS_2_SHIFT) | 1429de2362d3Smrg (7 << R300_DST_VEC_LOC_2_SHIFT) | 1430de2362d3Smrg R300_LAST_VEC_2 | 1431de2362d3Smrg R300_SIGNED_2)); 1432de2362d3Smrg } else { 14337821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1434de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1435de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 1436de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 1437de2362d3Smrg R300_SIGNED_0 | 1438de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1439de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 1440de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 1441de2362d3Smrg R300_LAST_VEC_1 | 1442de2362d3Smrg R300_SIGNED_1)); 1443de2362d3Smrg } 1444de2362d3Smrg 1445de2362d3Smrg /* load the vertex shader 1446de2362d3Smrg * We pre-load vertex programs in RADEONInit3DEngine(): 1447de2362d3Smrg * - exa 1448de2362d3Smrg * - Xv 1449de2362d3Smrg * - Xv bicubic 1450de2362d3Smrg * Here we select the offset of the vertex program we want to use 1451de2362d3Smrg */ 1452de2362d3Smrg if (info->accel_state->has_tcl) { 1453de2362d3Smrg if (pPriv->bicubic_enabled) { 14547821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1455de2362d3Smrg ((11 << R300_PVS_FIRST_INST_SHIFT) | 1456de2362d3Smrg (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1457de2362d3Smrg (13 << R300_PVS_LAST_INST_SHIFT))); 14587821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1459de2362d3Smrg (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1460de2362d3Smrg } else { 14617821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1462de2362d3Smrg ((9 << R300_PVS_FIRST_INST_SHIFT) | 1463de2362d3Smrg (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1464de2362d3Smrg (10 << R300_PVS_LAST_INST_SHIFT))); 14657821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1466de2362d3Smrg (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1467de2362d3Smrg } 1468de2362d3Smrg } 1469de2362d3Smrg 1470de2362d3Smrg /* Position and one set of 2 texture coordinates */ 14717821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1472de2362d3Smrg if (pPriv->bicubic_enabled) 14737821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1474de2362d3Smrg (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1475de2362d3Smrg else 14767821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1477de2362d3Smrg 14787821949aSmrg OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 14797821949aSmrg FINISH_ACCEL(); 1480de2362d3Smrg 1481de2362d3Smrg /* setup pixel shader */ 1482de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) { 1483de2362d3Smrg if (pPriv->bicubic_enabled) { 14847821949aSmrg BEGIN_ACCEL(79); 1485de2362d3Smrg 1486de2362d3Smrg /* 4 components: 2 for tex0 and 2 for tex1 */ 14877821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1488de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1489de2362d3Smrg 1490de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 14917821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1492de2362d3Smrg 1493de2362d3Smrg /* Pixel stack frame size. */ 14947821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 1495de2362d3Smrg 1496de2362d3Smrg /* Indirection levels */ 14977821949aSmrg OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1498de2362d3Smrg R300_FIRST_TEX)); 1499de2362d3Smrg 1500de2362d3Smrg /* Set nodes. */ 15017821949aSmrg OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1502de2362d3Smrg R300_ALU_CODE_SIZE(14) | 1503de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1504de2362d3Smrg R300_TEX_CODE_SIZE(6))); 1505de2362d3Smrg 1506de2362d3Smrg /* Nodes are allocated highest first, but executed lowest first */ 15077821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); 15087821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1509de2362d3Smrg R300_ALU_SIZE(0) | 1510de2362d3Smrg R300_TEX_START(0) | 1511de2362d3Smrg R300_TEX_SIZE(0))); 15127821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1513de2362d3Smrg R300_ALU_SIZE(9) | 1514de2362d3Smrg R300_TEX_START(1) | 1515de2362d3Smrg R300_TEX_SIZE(0))); 15167821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1517de2362d3Smrg R300_ALU_SIZE(2) | 1518de2362d3Smrg R300_TEX_START(2) | 1519de2362d3Smrg R300_TEX_SIZE(3) | 1520de2362d3Smrg R300_RGBA_OUT)); 1521de2362d3Smrg 1522de2362d3Smrg /* ** BICUBIC FP ** */ 1523de2362d3Smrg 1524de2362d3Smrg /* texcoord0 => temp0 1525de2362d3Smrg * texcoord1 => temp1 */ 1526de2362d3Smrg 1527de2362d3Smrg // first node 1528de2362d3Smrg /* TEX temp2, temp1.rrr0, tex1, 1D */ 15297821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1530de2362d3Smrg R300_TEX_ID(1) | 1531de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1532de2362d3Smrg R300_TEX_DST_ADDR(2))); 1533de2362d3Smrg 1534de2362d3Smrg /* MOV temp1.r, temp1.ggg0 */ 15357821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1536de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1537de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1538de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 15397821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1540de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1541de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 15427821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1543de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1544de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1545de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 15467821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1547de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1548de2362d3Smrg 1549de2362d3Smrg 1550de2362d3Smrg // second node 1551de2362d3Smrg /* TEX temp1, temp1, tex1, 1D */ 15527821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1553de2362d3Smrg R300_TEX_ID(1) | 1554de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1555de2362d3Smrg R300_TEX_DST_ADDR(1))); 1556de2362d3Smrg 1557de2362d3Smrg /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 15587821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1559de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1560de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1561de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 15627821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1563de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1564de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1565de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 15667821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1567de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1568de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1569de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 15707821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1571de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1572de2362d3Smrg 1573de2362d3Smrg 1574de2362d3Smrg /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 15757821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1576de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1577de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1578de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 15797821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1580de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1581de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1582de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 15837821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1584de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1585de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1586de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 15877821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1588de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1589de2362d3Smrg 1590de2362d3Smrg /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 15917821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1592de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1593de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1594de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 15957821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1596de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1597de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1598de2362d3Smrg R300_ALU_RGB_ADDRD(4) | 1599de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16007821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1601de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1602de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1603de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16047821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1605de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1606de2362d3Smrg 1607de2362d3Smrg /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 16087821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1609de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1610de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1611de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16127821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1613de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1614de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1615de2362d3Smrg R300_ALU_RGB_ADDRD(5) | 1616de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16177821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1618de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1619de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1620de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16217821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1622de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1623de2362d3Smrg 1624de2362d3Smrg /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 16257821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1626de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1627de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1628de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16297821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1630de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1631de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1632de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1633de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16347821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1635de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1636de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1637de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16387821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1639de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1640de2362d3Smrg 1641de2362d3Smrg /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 16427821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1643de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1644de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1645de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16467821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1647de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1648de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1649de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1650de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16517821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1652de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1653de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1654de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16557821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1656de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1657de2362d3Smrg 1658de2362d3Smrg /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 16597821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1660de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1661de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1662de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16637821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1664de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1665de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1666de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16677821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1668de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1669de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1670de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16717821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1672de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1673de2362d3Smrg 1674de2362d3Smrg /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 16757821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1676de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1677de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1678de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16797821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1680de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1681de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1682de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16837821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1684de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1685de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1686de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 16877821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1688de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1689de2362d3Smrg 1690de2362d3Smrg /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 16917821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1692de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1693de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1694de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 16957821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1696de2362d3Smrg R300_ALU_RGB_ADDR2(5) | 1697de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1698de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 16997821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1700de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1701de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1702de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 17037821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1704de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1705de2362d3Smrg 1706de2362d3Smrg /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 17077821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1708de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1709de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1710de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 17117821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1712de2362d3Smrg R300_ALU_RGB_ADDR2(4) | 1713de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1714de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 17157821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1716de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1717de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1718de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 17197821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1720de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1721de2362d3Smrg 1722de2362d3Smrg 1723de2362d3Smrg // third node 1724de2362d3Smrg /* TEX temp4, temp1.rg--, tex0, 1D */ 17257821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1726de2362d3Smrg R300_TEX_ID(0) | 1727de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1728de2362d3Smrg R300_TEX_DST_ADDR(4))); 1729de2362d3Smrg 1730de2362d3Smrg /* TEX temp3, temp3.rg--, tex0, 1D */ 17317821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1732de2362d3Smrg R300_TEX_ID(0) | 1733de2362d3Smrg R300_TEX_SRC_ADDR(3) | 1734de2362d3Smrg R300_TEX_DST_ADDR(3))); 1735de2362d3Smrg 1736de2362d3Smrg /* TEX temp5, temp2.rg--, tex0, 1D */ 17377821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1738de2362d3Smrg R300_TEX_ID(0) | 1739de2362d3Smrg R300_TEX_SRC_ADDR(2) | 1740de2362d3Smrg R300_TEX_DST_ADDR(5))); 1741de2362d3Smrg 1742de2362d3Smrg /* TEX temp0, temp0.rg--, tex0, 1D */ 17437821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1744de2362d3Smrg R300_TEX_ID(0) | 1745de2362d3Smrg R300_TEX_SRC_ADDR(0) | 1746de2362d3Smrg R300_TEX_DST_ADDR(0))); 1747de2362d3Smrg 1748de2362d3Smrg /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1749de2362d3Smrg * - PRESUB temps, temp4 - temp3 1750de2362d3Smrg * - MAD temp3, temp1.bbbb, temps, temp3 */ 17517821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1752de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1753de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1754de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1755de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 17567821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1757de2362d3Smrg R300_ALU_RGB_ADDR1(4) | 1758de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1759de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1760de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 17617821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1762de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1763de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1764de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 17657821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1766de2362d3Smrg R300_ALU_ALPHA_ADDR1(4) | 1767de2362d3Smrg R300_ALU_ALPHA_ADDR2(1) | 1768de2362d3Smrg R300_ALU_ALPHA_ADDRD(3) | 1769de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1770de2362d3Smrg 1771de2362d3Smrg /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1772de2362d3Smrg * - PRESUB temps, temp5 - temp0 1773de2362d3Smrg * - MAD temp0, temp1.bbbb, temps, temp0 */ 17747821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1775de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1776de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1777de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1778de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1779de2362d3Smrg R300_ALU_RGB_INSERT_NOP)); 17807821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1781de2362d3Smrg R300_ALU_RGB_ADDR1(5) | 1782de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1783de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1784de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 17857821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1786de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1787de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1788de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 17897821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1790de2362d3Smrg R300_ALU_ALPHA_ADDR1(5) | 1791de2362d3Smrg R300_ALU_ALPHA_ADDR2(1) | 1792de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 1793de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1794de2362d3Smrg 1795de2362d3Smrg /* LRP output, temp2.bbbb, temp3, temp0 -> 1796de2362d3Smrg * - PRESUB temps, temp3 - temp0 1797de2362d3Smrg * - MAD output, temp2.bbbb, temps, temp0 */ 17987821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1799de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1800de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1801de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1802de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 18037821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1804de2362d3Smrg R300_ALU_RGB_ADDR1(3) | 1805de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1806de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 18077821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1808de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1809de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1810de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 18117821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1812de2362d3Smrg R300_ALU_ALPHA_ADDR1(3) | 1813de2362d3Smrg R300_ALU_ALPHA_ADDR2(2) | 1814de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1815de2362d3Smrg 1816de2362d3Smrg /* Shader constants. */ 18177821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 18187821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); 18197821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); 18207821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); 1821de2362d3Smrg 18227821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); 18237821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 18247821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); 18257821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); 1826de2362d3Smrg 18277821949aSmrg FINISH_ACCEL(); 1828de2362d3Smrg } else { 18297821949aSmrg BEGIN_ACCEL(11); 1830de2362d3Smrg /* 2 components: 2 for tex0 */ 18317821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 1832de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1833de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1834de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 18357821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1836de2362d3Smrg 18377821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1838de2362d3Smrg 1839de2362d3Smrg /* Indirection levels */ 18407821949aSmrg OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1841de2362d3Smrg R300_FIRST_TEX)); 1842de2362d3Smrg 18437821949aSmrg OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1844de2362d3Smrg R300_ALU_CODE_SIZE(1) | 1845de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1846de2362d3Smrg R300_TEX_CODE_SIZE(1))); 1847de2362d3Smrg 18487821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1849de2362d3Smrg R300_ALU_SIZE(0) | 1850de2362d3Smrg R300_TEX_START(0) | 1851de2362d3Smrg R300_TEX_SIZE(0) | 1852de2362d3Smrg R300_RGBA_OUT)); 1853de2362d3Smrg 1854de2362d3Smrg /* tex inst */ 18557821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1856de2362d3Smrg R300_TEX_DST_ADDR(0) | 1857de2362d3Smrg R300_TEX_ID(0) | 1858de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 1859de2362d3Smrg 1860de2362d3Smrg /* ALU inst */ 1861de2362d3Smrg /* RGB */ 18627821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1863de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1864de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 1865de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1866de2362d3Smrg R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1867de2362d3Smrg R300_ALU_RGB_MASK_G | 1868de2362d3Smrg R300_ALU_RGB_MASK_B)) | 1869de2362d3Smrg R300_ALU_RGB_TARGET_A)); 18707821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1871de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1872de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1873de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1874de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1875de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1876de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1877de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1878de2362d3Smrg R300_ALU_RGB_CLAMP)); 1879de2362d3Smrg /* Alpha */ 18807821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1881de2362d3Smrg R300_ALU_ALPHA_ADDR1(0) | 1882de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 1883de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 1884de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1885de2362d3Smrg R300_ALU_ALPHA_TARGET_A | 1886de2362d3Smrg R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 18877821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1888de2362d3Smrg R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1889de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1890de2362d3Smrg R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1891de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1892de2362d3Smrg R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1893de2362d3Smrg R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1894de2362d3Smrg R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1895de2362d3Smrg R300_ALU_ALPHA_CLAMP)); 18967821949aSmrg FINISH_ACCEL(); 1897de2362d3Smrg } 1898de2362d3Smrg } else { 1899de2362d3Smrg /* 1900de2362d3Smrg * y' = y - .0625 1901de2362d3Smrg * u' = u - .5 1902de2362d3Smrg * v' = v - .5; 1903de2362d3Smrg * 1904de2362d3Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1905de2362d3Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1906de2362d3Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1907de2362d3Smrg * 1908de2362d3Smrg * DP3 might look like the straightforward solution 1909de2362d3Smrg * but we'd need to move the texture yuv values in 1910de2362d3Smrg * the same reg for this to work. Therefore use MADs. 1911de2362d3Smrg * Brightness just adds to the off constant. 1912de2362d3Smrg * Contrast is multiplication of luminance. 1913de2362d3Smrg * Saturation and hue change the u and v coeffs. 1914de2362d3Smrg * Default values (before adjustments - depend on colorspace): 1915de2362d3Smrg * yco = 1.1643 1916de2362d3Smrg * uco = 0, -0.39173, 2.017 1917de2362d3Smrg * vco = 1.5958, -0.8129, 0 1918de2362d3Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1919de2362d3Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1920de2362d3Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1921de2362d3Smrg * 1922de2362d3Smrg * temp = MAD(yco, yuv.yyyy, off) 1923de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 1924de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 1925de2362d3Smrg */ 1926de2362d3Smrg /* TODO: don't recalc consts always */ 1927de2362d3Smrg const float Loff = -0.0627; 1928de2362d3Smrg const float Coff = -0.502; 1929de2362d3Smrg float uvcosf, uvsinf; 1930de2362d3Smrg float yco; 1931de2362d3Smrg float uco[3], vco[3], off[3]; 1932de2362d3Smrg float bright, cont, gamma; 1933de2362d3Smrg int ref = pPriv->transform_index; 1934de2362d3Smrg Bool needgamma = FALSE; 1935de2362d3Smrg 1936de2362d3Smrg cont = RTFContrast(pPriv->contrast); 1937de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 1938de2362d3Smrg gamma = (float)pPriv->gamma / 1000.0; 1939de2362d3Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1940de2362d3Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1941de2362d3Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1942de2362d3Smrg 1943de2362d3Smrg yco = trans[ref].RefLuma * cont; 1944de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 1945de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1946de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 1947de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 1948de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1949de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 1950de2362d3Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1951de2362d3Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1952de2362d3Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1953de2362d3Smrg 1954de2362d3Smrg if (gamma != 1.0) { 1955de2362d3Smrg needgamma = TRUE; 1956de2362d3Smrg /* note: gamma correction is out = in ^ gamma; 1957de2362d3Smrg gpu can only do LG2/EX2 therefore we transform into 1958de2362d3Smrg in ^ gamma = 2 ^ (log2(in) * gamma). 1959de2362d3Smrg Lots of scalar ops, unfortunately (better solution?) - 1960de2362d3Smrg without gamma that's 3 inst, with gamma it's 10... 1961de2362d3Smrg could use different gamma factors per channel, 1962de2362d3Smrg if that's of any use. */ 1963de2362d3Smrg } 1964de2362d3Smrg 1965de2362d3Smrg if (pPriv->is_planar) { 19667821949aSmrg BEGIN_ACCEL(needgamma ? 28 + 33 : 33); 1967de2362d3Smrg /* 2 components: same 2 for tex0/1/2 */ 19687821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 1969de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1970de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1971de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 19727821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1973de2362d3Smrg 19747821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1975de2362d3Smrg 1976de2362d3Smrg /* Indirection levels */ 19777821949aSmrg OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1978de2362d3Smrg R300_FIRST_TEX)); 1979de2362d3Smrg 19807821949aSmrg OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1981de2362d3Smrg R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1982de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1983de2362d3Smrg R300_TEX_CODE_SIZE(3))); 1984de2362d3Smrg 19857821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1986de2362d3Smrg R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1987de2362d3Smrg R300_TEX_START(0) | 1988de2362d3Smrg R300_TEX_SIZE(2) | 1989de2362d3Smrg R300_RGBA_OUT)); 1990de2362d3Smrg 1991de2362d3Smrg /* tex inst */ 19927821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1993de2362d3Smrg R300_TEX_DST_ADDR(2) | 1994de2362d3Smrg R300_TEX_ID(0) | 1995de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 19967821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1997de2362d3Smrg R300_TEX_DST_ADDR(1) | 1998de2362d3Smrg R300_TEX_ID(1) | 1999de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 20007821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 2001de2362d3Smrg R300_TEX_DST_ADDR(0) | 2002de2362d3Smrg R300_TEX_ID(2) | 2003de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 2004de2362d3Smrg 2005de2362d3Smrg /* ALU inst */ 2006de2362d3Smrg /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 20077821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2008de2362d3Smrg R300_ALU_RGB_ADDR1(2) | 2009de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 2010de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 2011de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 20127821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2013de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2014de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2015de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2016de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2017de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2018de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2019de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2020de2362d3Smrg /* alpha nop, but need to set up alpha source for rgb usage */ 20217821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2022de2362d3Smrg R300_ALU_ALPHA_ADDR1(2) | 2023de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 2024de2362d3Smrg R300_ALU_ALPHA_ADDRD(2) | 2025de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 20267821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2027de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2028de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2029de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2030de2362d3Smrg 2031de2362d3Smrg /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 20327821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2033de2362d3Smrg R300_ALU_RGB_ADDR1(1) | 2034de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 2035de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 2036de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 20377821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2038de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2039de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2040de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2041de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2042de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2043de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2044de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2045de2362d3Smrg /* alpha nop */ 20467821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 2047de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 20487821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2049de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2050de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2051de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2052de2362d3Smrg 2053de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 20547821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2055de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2056de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 2057de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2058de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2059de2362d3Smrg (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 20607821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2061de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2062de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2063de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2064de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2065de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2066de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2067de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2068de2362d3Smrg R300_ALU_RGB_CLAMP)); 2069de2362d3Smrg /* write alpha 1 */ 20707821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2071de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2072de2362d3Smrg R300_ALU_ALPHA_TARGET_A)); 20737821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2074de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2075de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2076de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2077de2362d3Smrg 2078de2362d3Smrg if (needgamma) { 2079de2362d3Smrg /* rgb temp0.r = op_sop, set up src0 reg */ 20807821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2081de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 20827821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2083de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2084de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2085de2362d3Smrg /* alpha lg2 temp0, temp0.r */ 20867821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2087de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 20887821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2089de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2090de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2091de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2092de2362d3Smrg 2093de2362d3Smrg /* rgb temp0.g = op_sop, set up src0 reg */ 20947821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2095de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 20967821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2097de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2098de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2099de2362d3Smrg /* alpha lg2 temp0, temp0.g */ 21007821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2101de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21027821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2103de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2104de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2105de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2106de2362d3Smrg 2107de2362d3Smrg /* rgb temp0.b = op_sop, set up src0 reg */ 21087821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2109de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 21107821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2111de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2112de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2113de2362d3Smrg /* alpha lg2 temp0, temp0.b */ 21147821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2115de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21167821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2117de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2118de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2119de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2120de2362d3Smrg 2121de2362d3Smrg /* MUL const1, temp1, temp0 */ 21227821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2123de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2124de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 2125de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2126de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 21277821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2128de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2129de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2130de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2131de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2132de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2133de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2134de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2135de2362d3Smrg /* alpha nop, but set up const1 */ 21367821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2137de2362d3Smrg R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2138de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21397821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2140de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2141de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2142de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2143de2362d3Smrg 2144de2362d3Smrg /* rgb out0.r = op_sop, set up src0 reg */ 21457821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2146de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2147de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 21487821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2149de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2150de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2151de2362d3Smrg /* alpha ex2 temp0, temp0.r */ 21527821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2153de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21547821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2155de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2156de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2157de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2158de2362d3Smrg 2159de2362d3Smrg /* rgb out0.g = op_sop, set up src0 reg */ 21607821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2161de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2162de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 21637821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2164de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2165de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2166de2362d3Smrg /* alpha ex2 temp0, temp0.g */ 21677821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2168de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21697821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2170de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2171de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2172de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2173de2362d3Smrg 2174de2362d3Smrg /* rgb out0.b = op_sop, set up src0 reg */ 21757821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2176de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2177de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 21787821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2179de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2180de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2181de2362d3Smrg /* alpha ex2 temp0, temp0.b */ 21827821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2183de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 21847821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2185de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2186de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2187de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2188de2362d3Smrg } 2189de2362d3Smrg } else { 21907821949aSmrg BEGIN_ACCEL(needgamma ? 28 + 31 : 31); 2191de2362d3Smrg /* 2 components */ 21927821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 2193de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2194de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 2195de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 21967821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 2197de2362d3Smrg 21987821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 2199de2362d3Smrg 2200de2362d3Smrg /* Indirection levels */ 22017821949aSmrg OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 2202de2362d3Smrg R300_FIRST_TEX)); 2203de2362d3Smrg 22047821949aSmrg OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 2205de2362d3Smrg R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 2206de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 2207de2362d3Smrg R300_TEX_CODE_SIZE(1))); 2208de2362d3Smrg 22097821949aSmrg OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 2210de2362d3Smrg R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 2211de2362d3Smrg R300_TEX_START(0) | 2212de2362d3Smrg R300_TEX_SIZE(0) | 2213de2362d3Smrg R300_RGBA_OUT)); 2214de2362d3Smrg 2215de2362d3Smrg /* tex inst */ 22167821949aSmrg OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 2217de2362d3Smrg R300_TEX_DST_ADDR(0) | 2218de2362d3Smrg R300_TEX_ID(0) | 2219de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 2220de2362d3Smrg 2221de2362d3Smrg /* ALU inst */ 2222de2362d3Smrg /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 22237821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2224de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2225de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 2226de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 2227de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 22287821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2229de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2230de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 2231de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2232de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2233de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2234de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2235de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2236de2362d3Smrg /* alpha nop, but need to set up alpha source for rgb usage */ 22377821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2238de2362d3Smrg R300_ALU_ALPHA_ADDR1(0) | 2239de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 2240de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 2241de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 22427821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2243de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2244de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2245de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2246de2362d3Smrg 2247de2362d3Smrg /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 22487821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2249de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2250de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 2251de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 2252de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 22537821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2254de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2255de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 2256de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2257de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2258de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2259de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2260de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2261de2362d3Smrg /* alpha nop */ 22627821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 2263de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 22647821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2265de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2266de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2267de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2268de2362d3Smrg 2269de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 22707821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2271de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2272de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 2273de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2274de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2275de2362d3Smrg (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 22767821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2277de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2278de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2279de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2280de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2281de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2282de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2283de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2284de2362d3Smrg R300_ALU_RGB_CLAMP)); 2285de2362d3Smrg /* write alpha 1 */ 22867821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2287de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2288de2362d3Smrg R300_ALU_ALPHA_TARGET_A)); 22897821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2290de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2291de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2292de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2293de2362d3Smrg 2294de2362d3Smrg if (needgamma) { 2295de2362d3Smrg /* rgb temp0.r = op_sop, set up src0 reg */ 22967821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2297de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 22987821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2299de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2300de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2301de2362d3Smrg /* alpha lg2 temp0, temp0.r */ 23027821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2303de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23047821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2305de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2306de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2307de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2308de2362d3Smrg 2309de2362d3Smrg /* rgb temp0.g = op_sop, set up src0 reg */ 23107821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2311de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 23127821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2313de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2314de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2315de2362d3Smrg /* alpha lg2 temp0, temp0.g */ 23167821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2317de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23187821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2319de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2320de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2321de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2322de2362d3Smrg 2323de2362d3Smrg /* rgb temp0.b = op_sop, set up src0 reg */ 23247821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2325de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 23267821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2327de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2328de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2329de2362d3Smrg /* alpha lg2 temp0, temp0.b */ 23307821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2331de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23327821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2333de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2334de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2335de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2336de2362d3Smrg 2337de2362d3Smrg /* MUL const1, temp1, temp0 */ 23387821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2339de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2340de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 2341de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2342de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 23437821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2344de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2345de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2346de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2347de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2348de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2349de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2350de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2351de2362d3Smrg /* alpha nop, but set up const1 */ 23527821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2353de2362d3Smrg R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2354de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23557821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2356de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2357de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2358de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2359de2362d3Smrg 2360de2362d3Smrg /* rgb out0.r = op_sop, set up src0 reg */ 23617821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2362de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2363de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 23647821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2365de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2366de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2367de2362d3Smrg /* alpha ex2 temp0, temp0.r */ 23687821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2369de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23707821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2371de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2372de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2373de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2374de2362d3Smrg 2375de2362d3Smrg /* rgb out0.g = op_sop, set up src0 reg */ 23767821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2377de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2378de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 23797821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2380de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2381de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2382de2362d3Smrg /* alpha ex2 temp0, temp0.g */ 23837821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2384de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 23857821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2386de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2387de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2388de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2389de2362d3Smrg 2390de2362d3Smrg /* rgb out0.b = op_sop, set up src0 reg */ 23917821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2392de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2393de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 23947821949aSmrg OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2395de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2396de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2397de2362d3Smrg /* alpha ex2 temp0, temp0.b */ 23987821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2399de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 24007821949aSmrg OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2401de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2402de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2403de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2404de2362d3Smrg } 2405de2362d3Smrg } 2406de2362d3Smrg 2407de2362d3Smrg /* Shader constants. */ 2408de2362d3Smrg /* constant 0: off, yco */ 24097821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 24107821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 24117821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 24127821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2413de2362d3Smrg /* constant 1: uco */ 24147821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 24157821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 24167821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 24177821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2418de2362d3Smrg /* constant 2: vco */ 24197821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 24207821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 24217821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 24227821949aSmrg OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2423de2362d3Smrg 24247821949aSmrg FINISH_ACCEL(); 2425de2362d3Smrg } 2426de2362d3Smrg 2427de2362d3Smrg BEGIN_ACCEL_RELOC(6, 2); 24287821949aSmrg OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 24297821949aSmrg OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 2430de2362d3Smrg 2431de2362d3Smrg EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2432de2362d3Smrg EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2433de2362d3Smrg 2434de2362d3Smrg /* no need to enable blending */ 24357821949aSmrg OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2436de2362d3Smrg 24377821949aSmrg OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 24387821949aSmrg FINISH_ACCEL(); 2439de2362d3Smrg 2440de2362d3Smrg if (pPriv->vsync) { 2441de2362d3Smrg xf86CrtcPtr crtc; 2442de2362d3Smrg if (pPriv->desired_crtc) 2443de2362d3Smrg crtc = pPriv->desired_crtc; 2444de2362d3Smrg else 24457821949aSmrg crtc = radeon_pick_best_crtc(pScrn, 2446de2362d3Smrg pPriv->drw_x, 2447de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 2448de2362d3Smrg pPriv->drw_y, 2449de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 2450de2362d3Smrg if (crtc) 24517821949aSmrg FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 24527821949aSmrg crtc, 24537821949aSmrg pPriv->drw_y - crtc->y, 24547821949aSmrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2455de2362d3Smrg } 2456de2362d3Smrg 2457de2362d3Smrg return TRUE; 2458de2362d3Smrg} 2459de2362d3Smrg 2460de2362d3Smrgstatic void 24617821949aSmrgFUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2462de2362d3Smrg{ 2463de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2464de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 2465de2362d3Smrg int dstxoff, dstyoff; 2466de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2467de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 24687821949aSmrg ACCEL_PREAMBLE(); 2469de2362d3Smrg 2470de2362d3Smrg#ifdef COMPOSITE 2471de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2472de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2473de2362d3Smrg#else 2474de2362d3Smrg dstxoff = 0; 2475de2362d3Smrg dstyoff = 0; 2476de2362d3Smrg#endif 2477de2362d3Smrg 24787821949aSmrg if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2479de2362d3Smrg return; 2480de2362d3Smrg 2481de2362d3Smrg /* 2482de2362d3Smrg * Rendering of the actual polygon is done in two different 2483de2362d3Smrg * ways depending on chip generation: 2484de2362d3Smrg * 2485de2362d3Smrg * < R300: 2486de2362d3Smrg * 2487de2362d3Smrg * These chips can render a rectangle in one pass, so 2488de2362d3Smrg * handling is pretty straight-forward. 2489de2362d3Smrg * 2490de2362d3Smrg * >= R300: 2491de2362d3Smrg * 2492de2362d3Smrg * These chips can accept a quad, but will render it as 2493de2362d3Smrg * two triangles which results in a diagonal tear. Instead 2494de2362d3Smrg * We render a single, large triangle and use the scissor 2495de2362d3Smrg * functionality to restrict it to the desired rectangle. 2496de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 2497de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 2498de2362d3Smrg * render as a quad. 2499de2362d3Smrg */ 2500de2362d3Smrg 2501de2362d3Smrg while (nBox--) { 2502de2362d3Smrg float srcX, srcY, srcw, srch; 2503de2362d3Smrg int dstX, dstY, dstw, dsth; 2504de2362d3Smrg Bool use_quad = FALSE; 25057821949aSmrg#ifdef ACCEL_CP 2506de2362d3Smrg int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2507de2362d3Smrg 2508de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 25097821949aSmrg if (info->cs) 25107821949aSmrg radeon_cs_flush_indirect(pScrn); 25117821949aSmrg else 25127821949aSmrg RADEONCPFlushIndirect(pScrn, 1); 25137821949aSmrg if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2514de2362d3Smrg return; 2515de2362d3Smrg } 25167821949aSmrg#endif 2517de2362d3Smrg 2518de2362d3Smrg dstX = pBox->x1 + dstxoff; 2519de2362d3Smrg dstY = pBox->y1 + dstyoff; 2520de2362d3Smrg dstw = pBox->x2 - pBox->x1; 2521de2362d3Smrg dsth = pBox->y2 - pBox->y1; 2522de2362d3Smrg 2523de2362d3Smrg srcX = pPriv->src_x; 2524de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 2525de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 2526de2362d3Smrg srcY = pPriv->src_y; 2527de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 2528de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 2529de2362d3Smrg 2530de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 2531de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 2532de2362d3Smrg 2533de2362d3Smrg if (IS_R400_3D) { 2534de2362d3Smrg if ((dstw+dsth) > 4021) 2535de2362d3Smrg use_quad = TRUE; 2536de2362d3Smrg } else { 2537de2362d3Smrg if ((dstw+dsth) > 2560) 2538de2362d3Smrg use_quad = TRUE; 2539de2362d3Smrg } 2540de2362d3Smrg /* 2541de2362d3Smrg * Set up the scissor area to that of the output size. 2542de2362d3Smrg */ 25437821949aSmrg BEGIN_ACCEL(2); 2544de2362d3Smrg /* R300 has an offset */ 25457821949aSmrg OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2546de2362d3Smrg ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 25477821949aSmrg OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2548de2362d3Smrg ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 25497821949aSmrg FINISH_ACCEL(); 2550de2362d3Smrg 25517821949aSmrg#ifdef ACCEL_CP 2552de2362d3Smrg if (use_quad) { 2553de2362d3Smrg BEGIN_RING(4 * pPriv->vtx_count + 4); 2554de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2555de2362d3Smrg 4 * pPriv->vtx_count)); 2556de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2557de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2558de2362d3Smrg (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2559de2362d3Smrg } else { 2560de2362d3Smrg BEGIN_RING(3 * pPriv->vtx_count + 4); 2561de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2562de2362d3Smrg 3 * pPriv->vtx_count)); 2563de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2564de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2565de2362d3Smrg (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2566de2362d3Smrg } 25677821949aSmrg#else /* ACCEL_CP */ 25687821949aSmrg if (use_quad) 25697821949aSmrg BEGIN_ACCEL(2 + pPriv->vtx_count * 4); 25707821949aSmrg else 25717821949aSmrg BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 2572de2362d3Smrg 25737821949aSmrg if (use_quad) 25747821949aSmrg OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | 25757821949aSmrg RADEON_VF_PRIM_WALK_DATA | 25767821949aSmrg (4 << RADEON_VF_NUM_VERTICES_SHIFT))); 25777821949aSmrg else 25787821949aSmrg OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 25797821949aSmrg RADEON_VF_PRIM_WALK_DATA | 25807821949aSmrg (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 25817821949aSmrg#endif 2582de2362d3Smrg if (pPriv->bicubic_enabled) { 2583de2362d3Smrg /* 2584de2362d3Smrg * This code is only executed on >= R300, so we don't 2585de2362d3Smrg * have to deal with the legacy handling. 2586de2362d3Smrg */ 2587de2362d3Smrg if (use_quad) { 2588de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 2589de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2590de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 2591de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2592de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2593de2362d3Smrg (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2594de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2595de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2596de2362d3Smrg (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2597de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2598de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2599de2362d3Smrg (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2600de2362d3Smrg } else { 2601de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 2602de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2603de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 2604de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2605de2362d3Smrg (float)srcX / pPriv->w, 2606de2362d3Smrg ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2607de2362d3Smrg (float)srcX + 0.5, 2608de2362d3Smrg (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2609de2362d3Smrg VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2610de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2611de2362d3Smrg (float)srcY / pPriv->h, 2612de2362d3Smrg (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2613de2362d3Smrg (float)srcY + 0.5); 2614de2362d3Smrg } 2615de2362d3Smrg } else { 2616de2362d3Smrg if (use_quad) { 2617de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 2618de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2619de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2620de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2621de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2622de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2623de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2624de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2625de2362d3Smrg } else { 2626de2362d3Smrg /* 2627de2362d3Smrg * Render a big, scissored triangle. This means 2628de2362d3Smrg * increasing the triangle size and adjusting 2629de2362d3Smrg * texture coordinates. 2630de2362d3Smrg */ 2631de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 2632de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2633de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2634de2362d3Smrg (float)srcX / pPriv->w, 2635de2362d3Smrg ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2636de2362d3Smrg VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2637de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2638de2362d3Smrg (float)srcY / pPriv->h); 2639de2362d3Smrg } 2640de2362d3Smrg } 2641de2362d3Smrg 2642de2362d3Smrg /* flushing is pipelined, free/finish is not */ 26437821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2644de2362d3Smrg 26457821949aSmrg#ifdef ACCEL_CP 2646de2362d3Smrg ADVANCE_RING(); 26477821949aSmrg#else 26487821949aSmrg FINISH_ACCEL(); 26497821949aSmrg#endif /* !ACCEL_CP */ 2650de2362d3Smrg 2651de2362d3Smrg pBox++; 2652de2362d3Smrg } 2653de2362d3Smrg 26547821949aSmrg BEGIN_ACCEL(3); 26557821949aSmrg OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 26567821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 26577821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 26587821949aSmrg FINISH_ACCEL(); 2659de2362d3Smrg 2660de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2661de2362d3Smrg} 2662de2362d3Smrg 2663de2362d3Smrgstatic Bool 26647821949aSmrgFUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2665de2362d3Smrg{ 2666de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2667de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 2668de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 2669de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 26707821949aSmrg uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0; 2671de2362d3Smrg uint32_t dst_pitch, dst_format; 26727821949aSmrg uint32_t txenable, colorpitch, bicubic_offset; 2673de2362d3Smrg uint32_t output_fmt; 2674de2362d3Smrg int pixel_shift, out_size = 6; 26757821949aSmrg ACCEL_PREAMBLE(); 2676de2362d3Smrg 26777821949aSmrg#ifdef XF86DRM_MODE 26787821949aSmrg if (info->cs) { 26797821949aSmrg int ret; 2680de2362d3Smrg 26817821949aSmrg radeon_cs_space_reset_bos(info->cs); 26827821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 26837821949aSmrg 26847821949aSmrg if (pPriv->bicubic_enabled) 26857821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 26867821949aSmrg 26877821949aSmrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 26887821949aSmrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 26897821949aSmrg 26907821949aSmrg ret = radeon_cs_space_check(info->cs); 26917821949aSmrg if (ret) { 26927821949aSmrg ErrorF("Not enough RAM to hw accel xv operation\n"); 26937821949aSmrg return FALSE; 26947821949aSmrg } 2695de2362d3Smrg } 26967821949aSmrg#else 26977821949aSmrg (void)src_bo; 26987821949aSmrg#endif 2699de2362d3Smrg 2700de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2701de2362d3Smrg 27027821949aSmrg#ifdef USE_EXA 27037821949aSmrg if (info->useEXA) { 27047821949aSmrg dst_pitch = exaGetPixmapPitch(pPixmap); 27057821949aSmrg } else 27067821949aSmrg#endif 27077821949aSmrg { 27087821949aSmrg dst_pitch = pPixmap->devKind; 27097821949aSmrg } 27107821949aSmrg 27117821949aSmrg#ifdef USE_EXA 27127821949aSmrg if (info->useEXA) { 27137821949aSmrg RADEON_SWITCH_TO_3D(); 27147821949aSmrg } else 27157821949aSmrg#endif 27167821949aSmrg { 27177821949aSmrg BEGIN_ACCEL(2); 27187821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 27197821949aSmrg /* We must wait for 3d to idle, in case source was just written as a dest. */ 27207821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 27217821949aSmrg RADEON_WAIT_HOST_IDLECLEAN | 27227821949aSmrg RADEON_WAIT_2D_IDLECLEAN | 27237821949aSmrg RADEON_WAIT_3D_IDLECLEAN | 27247821949aSmrg RADEON_WAIT_DMA_GUI_IDLE); 27257821949aSmrg FINISH_ACCEL(); 27267821949aSmrg 27277821949aSmrg if (!info->accel_state->XInited3D) 27287821949aSmrg RADEONInit3DEngine(pScrn); 27297821949aSmrg } 2730de2362d3Smrg 2731de2362d3Smrg if (pPriv->bicubic_enabled) 2732de2362d3Smrg pPriv->vtx_count = 6; 2733de2362d3Smrg else 2734de2362d3Smrg pPriv->vtx_count = 4; 2735de2362d3Smrg 2736de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 2737de2362d3Smrg case 16: 2738de2362d3Smrg if (pPixmap->drawable.depth == 15) 2739de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB1555; 2740de2362d3Smrg else 2741de2362d3Smrg dst_format = R300_COLORFORMAT_RGB565; 2742de2362d3Smrg break; 2743de2362d3Smrg case 32: 2744de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB8888; 2745de2362d3Smrg break; 2746de2362d3Smrg default: 2747de2362d3Smrg return FALSE; 2748de2362d3Smrg } 2749de2362d3Smrg 2750de2362d3Smrg output_fmt = (R300_OUT_FMT_C4_8 | 2751de2362d3Smrg R300_OUT_FMT_C0_SEL_BLUE | 2752de2362d3Smrg R300_OUT_FMT_C1_SEL_GREEN | 2753de2362d3Smrg R300_OUT_FMT_C2_SEL_RED | 2754de2362d3Smrg R300_OUT_FMT_C3_SEL_ALPHA); 2755de2362d3Smrg 2756de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 2757de2362d3Smrg colorpitch |= dst_format; 2758de2362d3Smrg 2759de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 2760de2362d3Smrg colorpitch |= R300_COLORTILE; 2761de2362d3Smrg 2762de2362d3Smrg if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2763de2362d3Smrg (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2764de2362d3Smrg pPriv->is_planar = TRUE; 2765de2362d3Smrg else 2766de2362d3Smrg pPriv->is_planar = FALSE; 2767de2362d3Smrg 2768de2362d3Smrg if (pPriv->is_planar) { 2769de2362d3Smrg txformat1 = R300_TX_FORMAT_X8; 2770de2362d3Smrg txpitch = pPriv->src_pitch; 2771de2362d3Smrg } else { 2772de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 2773de2362d3Smrg txformat1 = R300_TX_FORMAT_YVYU422; 2774de2362d3Smrg else 2775de2362d3Smrg txformat1 = R300_TX_FORMAT_VYUY422; 2776de2362d3Smrg 2777de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) 2778de2362d3Smrg txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2779de2362d3Smrg 2780de2362d3Smrg /* pitch is in pixels */ 2781de2362d3Smrg txpitch = pPriv->src_pitch / 2; 2782de2362d3Smrg } 2783de2362d3Smrg txpitch -= 1; 2784de2362d3Smrg 2785de2362d3Smrg txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2786de2362d3Smrg (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2787de2362d3Smrg R300_TXPITCH_EN); 2788de2362d3Smrg 2789de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2790de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2791de2362d3Smrg R300_TX_MAG_FILTER_LINEAR | 2792de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 2793de2362d3Smrg (0 << R300_TX_ID_SHIFT)); 2794de2362d3Smrg 2795de2362d3Smrg 2796de2362d3Smrg if ((pPriv->w - 1) & 0x800) 2797de2362d3Smrg txpitch |= R500_TXWIDTH_11; 2798de2362d3Smrg 2799de2362d3Smrg if ((pPriv->h - 1) & 0x800) 2800de2362d3Smrg txpitch |= R500_TXHEIGHT_11; 2801de2362d3Smrg 2802de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R520) { 2803de2362d3Smrg unsigned us_width = (pPriv->w - 1) & 0x7ff; 2804de2362d3Smrg unsigned us_height = (pPriv->h - 1) & 0x7ff; 2805de2362d3Smrg unsigned us_depth = 0; 2806de2362d3Smrg 2807de2362d3Smrg if (pPriv->w > 2048) { 2808de2362d3Smrg us_width = (0x7ff + us_width) >> 1; 2809de2362d3Smrg us_depth |= 0x0d; 2810de2362d3Smrg } 2811de2362d3Smrg if (pPriv->h > 2048) { 2812de2362d3Smrg us_height = (0x7ff + us_height) >> 1; 2813de2362d3Smrg us_depth |= 0x0e; 2814de2362d3Smrg } 2815de2362d3Smrg us_format = (us_width << R300_TXWIDTH_SHIFT) | 2816de2362d3Smrg (us_height << R300_TXHEIGHT_SHIFT) | 2817de2362d3Smrg (us_depth << R300_TXDEPTH_SHIFT); 2818de2362d3Smrg out_size++; 2819de2362d3Smrg } 2820de2362d3Smrg 28217821949aSmrg txoffset = info->cs ? 0 : pPriv->src_offset; 28227821949aSmrg 2823de2362d3Smrg BEGIN_ACCEL_RELOC(out_size, 1); 28247821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 28257821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 28267821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 28277821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 28287821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 28297821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 2830de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R520) 28317821949aSmrg OUT_ACCEL_REG(R500_US_FORMAT0_0, us_format); 28327821949aSmrg FINISH_ACCEL(); 2833de2362d3Smrg 2834de2362d3Smrg txenable = R300_TEX_0_ENABLE; 2835de2362d3Smrg 2836de2362d3Smrg if (pPriv->is_planar) { 2837de2362d3Smrg txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2838de2362d3Smrg (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2839de2362d3Smrg R300_TXPITCH_EN); 2840de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2841de2362d3Smrg txpitch -= 1; 2842de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2843de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2844de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 2845de2362d3Smrg R300_TX_MAG_FILTER_LINEAR); 2846de2362d3Smrg 2847de2362d3Smrg BEGIN_ACCEL_RELOC(12, 2); 28487821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 28497821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 28507821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 28517821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 28527821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 28537821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 28547821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 28557821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 28567821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 28577821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 28587821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 28597821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 28607821949aSmrg FINISH_ACCEL(); 2861de2362d3Smrg txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2862de2362d3Smrg } 2863de2362d3Smrg 2864de2362d3Smrg if (pPriv->bicubic_enabled) { 2865de2362d3Smrg /* Size is 128x1 */ 2866de2362d3Smrg txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2867de2362d3Smrg (0x0 << R300_TXHEIGHT_SHIFT) | 2868de2362d3Smrg R300_TXPITCH_EN); 2869de2362d3Smrg /* Format is 32-bit floats, 4bpp */ 2870de2362d3Smrg txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2871de2362d3Smrg /* Pitch is 127 (128-1) */ 2872de2362d3Smrg txpitch = 0x7f; 2873de2362d3Smrg /* Tex filter */ 2874de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2875de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2876de2362d3Smrg R300_TX_MIN_FILTER_NEAREST | 2877de2362d3Smrg R300_TX_MAG_FILTER_NEAREST | 2878de2362d3Smrg (1 << R300_TX_ID_SHIFT)); 2879de2362d3Smrg 28807821949aSmrg if (info->cs) 28817821949aSmrg bicubic_offset = 0; 28827821949aSmrg else 28837821949aSmrg bicubic_offset = pPriv->bicubic_src_offset; 28847821949aSmrg 2885de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 28867821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 28877821949aSmrg OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 28887821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 28897821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 28907821949aSmrg OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 28917821949aSmrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 28927821949aSmrg FINISH_ACCEL(); 2893de2362d3Smrg 2894de2362d3Smrg /* Enable tex 1 */ 2895de2362d3Smrg txenable |= R300_TEX_1_ENABLE; 2896de2362d3Smrg } 2897de2362d3Smrg 2898de2362d3Smrg /* setup the VAP */ 2899de2362d3Smrg if (info->accel_state->has_tcl) { 2900de2362d3Smrg if (pPriv->bicubic_enabled) 29017821949aSmrg BEGIN_ACCEL(7); 2902de2362d3Smrg else 29037821949aSmrg BEGIN_ACCEL(6); 2904de2362d3Smrg } else { 2905de2362d3Smrg if (pPriv->bicubic_enabled) 29067821949aSmrg BEGIN_ACCEL(5); 2907de2362d3Smrg else 29087821949aSmrg BEGIN_ACCEL(4); 2909de2362d3Smrg } 2910de2362d3Smrg 2911de2362d3Smrg /* These registers define the number, type, and location of data submitted 2912de2362d3Smrg * to the PVS unit of GA input (when PVS is disabled) 2913de2362d3Smrg * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2914de2362d3Smrg * enabled. This memory provides the imputs to the vertex shader program 2915de2362d3Smrg * and ordering is not important. When PVS/TCL is disabled, this field maps 2916de2362d3Smrg * directly to the GA input memory and the order is signifigant. In 2917de2362d3Smrg * PVS_BYPASS mode the order is as follows: 2918de2362d3Smrg * Position 2919de2362d3Smrg * Point Size 2920de2362d3Smrg * Color 0-3 2921de2362d3Smrg * Textures 0-7 2922de2362d3Smrg * Fog 2923de2362d3Smrg */ 2924de2362d3Smrg if (pPriv->bicubic_enabled) { 29257821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2926de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2927de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 2928de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 2929de2362d3Smrg R300_SIGNED_0 | 2930de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2931de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 2932de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 2933de2362d3Smrg R300_SIGNED_1)); 29347821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 2935de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2936de2362d3Smrg (0 << R300_SKIP_DWORDS_2_SHIFT) | 2937de2362d3Smrg (7 << R300_DST_VEC_LOC_2_SHIFT) | 2938de2362d3Smrg R300_LAST_VEC_2 | 2939de2362d3Smrg R300_SIGNED_2)); 2940de2362d3Smrg } else { 29417821949aSmrg OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2942de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2943de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 2944de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 2945de2362d3Smrg R300_SIGNED_0 | 2946de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2947de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 2948de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 2949de2362d3Smrg R300_LAST_VEC_1 | 2950de2362d3Smrg R300_SIGNED_1)); 2951de2362d3Smrg } 2952de2362d3Smrg 2953de2362d3Smrg /* load the vertex shader 2954de2362d3Smrg * We pre-load vertex programs in RADEONInit3DEngine(): 2955de2362d3Smrg * - exa 2956de2362d3Smrg * - Xv 2957de2362d3Smrg * - Xv bicubic 2958de2362d3Smrg * Here we select the offset of the vertex program we want to use 2959de2362d3Smrg */ 2960de2362d3Smrg if (info->accel_state->has_tcl) { 2961de2362d3Smrg if (pPriv->bicubic_enabled) { 29627821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2963de2362d3Smrg ((11 << R300_PVS_FIRST_INST_SHIFT) | 2964de2362d3Smrg (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2965de2362d3Smrg (13 << R300_PVS_LAST_INST_SHIFT))); 29667821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2967de2362d3Smrg (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2968de2362d3Smrg } else { 29697821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2970de2362d3Smrg ((9 << R300_PVS_FIRST_INST_SHIFT) | 2971de2362d3Smrg (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2972de2362d3Smrg (10 << R300_PVS_LAST_INST_SHIFT))); 29737821949aSmrg OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2974de2362d3Smrg (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2975de2362d3Smrg } 2976de2362d3Smrg } 2977de2362d3Smrg 2978de2362d3Smrg /* Position and one set of 2 texture coordinates */ 29797821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2980de2362d3Smrg if (pPriv->bicubic_enabled) 29817821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2982de2362d3Smrg (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2983de2362d3Smrg else 29847821949aSmrg OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2985de2362d3Smrg 29867821949aSmrg OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 29877821949aSmrg FINISH_ACCEL(); 2988de2362d3Smrg 2989de2362d3Smrg /* setup pixel shader */ 2990de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) { 2991de2362d3Smrg if (pPriv->bicubic_enabled) { 29927821949aSmrg BEGIN_ACCEL(7); 2993de2362d3Smrg 2994de2362d3Smrg /* 4 components: 2 for tex0 and 2 for tex1 */ 29957821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 2996de2362d3Smrg ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2997de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 2998de2362d3Smrg 2999de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 30007821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 3001de2362d3Smrg 3002de2362d3Smrg /* Pixel stack frame size. */ 30037821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 3004de2362d3Smrg 3005de2362d3Smrg /* FP length. */ 30067821949aSmrg OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3007de2362d3Smrg R500_US_CODE_END_ADDR(13))); 30087821949aSmrg OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3009de2362d3Smrg R500_US_CODE_RANGE_SIZE(13))); 3010de2362d3Smrg 3011de2362d3Smrg /* Prepare for FP emission. */ 30127821949aSmrg OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 30137821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 30147821949aSmrg FINISH_ACCEL(); 3015de2362d3Smrg 30167821949aSmrg BEGIN_ACCEL(89); 3017de2362d3Smrg /* Pixel shader. 3018de2362d3Smrg * I've gone ahead and annotated each instruction, since this 3019de2362d3Smrg * thing is MASSIVE. :3 3020de2362d3Smrg * Note: In order to avoid buggies with temps and multiple 3021de2362d3Smrg * inputs, all temps are offset by 2. temp0 -> register2. */ 3022de2362d3Smrg 3023de2362d3Smrg /* TEX temp2, input1.xxxx, tex1, 1D */ 30247821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3025de2362d3Smrg R500_INST_RGB_WMASK_R | 3026de2362d3Smrg R500_INST_RGB_WMASK_G | 3027de2362d3Smrg R500_INST_RGB_WMASK_B)); 30287821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3029de2362d3Smrg R500_TEX_INST_LD | 3030de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 30317821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3032de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3033de2362d3Smrg R500_TEX_SRC_T_SWIZ_R | 3034de2362d3Smrg R500_TEX_SRC_R_SWIZ_R | 3035de2362d3Smrg R500_TEX_SRC_Q_SWIZ_R | 3036de2362d3Smrg R500_TEX_DST_ADDR(2) | 3037de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3038de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3039de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3040de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 30417821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 30427821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 30437821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3044de2362d3Smrg 3045de2362d3Smrg /* TEX temp5, input1.yyyy, tex1, 1D */ 30467821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3047de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3048de2362d3Smrg R500_INST_RGB_WMASK_R | 3049de2362d3Smrg R500_INST_RGB_WMASK_G | 3050de2362d3Smrg R500_INST_RGB_WMASK_B)); 30517821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3052de2362d3Smrg R500_TEX_INST_LD | 3053de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3054de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 30557821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3056de2362d3Smrg R500_TEX_SRC_S_SWIZ_G | 3057de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3058de2362d3Smrg R500_TEX_SRC_R_SWIZ_G | 3059de2362d3Smrg R500_TEX_SRC_Q_SWIZ_G | 3060de2362d3Smrg R500_TEX_DST_ADDR(5) | 3061de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3062de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3063de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3064de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 30657821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 30667821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 30677821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3068de2362d3Smrg 3069de2362d3Smrg /* MUL temp4, const0.x0x0, temp2.yyxx */ 30707821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3071de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3072de2362d3Smrg R500_INST_RGB_WMASK_R | 3073de2362d3Smrg R500_INST_RGB_WMASK_G | 3074de2362d3Smrg R500_INST_RGB_WMASK_B | 3075de2362d3Smrg R500_INST_ALPHA_WMASK)); 30767821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3077de2362d3Smrg R500_RGB_ADDR0_CONST | 3078de2362d3Smrg R500_RGB_ADDR1(2))); 30797821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3080de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3081de2362d3Smrg R500_ALPHA_ADDR1(2))); 30827821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3083de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3084de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_0 | 3085de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_R | 3086de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3087de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 3088de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3089de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R)); 30907821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3091de2362d3Smrg R500_ALPHA_OP_MAD | 3092de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 3093de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3094de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 3095de2362d3Smrg R500_ALPHA_SWIZ_B_R)); 30967821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3097de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3098de2362d3Smrg R500_ALU_RGBA_R_SWIZ_0 | 3099de2362d3Smrg R500_ALU_RGBA_G_SWIZ_0 | 3100de2362d3Smrg R500_ALU_RGBA_B_SWIZ_0 | 3101de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3102de2362d3Smrg 3103de2362d3Smrg /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 31047821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3105de2362d3Smrg R500_INST_RGB_WMASK_R | 3106de2362d3Smrg R500_INST_RGB_WMASK_G | 3107de2362d3Smrg R500_INST_RGB_WMASK_B | 3108de2362d3Smrg R500_INST_ALPHA_WMASK)); 31097821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3110de2362d3Smrg R500_RGB_ADDR0_CONST | 3111de2362d3Smrg R500_RGB_ADDR1(5) | 3112de2362d3Smrg R500_RGB_ADDR2(4))); 31137821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3114de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3115de2362d3Smrg R500_ALPHA_ADDR1(5) | 3116de2362d3Smrg R500_ALPHA_ADDR2(4))); 31177821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3118de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_0 | 3119de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3120de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_0 | 3121de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3122de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3123de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_R | 3124de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R)); 31257821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3126de2362d3Smrg R500_ALPHA_OP_MAD | 3127de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 3128de2362d3Smrg R500_ALPHA_SWIZ_A_G | 3129de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 3130de2362d3Smrg R500_ALPHA_SWIZ_B_R)); 31317821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3132de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3133de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3134de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3135de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3136de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3137de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3138de2362d3Smrg 3139de2362d3Smrg /* ADD temp3, temp3, input0.xyxy */ 31407821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3141de2362d3Smrg R500_INST_RGB_WMASK_R | 3142de2362d3Smrg R500_INST_RGB_WMASK_G | 3143de2362d3Smrg R500_INST_RGB_WMASK_B | 3144de2362d3Smrg R500_INST_ALPHA_WMASK)); 31457821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 3146de2362d3Smrg R500_RGB_ADDR2(0))); 31477821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 3148de2362d3Smrg R500_ALPHA_ADDR2(0))); 31497821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3150de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_1 | 3151de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_1 | 3152de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3153de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3154de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3155de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 31567821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3157de2362d3Smrg R500_ALPHA_OP_MAD | 3158de2362d3Smrg R500_ALPHA_SWIZ_A_1 | 3159de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 3160de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 31617821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3162de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3163de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3164de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3165de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3166de2362d3Smrg R500_ALU_RGBA_B_SWIZ_R | 3167de2362d3Smrg R500_ALU_RGBA_A_SWIZ_G)); 3168de2362d3Smrg 3169de2362d3Smrg /* TEX temp1, temp3.zwxy, tex0, 2D */ 31707821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3171de2362d3Smrg R500_INST_RGB_WMASK_R | 3172de2362d3Smrg R500_INST_RGB_WMASK_G | 3173de2362d3Smrg R500_INST_RGB_WMASK_B | 3174de2362d3Smrg R500_INST_ALPHA_WMASK)); 31757821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3176de2362d3Smrg R500_TEX_INST_LD | 3177de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 31787821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3179de2362d3Smrg R500_TEX_SRC_S_SWIZ_B | 3180de2362d3Smrg R500_TEX_SRC_T_SWIZ_A | 3181de2362d3Smrg R500_TEX_SRC_R_SWIZ_R | 3182de2362d3Smrg R500_TEX_SRC_Q_SWIZ_G | 3183de2362d3Smrg R500_TEX_DST_ADDR(1) | 3184de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3185de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3186de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3187de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 31887821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 31897821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 31907821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3191de2362d3Smrg 3192de2362d3Smrg /* TEX temp3, temp3.xyzw, tex0, 2D */ 31937821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3194de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3195de2362d3Smrg R500_INST_RGB_WMASK_R | 3196de2362d3Smrg R500_INST_RGB_WMASK_G | 3197de2362d3Smrg R500_INST_RGB_WMASK_B | 3198de2362d3Smrg R500_INST_ALPHA_WMASK)); 31997821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3200de2362d3Smrg R500_TEX_INST_LD | 3201de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3202de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 32037821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3204de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3205de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3206de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 3207de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 3208de2362d3Smrg R500_TEX_DST_ADDR(3) | 3209de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3210de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3211de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3212de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 32137821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 32147821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 32157821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3216de2362d3Smrg 3217de2362d3Smrg /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 32187821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3219de2362d3Smrg R500_INST_RGB_WMASK_R | 3220de2362d3Smrg R500_INST_RGB_WMASK_G | 3221de2362d3Smrg R500_INST_RGB_WMASK_B | 3222de2362d3Smrg R500_INST_ALPHA_WMASK)); 32237821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3224de2362d3Smrg R500_RGB_ADDR0_CONST | 3225de2362d3Smrg R500_RGB_ADDR1(5) | 3226de2362d3Smrg R500_RGB_ADDR2(4))); 32277821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3228de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3229de2362d3Smrg R500_ALPHA_ADDR1(5) | 3230de2362d3Smrg R500_ALPHA_ADDR2(4))); 32317821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3232de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_0 | 3233de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3234de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_0 | 3235de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3236de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 3237de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3238de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G)); 32397821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3240de2362d3Smrg R500_ALPHA_OP_MAD | 3241de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 3242de2362d3Smrg R500_ALPHA_SWIZ_A_G | 3243de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 3244de2362d3Smrg R500_ALPHA_SWIZ_B_G)); 32457821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3246de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3247de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3248de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3249de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3250de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3251de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3252de2362d3Smrg 3253de2362d3Smrg /* ADD temp0, temp4, input0.xyxy */ 32547821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3255de2362d3Smrg R500_INST_RGB_WMASK_R | 3256de2362d3Smrg R500_INST_RGB_WMASK_G | 3257de2362d3Smrg R500_INST_RGB_WMASK_B | 3258de2362d3Smrg R500_INST_ALPHA_WMASK)); 32597821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 3260de2362d3Smrg R500_RGB_ADDR2(0))); 32617821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 3262de2362d3Smrg R500_ALPHA_ADDR2(0))); 32637821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3264de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_1 | 3265de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_1 | 3266de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3267de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3268de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3269de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 32707821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3271de2362d3Smrg R500_ALPHA_OP_MAD | 3272de2362d3Smrg R500_ALPHA_SWIZ_A_1 | 3273de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 3274de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 32757821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3276de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3277de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3278de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3279de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3280de2362d3Smrg R500_ALU_RGBA_B_SWIZ_R | 3281de2362d3Smrg R500_ALU_RGBA_A_SWIZ_G)); 3282de2362d3Smrg 3283de2362d3Smrg /* TEX temp4, temp0.zwzw, tex0, 2D */ 32847821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3285de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3286de2362d3Smrg R500_INST_RGB_WMASK_R | 3287de2362d3Smrg R500_INST_RGB_WMASK_G | 3288de2362d3Smrg R500_INST_RGB_WMASK_B | 3289de2362d3Smrg R500_INST_ALPHA_WMASK)); 32907821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3291de2362d3Smrg R500_TEX_INST_LD | 3292de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 32937821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3294de2362d3Smrg R500_TEX_SRC_S_SWIZ_B | 3295de2362d3Smrg R500_TEX_SRC_T_SWIZ_A | 3296de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 3297de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 3298de2362d3Smrg R500_TEX_DST_ADDR(4) | 3299de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3300de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3301de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3302de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 33037821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 33047821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 33057821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3306de2362d3Smrg 3307de2362d3Smrg /* TEX temp0, temp0.xyzw, tex0, 2D */ 33087821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3309de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3310de2362d3Smrg R500_INST_RGB_WMASK_R | 3311de2362d3Smrg R500_INST_RGB_WMASK_G | 3312de2362d3Smrg R500_INST_RGB_WMASK_B | 3313de2362d3Smrg R500_INST_ALPHA_WMASK)); 33147821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3315de2362d3Smrg R500_TEX_INST_LD | 3316de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3317de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 33187821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3319de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3320de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3321de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 3322de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 3323de2362d3Smrg R500_TEX_DST_ADDR(0) | 3324de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3325de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3326de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3327de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 33287821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 33297821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 33307821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3331de2362d3Smrg 3332de2362d3Smrg /* LRP temp3, temp2.zzzz, temp1, temp3 -> 3333de2362d3Smrg * - PRESUB temps, temp1 - temp3 3334de2362d3Smrg * - MAD temp2.zzzz, temps, temp3 */ 33357821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3336de2362d3Smrg R500_INST_RGB_WMASK_R | 3337de2362d3Smrg R500_INST_RGB_WMASK_G | 3338de2362d3Smrg R500_INST_RGB_WMASK_B | 3339de2362d3Smrg R500_INST_ALPHA_WMASK)); 33407821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3341de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3342de2362d3Smrg R500_RGB_ADDR1(1) | 3343de2362d3Smrg R500_RGB_ADDR2(2))); 33447821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3345de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3346de2362d3Smrg R500_ALPHA_ADDR1(1) | 3347de2362d3Smrg R500_ALPHA_ADDR2(2))); 33487821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3349de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3350de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3351de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3352de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3353de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3354de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3355de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 33567821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3357de2362d3Smrg R500_ALPHA_OP_MAD | 3358de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3359de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3360de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3361de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 33627821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3363de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3364de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3365de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3366de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3367de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3368de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3369de2362d3Smrg 3370de2362d3Smrg /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3371de2362d3Smrg * - PRESUB temps, temp4 - temp1 3372de2362d3Smrg * - MAD temp2.zzzz, temps, temp0 */ 33737821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3374de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3375de2362d3Smrg R500_INST_RGB_WMASK_R | 3376de2362d3Smrg R500_INST_RGB_WMASK_G | 3377de2362d3Smrg R500_INST_RGB_WMASK_B | 3378de2362d3Smrg R500_INST_ALPHA_WMASK)); 33797821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3380de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3381de2362d3Smrg R500_RGB_ADDR1(4) | 3382de2362d3Smrg R500_RGB_ADDR2(2))); 33837821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3384de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3385de2362d3Smrg R500_ALPHA_ADDR1(4) | 3386de2362d3Smrg R500_ALPHA_ADDR2(2))); 33877821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3388de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3389de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3390de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3391de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3392de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3393de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3394de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 33957821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3396de2362d3Smrg R500_ALPHA_OP_MAD | 3397de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3398de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3399de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3400de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 34017821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3402de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3403de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3404de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3405de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3406de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3407de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3408de2362d3Smrg 3409de2362d3Smrg /* LRP output, temp5.zzzz, temp3, temp0 -> 3410de2362d3Smrg * - PRESUB temps, temp3 - temp0 3411de2362d3Smrg * - MAD temp5.zzzz, temps, temp0 */ 34127821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3413de2362d3Smrg R500_INST_LAST | 3414de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3415de2362d3Smrg R500_INST_RGB_WMASK_R | 3416de2362d3Smrg R500_INST_RGB_WMASK_G | 3417de2362d3Smrg R500_INST_RGB_WMASK_B | 3418de2362d3Smrg R500_INST_ALPHA_WMASK | 3419de2362d3Smrg R500_INST_RGB_OMASK_R | 3420de2362d3Smrg R500_INST_RGB_OMASK_G | 3421de2362d3Smrg R500_INST_RGB_OMASK_B | 3422de2362d3Smrg R500_INST_ALPHA_OMASK)); 34237821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3424de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3425de2362d3Smrg R500_RGB_ADDR1(3) | 3426de2362d3Smrg R500_RGB_ADDR2(5))); 34277821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3428de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3429de2362d3Smrg R500_ALPHA_ADDR1(3) | 3430de2362d3Smrg R500_ALPHA_ADDR2(5))); 34317821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3432de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3433de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3434de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3435de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3436de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3437de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3438de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 34397821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3440de2362d3Smrg R500_ALPHA_OP_MAD | 3441de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3442de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3443de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3444de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 34457821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3446de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3447de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3448de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3449de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3450de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3451de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3452de2362d3Smrg 3453de2362d3Smrg /* Shader constants. */ 34547821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3455de2362d3Smrg 3456de2362d3Smrg /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3457de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3458de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3459de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3460de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3461de2362d3Smrg 34627821949aSmrg FINISH_ACCEL(); 3463de2362d3Smrg } else { 34647821949aSmrg BEGIN_ACCEL(19); 3465de2362d3Smrg /* 2 components: 2 for tex0 */ 34667821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 3467de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3468de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3469de2362d3Smrg 3470de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 34717821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3472de2362d3Smrg 3473de2362d3Smrg /* Pixel stack frame size. */ 34747821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3475de2362d3Smrg 3476de2362d3Smrg /* FP length. */ 34777821949aSmrg OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3478de2362d3Smrg R500_US_CODE_END_ADDR(1))); 34797821949aSmrg OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3480de2362d3Smrg R500_US_CODE_RANGE_SIZE(1))); 3481de2362d3Smrg 3482de2362d3Smrg /* Prepare for FP emission. */ 34837821949aSmrg OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 34847821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3485de2362d3Smrg 3486de2362d3Smrg /* tex inst */ 34877821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3488de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3489de2362d3Smrg R500_INST_RGB_WMASK_R | 3490de2362d3Smrg R500_INST_RGB_WMASK_G | 3491de2362d3Smrg R500_INST_RGB_WMASK_B | 3492de2362d3Smrg R500_INST_ALPHA_WMASK | 3493de2362d3Smrg R500_INST_RGB_CLAMP | 3494de2362d3Smrg R500_INST_ALPHA_CLAMP)); 34957821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3496de2362d3Smrg R500_TEX_INST_LD | 3497de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3498de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 34997821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3500de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3501de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3502de2362d3Smrg R500_TEX_DST_ADDR(0) | 3503de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3504de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3505de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3506de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 35077821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3508de2362d3Smrg R500_DX_S_SWIZ_R | 3509de2362d3Smrg R500_DX_T_SWIZ_R | 3510de2362d3Smrg R500_DX_R_SWIZ_R | 3511de2362d3Smrg R500_DX_Q_SWIZ_R | 3512de2362d3Smrg R500_DY_ADDR(0) | 3513de2362d3Smrg R500_DY_S_SWIZ_R | 3514de2362d3Smrg R500_DY_T_SWIZ_R | 3515de2362d3Smrg R500_DY_R_SWIZ_R | 3516de2362d3Smrg R500_DY_Q_SWIZ_R)); 35177821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 35187821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3519de2362d3Smrg 3520de2362d3Smrg /* ALU inst */ 35217821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3522de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3523de2362d3Smrg R500_INST_LAST | 3524de2362d3Smrg R500_INST_RGB_OMASK_R | 3525de2362d3Smrg R500_INST_RGB_OMASK_G | 3526de2362d3Smrg R500_INST_RGB_OMASK_B | 3527de2362d3Smrg R500_INST_ALPHA_OMASK | 3528de2362d3Smrg R500_INST_RGB_CLAMP | 3529de2362d3Smrg R500_INST_ALPHA_CLAMP)); 35307821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3531de2362d3Smrg R500_RGB_ADDR1(0) | 3532de2362d3Smrg R500_RGB_ADDR1_CONST | 3533de2362d3Smrg R500_RGB_ADDR2(0) | 3534de2362d3Smrg R500_RGB_ADDR2_CONST)); 35357821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3536de2362d3Smrg R500_ALPHA_ADDR1(0) | 3537de2362d3Smrg R500_ALPHA_ADDR1_CONST | 3538de2362d3Smrg R500_ALPHA_ADDR2(0) | 3539de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 35407821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3541de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3542de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3543de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3544de2362d3Smrg R500_ALU_RGB_SEL_B_SRC0 | 3545de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_1 | 3546de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_1 | 3547de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_1)); 35487821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3549de2362d3Smrg R500_ALPHA_SWIZ_A_A | 3550de2362d3Smrg R500_ALPHA_SWIZ_B_1)); 35517821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3552de2362d3Smrg R500_ALU_RGBA_R_SWIZ_0 | 3553de2362d3Smrg R500_ALU_RGBA_G_SWIZ_0 | 3554de2362d3Smrg R500_ALU_RGBA_B_SWIZ_0 | 3555de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 35567821949aSmrg FINISH_ACCEL(); 3557de2362d3Smrg } 3558de2362d3Smrg } else { 3559de2362d3Smrg /* 3560de2362d3Smrg * y' = y - .0625 3561de2362d3Smrg * u' = u - .5 3562de2362d3Smrg * v' = v - .5; 3563de2362d3Smrg * 3564de2362d3Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3565de2362d3Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3566de2362d3Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3567de2362d3Smrg * 3568de2362d3Smrg * DP3 might look like the straightforward solution 3569de2362d3Smrg * but we'd need to move the texture yuv values in 3570de2362d3Smrg * the same reg for this to work. Therefore use MADs. 3571de2362d3Smrg * Brightness just adds to the off constant. 3572de2362d3Smrg * Contrast is multiplication of luminance. 3573de2362d3Smrg * Saturation and hue change the u and v coeffs. 3574de2362d3Smrg * Default values (before adjustments - depend on colorspace): 3575de2362d3Smrg * yco = 1.1643 3576de2362d3Smrg * uco = 0, -0.39173, 2.017 3577de2362d3Smrg * vco = 1.5958, -0.8129, 0 3578de2362d3Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3579de2362d3Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3580de2362d3Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3581de2362d3Smrg * 3582de2362d3Smrg * temp = MAD(yco, yuv.yyyy, off) 3583de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 3584de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 3585de2362d3Smrg */ 3586de2362d3Smrg /* TODO: don't recalc consts always */ 3587de2362d3Smrg const float Loff = -0.0627; 3588de2362d3Smrg const float Coff = -0.502; 3589de2362d3Smrg float uvcosf, uvsinf; 3590de2362d3Smrg float yco; 3591de2362d3Smrg float uco[3], vco[3], off[3]; 3592de2362d3Smrg float bright, cont, gamma; 3593de2362d3Smrg int ref = pPriv->transform_index; 35947821949aSmrg Bool needgamma = FALSE; 3595de2362d3Smrg 3596de2362d3Smrg cont = RTFContrast(pPriv->contrast); 3597de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 3598de2362d3Smrg gamma = (float)pPriv->gamma / 1000.0; 3599de2362d3Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3600de2362d3Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3601de2362d3Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3602de2362d3Smrg 3603de2362d3Smrg yco = trans[ref].RefLuma * cont; 3604de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 3605de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3606de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 3607de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 3608de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3609de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 3610de2362d3Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3611de2362d3Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3612de2362d3Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3613de2362d3Smrg 3614de2362d3Smrg //XXX gamma 3615de2362d3Smrg 36167821949aSmrg if (gamma != 1.0) { 36177821949aSmrg needgamma = TRUE; 36187821949aSmrg /* note: gamma correction is out = in ^ gamma; 36197821949aSmrg gpu can only do LG2/EX2 therefore we transform into 36207821949aSmrg in ^ gamma = 2 ^ (log2(in) * gamma). 36217821949aSmrg Lots of scalar ops, unfortunately (better solution?) - 36227821949aSmrg without gamma that's 3 inst, with gamma it's 10... 36237821949aSmrg could use different gamma factors per channel, 36247821949aSmrg if that's of any use. */ 36257821949aSmrg } 36267821949aSmrg 3627de2362d3Smrg if (pPriv->is_planar) { 36287821949aSmrg BEGIN_ACCEL(56); 3629de2362d3Smrg /* 2 components: 2 for tex0 */ 36307821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 3631de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3632de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3633de2362d3Smrg 3634de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 36357821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3636de2362d3Smrg 3637de2362d3Smrg /* Pixel stack frame size. */ 36387821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3639de2362d3Smrg 3640de2362d3Smrg /* FP length. */ 36417821949aSmrg OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3642de2362d3Smrg R500_US_CODE_END_ADDR(5))); 36437821949aSmrg OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3644de2362d3Smrg R500_US_CODE_RANGE_SIZE(5))); 3645de2362d3Smrg 3646de2362d3Smrg /* Prepare for FP emission. */ 36477821949aSmrg OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 36487821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3649de2362d3Smrg 3650de2362d3Smrg /* tex inst */ 36517821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3652de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3653de2362d3Smrg R500_INST_RGB_WMASK_R | 3654de2362d3Smrg R500_INST_RGB_WMASK_G | 3655de2362d3Smrg R500_INST_RGB_WMASK_B | 3656de2362d3Smrg R500_INST_ALPHA_WMASK | 3657de2362d3Smrg R500_INST_RGB_CLAMP | 3658de2362d3Smrg R500_INST_ALPHA_CLAMP)); 36597821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3660de2362d3Smrg R500_TEX_INST_LD | 3661de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 36627821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3663de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3664de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3665de2362d3Smrg R500_TEX_DST_ADDR(2) | 3666de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3667de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3668de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3669de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 36707821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3671de2362d3Smrg R500_DX_S_SWIZ_R | 3672de2362d3Smrg R500_DX_T_SWIZ_R | 3673de2362d3Smrg R500_DX_R_SWIZ_R | 3674de2362d3Smrg R500_DX_Q_SWIZ_R | 3675de2362d3Smrg R500_DY_ADDR(0) | 3676de2362d3Smrg R500_DY_S_SWIZ_R | 3677de2362d3Smrg R500_DY_T_SWIZ_R | 3678de2362d3Smrg R500_DY_R_SWIZ_R | 3679de2362d3Smrg R500_DY_Q_SWIZ_R)); 36807821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 36817821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3682de2362d3Smrg 3683de2362d3Smrg /* tex inst */ 36847821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3685de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3686de2362d3Smrg R500_INST_RGB_WMASK_R | 3687de2362d3Smrg R500_INST_RGB_WMASK_G | 3688de2362d3Smrg R500_INST_RGB_WMASK_B | 3689de2362d3Smrg R500_INST_ALPHA_WMASK | 3690de2362d3Smrg R500_INST_RGB_CLAMP | 3691de2362d3Smrg R500_INST_ALPHA_CLAMP)); 36927821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3693de2362d3Smrg R500_TEX_INST_LD | 3694de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 36957821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3696de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3697de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3698de2362d3Smrg R500_TEX_DST_ADDR(1) | 3699de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3700de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3701de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3702de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 37037821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3704de2362d3Smrg R500_DX_S_SWIZ_R | 3705de2362d3Smrg R500_DX_T_SWIZ_R | 3706de2362d3Smrg R500_DX_R_SWIZ_R | 3707de2362d3Smrg R500_DX_Q_SWIZ_R | 3708de2362d3Smrg R500_DY_ADDR(0) | 3709de2362d3Smrg R500_DY_S_SWIZ_R | 3710de2362d3Smrg R500_DY_T_SWIZ_R | 3711de2362d3Smrg R500_DY_R_SWIZ_R | 3712de2362d3Smrg R500_DY_Q_SWIZ_R)); 37137821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 37147821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3715de2362d3Smrg 3716de2362d3Smrg /* tex inst */ 37177821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3718de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3719de2362d3Smrg R500_INST_RGB_WMASK_R | 3720de2362d3Smrg R500_INST_RGB_WMASK_G | 3721de2362d3Smrg R500_INST_RGB_WMASK_B | 3722de2362d3Smrg R500_INST_ALPHA_WMASK | 3723de2362d3Smrg R500_INST_RGB_CLAMP | 3724de2362d3Smrg R500_INST_ALPHA_CLAMP)); 37257821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3726de2362d3Smrg R500_TEX_INST_LD | 3727de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3728de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 37297821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3730de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3731de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3732de2362d3Smrg R500_TEX_DST_ADDR(0) | 3733de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3734de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3735de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3736de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 37377821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3738de2362d3Smrg R500_DX_S_SWIZ_R | 3739de2362d3Smrg R500_DX_T_SWIZ_R | 3740de2362d3Smrg R500_DX_R_SWIZ_R | 3741de2362d3Smrg R500_DX_Q_SWIZ_R | 3742de2362d3Smrg R500_DY_ADDR(0) | 3743de2362d3Smrg R500_DY_S_SWIZ_R | 3744de2362d3Smrg R500_DY_T_SWIZ_R | 3745de2362d3Smrg R500_DY_R_SWIZ_R | 3746de2362d3Smrg R500_DY_Q_SWIZ_R)); 37477821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 37487821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3749de2362d3Smrg 3750de2362d3Smrg /* ALU inst */ 3751de2362d3Smrg /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 37527821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3753de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3754de2362d3Smrg R500_INST_RGB_WMASK_R | 3755de2362d3Smrg R500_INST_RGB_WMASK_G | 3756de2362d3Smrg R500_INST_RGB_WMASK_B | 3757de2362d3Smrg R500_INST_ALPHA_WMASK)); 37587821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3759de2362d3Smrg R500_RGB_ADDR0_CONST | 3760de2362d3Smrg R500_RGB_ADDR1(2) | 3761de2362d3Smrg R500_RGB_ADDR2(0) | 3762de2362d3Smrg R500_RGB_ADDR2_CONST)); 37637821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3764de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3765de2362d3Smrg R500_ALPHA_ADDR1(2) | 3766de2362d3Smrg R500_ALPHA_ADDR2(0) | 3767de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 37687821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3769de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_A | 3770de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_A | 3771de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_A | 3772de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3773de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3774de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3775de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 37767821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3777de2362d3Smrg R500_ALPHA_ADDRD(2) | 3778de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3779de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 37807821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3781de2362d3Smrg R500_ALU_RGBA_ADDRD(2) | 3782de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3783de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3784de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3785de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3786de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3787de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3788de2362d3Smrg 3789de2362d3Smrg /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 37907821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3791de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3792de2362d3Smrg R500_INST_RGB_WMASK_R | 3793de2362d3Smrg R500_INST_RGB_WMASK_G | 3794de2362d3Smrg R500_INST_RGB_WMASK_B | 3795de2362d3Smrg R500_INST_ALPHA_WMASK)); 37967821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3797de2362d3Smrg R500_RGB_ADDR0_CONST | 3798de2362d3Smrg R500_RGB_ADDR1(1) | 3799de2362d3Smrg R500_RGB_ADDR2(2))); 38007821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3801de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3802de2362d3Smrg R500_ALPHA_ADDR1(1) | 3803de2362d3Smrg R500_ALPHA_ADDR2(2))); 38047821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3805de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3806de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3807de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3808de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3809de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3810de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3811de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 38127821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3813de2362d3Smrg R500_ALPHA_ADDRD(2) | 3814de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3815de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 38167821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3817de2362d3Smrg R500_ALU_RGBA_ADDRD(2) | 3818de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3819de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3820de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3821de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3822de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3823de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3824de2362d3Smrg 3825de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 38267821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3827de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3828de2362d3Smrg R500_INST_LAST | 3829de2362d3Smrg R500_INST_RGB_OMASK_R | 3830de2362d3Smrg R500_INST_RGB_OMASK_G | 3831de2362d3Smrg R500_INST_RGB_OMASK_B | 3832de2362d3Smrg R500_INST_ALPHA_OMASK | 3833de2362d3Smrg R500_INST_RGB_CLAMP | 3834de2362d3Smrg R500_INST_ALPHA_CLAMP)); 38357821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3836de2362d3Smrg R500_RGB_ADDR0_CONST | 3837de2362d3Smrg R500_RGB_ADDR1(0) | 3838de2362d3Smrg R500_RGB_ADDR2(2))); 38397821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3840de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3841de2362d3Smrg R500_ALPHA_ADDR1(0) | 3842de2362d3Smrg R500_ALPHA_ADDR2(2))); 38437821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3844de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3845de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3846de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3847de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3848de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3849de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3850de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 38517821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3852de2362d3Smrg R500_ALPHA_ADDRD(0) | 3853de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3854de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 38557821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3856de2362d3Smrg R500_ALU_RGBA_ADDRD(0) | 3857de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3858de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3859de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3860de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3861de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3862de2362d3Smrg R500_ALU_RGBA_A_SWIZ_1)); 3863de2362d3Smrg 3864de2362d3Smrg } else { 38657821949aSmrg BEGIN_ACCEL(44); 3866de2362d3Smrg /* 2 components: 2 for tex0/1/2 */ 38677821949aSmrg OUT_ACCEL_REG(R300_RS_COUNT, 3868de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3869de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3870de2362d3Smrg 3871de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 38727821949aSmrg OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3873de2362d3Smrg 3874de2362d3Smrg /* Pixel stack frame size. */ 38757821949aSmrg OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3876de2362d3Smrg 3877de2362d3Smrg /* FP length. */ 38787821949aSmrg OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3879de2362d3Smrg R500_US_CODE_END_ADDR(3))); 38807821949aSmrg OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3881de2362d3Smrg R500_US_CODE_RANGE_SIZE(3))); 3882de2362d3Smrg 3883de2362d3Smrg /* Prepare for FP emission. */ 38847821949aSmrg OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 38857821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3886de2362d3Smrg 3887de2362d3Smrg /* tex inst */ 38887821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3889de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3890de2362d3Smrg R500_INST_RGB_WMASK_R | 3891de2362d3Smrg R500_INST_RGB_WMASK_G | 3892de2362d3Smrg R500_INST_RGB_WMASK_B | 3893de2362d3Smrg R500_INST_ALPHA_WMASK | 3894de2362d3Smrg R500_INST_RGB_CLAMP | 3895de2362d3Smrg R500_INST_ALPHA_CLAMP)); 38967821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3897de2362d3Smrg R500_TEX_INST_LD | 3898de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3899de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 39007821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3901de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3902de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3903de2362d3Smrg R500_TEX_DST_ADDR(0) | 3904de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3905de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3906de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3907de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 39087821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3909de2362d3Smrg R500_DX_S_SWIZ_R | 3910de2362d3Smrg R500_DX_T_SWIZ_R | 3911de2362d3Smrg R500_DX_R_SWIZ_R | 3912de2362d3Smrg R500_DX_Q_SWIZ_R | 3913de2362d3Smrg R500_DY_ADDR(0) | 3914de2362d3Smrg R500_DY_S_SWIZ_R | 3915de2362d3Smrg R500_DY_T_SWIZ_R | 3916de2362d3Smrg R500_DY_R_SWIZ_R | 3917de2362d3Smrg R500_DY_Q_SWIZ_R)); 39187821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 39197821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3920de2362d3Smrg 3921de2362d3Smrg /* ALU inst */ 3922de2362d3Smrg /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 39237821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3924de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3925de2362d3Smrg R500_INST_RGB_WMASK_R | 3926de2362d3Smrg R500_INST_RGB_WMASK_G | 3927de2362d3Smrg R500_INST_RGB_WMASK_B | 3928de2362d3Smrg R500_INST_ALPHA_WMASK)); 39297821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3930de2362d3Smrg R500_RGB_ADDR0_CONST | 3931de2362d3Smrg R500_RGB_ADDR1(0) | 3932de2362d3Smrg R500_RGB_ADDR2(0) | 3933de2362d3Smrg R500_RGB_ADDR2_CONST)); 39347821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3935de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3936de2362d3Smrg R500_ALPHA_ADDR1(0) | 3937de2362d3Smrg R500_ALPHA_ADDR2(0) | 3938de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 39397821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3940de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_A | 3941de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_A | 3942de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_A | 3943de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3944de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 3945de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3946de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G)); 39477821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3948de2362d3Smrg R500_ALPHA_ADDRD(1) | 3949de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3950de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 39517821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3952de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 3953de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3954de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3955de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3956de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3957de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3958de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3959de2362d3Smrg 3960de2362d3Smrg /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 39617821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3962de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3963de2362d3Smrg R500_INST_RGB_WMASK_R | 3964de2362d3Smrg R500_INST_RGB_WMASK_G | 3965de2362d3Smrg R500_INST_RGB_WMASK_B | 3966de2362d3Smrg R500_INST_ALPHA_WMASK)); 39677821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3968de2362d3Smrg R500_RGB_ADDR0_CONST | 3969de2362d3Smrg R500_RGB_ADDR1(0) | 3970de2362d3Smrg R500_RGB_ADDR2(1))); 39717821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3972de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3973de2362d3Smrg R500_ALPHA_ADDR1(0) | 3974de2362d3Smrg R500_ALPHA_ADDR2(1))); 39757821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3976de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3977de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3978de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3979de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3980de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_B | 3981de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B | 3982de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 39837821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3984de2362d3Smrg R500_ALPHA_ADDRD(1) | 3985de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3986de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 39877821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3988de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 3989de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3990de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3991de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3992de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3993de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3994de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3995de2362d3Smrg 3996de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 39977821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3998de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3999de2362d3Smrg R500_INST_LAST | 4000de2362d3Smrg R500_INST_RGB_OMASK_R | 4001de2362d3Smrg R500_INST_RGB_OMASK_G | 4002de2362d3Smrg R500_INST_RGB_OMASK_B | 4003de2362d3Smrg R500_INST_ALPHA_OMASK | 4004de2362d3Smrg R500_INST_RGB_CLAMP | 4005de2362d3Smrg R500_INST_ALPHA_CLAMP)); 40067821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 4007de2362d3Smrg R500_RGB_ADDR0_CONST | 4008de2362d3Smrg R500_RGB_ADDR1(0) | 4009de2362d3Smrg R500_RGB_ADDR2(1))); 40107821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 4011de2362d3Smrg R500_ALPHA_ADDR0_CONST | 4012de2362d3Smrg R500_ALPHA_ADDR1(0) | 4013de2362d3Smrg R500_ALPHA_ADDR2(1))); 40147821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 4015de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 4016de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 4017de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 4018de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 4019de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 4020de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R | 4021de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_R)); 40227821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 4023de2362d3Smrg R500_ALPHA_ADDRD(1) | 4024de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 4025de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 40267821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 4027de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 4028de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 4029de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 4030de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 4031de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 4032de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 4033de2362d3Smrg R500_ALU_RGBA_A_SWIZ_1)); 4034de2362d3Smrg } 4035de2362d3Smrg 4036de2362d3Smrg /* Shader constants. */ 40377821949aSmrg OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 4038de2362d3Smrg 4039de2362d3Smrg /* constant 0: off, yco */ 4040de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 4041de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 4042de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 4043de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 4044de2362d3Smrg /* constant 1: uco */ 4045de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 4046de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 4047de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 4048de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 4049de2362d3Smrg /* constant 2: vco */ 4050de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 4051de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 4052de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 4053de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 4054de2362d3Smrg 40557821949aSmrg FINISH_ACCEL(); 4056de2362d3Smrg } 4057de2362d3Smrg 4058de2362d3Smrg BEGIN_ACCEL_RELOC(6, 2); 40597821949aSmrg OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 40607821949aSmrg OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 4061de2362d3Smrg 4062de2362d3Smrg EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 4063de2362d3Smrg EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 4064de2362d3Smrg 4065de2362d3Smrg /* no need to enable blending */ 40667821949aSmrg OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 4067de2362d3Smrg 40687821949aSmrg OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 40697821949aSmrg FINISH_ACCEL(); 4070de2362d3Smrg 4071de2362d3Smrg if (pPriv->vsync) { 4072de2362d3Smrg xf86CrtcPtr crtc; 4073de2362d3Smrg if (pPriv->desired_crtc) 4074de2362d3Smrg crtc = pPriv->desired_crtc; 4075de2362d3Smrg else 40767821949aSmrg crtc = radeon_pick_best_crtc(pScrn, 4077de2362d3Smrg pPriv->drw_x, 4078de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 4079de2362d3Smrg pPriv->drw_y, 4080de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 4081de2362d3Smrg if (crtc) 40827821949aSmrg FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 40837821949aSmrg crtc, 40847821949aSmrg pPriv->drw_y - crtc->y, 40857821949aSmrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 4086de2362d3Smrg } 4087de2362d3Smrg 4088de2362d3Smrg return TRUE; 4089de2362d3Smrg} 4090de2362d3Smrg 4091de2362d3Smrgstatic void 40927821949aSmrgFUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 4093de2362d3Smrg{ 4094de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 4095de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 4096de2362d3Smrg int dstxoff, dstyoff; 4097de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 4098de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 40997821949aSmrg ACCEL_PREAMBLE(); 4100de2362d3Smrg 4101de2362d3Smrg#ifdef COMPOSITE 4102de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 4103de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 4104de2362d3Smrg#else 4105de2362d3Smrg dstxoff = 0; 4106de2362d3Smrg dstyoff = 0; 4107de2362d3Smrg#endif 4108de2362d3Smrg 41097821949aSmrg if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4110de2362d3Smrg return; 4111de2362d3Smrg 4112de2362d3Smrg /* 4113de2362d3Smrg * Rendering of the actual polygon is done in two different 4114de2362d3Smrg * ways depending on chip generation: 4115de2362d3Smrg * 4116de2362d3Smrg * < R300: 4117de2362d3Smrg * 4118de2362d3Smrg * These chips can render a rectangle in one pass, so 4119de2362d3Smrg * handling is pretty straight-forward. 4120de2362d3Smrg * 4121de2362d3Smrg * >= R300: 4122de2362d3Smrg * 4123de2362d3Smrg * These chips can accept a quad, but will render it as 4124de2362d3Smrg * two triangles which results in a diagonal tear. Instead 4125de2362d3Smrg * We render a single, large triangle and use the scissor 4126de2362d3Smrg * functionality to restrict it to the desired rectangle. 4127de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 4128de2362d3Smrg * the single triangle up to 2880 pixels; above that we 4129de2362d3Smrg * render as a quad. 4130de2362d3Smrg */ 4131de2362d3Smrg 4132de2362d3Smrg while (nBox--) { 4133de2362d3Smrg float srcX, srcY, srcw, srch; 4134de2362d3Smrg int dstX, dstY, dstw, dsth; 41357821949aSmrg#ifdef ACCEL_CP 4136de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 4137de2362d3Smrg 4138de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 41397821949aSmrg if (info->cs) 41407821949aSmrg radeon_cs_flush_indirect(pScrn); 41417821949aSmrg else 41427821949aSmrg RADEONCPFlushIndirect(pScrn, 1); 41437821949aSmrg if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4144de2362d3Smrg return; 4145de2362d3Smrg } 41467821949aSmrg#endif 4147de2362d3Smrg 4148de2362d3Smrg dstX = pBox->x1 + dstxoff; 4149de2362d3Smrg dstY = pBox->y1 + dstyoff; 4150de2362d3Smrg dstw = pBox->x2 - pBox->x1; 4151de2362d3Smrg dsth = pBox->y2 - pBox->y1; 4152de2362d3Smrg 4153de2362d3Smrg srcX = pPriv->src_x; 4154de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 4155de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 4156de2362d3Smrg srcY = pPriv->src_y; 4157de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 4158de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 4159de2362d3Smrg 4160de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 4161de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 4162de2362d3Smrg 41637821949aSmrg BEGIN_ACCEL(2); 41647821949aSmrg OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 4165de2362d3Smrg ((dstY) << R300_SCISSOR_Y_SHIFT))); 41667821949aSmrg OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 4167de2362d3Smrg ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 41687821949aSmrg FINISH_ACCEL(); 4169de2362d3Smrg 41707821949aSmrg#ifdef ACCEL_CP 4171de2362d3Smrg BEGIN_RING(3 * pPriv->vtx_count + 4); 4172de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 4173de2362d3Smrg 3 * pPriv->vtx_count)); 4174de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 4175de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 4176de2362d3Smrg (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 41777821949aSmrg#else /* ACCEL_CP */ 41787821949aSmrg BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 41797821949aSmrg OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 41807821949aSmrg RADEON_VF_PRIM_WALK_DATA | 41817821949aSmrg (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 41827821949aSmrg#endif 4183de2362d3Smrg if (pPriv->bicubic_enabled) { 4184de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 4185de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 4186de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 4187de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 4188de2362d3Smrg (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 4189de2362d3Smrg (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 4190de2362d3Smrg VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 4191de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4192de2362d3Smrg (float)srcY / pPriv->h, 4193de2362d3Smrg (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 4194de2362d3Smrg (float)srcY + 0.5); 4195de2362d3Smrg } else { 4196de2362d3Smrg /* 4197de2362d3Smrg * Render a big, scissored triangle. This means 4198de2362d3Smrg * increasing the triangle size and adjusting 4199de2362d3Smrg * texture coordinates. 4200de2362d3Smrg */ 4201de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 4202de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 4203de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 4204de2362d3Smrg (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 4205de2362d3Smrg VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 4206de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4207de2362d3Smrg (float)srcY / pPriv->h); 4208de2362d3Smrg } 4209de2362d3Smrg 4210de2362d3Smrg /* flushing is pipelined, free/finish is not */ 42117821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 4212de2362d3Smrg 42137821949aSmrg#ifdef ACCEL_CP 4214de2362d3Smrg ADVANCE_RING(); 42157821949aSmrg#else 42167821949aSmrg FINISH_ACCEL(); 42177821949aSmrg#endif /* !ACCEL_CP */ 4218de2362d3Smrg 4219de2362d3Smrg pBox++; 4220de2362d3Smrg } 4221de2362d3Smrg 42227821949aSmrg BEGIN_ACCEL(3); 42237821949aSmrg OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 42247821949aSmrg OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 42257821949aSmrg OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 42267821949aSmrg FINISH_ACCEL(); 4227de2362d3Smrg 4228de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 4229de2362d3Smrg} 4230de2362d3Smrg 4231de2362d3Smrg#undef VTX_OUT_4 4232de2362d3Smrg#undef VTX_OUT_6 42337821949aSmrg#undef FUNC_NAME 4234