radeon_textured_videofuncs.c revision ad43ddac
1/* 2 * Copyright 2008 Alex Deucher 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * 24 * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25 * 26 */ 27 28#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 29#error Cannot define both MMIO and CP acceleration! 30#endif 31 32#if !defined(UNIXCPP) || defined(ANSICPP) 33#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 34#else 35#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 36#endif 37 38#ifdef ACCEL_MMIO 39#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 40#else 41#ifdef ACCEL_CP 42#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 43#else 44#error No accel type defined! 45#endif 46#endif 47 48#ifdef ACCEL_CP 49 50#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 51do { \ 52 OUT_RING_F(_dstX); \ 53 OUT_RING_F(_dstY); \ 54 OUT_RING_F(_srcX); \ 55 OUT_RING_F(_srcY); \ 56 OUT_RING_F(_maskX); \ 57 OUT_RING_F(_maskY); \ 58} while (0) 59 60#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 61do { \ 62 OUT_RING_F(_dstX); \ 63 OUT_RING_F(_dstY); \ 64 OUT_RING_F(_srcX); \ 65 OUT_RING_F(_srcY); \ 66} while (0) 67 68#else /* ACCEL_CP */ 69 70#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 71do { \ 72 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 73 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 74 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 75 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 76 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX); \ 77 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ 78} while (0) 79 80#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 81do { \ 82 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 83 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 84 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 85 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 86} while (0) 87 88#endif /* !ACCEL_CP */ 89 90static void 91FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 92{ 93 RADEONInfoPtr info = RADEONPTR(pScrn); 94 PixmapPtr pPixmap = pPriv->pPixmap; 95 struct radeon_exa_pixmap_priv *driver_priv; 96 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 97 uint32_t txformat, txsize, txpitch, txoffset; 98 uint32_t dst_pitch, dst_format; 99 uint32_t colorpitch; 100 Bool isplanar = FALSE; 101 int dstxoff, dstyoff, pixel_shift, vtx_count; 102 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 103 int nBox = REGION_NUM_RECTS(&pPriv->clip); 104 ACCEL_PREAMBLE(); 105 106#ifdef XF86DRM_MODE 107 if (info->cs) { 108 int ret; 109 110 radeon_cs_space_reset_bos(info->cs); 111 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 112 113 if (pPriv->bicubic_enabled) 114 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 115 116 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 117 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 118 119 ret = radeon_cs_space_check(info->cs); 120 if (ret) { 121 ErrorF("Not enough RAM to hw accel xv operation\n"); 122 return; 123 } 124 } 125#endif 126 127 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 128 129 130#ifdef USE_EXA 131 if (info->useEXA) { 132 dst_pitch = exaGetPixmapPitch(pPixmap); 133 } else 134#endif 135 { 136 dst_pitch = pPixmap->devKind; 137 } 138 139#ifdef COMPOSITE 140 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 141 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 142#else 143 dstxoff = 0; 144 dstyoff = 0; 145#endif 146 147#ifdef USE_EXA 148 if (info->useEXA) { 149 RADEON_SWITCH_TO_3D(); 150 } else 151#endif 152 { 153 BEGIN_ACCEL(2); 154 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 155 /* We must wait for 3d to idle, in case source was just written as a dest. */ 156 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 157 RADEON_WAIT_HOST_IDLECLEAN | 158 RADEON_WAIT_2D_IDLECLEAN | 159 RADEON_WAIT_3D_IDLECLEAN | 160 RADEON_WAIT_DMA_GUI_IDLE); 161 FINISH_ACCEL(); 162 163 if (!info->accel_state->XInited3D) 164 RADEONInit3DEngine(pScrn); 165 } 166 167 /* Same for R100/R200 */ 168 switch (pPixmap->drawable.bitsPerPixel) { 169 case 16: 170 if (pPixmap->drawable.depth == 15) 171 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 172 else 173 dst_format = RADEON_COLOR_FORMAT_RGB565; 174 break; 175 case 32: 176 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 177 break; 178 default: 179 return; 180 } 181 182 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 183 isplanar = TRUE; 184 txformat = RADEON_TXFORMAT_Y8; 185 } else { 186 if (pPriv->id == FOURCC_UYVY) 187 txformat = RADEON_TXFORMAT_YVYU422; 188 else 189 txformat = RADEON_TXFORMAT_VYUY422; 190 } 191 192 txformat |= RADEON_TXFORMAT_NON_POWER2; 193 194 colorpitch = dst_pitch >> pixel_shift; 195 196 if (RADEONTilingEnabled(pScrn, pPixmap)) 197 colorpitch |= RADEON_COLOR_TILE_ENABLE; 198 199 txoffset = info->cs ? 0 : pPriv->src_offset; 200 201 BEGIN_ACCEL_RELOC(4,2); 202 203 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 204 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 205 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 206 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 207 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 208 209 FINISH_ACCEL(); 210 211 if (isplanar) { 212 /* need 2 texcoord sets (even though they are identical) due 213 to denormalization! hw apparently can't premultiply 214 same coord set by different texture size */ 215 vtx_count = 6; 216 217 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 218 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 219 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 220 txpitch -= 32; 221 222 BEGIN_ACCEL_RELOC(23, 3); 223 224 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 225 RADEON_SE_VTX_FMT_ST0 | 226 RADEON_SE_VTX_FMT_ST1)); 227 228 OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 229 RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 230 RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 231 RADEON_PLANAR_YUV_ENABLE)); 232 233 /* Y */ 234 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 235 RADEON_MAG_FILTER_LINEAR | 236 RADEON_MIN_FILTER_LINEAR | 237 RADEON_CLAMP_S_CLAMP_LAST | 238 RADEON_CLAMP_T_CLAMP_LAST | 239 RADEON_YUV_TO_RGB); 240 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 241 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 242 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 243 RADEON_COLOR_ARG_A_ZERO | 244 RADEON_COLOR_ARG_B_ZERO | 245 RADEON_COLOR_ARG_C_T0_COLOR | 246 RADEON_BLEND_CTL_ADD | 247 RADEON_CLAMP_TX); 248 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 249 RADEON_ALPHA_ARG_A_ZERO | 250 RADEON_ALPHA_ARG_B_ZERO | 251 RADEON_ALPHA_ARG_C_T0_ALPHA | 252 RADEON_BLEND_CTL_ADD | 253 RADEON_CLAMP_TX); 254 255 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 256 (pPriv->w - 1) | 257 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 258 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 259 pPriv->src_pitch - 32); 260 261 /* U */ 262 OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, 263 RADEON_MAG_FILTER_LINEAR | 264 RADEON_MIN_FILTER_LINEAR | 265 RADEON_CLAMP_S_CLAMP_LAST | 266 RADEON_CLAMP_T_CLAMP_LAST); 267 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 268 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 269 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1, 270 RADEON_COLOR_ARG_A_ZERO | 271 RADEON_COLOR_ARG_B_ZERO | 272 RADEON_COLOR_ARG_C_T0_COLOR | 273 RADEON_BLEND_CTL_ADD | 274 RADEON_CLAMP_TX); 275 OUT_ACCEL_REG(RADEON_PP_TXABLEND_1, 276 RADEON_ALPHA_ARG_A_ZERO | 277 RADEON_ALPHA_ARG_B_ZERO | 278 RADEON_ALPHA_ARG_C_T0_ALPHA | 279 RADEON_BLEND_CTL_ADD | 280 RADEON_CLAMP_TX); 281 282 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize); 283 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch); 284 285 /* V */ 286 OUT_ACCEL_REG(RADEON_PP_TXFILTER_2, 287 RADEON_MAG_FILTER_LINEAR | 288 RADEON_MIN_FILTER_LINEAR | 289 RADEON_CLAMP_S_CLAMP_LAST | 290 RADEON_CLAMP_T_CLAMP_LAST); 291 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 292 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 293 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2, 294 RADEON_COLOR_ARG_A_ZERO | 295 RADEON_COLOR_ARG_B_ZERO | 296 RADEON_COLOR_ARG_C_T0_COLOR | 297 RADEON_BLEND_CTL_ADD | 298 RADEON_CLAMP_TX); 299 OUT_ACCEL_REG(RADEON_PP_TXABLEND_2, 300 RADEON_ALPHA_ARG_A_ZERO | 301 RADEON_ALPHA_ARG_B_ZERO | 302 RADEON_ALPHA_ARG_C_T0_ALPHA | 303 RADEON_BLEND_CTL_ADD | 304 RADEON_CLAMP_TX); 305 306 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize); 307 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch); 308 FINISH_ACCEL(); 309 } else { 310 vtx_count = 4; 311 BEGIN_ACCEL_RELOC(9, 1); 312 313 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 314 RADEON_SE_VTX_FMT_ST0)); 315 316 OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 317 318 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 319 RADEON_MAG_FILTER_LINEAR | 320 RADEON_MIN_FILTER_LINEAR | 321 RADEON_CLAMP_S_CLAMP_LAST | 322 RADEON_CLAMP_T_CLAMP_LAST | 323 RADEON_YUV_TO_RGB); 324 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 325 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 326 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 327 RADEON_COLOR_ARG_A_ZERO | 328 RADEON_COLOR_ARG_B_ZERO | 329 RADEON_COLOR_ARG_C_T0_COLOR | 330 RADEON_BLEND_CTL_ADD | 331 RADEON_CLAMP_TX); 332 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 333 RADEON_ALPHA_ARG_A_ZERO | 334 RADEON_ALPHA_ARG_B_ZERO | 335 RADEON_ALPHA_ARG_C_T0_ALPHA | 336 RADEON_BLEND_CTL_ADD | 337 RADEON_CLAMP_TX); 338 339 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 340 (pPriv->w - 1) | 341 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 342 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 343 pPriv->src_pitch - 32); 344 FINISH_ACCEL(); 345 } 346 347 { 348 int scissor_w, scissor_h; 349 scissor_w = MIN(pPixmap->drawable.width, 2047); 350 scissor_h = MIN(pPixmap->drawable.height, 2047); 351 352 BEGIN_ACCEL(2); 353 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 354 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 355 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 356 FINISH_ACCEL(); 357 } 358 if (pPriv->vsync) { 359 xf86CrtcPtr crtc; 360 if (pPriv->desired_crtc) 361 crtc = pPriv->desired_crtc; 362 else 363 crtc = radeon_pick_best_crtc(pScrn, 364 pPriv->drw_x, 365 pPriv->drw_x + pPriv->dst_w, 366 pPriv->drw_y, 367 pPriv->drw_y + pPriv->dst_h); 368 if (crtc) 369 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 370 crtc, 371 pPriv->drw_y - crtc->y, 372 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 373 } 374 /* 375 * Rendering of the actual polygon is done in two different 376 * ways depending on chip generation: 377 * 378 * < R300: 379 * 380 * These chips can render a rectangle in one pass, so 381 * handling is pretty straight-forward. 382 * 383 * >= R300: 384 * 385 * These chips can accept a quad, but will render it as 386 * two triangles which results in a diagonal tear. Instead 387 * We render a single, large triangle and use the scissor 388 * functionality to restrict it to the desired rectangle. 389 * Due to guardband limits on r3xx/r4xx, we can only use 390 * the single triangle up to 2560/4021 pixels; above that we 391 * render as a quad. 392 */ 393 394#ifdef ACCEL_CP 395 BEGIN_RING(nBox * 3 * vtx_count + 5); 396 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 397 nBox * 3 * vtx_count + 1)); 398 if (isplanar) 399 OUT_RING(RADEON_CP_VC_FRMT_XY | 400 RADEON_CP_VC_FRMT_ST0 | 401 RADEON_CP_VC_FRMT_ST1); 402 else 403 OUT_RING(RADEON_CP_VC_FRMT_XY | 404 RADEON_CP_VC_FRMT_ST0); 405 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 406 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 407 RADEON_CP_VC_CNTL_MAOS_ENABLE | 408 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 409 ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 410#else /* ACCEL_CP */ 411 BEGIN_ACCEL(nBox * vtx_count * 3 + 2); 412 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 413 RADEON_VF_PRIM_WALK_DATA | 414 RADEON_VF_RADEON_MODE | 415 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 416#endif 417 418 while (nBox--) { 419 int srcX, srcY, srcw, srch; 420 int dstX, dstY, dstw, dsth; 421 dstX = pBox->x1 + dstxoff; 422 dstY = pBox->y1 + dstyoff; 423 dstw = pBox->x2 - pBox->x1; 424 dsth = pBox->y2 - pBox->y1; 425 426 srcX = pPriv->src_x; 427 srcX += ((pBox->x1 - pPriv->drw_x) * 428 pPriv->src_w) / pPriv->dst_w; 429 srcY = pPriv->src_y; 430 srcY += ((pBox->y1 - pPriv->drw_y) * 431 pPriv->src_h) / pPriv->dst_h; 432 433 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 434 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 435 436 437 if (isplanar) { 438 /* 439 * Just render a rect (using three coords). 440 */ 441 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 442 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 443 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 444 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 445 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 446 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 447 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 448 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 449 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 450 } else { 451 /* 452 * Just render a rect (using three coords). 453 */ 454 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 455 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 456 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 457 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 458 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 459 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 460 } 461 462 pBox++; 463 } 464 465 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 466#ifdef ACCEL_CP 467 ADVANCE_RING(); 468#else 469 FINISH_ACCEL(); 470#endif /* !ACCEL_CP */ 471 472 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 473} 474 475static void 476FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 477{ 478 RADEONInfoPtr info = RADEONPTR(pScrn); 479 PixmapPtr pPixmap = pPriv->pPixmap; 480 struct radeon_exa_pixmap_priv *driver_priv; 481 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 482 uint32_t txformat; 483 uint32_t txfilter, txsize, txpitch, txoffset; 484 uint32_t dst_pitch, dst_format; 485 uint32_t colorpitch; 486 Bool isplanar = FALSE; 487 int dstxoff, dstyoff, pixel_shift, vtx_count; 488 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 489 int nBox = REGION_NUM_RECTS(&pPriv->clip); 490 491 /* note: in contrast to r300, use input biasing on uv components */ 492 const float Loff = -0.0627; 493 float uvcosf, uvsinf; 494 float yco, yoff; 495 float uco[3], vco[3]; 496 float bright, cont, sat; 497 int ref = pPriv->transform_index; 498 float ucscale = 0.25, vcscale = 0.25; 499 Bool needux8 = FALSE, needvx8 = FALSE; 500 ACCEL_PREAMBLE(); 501 502#ifdef XF86DRM_MODE 503 if (info->cs) { 504 int ret; 505 506 radeon_cs_space_reset_bos(info->cs); 507 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 508 509 if (pPriv->bicubic_enabled) 510 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 511 512 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 513 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 514 515 ret = radeon_cs_space_check(info->cs); 516 if (ret) { 517 ErrorF("Not enough RAM to hw accel xv operation\n"); 518 return; 519 } 520 } 521#endif 522 523 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 524 525#ifdef USE_EXA 526 if (info->useEXA) { 527 dst_pitch = exaGetPixmapPitch(pPixmap); 528 } else 529#endif 530 { 531 dst_pitch = pPixmap->devKind; 532 } 533 534#ifdef COMPOSITE 535 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 536 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 537#else 538 dstxoff = 0; 539 dstyoff = 0; 540#endif 541 542#ifdef USE_EXA 543 if (info->useEXA) { 544 RADEON_SWITCH_TO_3D(); 545 } else 546#endif 547 { 548 BEGIN_ACCEL(2); 549 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 550 /* We must wait for 3d to idle, in case source was just written as a dest. */ 551 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 552 RADEON_WAIT_HOST_IDLECLEAN | 553 RADEON_WAIT_2D_IDLECLEAN | 554 RADEON_WAIT_3D_IDLECLEAN | 555 RADEON_WAIT_DMA_GUI_IDLE); 556 FINISH_ACCEL(); 557 558 if (!info->accel_state->XInited3D) 559 RADEONInit3DEngine(pScrn); 560 } 561 562 /* Same for R100/R200 */ 563 switch (pPixmap->drawable.bitsPerPixel) { 564 case 16: 565 if (pPixmap->drawable.depth == 15) 566 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 567 else 568 dst_format = RADEON_COLOR_FORMAT_RGB565; 569 break; 570 case 32: 571 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 572 break; 573 default: 574 return; 575 } 576 577 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 578 isplanar = TRUE; 579 txformat = RADEON_TXFORMAT_I8; 580 } else { 581 if (pPriv->id == FOURCC_UYVY) 582 txformat = RADEON_TXFORMAT_YVYU422; 583 else 584 txformat = RADEON_TXFORMAT_VYUY422; 585 } 586 587 txformat |= RADEON_TXFORMAT_NON_POWER2; 588 589 colorpitch = dst_pitch >> pixel_shift; 590 591 if (RADEONTilingEnabled(pScrn, pPixmap)) 592 colorpitch |= RADEON_COLOR_TILE_ENABLE; 593 594 BEGIN_ACCEL_RELOC(4,2); 595 596 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 597 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 598 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 599 600 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 601 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 602 603 FINISH_ACCEL(); 604 605 txfilter = R200_MAG_FILTER_LINEAR | 606 R200_MIN_FILTER_LINEAR | 607 R200_CLAMP_S_CLAMP_LAST | 608 R200_CLAMP_T_CLAMP_LAST; 609 610 /* contrast can cause constant overflow, clamp */ 611 cont = RTFContrast(pPriv->contrast); 612 if (cont * trans[ref].RefLuma > 2.0) 613 cont = 2.0 / trans[ref].RefLuma; 614 /* brightness is only from -0.5 to 0.5 should be safe */ 615 bright = RTFBrightness(pPriv->brightness); 616 /* saturation can also cause overflow, clamp */ 617 sat = RTFSaturation(pPriv->saturation); 618 if (sat * trans[ref].RefBCb > 4.0) 619 sat = 4.0 / trans[ref].RefBCb; 620 uvcosf = sat * cos(RTFHue(pPriv->hue)); 621 uvsinf = sat * sin(RTFHue(pPriv->hue)); 622 623 yco = trans[ref].RefLuma * cont; 624 uco[0] = -trans[ref].RefRCr * uvsinf; 625 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 626 uco[2] = trans[ref].RefBCb * uvcosf; 627 vco[0] = trans[ref].RefRCr * uvcosf; 628 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 629 vco[2] = trans[ref].RefBCb * uvsinf; 630 yoff = Loff * yco + bright; 631 632 if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 633 needux8 = TRUE; 634 ucscale = 0.125; 635 } 636 if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 637 needvx8 = TRUE; 638 vcscale = 0.125; 639 } 640 641 txoffset = info->cs ? 0 : pPriv->src_offset; 642 643 if (isplanar) { 644 /* need 2 texcoord sets (even though they are identical) due 645 to denormalization! hw apparently can't premultiply 646 same coord set by different texture size */ 647 vtx_count = 6; 648 649 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 650 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 651 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 652 txpitch -= 32; 653 654 BEGIN_ACCEL_RELOC(36, 3); 655 656 OUT_ACCEL_REG(RADEON_PP_CNTL, 657 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 658 RADEON_TEX_BLEND_0_ENABLE | 659 RADEON_TEX_BLEND_1_ENABLE | 660 RADEON_TEX_BLEND_2_ENABLE); 661 662 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 663 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 664 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 665 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 666 667 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 668 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 669 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 670 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 671 (pPriv->w - 1) | 672 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 673 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 674 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 675 676 OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); 677 OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 678 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); 679 OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize); 680 OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); 681 OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 682 683 OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); 684 OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 685 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); 686 OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize); 687 OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); 688 OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 689 690 /* similar to r300 code. Note the big problem is that hardware constants 691 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 692 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 693 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 694 * the constants not. To get larger range can use output scale, but for 695 * that 2.018 value we need a total scale by 8, which means the constants 696 * really have no accuracy whatsoever (5 fractional bits only). 697 * The only direct way to get high precision "constants" into the fragment 698 * pipe I know of is to use the texcoord interpolator (not color, this one 699 * is 8 bit only too), which seems a bit expensive. We're lucky though it 700 * seems the values we need seem to fit better than worst case (get about 701 * 6 fractional bits for this instead of 5, at least when not correcting for 702 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 703 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 704 * even with non-default saturation/hue/contrast/brightness adjustments, 705 * it gets a little crazy and ultimately precision might still be lacking. 706 * 707 * A higher precision (8 fractional bits) version might just put uco into 708 * a texcoord, and calculate a new vcoconst in the shader, like so: 709 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 710 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 711 * vcocalc = ADD temp, bias/scale(cohelper), vco 712 * would in total use 4 tex units, 4 instructions which seems fairly 713 * balanced for this architecture (instead of 3 + 3 for the solution here) 714 * 715 * temp = MAD(yco, yuv.yyyy, yoff) 716 * temp = MAD(uco, yuv.uuuu, temp) 717 * result = MAD(vco, yuv.vvvv, temp) 718 * 719 * note first mad produces actually scalar, hence we transform 720 * it into a dp2a to get 8 bit precision of yco instead of 7 - 721 * That's assuming hw correctly expands consts to internal precision. 722 * (y * 1 + y * (yco - 1) + yoff) 723 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 724 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 725 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 726 * 727 * vco, uco need bias (and hence scale too) 728 * 729 */ 730 731 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 732 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 733 R200_TXC_ARG_A_TFACTOR_COLOR | 734 R200_TXC_ARG_B_R0_COLOR | 735 R200_TXC_ARG_C_TFACTOR_COLOR | 736 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 737 R200_TXC_OP_DOT2_ADD); 738 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 739 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 740 R200_TXC_SCALE_INV2 | 741 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 742 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 743 R200_TXA_ARG_A_ZERO | 744 R200_TXA_ARG_B_ZERO | 745 R200_TXA_ARG_C_ZERO | 746 R200_TXA_OP_MADD); 747 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 748 R200_TXA_OUTPUT_REG_NONE); 749 750 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 751 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 752 R200_TXC_ARG_A_TFACTOR_COLOR | 753 R200_TXC_BIAS_ARG_A | 754 R200_TXC_SCALE_ARG_A | 755 R200_TXC_ARG_B_R1_COLOR | 756 R200_TXC_BIAS_ARG_B | 757 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 758 R200_TXC_ARG_C_R0_COLOR | 759 R200_TXC_OP_MADD); 760 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 761 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 762 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 763 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 764 R200_TXA_ARG_A_ZERO | 765 R200_TXA_ARG_B_ZERO | 766 R200_TXA_ARG_C_ZERO | 767 R200_TXA_OP_MADD); 768 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 769 R200_TXA_OUTPUT_REG_NONE); 770 771 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 772 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 773 R200_TXC_ARG_A_TFACTOR_COLOR | 774 R200_TXC_BIAS_ARG_A | 775 R200_TXC_SCALE_ARG_A | 776 R200_TXC_ARG_B_R2_COLOR | 777 R200_TXC_BIAS_ARG_B | 778 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 779 R200_TXC_ARG_C_R0_COLOR | 780 R200_TXC_OP_MADD); 781 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 782 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 783 R200_TXC_SCALE_2X | 784 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 785 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 786 R200_TXA_ARG_A_ZERO | 787 R200_TXA_ARG_B_ZERO | 788 R200_TXA_ARG_C_ZERO | 789 R200_TXA_COMP_ARG_C | 790 R200_TXA_OP_MADD); 791 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 792 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 793 794 /* shader constants */ 795 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 796 yco > 1.0 ? yco - 1.0: yco, 797 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 798 0.0)); 799 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 800 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 801 uco[2] * ucscale + 0.5, 802 0.0)); 803 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 804 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 805 vco[2] * vcscale + 0.5, 806 0.0)); 807 808 FINISH_ACCEL(); 809 } else { 810 vtx_count = 4; 811 812 BEGIN_ACCEL_RELOC(24, 1); 813 814 OUT_ACCEL_REG(RADEON_PP_CNTL, 815 RADEON_TEX_0_ENABLE | 816 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 817 RADEON_TEX_BLEND_2_ENABLE); 818 819 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 820 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 821 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 822 823 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 824 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 825 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 826 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 827 (pPriv->w - 1) | 828 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 829 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 830 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 831 832 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 833 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 834 R200_TXC_ARG_A_TFACTOR_COLOR | 835 R200_TXC_ARG_B_R0_COLOR | 836 R200_TXC_ARG_C_TFACTOR_COLOR | 837 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 838 R200_TXC_OP_DOT2_ADD); 839 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 840 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 841 R200_TXC_SCALE_INV2 | 842 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 843 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 844 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 845 R200_TXA_ARG_A_ZERO | 846 R200_TXA_ARG_B_ZERO | 847 R200_TXA_ARG_C_ZERO | 848 R200_TXA_OP_MADD); 849 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 850 R200_TXA_OUTPUT_REG_NONE); 851 852 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 853 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 854 R200_TXC_ARG_A_TFACTOR_COLOR | 855 R200_TXC_BIAS_ARG_A | 856 R200_TXC_SCALE_ARG_A | 857 R200_TXC_ARG_B_R0_COLOR | 858 R200_TXC_BIAS_ARG_B | 859 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 860 R200_TXC_ARG_C_R1_COLOR | 861 R200_TXC_OP_MADD); 862 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 863 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 864 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 865 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 866 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 867 R200_TXA_ARG_A_ZERO | 868 R200_TXA_ARG_B_ZERO | 869 R200_TXA_ARG_C_ZERO | 870 R200_TXA_OP_MADD); 871 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 872 R200_TXA_OUTPUT_REG_NONE); 873 874 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 875 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 876 R200_TXC_ARG_A_TFACTOR_COLOR | 877 R200_TXC_BIAS_ARG_A | 878 R200_TXC_SCALE_ARG_A | 879 R200_TXC_ARG_B_R0_COLOR | 880 R200_TXC_BIAS_ARG_B | 881 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 882 R200_TXC_ARG_C_R1_COLOR | 883 R200_TXC_OP_MADD); 884 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 885 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 886 R200_TXC_SCALE_2X | 887 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 888 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 889 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 890 R200_TXA_ARG_A_ZERO | 891 R200_TXA_ARG_B_ZERO | 892 R200_TXA_ARG_C_ZERO | 893 R200_TXA_COMP_ARG_C | 894 R200_TXA_OP_MADD); 895 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 896 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 897 898 /* shader constants */ 899 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 900 yco > 1.0 ? yco - 1.0: yco, 901 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 902 0.0)); 903 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 904 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 905 uco[2] * ucscale + 0.5, 906 0.0)); 907 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 908 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 909 vco[2] * vcscale + 0.5, 910 0.0)); 911 912 FINISH_ACCEL(); 913 } 914 915 { 916 int scissor_w, scissor_h; 917 scissor_w = MIN(pPixmap->drawable.width, 2047); 918 scissor_h = MIN(pPixmap->drawable.height, 2047); 919 BEGIN_ACCEL(2); 920 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 921 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 922 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 923 } 924 FINISH_ACCEL(); 925 926 if (pPriv->vsync) { 927 xf86CrtcPtr crtc; 928 if (pPriv->desired_crtc) 929 crtc = pPriv->desired_crtc; 930 else 931 crtc = radeon_pick_best_crtc(pScrn, 932 pPriv->drw_x, 933 pPriv->drw_x + pPriv->dst_w, 934 pPriv->drw_y, 935 pPriv->drw_y + pPriv->dst_h); 936 if (crtc) 937 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 938 crtc, 939 pPriv->drw_y - crtc->y, 940 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 941 } 942 /* 943 * Rendering of the actual polygon is done in two different 944 * ways depending on chip generation: 945 * 946 * < R300: 947 * 948 * These chips can render a rectangle in one pass, so 949 * handling is pretty straight-forward. 950 * 951 * >= R300: 952 * 953 * These chips can accept a quad, but will render it as 954 * two triangles which results in a diagonal tear. Instead 955 * We render a single, large triangle and use the scissor 956 * functionality to restrict it to the desired rectangle. 957 * Due to guardband limits on r3xx/r4xx, we can only use 958 * the single triangle up to 2560/4021 pixels; above that we 959 * render as a quad. 960 */ 961 962#ifdef ACCEL_CP 963 BEGIN_RING(nBox * 3 * vtx_count + 4); 964 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 965 nBox * 3 * vtx_count)); 966 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 967 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 968 ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 969#else /* ACCEL_CP */ 970 BEGIN_ACCEL(nBox * 3 * vtx_count + 2); 971 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 972 RADEON_VF_PRIM_WALK_DATA | 973 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 974 975#endif 976 977 while (nBox--) { 978 int srcX, srcY, srcw, srch; 979 int dstX, dstY, dstw, dsth; 980 dstX = pBox->x1 + dstxoff; 981 dstY = pBox->y1 + dstyoff; 982 dstw = pBox->x2 - pBox->x1; 983 dsth = pBox->y2 - pBox->y1; 984 985 srcX = pPriv->src_x; 986 srcX += ((pBox->x1 - pPriv->drw_x) * 987 pPriv->src_w) / pPriv->dst_w; 988 srcY = pPriv->src_y; 989 srcY += ((pBox->y1 - pPriv->drw_y) * 990 pPriv->src_h) / pPriv->dst_h; 991 992 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 993 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 994 995 if (isplanar) { 996 /* 997 * Just render a rect (using three coords). 998 */ 999 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1000 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1001 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1002 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1003 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1004 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1005 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1006 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1007 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1008 } else { 1009 /* 1010 * Just render a rect (using three coords). 1011 */ 1012 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1013 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1014 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1015 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1016 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1017 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1018 } 1019 1020 pBox++; 1021 } 1022 1023 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1024 1025#ifdef ACCEL_CP 1026 ADVANCE_RING(); 1027#else 1028 FINISH_ACCEL(); 1029#endif /* !ACCEL_CP */ 1030 1031 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 1032} 1033 1034static void 1035FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1036{ 1037 RADEONInfoPtr info = RADEONPTR(pScrn); 1038 PixmapPtr pPixmap = pPriv->pPixmap; 1039 struct radeon_exa_pixmap_priv *driver_priv; 1040 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 1041 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 1042 uint32_t dst_pitch, dst_format; 1043 uint32_t txenable, colorpitch, bicubic_offset; 1044 uint32_t output_fmt; 1045 Bool isplanar = FALSE; 1046 int dstxoff, dstyoff, pixel_shift, vtx_count; 1047 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 1048 int nBox = REGION_NUM_RECTS(&pPriv->clip); 1049 ACCEL_PREAMBLE(); 1050 1051#ifdef XF86DRM_MODE 1052 if (info->cs) { 1053 int ret; 1054 1055 radeon_cs_space_reset_bos(info->cs); 1056 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1057 1058 if (pPriv->bicubic_enabled) 1059 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1060 1061 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 1062 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 1063 1064 ret = radeon_cs_space_check(info->cs); 1065 if (ret) { 1066 ErrorF("Not enough RAM to hw accel xv operation\n"); 1067 return; 1068 } 1069 } 1070#endif 1071 1072 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 1073 1074#ifdef USE_EXA 1075 if (info->useEXA) { 1076 dst_pitch = exaGetPixmapPitch(pPixmap); 1077 } else 1078#endif 1079 { 1080 dst_pitch = pPixmap->devKind; 1081 } 1082 1083#ifdef COMPOSITE 1084 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 1085 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 1086#else 1087 dstxoff = 0; 1088 dstyoff = 0; 1089#endif 1090 1091#ifdef USE_EXA 1092 if (info->useEXA) { 1093 RADEON_SWITCH_TO_3D(); 1094 } else 1095#endif 1096 { 1097 BEGIN_ACCEL(2); 1098 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 1099 /* We must wait for 3d to idle, in case source was just written as a dest. */ 1100 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 1101 RADEON_WAIT_HOST_IDLECLEAN | 1102 RADEON_WAIT_2D_IDLECLEAN | 1103 RADEON_WAIT_3D_IDLECLEAN | 1104 RADEON_WAIT_DMA_GUI_IDLE); 1105 FINISH_ACCEL(); 1106 1107 if (!info->accel_state->XInited3D) 1108 RADEONInit3DEngine(pScrn); 1109 } 1110 1111 if (pPriv->bicubic_enabled) 1112 vtx_count = 6; 1113 else 1114 vtx_count = 4; 1115 1116 switch (pPixmap->drawable.bitsPerPixel) { 1117 case 16: 1118 if (pPixmap->drawable.depth == 15) 1119 dst_format = R300_COLORFORMAT_ARGB1555; 1120 else 1121 dst_format = R300_COLORFORMAT_RGB565; 1122 break; 1123 case 32: 1124 dst_format = R300_COLORFORMAT_ARGB8888; 1125 break; 1126 default: 1127 return; 1128 } 1129 1130 output_fmt = (R300_OUT_FMT_C4_8 | 1131 R300_OUT_FMT_C0_SEL_BLUE | 1132 R300_OUT_FMT_C1_SEL_GREEN | 1133 R300_OUT_FMT_C2_SEL_RED | 1134 R300_OUT_FMT_C3_SEL_ALPHA); 1135 1136 colorpitch = dst_pitch >> pixel_shift; 1137 colorpitch |= dst_format; 1138 1139 if (RADEONTilingEnabled(pScrn, pPixmap)) 1140 colorpitch |= R300_COLORTILE; 1141 1142 1143 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1144 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1145 isplanar = TRUE; 1146 1147 if (isplanar) { 1148 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1149 txpitch = pPriv->src_pitch; 1150 } else { 1151 if (pPriv->id == FOURCC_UYVY) 1152 txformat1 = R300_TX_FORMAT_YVYU422; 1153 else 1154 txformat1 = R300_TX_FORMAT_VYUY422; 1155 1156 if (pPriv->bicubic_state != BICUBIC_OFF) 1157 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1158 1159 /* pitch is in pixels */ 1160 txpitch = pPriv->src_pitch / 2; 1161 } 1162 txpitch -= 1; 1163 1164 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1165 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1166 R300_TXPITCH_EN); 1167 1168 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1169 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1170 R300_TX_MAG_FILTER_LINEAR | 1171 R300_TX_MIN_FILTER_LINEAR | 1172 (0 << R300_TX_ID_SHIFT)); 1173 1174 txoffset = info->cs ? 0 : pPriv->src_offset; 1175 1176 BEGIN_ACCEL_RELOC(6, 1); 1177 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 1178 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 1179 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 1180 if (isplanar) 1181 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1182 else 1183 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 1184 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 1185 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 1186 FINISH_ACCEL(); 1187 1188 txenable = R300_TEX_0_ENABLE; 1189 1190 if (isplanar) { 1191 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1192 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1193 R300_TXPITCH_EN); 1194 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1195 txpitch -= 1; 1196 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1197 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1198 R300_TX_MIN_FILTER_LINEAR | 1199 R300_TX_MAG_FILTER_LINEAR); 1200 1201 BEGIN_ACCEL_RELOC(12, 2); 1202 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 1203 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1204 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1205 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 1206 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1207 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 1208 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 1209 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 1210 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 1211 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 1212 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 1213 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 1214 FINISH_ACCEL(); 1215 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1216 } 1217 1218 if (pPriv->bicubic_enabled) { 1219 /* Size is 128x1 */ 1220 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1221 (0x0 << R300_TXHEIGHT_SHIFT) | 1222 R300_TXPITCH_EN); 1223 /* Format is 32-bit floats, 4bpp */ 1224 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1225 /* Pitch is 127 (128-1) */ 1226 txpitch = 0x7f; 1227 /* Tex filter */ 1228 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1229 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1230 R300_TX_MIN_FILTER_NEAREST | 1231 R300_TX_MAG_FILTER_NEAREST | 1232 (1 << R300_TX_ID_SHIFT)); 1233 1234 if (info->cs) 1235 bicubic_offset = 0; 1236 else 1237 bicubic_offset = pPriv->bicubic_src_offset; 1238 1239 BEGIN_ACCEL_RELOC(6, 1); 1240 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 1241 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1242 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1243 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 1244 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1245 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 1246 FINISH_ACCEL(); 1247 1248 /* Enable tex 1 */ 1249 txenable |= R300_TEX_1_ENABLE; 1250 } 1251 1252 /* setup the VAP */ 1253 if (info->accel_state->has_tcl) { 1254 if (pPriv->bicubic_enabled) 1255 BEGIN_ACCEL(7); 1256 else 1257 BEGIN_ACCEL(6); 1258 } else { 1259 if (pPriv->bicubic_enabled) 1260 BEGIN_ACCEL(5); 1261 else 1262 BEGIN_ACCEL(4); 1263 } 1264 1265 /* These registers define the number, type, and location of data submitted 1266 * to the PVS unit of GA input (when PVS is disabled) 1267 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1268 * enabled. This memory provides the imputs to the vertex shader program 1269 * and ordering is not important. When PVS/TCL is disabled, this field maps 1270 * directly to the GA input memory and the order is signifigant. In 1271 * PVS_BYPASS mode the order is as follows: 1272 * Position 1273 * Point Size 1274 * Color 0-3 1275 * Textures 0-7 1276 * Fog 1277 */ 1278 if (pPriv->bicubic_enabled) { 1279 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1280 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1281 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1282 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1283 R300_SIGNED_0 | 1284 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1285 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1286 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1287 R300_SIGNED_1)); 1288 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 1289 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1290 (0 << R300_SKIP_DWORDS_2_SHIFT) | 1291 (7 << R300_DST_VEC_LOC_2_SHIFT) | 1292 R300_LAST_VEC_2 | 1293 R300_SIGNED_2)); 1294 } else { 1295 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1296 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1297 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1298 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1299 R300_SIGNED_0 | 1300 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1301 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1302 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1303 R300_LAST_VEC_1 | 1304 R300_SIGNED_1)); 1305 } 1306 1307 /* load the vertex shader 1308 * We pre-load vertex programs in RADEONInit3DEngine(): 1309 * - exa 1310 * - Xv 1311 * - Xv bicubic 1312 * Here we select the offset of the vertex program we want to use 1313 */ 1314 if (info->accel_state->has_tcl) { 1315 if (pPriv->bicubic_enabled) { 1316 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1317 ((11 << R300_PVS_FIRST_INST_SHIFT) | 1318 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1319 (13 << R300_PVS_LAST_INST_SHIFT))); 1320 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1321 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1322 } else { 1323 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1324 ((9 << R300_PVS_FIRST_INST_SHIFT) | 1325 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1326 (10 << R300_PVS_LAST_INST_SHIFT))); 1327 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1328 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1329 } 1330 } 1331 1332 /* Position and one set of 2 texture coordinates */ 1333 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1334 if (pPriv->bicubic_enabled) 1335 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1336 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1337 else 1338 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1339 1340 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 1341 FINISH_ACCEL(); 1342 1343 /* setup pixel shader */ 1344 if (pPriv->bicubic_state != BICUBIC_OFF) { 1345 if (pPriv->bicubic_enabled) { 1346 BEGIN_ACCEL(79); 1347 1348 /* 4 components: 2 for tex0 and 2 for tex1 */ 1349 OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1350 R300_RS_COUNT_HIRES_EN)); 1351 1352 /* R300_INST_COUNT_RS - highest RS instruction used */ 1353 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1354 1355 /* Pixel stack frame size. */ 1356 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 1357 1358 /* Indirection levels */ 1359 OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1360 R300_FIRST_TEX)); 1361 1362 /* Set nodes. */ 1363 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1364 R300_ALU_CODE_SIZE(14) | 1365 R300_TEX_CODE_OFFSET(0) | 1366 R300_TEX_CODE_SIZE(6))); 1367 1368 /* Nodes are allocated highest first, but executed lowest first */ 1369 OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); 1370 OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1371 R300_ALU_SIZE(0) | 1372 R300_TEX_START(0) | 1373 R300_TEX_SIZE(0))); 1374 OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1375 R300_ALU_SIZE(9) | 1376 R300_TEX_START(1) | 1377 R300_TEX_SIZE(0))); 1378 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1379 R300_ALU_SIZE(2) | 1380 R300_TEX_START(2) | 1381 R300_TEX_SIZE(3) | 1382 R300_RGBA_OUT)); 1383 1384 /* ** BICUBIC FP ** */ 1385 1386 /* texcoord0 => temp0 1387 * texcoord1 => temp1 */ 1388 1389 // first node 1390 /* TEX temp2, temp1.rrr0, tex1, 1D */ 1391 OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1392 R300_TEX_ID(1) | 1393 R300_TEX_SRC_ADDR(1) | 1394 R300_TEX_DST_ADDR(2))); 1395 1396 /* MOV temp1.r, temp1.ggg0 */ 1397 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1398 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1399 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1400 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1401 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1402 R300_ALU_RGB_ADDRD(1) | 1403 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1404 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1405 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1406 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1407 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1408 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1409 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1410 1411 1412 // second node 1413 /* TEX temp1, temp1, tex1, 1D */ 1414 OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1415 R300_TEX_ID(1) | 1416 R300_TEX_SRC_ADDR(1) | 1417 R300_TEX_DST_ADDR(1))); 1418 1419 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 1420 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1421 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1422 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1423 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1424 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1425 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1426 R300_ALU_RGB_ADDRD(3) | 1427 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1428 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1429 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1430 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1431 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1432 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1433 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1434 1435 1436 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 1437 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1438 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1439 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1440 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1441 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1442 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1443 R300_ALU_RGB_ADDRD(2) | 1444 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1445 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1446 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1447 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1448 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1449 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1450 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1451 1452 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 1453 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1454 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1455 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1456 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1457 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1458 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1459 R300_ALU_RGB_ADDR2(3) | 1460 R300_ALU_RGB_ADDRD(4) | 1461 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1462 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1463 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1464 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1465 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1466 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1467 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1468 1469 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 1470 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1471 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1472 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1473 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1474 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1475 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1476 R300_ALU_RGB_ADDR2(2) | 1477 R300_ALU_RGB_ADDRD(5) | 1478 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1479 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1480 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1481 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1482 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1483 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1484 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1485 1486 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 1487 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1488 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1489 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1490 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1491 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1492 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1493 R300_ALU_RGB_ADDR2(3) | 1494 R300_ALU_RGB_ADDRD(3) | 1495 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1496 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1497 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1498 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1499 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1500 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1501 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1502 1503 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 1504 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1505 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1506 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1507 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1508 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1509 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1510 R300_ALU_RGB_ADDR2(2) | 1511 R300_ALU_RGB_ADDRD(1) | 1512 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1513 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1514 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1515 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1516 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1517 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1518 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1519 1520 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 1521 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1522 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1523 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1524 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1525 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1526 R300_ALU_RGB_ADDR2(1) | 1527 R300_ALU_RGB_ADDRD(1) | 1528 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1529 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1530 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1531 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1532 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1533 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1534 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1535 1536 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 1537 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1538 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1539 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1540 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1541 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1542 R300_ALU_RGB_ADDR2(3) | 1543 R300_ALU_RGB_ADDRD(2) | 1544 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1545 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1546 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1547 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1548 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1549 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1550 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1551 1552 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 1553 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1554 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1555 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1556 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1557 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1558 R300_ALU_RGB_ADDR2(5) | 1559 R300_ALU_RGB_ADDRD(3) | 1560 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1561 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1562 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1563 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1564 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1565 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1566 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1567 1568 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 1569 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1570 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1571 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1572 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1573 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1574 R300_ALU_RGB_ADDR2(4) | 1575 R300_ALU_RGB_ADDRD(0) | 1576 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1577 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1578 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1579 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1580 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1581 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1582 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1583 1584 1585 // third node 1586 /* TEX temp4, temp1.rg--, tex0, 1D */ 1587 OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1588 R300_TEX_ID(0) | 1589 R300_TEX_SRC_ADDR(1) | 1590 R300_TEX_DST_ADDR(4))); 1591 1592 /* TEX temp3, temp3.rg--, tex0, 1D */ 1593 OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1594 R300_TEX_ID(0) | 1595 R300_TEX_SRC_ADDR(3) | 1596 R300_TEX_DST_ADDR(3))); 1597 1598 /* TEX temp5, temp2.rg--, tex0, 1D */ 1599 OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1600 R300_TEX_ID(0) | 1601 R300_TEX_SRC_ADDR(2) | 1602 R300_TEX_DST_ADDR(5))); 1603 1604 /* TEX temp0, temp0.rg--, tex0, 1D */ 1605 OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1606 R300_TEX_ID(0) | 1607 R300_TEX_SRC_ADDR(0) | 1608 R300_TEX_DST_ADDR(0))); 1609 1610 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1611 * - PRESUB temps, temp4 - temp3 1612 * - MAD temp3, temp1.bbbb, temps, temp3 */ 1613 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1614 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1615 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1616 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1617 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1618 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1619 R300_ALU_RGB_ADDR1(4) | 1620 R300_ALU_RGB_ADDR2(1) | 1621 R300_ALU_RGB_ADDRD(3) | 1622 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1623 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1624 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1625 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1626 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1627 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1628 R300_ALU_ALPHA_ADDR1(4) | 1629 R300_ALU_ALPHA_ADDR2(1) | 1630 R300_ALU_ALPHA_ADDRD(3) | 1631 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1632 1633 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1634 * - PRESUB temps, temp5 - temp0 1635 * - MAD temp0, temp1.bbbb, temps, temp0 */ 1636 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1637 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1638 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1639 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1640 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1641 R300_ALU_RGB_INSERT_NOP)); 1642 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1643 R300_ALU_RGB_ADDR1(5) | 1644 R300_ALU_RGB_ADDR2(1) | 1645 R300_ALU_RGB_ADDRD(0) | 1646 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1647 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1648 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1649 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1650 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1651 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1652 R300_ALU_ALPHA_ADDR1(5) | 1653 R300_ALU_ALPHA_ADDR2(1) | 1654 R300_ALU_ALPHA_ADDRD(0) | 1655 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1656 1657 /* LRP output, temp2.bbbb, temp3, temp0 -> 1658 * - PRESUB temps, temp3 - temp0 1659 * - MAD output, temp2.bbbb, temps, temp0 */ 1660 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1661 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1662 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1663 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1664 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1665 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1666 R300_ALU_RGB_ADDR1(3) | 1667 R300_ALU_RGB_ADDR2(2) | 1668 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 1669 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1670 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1671 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1672 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1673 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1674 R300_ALU_ALPHA_ADDR1(3) | 1675 R300_ALU_ALPHA_ADDR2(2) | 1676 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1677 1678 /* Shader constants. */ 1679 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 1680 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); 1681 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); 1682 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); 1683 1684 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); 1685 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 1686 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); 1687 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); 1688 1689 FINISH_ACCEL(); 1690 } else { 1691 BEGIN_ACCEL(11); 1692 /* 2 components: 2 for tex0 */ 1693 OUT_ACCEL_REG(R300_RS_COUNT, 1694 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1695 R300_RS_COUNT_HIRES_EN)); 1696 /* R300_INST_COUNT_RS - highest RS instruction used */ 1697 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1698 1699 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1700 1701 /* Indirection levels */ 1702 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1703 R300_FIRST_TEX)); 1704 1705 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1706 R300_ALU_CODE_SIZE(1) | 1707 R300_TEX_CODE_OFFSET(0) | 1708 R300_TEX_CODE_SIZE(1))); 1709 1710 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1711 R300_ALU_SIZE(0) | 1712 R300_TEX_START(0) | 1713 R300_TEX_SIZE(0) | 1714 R300_RGBA_OUT)); 1715 1716 /* tex inst */ 1717 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1718 R300_TEX_DST_ADDR(0) | 1719 R300_TEX_ID(0) | 1720 R300_TEX_INST(R300_TEX_INST_LD))); 1721 1722 /* ALU inst */ 1723 /* RGB */ 1724 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1725 R300_ALU_RGB_ADDR1(0) | 1726 R300_ALU_RGB_ADDR2(0) | 1727 R300_ALU_RGB_ADDRD(0) | 1728 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1729 R300_ALU_RGB_MASK_G | 1730 R300_ALU_RGB_MASK_B)) | 1731 R300_ALU_RGB_TARGET_A)); 1732 OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1733 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1734 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1735 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1736 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1737 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1738 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1739 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1740 R300_ALU_RGB_CLAMP)); 1741 /* Alpha */ 1742 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1743 R300_ALU_ALPHA_ADDR1(0) | 1744 R300_ALU_ALPHA_ADDR2(0) | 1745 R300_ALU_ALPHA_ADDRD(0) | 1746 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1747 R300_ALU_ALPHA_TARGET_A | 1748 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 1749 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1750 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1751 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1752 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1753 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1754 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1755 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1756 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1757 R300_ALU_ALPHA_CLAMP)); 1758 FINISH_ACCEL(); 1759 } 1760 } else { 1761 /* 1762 * y' = y - .0625 1763 * u' = u - .5 1764 * v' = v - .5; 1765 * 1766 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1767 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1768 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1769 * 1770 * DP3 might look like the straightforward solution 1771 * but we'd need to move the texture yuv values in 1772 * the same reg for this to work. Therefore use MADs. 1773 * Brightness just adds to the off constant. 1774 * Contrast is multiplication of luminance. 1775 * Saturation and hue change the u and v coeffs. 1776 * Default values (before adjustments - depend on colorspace): 1777 * yco = 1.1643 1778 * uco = 0, -0.39173, 2.017 1779 * vco = 1.5958, -0.8129, 0 1780 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1781 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1782 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1783 * 1784 * temp = MAD(yco, yuv.yyyy, off) 1785 * temp = MAD(uco, yuv.uuuu, temp) 1786 * result = MAD(vco, yuv.vvvv, temp) 1787 */ 1788 /* TODO: don't recalc consts always */ 1789 const float Loff = -0.0627; 1790 const float Coff = -0.502; 1791 float uvcosf, uvsinf; 1792 float yco; 1793 float uco[3], vco[3], off[3]; 1794 float bright, cont, gamma; 1795 int ref = pPriv->transform_index; 1796 Bool needgamma = FALSE; 1797 1798 cont = RTFContrast(pPriv->contrast); 1799 bright = RTFBrightness(pPriv->brightness); 1800 gamma = (float)pPriv->gamma / 1000.0; 1801 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1802 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1803 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1804 1805 yco = trans[ref].RefLuma * cont; 1806 uco[0] = -trans[ref].RefRCr * uvsinf; 1807 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1808 uco[2] = trans[ref].RefBCb * uvcosf; 1809 vco[0] = trans[ref].RefRCr * uvcosf; 1810 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1811 vco[2] = trans[ref].RefBCb * uvsinf; 1812 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1813 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1814 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1815 1816 if (gamma != 1.0) { 1817 needgamma = TRUE; 1818 /* note: gamma correction is out = in ^ gamma; 1819 gpu can only do LG2/EX2 therefore we transform into 1820 in ^ gamma = 2 ^ (log2(in) * gamma). 1821 Lots of scalar ops, unfortunately (better solution?) - 1822 without gamma that's 3 inst, with gamma it's 10... 1823 could use different gamma factors per channel, 1824 if that's of any use. */ 1825 } 1826 1827 if (isplanar) { 1828 BEGIN_ACCEL(needgamma ? 28 + 33 : 33); 1829 /* 2 components: same 2 for tex0/1/2 */ 1830 OUT_ACCEL_REG(R300_RS_COUNT, 1831 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1832 R300_RS_COUNT_HIRES_EN)); 1833 /* R300_INST_COUNT_RS - highest RS instruction used */ 1834 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1835 1836 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1837 1838 /* Indirection levels */ 1839 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1840 R300_FIRST_TEX)); 1841 1842 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1843 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1844 R300_TEX_CODE_OFFSET(0) | 1845 R300_TEX_CODE_SIZE(3))); 1846 1847 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1848 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1849 R300_TEX_START(0) | 1850 R300_TEX_SIZE(2) | 1851 R300_RGBA_OUT)); 1852 1853 /* tex inst */ 1854 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1855 R300_TEX_DST_ADDR(2) | 1856 R300_TEX_ID(0) | 1857 R300_TEX_INST(R300_TEX_INST_LD))); 1858 OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1859 R300_TEX_DST_ADDR(1) | 1860 R300_TEX_ID(1) | 1861 R300_TEX_INST(R300_TEX_INST_LD))); 1862 OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 1863 R300_TEX_DST_ADDR(0) | 1864 R300_TEX_ID(2) | 1865 R300_TEX_INST(R300_TEX_INST_LD))); 1866 1867 /* ALU inst */ 1868 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 1869 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1870 R300_ALU_RGB_ADDR1(2) | 1871 R300_ALU_RGB_ADDR2(0) | 1872 R300_ALU_RGB_ADDRD(2) | 1873 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1874 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1875 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1876 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1877 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1878 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1879 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1880 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1881 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1882 /* alpha nop, but need to set up alpha source for rgb usage */ 1883 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1884 R300_ALU_ALPHA_ADDR1(2) | 1885 R300_ALU_ALPHA_ADDR2(0) | 1886 R300_ALU_ALPHA_ADDRD(2) | 1887 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1888 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1889 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1890 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1891 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1892 1893 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 1894 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1895 R300_ALU_RGB_ADDR1(1) | 1896 R300_ALU_RGB_ADDR2(2) | 1897 R300_ALU_RGB_ADDRD(2) | 1898 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1899 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1900 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1901 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1902 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1903 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1904 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1905 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1906 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1907 /* alpha nop */ 1908 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 1909 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1910 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1911 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1912 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1913 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1914 1915 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 1916 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 1917 R300_ALU_RGB_ADDR1(0) | 1918 R300_ALU_RGB_ADDR2(2) | 1919 R300_ALU_RGB_ADDRD(0) | 1920 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 1921 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 1922 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1923 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1924 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1925 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1926 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1927 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1928 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1929 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1930 R300_ALU_RGB_CLAMP)); 1931 /* write alpha 1 */ 1932 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 1933 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1934 R300_ALU_ALPHA_TARGET_A)); 1935 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1936 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1937 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1938 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 1939 1940 if (needgamma) { 1941 /* rgb temp0.r = op_sop, set up src0 reg */ 1942 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 1943 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1944 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 1945 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1946 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1947 /* alpha lg2 temp0, temp0.r */ 1948 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 1949 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1950 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1951 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1952 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1953 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1954 1955 /* rgb temp0.g = op_sop, set up src0 reg */ 1956 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 1957 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 1958 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 1959 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1960 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1961 /* alpha lg2 temp0, temp0.g */ 1962 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 1963 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1964 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1965 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1966 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1967 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1968 1969 /* rgb temp0.b = op_sop, set up src0 reg */ 1970 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 1971 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 1972 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 1973 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1974 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1975 /* alpha lg2 temp0, temp0.b */ 1976 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 1977 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1978 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1979 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1980 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1981 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1982 1983 /* MUL const1, temp1, temp0 */ 1984 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 1985 R300_ALU_RGB_ADDR1(0) | 1986 R300_ALU_RGB_ADDR2(0) | 1987 R300_ALU_RGB_ADDRD(0) | 1988 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1989 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1990 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1991 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 1992 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1993 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1994 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1995 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1996 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1997 /* alpha nop, but set up const1 */ 1998 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 1999 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2000 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2001 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2002 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2003 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2004 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2005 2006 /* rgb out0.r = op_sop, set up src0 reg */ 2007 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2008 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2009 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2010 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2011 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2012 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2013 /* alpha ex2 temp0, temp0.r */ 2014 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2015 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2016 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2017 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2018 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2019 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2020 2021 /* rgb out0.g = op_sop, set up src0 reg */ 2022 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2023 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2024 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2025 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2026 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2027 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2028 /* alpha ex2 temp0, temp0.g */ 2029 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2030 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2031 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2032 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2033 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2034 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2035 2036 /* rgb out0.b = op_sop, set up src0 reg */ 2037 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2038 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2039 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2040 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2041 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2042 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2043 /* alpha ex2 temp0, temp0.b */ 2044 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2045 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2046 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2047 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2048 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2049 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2050 } 2051 } else { 2052 BEGIN_ACCEL(needgamma ? 28 + 33 : 33); 2053 /* 2 components */ 2054 OUT_ACCEL_REG(R300_RS_COUNT, 2055 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2056 R300_RS_COUNT_HIRES_EN)); 2057 /* R300_INST_COUNT_RS - highest RS instruction used */ 2058 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 2059 2060 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 2061 2062 /* Indirection levels */ 2063 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 2064 R300_FIRST_TEX)); 2065 2066 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 2067 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 2068 R300_TEX_CODE_OFFSET(0) | 2069 R300_TEX_CODE_SIZE(1))); 2070 2071 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 2072 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 2073 R300_TEX_START(0) | 2074 R300_TEX_SIZE(0) | 2075 R300_RGBA_OUT)); 2076 2077 /* tex inst */ 2078 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 2079 R300_TEX_DST_ADDR(0) | 2080 R300_TEX_ID(0) | 2081 R300_TEX_INST(R300_TEX_INST_LD))); 2082 2083 /* ALU inst */ 2084 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 2085 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2086 R300_ALU_RGB_ADDR1(0) | 2087 R300_ALU_RGB_ADDR2(0) | 2088 R300_ALU_RGB_ADDRD(1) | 2089 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2090 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2091 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2092 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 2093 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2094 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2095 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2096 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2097 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2098 /* alpha nop, but need to set up alpha source for rgb usage */ 2099 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2100 R300_ALU_ALPHA_ADDR1(0) | 2101 R300_ALU_ALPHA_ADDR2(0) | 2102 R300_ALU_ALPHA_ADDRD(0) | 2103 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2104 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2105 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2106 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2107 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2108 2109 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 2110 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2111 R300_ALU_RGB_ADDR1(0) | 2112 R300_ALU_RGB_ADDR2(1) | 2113 R300_ALU_RGB_ADDRD(1) | 2114 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2115 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2116 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2117 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 2118 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2119 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2120 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2121 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2122 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2123 /* alpha nop */ 2124 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 2125 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2126 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2127 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2128 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2129 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2130 2131 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 2132 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2133 R300_ALU_RGB_ADDR1(0) | 2134 R300_ALU_RGB_ADDR2(1) | 2135 R300_ALU_RGB_ADDRD(0) | 2136 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2137 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2138 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2139 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2140 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2141 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2142 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2143 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2144 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2145 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2146 R300_ALU_RGB_CLAMP)); 2147 /* write alpha 1 */ 2148 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2149 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2150 R300_ALU_ALPHA_TARGET_A)); 2151 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2152 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2153 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2154 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2155 2156 if (needgamma) { 2157 /* rgb temp0.r = op_sop, set up src0 reg */ 2158 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2159 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2160 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2161 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2162 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2163 /* alpha lg2 temp0, temp0.r */ 2164 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2165 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2166 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2167 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2168 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2169 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2170 2171 /* rgb temp0.g = op_sop, set up src0 reg */ 2172 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2173 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2174 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2175 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2176 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2177 /* alpha lg2 temp0, temp0.g */ 2178 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2179 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2180 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2181 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2182 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2183 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2184 2185 /* rgb temp0.b = op_sop, set up src0 reg */ 2186 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2187 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2188 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2189 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2190 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2191 /* alpha lg2 temp0, temp0.b */ 2192 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2193 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2194 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2195 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2196 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2197 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2198 2199 /* MUL const1, temp1, temp0 */ 2200 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2201 R300_ALU_RGB_ADDR1(0) | 2202 R300_ALU_RGB_ADDR2(0) | 2203 R300_ALU_RGB_ADDRD(0) | 2204 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2205 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2206 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2207 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2208 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2209 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2210 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2211 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2212 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2213 /* alpha nop, but set up const1 */ 2214 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2215 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2216 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2217 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2218 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2219 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2220 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2221 2222 /* rgb out0.r = op_sop, set up src0 reg */ 2223 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2224 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2225 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2226 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2227 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2228 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2229 /* alpha ex2 temp0, temp0.r */ 2230 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2231 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2232 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2233 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2234 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2235 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2236 2237 /* rgb out0.g = op_sop, set up src0 reg */ 2238 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2239 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2240 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2241 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2242 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2243 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2244 /* alpha ex2 temp0, temp0.g */ 2245 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2246 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2247 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2248 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2249 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2250 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2251 2252 /* rgb out0.b = op_sop, set up src0 reg */ 2253 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2254 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2255 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2256 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2257 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2258 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2259 /* alpha ex2 temp0, temp0.b */ 2260 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2261 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2262 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2263 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2264 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2265 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2266 } 2267 } 2268 2269 /* Shader constants. */ 2270 /* constant 0: off, yco */ 2271 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 2272 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 2273 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 2274 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2275 /* constant 1: uco */ 2276 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 2277 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 2278 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 2279 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2280 /* constant 2: vco */ 2281 OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 2282 OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 2283 OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 2284 OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2285 2286 FINISH_ACCEL(); 2287 } 2288 2289 BEGIN_ACCEL_RELOC(6, 2); 2290 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 2291 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 2292 2293 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2294 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2295 2296 /* no need to enable blending */ 2297 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2298 2299 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); 2300 FINISH_ACCEL(); 2301 2302 if (pPriv->vsync) { 2303 xf86CrtcPtr crtc; 2304 if (pPriv->desired_crtc) 2305 crtc = pPriv->desired_crtc; 2306 else 2307 crtc = radeon_pick_best_crtc(pScrn, 2308 pPriv->drw_x, 2309 pPriv->drw_x + pPriv->dst_w, 2310 pPriv->drw_y, 2311 pPriv->drw_y + pPriv->dst_h); 2312 if (crtc) 2313 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 2314 crtc, 2315 pPriv->drw_y - crtc->y, 2316 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2317 } 2318 /* 2319 * Rendering of the actual polygon is done in two different 2320 * ways depending on chip generation: 2321 * 2322 * < R300: 2323 * 2324 * These chips can render a rectangle in one pass, so 2325 * handling is pretty straight-forward. 2326 * 2327 * >= R300: 2328 * 2329 * These chips can accept a quad, but will render it as 2330 * two triangles which results in a diagonal tear. Instead 2331 * We render a single, large triangle and use the scissor 2332 * functionality to restrict it to the desired rectangle. 2333 * Due to guardband limits on r3xx/r4xx, we can only use 2334 * the single triangle up to 2560/4021 pixels; above that we 2335 * render as a quad. 2336 */ 2337 2338 while (nBox--) { 2339 int srcX, srcY, srcw, srch; 2340 int dstX, dstY, dstw, dsth; 2341 Bool use_quad = FALSE; 2342 dstX = pBox->x1 + dstxoff; 2343 dstY = pBox->y1 + dstyoff; 2344 dstw = pBox->x2 - pBox->x1; 2345 dsth = pBox->y2 - pBox->y1; 2346 2347 srcX = pPriv->src_x; 2348 srcX += ((pBox->x1 - pPriv->drw_x) * 2349 pPriv->src_w) / pPriv->dst_w; 2350 srcY = pPriv->src_y; 2351 srcY += ((pBox->y1 - pPriv->drw_y) * 2352 pPriv->src_h) / pPriv->dst_h; 2353 2354 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 2355 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 2356 2357#if 0 2358 ErrorF("dst: %d, %d, %d, %d\n", dstX, dstY, dstw, dsth); 2359 ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch); 2360#endif 2361 2362 if (IS_R400_3D) { 2363 if ((dstw+dsth) > 4021) 2364 use_quad = TRUE; 2365 } else { 2366 if ((dstw+dsth) > 2560) 2367 use_quad = TRUE; 2368 } 2369 /* 2370 * Set up the scissor area to that of the output size. 2371 */ 2372 BEGIN_ACCEL(2); 2373 /* R300 has an offset */ 2374 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2375 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 2376 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2377 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 2378 FINISH_ACCEL(); 2379 2380#ifdef ACCEL_CP 2381 if (use_quad) { 2382 BEGIN_RING(4 * vtx_count + 4); 2383 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2384 4 * vtx_count)); 2385 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2386 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2387 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2388 } else { 2389 BEGIN_RING(3 * vtx_count + 4); 2390 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2391 3 * vtx_count)); 2392 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2393 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2394 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2395 } 2396#else /* ACCEL_CP */ 2397 if (use_quad) 2398 BEGIN_ACCEL(2 + vtx_count * 4); 2399 else 2400 BEGIN_ACCEL(2 + vtx_count * 3); 2401 2402 if (use_quad) 2403 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | 2404 RADEON_VF_PRIM_WALK_DATA | 2405 (4 << RADEON_VF_NUM_VERTICES_SHIFT))); 2406 else 2407 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 2408 RADEON_VF_PRIM_WALK_DATA | 2409 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2410#endif 2411 if (pPriv->bicubic_enabled) { 2412 /* 2413 * This code is only executed on >= R300, so we don't 2414 * have to deal with the legacy handling. 2415 */ 2416 if (use_quad) { 2417 VTX_OUT_6((float)dstX, (float)dstY, 2418 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2419 (float)srcX + 0.5, (float)srcY + 0.5); 2420 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2421 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2422 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2423 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2424 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2425 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2426 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2427 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2428 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2429 } else { 2430 VTX_OUT_6((float)dstX, (float)dstY, 2431 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2432 (float)srcX + 0.5, (float)srcY + 0.5); 2433 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2434 (float)srcX / pPriv->w, 2435 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2436 (float)srcX + 0.5, 2437 (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2438 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2439 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2440 (float)srcY / pPriv->h, 2441 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2442 (float)srcY + 0.5); 2443 } 2444 } else { 2445 if (use_quad) { 2446 VTX_OUT_4((float)dstX, (float)dstY, 2447 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2448 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2449 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2450 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2451 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2452 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2453 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2454 } else { 2455 /* 2456 * Render a big, scissored triangle. This means 2457 * increasing the triangle size and adjusting 2458 * texture coordinates. 2459 */ 2460 VTX_OUT_4((float)dstX, (float)dstY, 2461 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2462 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2463 (float)srcX / pPriv->w, 2464 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2465 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2466 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2467 (float)srcY / pPriv->h); 2468 } 2469 } 2470 2471 /* flushing is pipelined, free/finish is not */ 2472 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2473 2474#ifdef ACCEL_CP 2475 ADVANCE_RING(); 2476#else 2477 FINISH_ACCEL(); 2478#endif /* !ACCEL_CP */ 2479 2480 pBox++; 2481 } 2482 2483 BEGIN_ACCEL(3); 2484 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 2485 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2486 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2487 FINISH_ACCEL(); 2488 2489 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2490} 2491 2492static void 2493FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2494{ 2495 RADEONInfoPtr info = RADEONPTR(pScrn); 2496 PixmapPtr pPixmap = pPriv->pPixmap; 2497 struct radeon_exa_pixmap_priv *driver_priv; 2498 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 2499 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 2500 uint32_t dst_pitch, dst_format; 2501 uint32_t txenable, colorpitch, bicubic_offset; 2502 uint32_t output_fmt; 2503 Bool isplanar = FALSE; 2504 int dstxoff, dstyoff, pixel_shift, vtx_count; 2505 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2506 int nBox = REGION_NUM_RECTS(&pPriv->clip); 2507 ACCEL_PREAMBLE(); 2508 2509#ifdef XF86DRM_MODE 2510 if (info->cs) { 2511 int ret; 2512 2513 radeon_cs_space_reset_bos(info->cs); 2514 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2515 2516 if (pPriv->bicubic_enabled) 2517 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2518 2519 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 2520 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 2521 2522 ret = radeon_cs_space_check(info->cs); 2523 if (ret) { 2524 ErrorF("Not enough RAM to hw accel xv operation\n"); 2525 return; 2526 } 2527 } 2528#endif 2529 2530 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2531 2532#ifdef USE_EXA 2533 if (info->useEXA) { 2534 dst_pitch = exaGetPixmapPitch(pPixmap); 2535 } else 2536#endif 2537 { 2538 dst_pitch = pPixmap->devKind; 2539 } 2540 2541#ifdef COMPOSITE 2542 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2543 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2544#else 2545 dstxoff = 0; 2546 dstyoff = 0; 2547#endif 2548 2549#ifdef USE_EXA 2550 if (info->useEXA) { 2551 RADEON_SWITCH_TO_3D(); 2552 } else 2553#endif 2554 { 2555 BEGIN_ACCEL(2); 2556 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2557 /* We must wait for 3d to idle, in case source was just written as a dest. */ 2558 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 2559 RADEON_WAIT_HOST_IDLECLEAN | 2560 RADEON_WAIT_2D_IDLECLEAN | 2561 RADEON_WAIT_3D_IDLECLEAN | 2562 RADEON_WAIT_DMA_GUI_IDLE); 2563 FINISH_ACCEL(); 2564 2565 if (!info->accel_state->XInited3D) 2566 RADEONInit3DEngine(pScrn); 2567 } 2568 2569 if (pPriv->bicubic_enabled) 2570 vtx_count = 6; 2571 else 2572 vtx_count = 4; 2573 2574 switch (pPixmap->drawable.bitsPerPixel) { 2575 case 16: 2576 if (pPixmap->drawable.depth == 15) 2577 dst_format = R300_COLORFORMAT_ARGB1555; 2578 else 2579 dst_format = R300_COLORFORMAT_RGB565; 2580 break; 2581 case 32: 2582 dst_format = R300_COLORFORMAT_ARGB8888; 2583 break; 2584 default: 2585 return; 2586 } 2587 2588 output_fmt = (R300_OUT_FMT_C4_8 | 2589 R300_OUT_FMT_C0_SEL_BLUE | 2590 R300_OUT_FMT_C1_SEL_GREEN | 2591 R300_OUT_FMT_C2_SEL_RED | 2592 R300_OUT_FMT_C3_SEL_ALPHA); 2593 2594 colorpitch = dst_pitch >> pixel_shift; 2595 colorpitch |= dst_format; 2596 2597 if (RADEONTilingEnabled(pScrn, pPixmap)) 2598 colorpitch |= R300_COLORTILE; 2599 2600 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2601 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2602 isplanar = TRUE; 2603 2604 if (isplanar) { 2605 txformat1 = R300_TX_FORMAT_X8; 2606 txpitch = pPriv->src_pitch; 2607 } else { 2608 if (pPriv->id == FOURCC_UYVY) 2609 txformat1 = R300_TX_FORMAT_YVYU422; 2610 else 2611 txformat1 = R300_TX_FORMAT_VYUY422; 2612 2613 if (pPriv->bicubic_state != BICUBIC_OFF) 2614 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2615 2616 /* pitch is in pixels */ 2617 txpitch = pPriv->src_pitch / 2; 2618 } 2619 txpitch -= 1; 2620 2621 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2622 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2623 R300_TXPITCH_EN); 2624 2625 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2626 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2627 R300_TX_MAG_FILTER_LINEAR | 2628 R300_TX_MIN_FILTER_LINEAR | 2629 (0 << R300_TX_ID_SHIFT)); 2630 2631 2632 if ((pPriv->w - 1) & 0x800) 2633 txpitch |= R500_TXWIDTH_11; 2634 2635 if ((pPriv->h - 1) & 0x800) 2636 txpitch |= R500_TXHEIGHT_11; 2637 2638 txoffset = info->cs ? 0 : pPriv->src_offset; 2639 2640 BEGIN_ACCEL_RELOC(6, 1); 2641 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 2642 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 2643 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 2644 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 2645 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 2646 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 2647 FINISH_ACCEL(); 2648 2649 txenable = R300_TEX_0_ENABLE; 2650 2651 if (isplanar) { 2652 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2653 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2654 R300_TXPITCH_EN); 2655 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2656 txpitch -= 1; 2657 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2658 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2659 R300_TX_MIN_FILTER_LINEAR | 2660 R300_TX_MAG_FILTER_LINEAR); 2661 2662 BEGIN_ACCEL_RELOC(12, 2); 2663 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 2664 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2665 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2666 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 2667 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2668 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 2669 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 2670 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 2671 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 2672 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 2673 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 2674 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 2675 FINISH_ACCEL(); 2676 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2677 } 2678 2679 if (pPriv->bicubic_enabled) { 2680 /* Size is 128x1 */ 2681 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2682 (0x0 << R300_TXHEIGHT_SHIFT) | 2683 R300_TXPITCH_EN); 2684 /* Format is 32-bit floats, 4bpp */ 2685 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2686 /* Pitch is 127 (128-1) */ 2687 txpitch = 0x7f; 2688 /* Tex filter */ 2689 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2690 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2691 R300_TX_MIN_FILTER_NEAREST | 2692 R300_TX_MAG_FILTER_NEAREST | 2693 (1 << R300_TX_ID_SHIFT)); 2694 2695 if (info->cs) 2696 bicubic_offset = 0; 2697 else 2698 bicubic_offset = pPriv->bicubic_src_offset; 2699 2700 BEGIN_ACCEL_RELOC(6, 1); 2701 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 2702 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2703 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2704 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 2705 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2706 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 2707 FINISH_ACCEL(); 2708 2709 /* Enable tex 1 */ 2710 txenable |= R300_TEX_1_ENABLE; 2711 } 2712 2713 /* setup the VAP */ 2714 if (info->accel_state->has_tcl) { 2715 if (pPriv->bicubic_enabled) 2716 BEGIN_ACCEL(7); 2717 else 2718 BEGIN_ACCEL(6); 2719 } else { 2720 if (pPriv->bicubic_enabled) 2721 BEGIN_ACCEL(5); 2722 else 2723 BEGIN_ACCEL(4); 2724 } 2725 2726 /* These registers define the number, type, and location of data submitted 2727 * to the PVS unit of GA input (when PVS is disabled) 2728 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2729 * enabled. This memory provides the imputs to the vertex shader program 2730 * and ordering is not important. When PVS/TCL is disabled, this field maps 2731 * directly to the GA input memory and the order is signifigant. In 2732 * PVS_BYPASS mode the order is as follows: 2733 * Position 2734 * Point Size 2735 * Color 0-3 2736 * Textures 0-7 2737 * Fog 2738 */ 2739 if (pPriv->bicubic_enabled) { 2740 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2741 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2742 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2743 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2744 R300_SIGNED_0 | 2745 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2746 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2747 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2748 R300_SIGNED_1)); 2749 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 2750 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2751 (0 << R300_SKIP_DWORDS_2_SHIFT) | 2752 (7 << R300_DST_VEC_LOC_2_SHIFT) | 2753 R300_LAST_VEC_2 | 2754 R300_SIGNED_2)); 2755 } else { 2756 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2757 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2758 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2759 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2760 R300_SIGNED_0 | 2761 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2762 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2763 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2764 R300_LAST_VEC_1 | 2765 R300_SIGNED_1)); 2766 } 2767 2768 /* load the vertex shader 2769 * We pre-load vertex programs in RADEONInit3DEngine(): 2770 * - exa 2771 * - Xv 2772 * - Xv bicubic 2773 * Here we select the offset of the vertex program we want to use 2774 */ 2775 if (info->accel_state->has_tcl) { 2776 if (pPriv->bicubic_enabled) { 2777 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2778 ((11 << R300_PVS_FIRST_INST_SHIFT) | 2779 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2780 (13 << R300_PVS_LAST_INST_SHIFT))); 2781 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2782 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2783 } else { 2784 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2785 ((9 << R300_PVS_FIRST_INST_SHIFT) | 2786 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2787 (10 << R300_PVS_LAST_INST_SHIFT))); 2788 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2789 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2790 } 2791 } 2792 2793 /* Position and one set of 2 texture coordinates */ 2794 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2795 if (pPriv->bicubic_enabled) 2796 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2797 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2798 else 2799 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2800 2801 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 2802 FINISH_ACCEL(); 2803 2804 /* setup pixel shader */ 2805 if (pPriv->bicubic_state != BICUBIC_OFF) { 2806 if (pPriv->bicubic_enabled) { 2807 BEGIN_ACCEL(7); 2808 2809 /* 4 components: 2 for tex0 and 2 for tex1 */ 2810 OUT_ACCEL_REG(R300_RS_COUNT, 2811 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2812 R300_RS_COUNT_HIRES_EN)); 2813 2814 /* R300_INST_COUNT_RS - highest RS instruction used */ 2815 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 2816 2817 /* Pixel stack frame size. */ 2818 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 2819 2820 /* FP length. */ 2821 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 2822 R500_US_CODE_END_ADDR(13))); 2823 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 2824 R500_US_CODE_RANGE_SIZE(13))); 2825 2826 /* Prepare for FP emission. */ 2827 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 2828 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 2829 FINISH_ACCEL(); 2830 2831 BEGIN_ACCEL(89); 2832 /* Pixel shader. 2833 * I've gone ahead and annotated each instruction, since this 2834 * thing is MASSIVE. :3 2835 * Note: In order to avoid buggies with temps and multiple 2836 * inputs, all temps are offset by 2. temp0 -> register2. */ 2837 2838 /* TEX temp2, input1.xxxx, tex1, 1D */ 2839 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2840 R500_INST_RGB_WMASK_R | 2841 R500_INST_RGB_WMASK_G | 2842 R500_INST_RGB_WMASK_B)); 2843 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2844 R500_TEX_INST_LD | 2845 R500_TEX_IGNORE_UNCOVERED)); 2846 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2847 R500_TEX_SRC_S_SWIZ_R | 2848 R500_TEX_SRC_T_SWIZ_R | 2849 R500_TEX_SRC_R_SWIZ_R | 2850 R500_TEX_SRC_Q_SWIZ_R | 2851 R500_TEX_DST_ADDR(2) | 2852 R500_TEX_DST_R_SWIZ_R | 2853 R500_TEX_DST_G_SWIZ_G | 2854 R500_TEX_DST_B_SWIZ_B | 2855 R500_TEX_DST_A_SWIZ_A)); 2856 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2857 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2858 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2859 2860 /* TEX temp5, input1.yyyy, tex1, 1D */ 2861 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2862 R500_INST_TEX_SEM_WAIT | 2863 R500_INST_RGB_WMASK_R | 2864 R500_INST_RGB_WMASK_G | 2865 R500_INST_RGB_WMASK_B)); 2866 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2867 R500_TEX_INST_LD | 2868 R500_TEX_SEM_ACQUIRE | 2869 R500_TEX_IGNORE_UNCOVERED)); 2870 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2871 R500_TEX_SRC_S_SWIZ_G | 2872 R500_TEX_SRC_T_SWIZ_G | 2873 R500_TEX_SRC_R_SWIZ_G | 2874 R500_TEX_SRC_Q_SWIZ_G | 2875 R500_TEX_DST_ADDR(5) | 2876 R500_TEX_DST_R_SWIZ_R | 2877 R500_TEX_DST_G_SWIZ_G | 2878 R500_TEX_DST_B_SWIZ_B | 2879 R500_TEX_DST_A_SWIZ_A)); 2880 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2881 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2882 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2883 2884 /* MUL temp4, const0.x0x0, temp2.yyxx */ 2885 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2886 R500_INST_TEX_SEM_WAIT | 2887 R500_INST_RGB_WMASK_R | 2888 R500_INST_RGB_WMASK_G | 2889 R500_INST_RGB_WMASK_B | 2890 R500_INST_ALPHA_WMASK)); 2891 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2892 R500_RGB_ADDR0_CONST | 2893 R500_RGB_ADDR1(2))); 2894 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2895 R500_ALPHA_ADDR0_CONST | 2896 R500_ALPHA_ADDR1(2))); 2897 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2898 R500_ALU_RGB_R_SWIZ_A_R | 2899 R500_ALU_RGB_G_SWIZ_A_0 | 2900 R500_ALU_RGB_B_SWIZ_A_R | 2901 R500_ALU_RGB_SEL_B_SRC1 | 2902 R500_ALU_RGB_R_SWIZ_B_G | 2903 R500_ALU_RGB_G_SWIZ_B_G | 2904 R500_ALU_RGB_B_SWIZ_B_R)); 2905 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2906 R500_ALPHA_OP_MAD | 2907 R500_ALPHA_SEL_A_SRC0 | 2908 R500_ALPHA_SWIZ_A_0 | 2909 R500_ALPHA_SEL_B_SRC1 | 2910 R500_ALPHA_SWIZ_B_R)); 2911 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2912 R500_ALU_RGBA_OP_MAD | 2913 R500_ALU_RGBA_R_SWIZ_0 | 2914 R500_ALU_RGBA_G_SWIZ_0 | 2915 R500_ALU_RGBA_B_SWIZ_0 | 2916 R500_ALU_RGBA_A_SWIZ_0)); 2917 2918 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 2919 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2920 R500_INST_RGB_WMASK_R | 2921 R500_INST_RGB_WMASK_G | 2922 R500_INST_RGB_WMASK_B | 2923 R500_INST_ALPHA_WMASK)); 2924 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2925 R500_RGB_ADDR0_CONST | 2926 R500_RGB_ADDR1(5) | 2927 R500_RGB_ADDR2(4))); 2928 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2929 R500_ALPHA_ADDR0_CONST | 2930 R500_ALPHA_ADDR1(5) | 2931 R500_ALPHA_ADDR2(4))); 2932 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2933 R500_ALU_RGB_R_SWIZ_A_0 | 2934 R500_ALU_RGB_G_SWIZ_A_G | 2935 R500_ALU_RGB_B_SWIZ_A_0 | 2936 R500_ALU_RGB_SEL_B_SRC1 | 2937 R500_ALU_RGB_R_SWIZ_B_R | 2938 R500_ALU_RGB_G_SWIZ_B_R | 2939 R500_ALU_RGB_B_SWIZ_B_R)); 2940 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2941 R500_ALPHA_OP_MAD | 2942 R500_ALPHA_SEL_A_SRC0 | 2943 R500_ALPHA_SWIZ_A_G | 2944 R500_ALPHA_SEL_B_SRC1 | 2945 R500_ALPHA_SWIZ_B_R)); 2946 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2947 R500_ALU_RGBA_OP_MAD | 2948 R500_ALU_RGBA_SEL_C_SRC2 | 2949 R500_ALU_RGBA_R_SWIZ_R | 2950 R500_ALU_RGBA_G_SWIZ_G | 2951 R500_ALU_RGBA_B_SWIZ_B | 2952 R500_ALU_RGBA_A_SWIZ_A)); 2953 2954 /* ADD temp3, temp3, input0.xyxy */ 2955 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2956 R500_INST_RGB_WMASK_R | 2957 R500_INST_RGB_WMASK_G | 2958 R500_INST_RGB_WMASK_B | 2959 R500_INST_ALPHA_WMASK)); 2960 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 2961 R500_RGB_ADDR2(0))); 2962 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 2963 R500_ALPHA_ADDR2(0))); 2964 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2965 R500_ALU_RGB_G_SWIZ_A_1 | 2966 R500_ALU_RGB_B_SWIZ_A_1 | 2967 R500_ALU_RGB_SEL_B_SRC1 | 2968 R500_ALU_RGB_R_SWIZ_B_R | 2969 R500_ALU_RGB_G_SWIZ_B_G | 2970 R500_ALU_RGB_B_SWIZ_B_B)); 2971 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2972 R500_ALPHA_OP_MAD | 2973 R500_ALPHA_SWIZ_A_1 | 2974 R500_ALPHA_SEL_B_SRC1 | 2975 R500_ALPHA_SWIZ_B_A)); 2976 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2977 R500_ALU_RGBA_OP_MAD | 2978 R500_ALU_RGBA_SEL_C_SRC2 | 2979 R500_ALU_RGBA_R_SWIZ_R | 2980 R500_ALU_RGBA_G_SWIZ_G | 2981 R500_ALU_RGBA_B_SWIZ_R | 2982 R500_ALU_RGBA_A_SWIZ_G)); 2983 2984 /* TEX temp1, temp3.zwxy, tex0, 2D */ 2985 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2986 R500_INST_RGB_WMASK_R | 2987 R500_INST_RGB_WMASK_G | 2988 R500_INST_RGB_WMASK_B | 2989 R500_INST_ALPHA_WMASK)); 2990 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2991 R500_TEX_INST_LD | 2992 R500_TEX_IGNORE_UNCOVERED)); 2993 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2994 R500_TEX_SRC_S_SWIZ_B | 2995 R500_TEX_SRC_T_SWIZ_A | 2996 R500_TEX_SRC_R_SWIZ_R | 2997 R500_TEX_SRC_Q_SWIZ_G | 2998 R500_TEX_DST_ADDR(1) | 2999 R500_TEX_DST_R_SWIZ_R | 3000 R500_TEX_DST_G_SWIZ_G | 3001 R500_TEX_DST_B_SWIZ_B | 3002 R500_TEX_DST_A_SWIZ_A)); 3003 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3004 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3005 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3006 3007 /* TEX temp3, temp3.xyzw, tex0, 2D */ 3008 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3009 R500_INST_TEX_SEM_WAIT | 3010 R500_INST_RGB_WMASK_R | 3011 R500_INST_RGB_WMASK_G | 3012 R500_INST_RGB_WMASK_B | 3013 R500_INST_ALPHA_WMASK)); 3014 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3015 R500_TEX_INST_LD | 3016 R500_TEX_SEM_ACQUIRE | 3017 R500_TEX_IGNORE_UNCOVERED)); 3018 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3019 R500_TEX_SRC_S_SWIZ_R | 3020 R500_TEX_SRC_T_SWIZ_G | 3021 R500_TEX_SRC_R_SWIZ_B | 3022 R500_TEX_SRC_Q_SWIZ_A | 3023 R500_TEX_DST_ADDR(3) | 3024 R500_TEX_DST_R_SWIZ_R | 3025 R500_TEX_DST_G_SWIZ_G | 3026 R500_TEX_DST_B_SWIZ_B | 3027 R500_TEX_DST_A_SWIZ_A)); 3028 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3029 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3030 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3031 3032 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 3033 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3034 R500_INST_RGB_WMASK_R | 3035 R500_INST_RGB_WMASK_G | 3036 R500_INST_RGB_WMASK_B | 3037 R500_INST_ALPHA_WMASK)); 3038 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3039 R500_RGB_ADDR0_CONST | 3040 R500_RGB_ADDR1(5) | 3041 R500_RGB_ADDR2(4))); 3042 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3043 R500_ALPHA_ADDR0_CONST | 3044 R500_ALPHA_ADDR1(5) | 3045 R500_ALPHA_ADDR2(4))); 3046 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3047 R500_ALU_RGB_R_SWIZ_A_0 | 3048 R500_ALU_RGB_G_SWIZ_A_G | 3049 R500_ALU_RGB_B_SWIZ_A_0 | 3050 R500_ALU_RGB_SEL_B_SRC1 | 3051 R500_ALU_RGB_R_SWIZ_B_G | 3052 R500_ALU_RGB_G_SWIZ_B_G | 3053 R500_ALU_RGB_B_SWIZ_B_G)); 3054 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3055 R500_ALPHA_OP_MAD | 3056 R500_ALPHA_SEL_A_SRC0 | 3057 R500_ALPHA_SWIZ_A_G | 3058 R500_ALPHA_SEL_B_SRC1 | 3059 R500_ALPHA_SWIZ_B_G)); 3060 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3061 R500_ALU_RGBA_OP_MAD | 3062 R500_ALU_RGBA_SEL_C_SRC2 | 3063 R500_ALU_RGBA_R_SWIZ_R | 3064 R500_ALU_RGBA_G_SWIZ_G | 3065 R500_ALU_RGBA_B_SWIZ_B | 3066 R500_ALU_RGBA_A_SWIZ_A)); 3067 3068 /* ADD temp0, temp4, input0.xyxy */ 3069 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3070 R500_INST_RGB_WMASK_R | 3071 R500_INST_RGB_WMASK_G | 3072 R500_INST_RGB_WMASK_B | 3073 R500_INST_ALPHA_WMASK)); 3074 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 3075 R500_RGB_ADDR2(0))); 3076 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 3077 R500_ALPHA_ADDR2(0))); 3078 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3079 R500_ALU_RGB_G_SWIZ_A_1 | 3080 R500_ALU_RGB_B_SWIZ_A_1 | 3081 R500_ALU_RGB_SEL_B_SRC1 | 3082 R500_ALU_RGB_R_SWIZ_B_R | 3083 R500_ALU_RGB_G_SWIZ_B_G | 3084 R500_ALU_RGB_B_SWIZ_B_B)); 3085 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3086 R500_ALPHA_OP_MAD | 3087 R500_ALPHA_SWIZ_A_1 | 3088 R500_ALPHA_SEL_B_SRC1 | 3089 R500_ALPHA_SWIZ_B_A)); 3090 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3091 R500_ALU_RGBA_OP_MAD | 3092 R500_ALU_RGBA_SEL_C_SRC2 | 3093 R500_ALU_RGBA_R_SWIZ_R | 3094 R500_ALU_RGBA_G_SWIZ_G | 3095 R500_ALU_RGBA_B_SWIZ_R | 3096 R500_ALU_RGBA_A_SWIZ_G)); 3097 3098 /* TEX temp4, temp0.zwzw, tex0, 2D */ 3099 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3100 R500_INST_TEX_SEM_WAIT | 3101 R500_INST_RGB_WMASK_R | 3102 R500_INST_RGB_WMASK_G | 3103 R500_INST_RGB_WMASK_B | 3104 R500_INST_ALPHA_WMASK)); 3105 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3106 R500_TEX_INST_LD | 3107 R500_TEX_IGNORE_UNCOVERED)); 3108 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3109 R500_TEX_SRC_S_SWIZ_B | 3110 R500_TEX_SRC_T_SWIZ_A | 3111 R500_TEX_SRC_R_SWIZ_B | 3112 R500_TEX_SRC_Q_SWIZ_A | 3113 R500_TEX_DST_ADDR(4) | 3114 R500_TEX_DST_R_SWIZ_R | 3115 R500_TEX_DST_G_SWIZ_G | 3116 R500_TEX_DST_B_SWIZ_B | 3117 R500_TEX_DST_A_SWIZ_A)); 3118 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3119 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3120 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3121 3122 /* TEX temp0, temp0.xyzw, tex0, 2D */ 3123 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3124 R500_INST_TEX_SEM_WAIT | 3125 R500_INST_RGB_WMASK_R | 3126 R500_INST_RGB_WMASK_G | 3127 R500_INST_RGB_WMASK_B | 3128 R500_INST_ALPHA_WMASK)); 3129 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3130 R500_TEX_INST_LD | 3131 R500_TEX_SEM_ACQUIRE | 3132 R500_TEX_IGNORE_UNCOVERED)); 3133 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3134 R500_TEX_SRC_S_SWIZ_R | 3135 R500_TEX_SRC_T_SWIZ_G | 3136 R500_TEX_SRC_R_SWIZ_B | 3137 R500_TEX_SRC_Q_SWIZ_A | 3138 R500_TEX_DST_ADDR(0) | 3139 R500_TEX_DST_R_SWIZ_R | 3140 R500_TEX_DST_G_SWIZ_G | 3141 R500_TEX_DST_B_SWIZ_B | 3142 R500_TEX_DST_A_SWIZ_A)); 3143 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3144 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3145 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3146 3147 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 3148 * - PRESUB temps, temp1 - temp3 3149 * - MAD temp2.zzzz, temps, temp3 */ 3150 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3151 R500_INST_RGB_WMASK_R | 3152 R500_INST_RGB_WMASK_G | 3153 R500_INST_RGB_WMASK_B | 3154 R500_INST_ALPHA_WMASK)); 3155 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3156 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3157 R500_RGB_ADDR1(1) | 3158 R500_RGB_ADDR2(2))); 3159 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3160 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3161 R500_ALPHA_ADDR1(1) | 3162 R500_ALPHA_ADDR2(2))); 3163 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3164 R500_ALU_RGB_R_SWIZ_A_B | 3165 R500_ALU_RGB_G_SWIZ_A_B | 3166 R500_ALU_RGB_B_SWIZ_A_B | 3167 R500_ALU_RGB_SEL_B_SRCP | 3168 R500_ALU_RGB_R_SWIZ_B_R | 3169 R500_ALU_RGB_G_SWIZ_B_G | 3170 R500_ALU_RGB_B_SWIZ_B_B)); 3171 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3172 R500_ALPHA_OP_MAD | 3173 R500_ALPHA_SEL_A_SRC2 | 3174 R500_ALPHA_SWIZ_A_B | 3175 R500_ALPHA_SEL_B_SRCP | 3176 R500_ALPHA_SWIZ_B_A)); 3177 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3178 R500_ALU_RGBA_OP_MAD | 3179 R500_ALU_RGBA_SEL_C_SRC0 | 3180 R500_ALU_RGBA_R_SWIZ_R | 3181 R500_ALU_RGBA_G_SWIZ_G | 3182 R500_ALU_RGBA_B_SWIZ_B | 3183 R500_ALU_RGBA_A_SWIZ_A)); 3184 3185 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3186 * - PRESUB temps, temp4 - temp1 3187 * - MAD temp2.zzzz, temps, temp0 */ 3188 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3189 R500_INST_TEX_SEM_WAIT | 3190 R500_INST_RGB_WMASK_R | 3191 R500_INST_RGB_WMASK_G | 3192 R500_INST_RGB_WMASK_B | 3193 R500_INST_ALPHA_WMASK)); 3194 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3195 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3196 R500_RGB_ADDR1(4) | 3197 R500_RGB_ADDR2(2))); 3198 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3199 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3200 R500_ALPHA_ADDR1(4) | 3201 R500_ALPHA_ADDR2(2))); 3202 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3203 R500_ALU_RGB_R_SWIZ_A_B | 3204 R500_ALU_RGB_G_SWIZ_A_B | 3205 R500_ALU_RGB_B_SWIZ_A_B | 3206 R500_ALU_RGB_SEL_B_SRCP | 3207 R500_ALU_RGB_R_SWIZ_B_R | 3208 R500_ALU_RGB_G_SWIZ_B_G | 3209 R500_ALU_RGB_B_SWIZ_B_B)); 3210 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3211 R500_ALPHA_OP_MAD | 3212 R500_ALPHA_SEL_A_SRC2 | 3213 R500_ALPHA_SWIZ_A_B | 3214 R500_ALPHA_SEL_B_SRCP | 3215 R500_ALPHA_SWIZ_B_A)); 3216 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3217 R500_ALU_RGBA_OP_MAD | 3218 R500_ALU_RGBA_SEL_C_SRC0 | 3219 R500_ALU_RGBA_R_SWIZ_R | 3220 R500_ALU_RGBA_G_SWIZ_G | 3221 R500_ALU_RGBA_B_SWIZ_B | 3222 R500_ALU_RGBA_A_SWIZ_A)); 3223 3224 /* LRP output, temp5.zzzz, temp3, temp0 -> 3225 * - PRESUB temps, temp3 - temp0 3226 * - MAD temp5.zzzz, temps, temp0 */ 3227 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3228 R500_INST_LAST | 3229 R500_INST_TEX_SEM_WAIT | 3230 R500_INST_RGB_WMASK_R | 3231 R500_INST_RGB_WMASK_G | 3232 R500_INST_RGB_WMASK_B | 3233 R500_INST_ALPHA_WMASK | 3234 R500_INST_RGB_OMASK_R | 3235 R500_INST_RGB_OMASK_G | 3236 R500_INST_RGB_OMASK_B | 3237 R500_INST_ALPHA_OMASK)); 3238 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3239 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3240 R500_RGB_ADDR1(3) | 3241 R500_RGB_ADDR2(5))); 3242 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3243 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3244 R500_ALPHA_ADDR1(3) | 3245 R500_ALPHA_ADDR2(5))); 3246 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3247 R500_ALU_RGB_R_SWIZ_A_B | 3248 R500_ALU_RGB_G_SWIZ_A_B | 3249 R500_ALU_RGB_B_SWIZ_A_B | 3250 R500_ALU_RGB_SEL_B_SRCP | 3251 R500_ALU_RGB_R_SWIZ_B_R | 3252 R500_ALU_RGB_G_SWIZ_B_G | 3253 R500_ALU_RGB_B_SWIZ_B_B)); 3254 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3255 R500_ALPHA_OP_MAD | 3256 R500_ALPHA_SEL_A_SRC2 | 3257 R500_ALPHA_SWIZ_A_B | 3258 R500_ALPHA_SEL_B_SRCP | 3259 R500_ALPHA_SWIZ_B_A)); 3260 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3261 R500_ALU_RGBA_OP_MAD | 3262 R500_ALU_RGBA_SEL_C_SRC0 | 3263 R500_ALU_RGBA_R_SWIZ_R | 3264 R500_ALU_RGBA_G_SWIZ_G | 3265 R500_ALU_RGBA_B_SWIZ_B | 3266 R500_ALU_RGBA_A_SWIZ_A)); 3267 3268 /* Shader constants. */ 3269 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3270 3271 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3272 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3273 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3274 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3275 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3276 3277 FINISH_ACCEL(); 3278 } else { 3279 BEGIN_ACCEL(19); 3280 /* 2 components: 2 for tex0 */ 3281 OUT_ACCEL_REG(R300_RS_COUNT, 3282 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3283 R300_RS_COUNT_HIRES_EN)); 3284 3285 /* R300_INST_COUNT_RS - highest RS instruction used */ 3286 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3287 3288 /* Pixel stack frame size. */ 3289 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3290 3291 /* FP length. */ 3292 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3293 R500_US_CODE_END_ADDR(1))); 3294 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3295 R500_US_CODE_RANGE_SIZE(1))); 3296 3297 /* Prepare for FP emission. */ 3298 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3299 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3300 3301 /* tex inst */ 3302 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3303 R500_INST_TEX_SEM_WAIT | 3304 R500_INST_RGB_WMASK_R | 3305 R500_INST_RGB_WMASK_G | 3306 R500_INST_RGB_WMASK_B | 3307 R500_INST_ALPHA_WMASK | 3308 R500_INST_RGB_CLAMP | 3309 R500_INST_ALPHA_CLAMP)); 3310 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3311 R500_TEX_INST_LD | 3312 R500_TEX_SEM_ACQUIRE | 3313 R500_TEX_IGNORE_UNCOVERED)); 3314 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3315 R500_TEX_SRC_S_SWIZ_R | 3316 R500_TEX_SRC_T_SWIZ_G | 3317 R500_TEX_DST_ADDR(0) | 3318 R500_TEX_DST_R_SWIZ_R | 3319 R500_TEX_DST_G_SWIZ_G | 3320 R500_TEX_DST_B_SWIZ_B | 3321 R500_TEX_DST_A_SWIZ_A)); 3322 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3323 R500_DX_S_SWIZ_R | 3324 R500_DX_T_SWIZ_R | 3325 R500_DX_R_SWIZ_R | 3326 R500_DX_Q_SWIZ_R | 3327 R500_DY_ADDR(0) | 3328 R500_DY_S_SWIZ_R | 3329 R500_DY_T_SWIZ_R | 3330 R500_DY_R_SWIZ_R | 3331 R500_DY_Q_SWIZ_R)); 3332 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3333 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3334 3335 /* ALU inst */ 3336 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3337 R500_INST_TEX_SEM_WAIT | 3338 R500_INST_LAST | 3339 R500_INST_RGB_OMASK_R | 3340 R500_INST_RGB_OMASK_G | 3341 R500_INST_RGB_OMASK_B | 3342 R500_INST_ALPHA_OMASK | 3343 R500_INST_RGB_CLAMP | 3344 R500_INST_ALPHA_CLAMP)); 3345 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3346 R500_RGB_ADDR1(0) | 3347 R500_RGB_ADDR1_CONST | 3348 R500_RGB_ADDR2(0) | 3349 R500_RGB_ADDR2_CONST)); 3350 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3351 R500_ALPHA_ADDR1(0) | 3352 R500_ALPHA_ADDR1_CONST | 3353 R500_ALPHA_ADDR2(0) | 3354 R500_ALPHA_ADDR2_CONST)); 3355 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3356 R500_ALU_RGB_R_SWIZ_A_R | 3357 R500_ALU_RGB_G_SWIZ_A_G | 3358 R500_ALU_RGB_B_SWIZ_A_B | 3359 R500_ALU_RGB_SEL_B_SRC0 | 3360 R500_ALU_RGB_R_SWIZ_B_1 | 3361 R500_ALU_RGB_B_SWIZ_B_1 | 3362 R500_ALU_RGB_G_SWIZ_B_1)); 3363 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3364 R500_ALPHA_SWIZ_A_A | 3365 R500_ALPHA_SWIZ_B_1)); 3366 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3367 R500_ALU_RGBA_R_SWIZ_0 | 3368 R500_ALU_RGBA_G_SWIZ_0 | 3369 R500_ALU_RGBA_B_SWIZ_0 | 3370 R500_ALU_RGBA_A_SWIZ_0)); 3371 FINISH_ACCEL(); 3372 } 3373 } else { 3374 /* 3375 * y' = y - .0625 3376 * u' = u - .5 3377 * v' = v - .5; 3378 * 3379 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3380 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3381 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3382 * 3383 * DP3 might look like the straightforward solution 3384 * but we'd need to move the texture yuv values in 3385 * the same reg for this to work. Therefore use MADs. 3386 * Brightness just adds to the off constant. 3387 * Contrast is multiplication of luminance. 3388 * Saturation and hue change the u and v coeffs. 3389 * Default values (before adjustments - depend on colorspace): 3390 * yco = 1.1643 3391 * uco = 0, -0.39173, 2.017 3392 * vco = 1.5958, -0.8129, 0 3393 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3394 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3395 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3396 * 3397 * temp = MAD(yco, yuv.yyyy, off) 3398 * temp = MAD(uco, yuv.uuuu, temp) 3399 * result = MAD(vco, yuv.vvvv, temp) 3400 */ 3401 /* TODO: don't recalc consts always */ 3402 const float Loff = -0.0627; 3403 const float Coff = -0.502; 3404 float uvcosf, uvsinf; 3405 float yco; 3406 float uco[3], vco[3], off[3]; 3407 float bright, cont, gamma; 3408 int ref = pPriv->transform_index; 3409 Bool needgamma = FALSE; 3410 3411 cont = RTFContrast(pPriv->contrast); 3412 bright = RTFBrightness(pPriv->brightness); 3413 gamma = (float)pPriv->gamma / 1000.0; 3414 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3415 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3416 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3417 3418 yco = trans[ref].RefLuma * cont; 3419 uco[0] = -trans[ref].RefRCr * uvsinf; 3420 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3421 uco[2] = trans[ref].RefBCb * uvcosf; 3422 vco[0] = trans[ref].RefRCr * uvcosf; 3423 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3424 vco[2] = trans[ref].RefBCb * uvsinf; 3425 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3426 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3427 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3428 3429 //XXX gamma 3430 3431 if (gamma != 1.0) { 3432 needgamma = TRUE; 3433 /* note: gamma correction is out = in ^ gamma; 3434 gpu can only do LG2/EX2 therefore we transform into 3435 in ^ gamma = 2 ^ (log2(in) * gamma). 3436 Lots of scalar ops, unfortunately (better solution?) - 3437 without gamma that's 3 inst, with gamma it's 10... 3438 could use different gamma factors per channel, 3439 if that's of any use. */ 3440 } 3441 3442 if (isplanar) { 3443 BEGIN_ACCEL(56); 3444 /* 2 components: 2 for tex0 */ 3445 OUT_ACCEL_REG(R300_RS_COUNT, 3446 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3447 R300_RS_COUNT_HIRES_EN)); 3448 3449 /* R300_INST_COUNT_RS - highest RS instruction used */ 3450 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3451 3452 /* Pixel stack frame size. */ 3453 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3454 3455 /* FP length. */ 3456 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3457 R500_US_CODE_END_ADDR(5))); 3458 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3459 R500_US_CODE_RANGE_SIZE(5))); 3460 3461 /* Prepare for FP emission. */ 3462 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3463 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3464 3465 /* tex inst */ 3466 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3467 R500_INST_TEX_SEM_WAIT | 3468 R500_INST_RGB_WMASK_R | 3469 R500_INST_RGB_WMASK_G | 3470 R500_INST_RGB_WMASK_B | 3471 R500_INST_ALPHA_WMASK | 3472 R500_INST_RGB_CLAMP | 3473 R500_INST_ALPHA_CLAMP)); 3474 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3475 R500_TEX_INST_LD | 3476 R500_TEX_IGNORE_UNCOVERED)); 3477 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3478 R500_TEX_SRC_S_SWIZ_R | 3479 R500_TEX_SRC_T_SWIZ_G | 3480 R500_TEX_DST_ADDR(2) | 3481 R500_TEX_DST_R_SWIZ_R | 3482 R500_TEX_DST_G_SWIZ_G | 3483 R500_TEX_DST_B_SWIZ_B | 3484 R500_TEX_DST_A_SWIZ_A)); 3485 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3486 R500_DX_S_SWIZ_R | 3487 R500_DX_T_SWIZ_R | 3488 R500_DX_R_SWIZ_R | 3489 R500_DX_Q_SWIZ_R | 3490 R500_DY_ADDR(0) | 3491 R500_DY_S_SWIZ_R | 3492 R500_DY_T_SWIZ_R | 3493 R500_DY_R_SWIZ_R | 3494 R500_DY_Q_SWIZ_R)); 3495 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3496 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3497 3498 /* tex inst */ 3499 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3500 R500_INST_TEX_SEM_WAIT | 3501 R500_INST_RGB_WMASK_R | 3502 R500_INST_RGB_WMASK_G | 3503 R500_INST_RGB_WMASK_B | 3504 R500_INST_ALPHA_WMASK | 3505 R500_INST_RGB_CLAMP | 3506 R500_INST_ALPHA_CLAMP)); 3507 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3508 R500_TEX_INST_LD | 3509 R500_TEX_IGNORE_UNCOVERED)); 3510 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3511 R500_TEX_SRC_S_SWIZ_R | 3512 R500_TEX_SRC_T_SWIZ_G | 3513 R500_TEX_DST_ADDR(1) | 3514 R500_TEX_DST_R_SWIZ_R | 3515 R500_TEX_DST_G_SWIZ_G | 3516 R500_TEX_DST_B_SWIZ_B | 3517 R500_TEX_DST_A_SWIZ_A)); 3518 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3519 R500_DX_S_SWIZ_R | 3520 R500_DX_T_SWIZ_R | 3521 R500_DX_R_SWIZ_R | 3522 R500_DX_Q_SWIZ_R | 3523 R500_DY_ADDR(0) | 3524 R500_DY_S_SWIZ_R | 3525 R500_DY_T_SWIZ_R | 3526 R500_DY_R_SWIZ_R | 3527 R500_DY_Q_SWIZ_R)); 3528 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3529 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3530 3531 /* tex inst */ 3532 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3533 R500_INST_TEX_SEM_WAIT | 3534 R500_INST_RGB_WMASK_R | 3535 R500_INST_RGB_WMASK_G | 3536 R500_INST_RGB_WMASK_B | 3537 R500_INST_ALPHA_WMASK | 3538 R500_INST_RGB_CLAMP | 3539 R500_INST_ALPHA_CLAMP)); 3540 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3541 R500_TEX_INST_LD | 3542 R500_TEX_SEM_ACQUIRE | 3543 R500_TEX_IGNORE_UNCOVERED)); 3544 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3545 R500_TEX_SRC_S_SWIZ_R | 3546 R500_TEX_SRC_T_SWIZ_G | 3547 R500_TEX_DST_ADDR(0) | 3548 R500_TEX_DST_R_SWIZ_R | 3549 R500_TEX_DST_G_SWIZ_G | 3550 R500_TEX_DST_B_SWIZ_B | 3551 R500_TEX_DST_A_SWIZ_A)); 3552 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3553 R500_DX_S_SWIZ_R | 3554 R500_DX_T_SWIZ_R | 3555 R500_DX_R_SWIZ_R | 3556 R500_DX_Q_SWIZ_R | 3557 R500_DY_ADDR(0) | 3558 R500_DY_S_SWIZ_R | 3559 R500_DY_T_SWIZ_R | 3560 R500_DY_R_SWIZ_R | 3561 R500_DY_Q_SWIZ_R)); 3562 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3563 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3564 3565 /* ALU inst */ 3566 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 3567 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3568 R500_INST_TEX_SEM_WAIT | 3569 R500_INST_RGB_WMASK_R | 3570 R500_INST_RGB_WMASK_G | 3571 R500_INST_RGB_WMASK_B | 3572 R500_INST_ALPHA_WMASK)); 3573 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3574 R500_RGB_ADDR0_CONST | 3575 R500_RGB_ADDR1(2) | 3576 R500_RGB_ADDR2(0) | 3577 R500_RGB_ADDR2_CONST)); 3578 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3579 R500_ALPHA_ADDR0_CONST | 3580 R500_ALPHA_ADDR1(2) | 3581 R500_ALPHA_ADDR2(0) | 3582 R500_ALPHA_ADDR2_CONST)); 3583 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3584 R500_ALU_RGB_R_SWIZ_A_A | 3585 R500_ALU_RGB_G_SWIZ_A_A | 3586 R500_ALU_RGB_B_SWIZ_A_A | 3587 R500_ALU_RGB_SEL_B_SRC1 | 3588 R500_ALU_RGB_R_SWIZ_B_R | 3589 R500_ALU_RGB_B_SWIZ_B_G | 3590 R500_ALU_RGB_G_SWIZ_B_B)); 3591 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3592 R500_ALPHA_ADDRD(2) | 3593 R500_ALPHA_SWIZ_A_0 | 3594 R500_ALPHA_SWIZ_B_0)); 3595 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3596 R500_ALU_RGBA_ADDRD(2) | 3597 R500_ALU_RGBA_SEL_C_SRC0 | 3598 R500_ALU_RGBA_R_SWIZ_R | 3599 R500_ALU_RGBA_G_SWIZ_G | 3600 R500_ALU_RGBA_B_SWIZ_B | 3601 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3602 R500_ALU_RGBA_A_SWIZ_0)); 3603 3604 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 3605 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3606 R500_INST_TEX_SEM_WAIT | 3607 R500_INST_RGB_WMASK_R | 3608 R500_INST_RGB_WMASK_G | 3609 R500_INST_RGB_WMASK_B | 3610 R500_INST_ALPHA_WMASK)); 3611 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3612 R500_RGB_ADDR0_CONST | 3613 R500_RGB_ADDR1(1) | 3614 R500_RGB_ADDR2(2))); 3615 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3616 R500_ALPHA_ADDR0_CONST | 3617 R500_ALPHA_ADDR1(1) | 3618 R500_ALPHA_ADDR2(2))); 3619 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3620 R500_ALU_RGB_R_SWIZ_A_R | 3621 R500_ALU_RGB_G_SWIZ_A_G | 3622 R500_ALU_RGB_B_SWIZ_A_B | 3623 R500_ALU_RGB_SEL_B_SRC1 | 3624 R500_ALU_RGB_R_SWIZ_B_R | 3625 R500_ALU_RGB_B_SWIZ_B_G | 3626 R500_ALU_RGB_G_SWIZ_B_B)); 3627 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3628 R500_ALPHA_ADDRD(2) | 3629 R500_ALPHA_SWIZ_A_0 | 3630 R500_ALPHA_SWIZ_B_0)); 3631 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3632 R500_ALU_RGBA_ADDRD(2) | 3633 R500_ALU_RGBA_SEL_C_SRC2 | 3634 R500_ALU_RGBA_R_SWIZ_R | 3635 R500_ALU_RGBA_G_SWIZ_G | 3636 R500_ALU_RGBA_B_SWIZ_B | 3637 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3638 R500_ALU_RGBA_A_SWIZ_0)); 3639 3640 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 3641 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3642 R500_INST_TEX_SEM_WAIT | 3643 R500_INST_LAST | 3644 R500_INST_RGB_OMASK_R | 3645 R500_INST_RGB_OMASK_G | 3646 R500_INST_RGB_OMASK_B | 3647 R500_INST_ALPHA_OMASK | 3648 R500_INST_RGB_CLAMP | 3649 R500_INST_ALPHA_CLAMP)); 3650 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3651 R500_RGB_ADDR0_CONST | 3652 R500_RGB_ADDR1(0) | 3653 R500_RGB_ADDR2(2))); 3654 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3655 R500_ALPHA_ADDR0_CONST | 3656 R500_ALPHA_ADDR1(0) | 3657 R500_ALPHA_ADDR2(2))); 3658 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3659 R500_ALU_RGB_R_SWIZ_A_R | 3660 R500_ALU_RGB_G_SWIZ_A_G | 3661 R500_ALU_RGB_B_SWIZ_A_B | 3662 R500_ALU_RGB_SEL_B_SRC1 | 3663 R500_ALU_RGB_R_SWIZ_B_R | 3664 R500_ALU_RGB_B_SWIZ_B_G | 3665 R500_ALU_RGB_G_SWIZ_B_B)); 3666 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3667 R500_ALPHA_ADDRD(0) | 3668 R500_ALPHA_SWIZ_A_0 | 3669 R500_ALPHA_SWIZ_B_0)); 3670 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3671 R500_ALU_RGBA_ADDRD(0) | 3672 R500_ALU_RGBA_SEL_C_SRC2 | 3673 R500_ALU_RGBA_R_SWIZ_R | 3674 R500_ALU_RGBA_G_SWIZ_G | 3675 R500_ALU_RGBA_B_SWIZ_B | 3676 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3677 R500_ALU_RGBA_A_SWIZ_1)); 3678 3679 } else { 3680 BEGIN_ACCEL(44); 3681 /* 2 components: 2 for tex0/1/2 */ 3682 OUT_ACCEL_REG(R300_RS_COUNT, 3683 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3684 R300_RS_COUNT_HIRES_EN)); 3685 3686 /* R300_INST_COUNT_RS - highest RS instruction used */ 3687 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3688 3689 /* Pixel stack frame size. */ 3690 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3691 3692 /* FP length. */ 3693 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3694 R500_US_CODE_END_ADDR(3))); 3695 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3696 R500_US_CODE_RANGE_SIZE(3))); 3697 3698 /* Prepare for FP emission. */ 3699 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3700 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3701 3702 /* tex inst */ 3703 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3704 R500_INST_TEX_SEM_WAIT | 3705 R500_INST_RGB_WMASK_R | 3706 R500_INST_RGB_WMASK_G | 3707 R500_INST_RGB_WMASK_B | 3708 R500_INST_ALPHA_WMASK | 3709 R500_INST_RGB_CLAMP | 3710 R500_INST_ALPHA_CLAMP)); 3711 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3712 R500_TEX_INST_LD | 3713 R500_TEX_SEM_ACQUIRE | 3714 R500_TEX_IGNORE_UNCOVERED)); 3715 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3716 R500_TEX_SRC_S_SWIZ_R | 3717 R500_TEX_SRC_T_SWIZ_G | 3718 R500_TEX_DST_ADDR(0) | 3719 R500_TEX_DST_R_SWIZ_R | 3720 R500_TEX_DST_G_SWIZ_G | 3721 R500_TEX_DST_B_SWIZ_B | 3722 R500_TEX_DST_A_SWIZ_A)); 3723 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3724 R500_DX_S_SWIZ_R | 3725 R500_DX_T_SWIZ_R | 3726 R500_DX_R_SWIZ_R | 3727 R500_DX_Q_SWIZ_R | 3728 R500_DY_ADDR(0) | 3729 R500_DY_S_SWIZ_R | 3730 R500_DY_T_SWIZ_R | 3731 R500_DY_R_SWIZ_R | 3732 R500_DY_Q_SWIZ_R)); 3733 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3734 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3735 3736 /* ALU inst */ 3737 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 3738 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3739 R500_INST_TEX_SEM_WAIT | 3740 R500_INST_RGB_WMASK_R | 3741 R500_INST_RGB_WMASK_G | 3742 R500_INST_RGB_WMASK_B | 3743 R500_INST_ALPHA_WMASK)); 3744 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3745 R500_RGB_ADDR0_CONST | 3746 R500_RGB_ADDR1(0) | 3747 R500_RGB_ADDR2(0) | 3748 R500_RGB_ADDR2_CONST)); 3749 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3750 R500_ALPHA_ADDR0_CONST | 3751 R500_ALPHA_ADDR1(0) | 3752 R500_ALPHA_ADDR2(0) | 3753 R500_ALPHA_ADDR2_CONST)); 3754 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3755 R500_ALU_RGB_R_SWIZ_A_A | 3756 R500_ALU_RGB_G_SWIZ_A_A | 3757 R500_ALU_RGB_B_SWIZ_A_A | 3758 R500_ALU_RGB_SEL_B_SRC1 | 3759 R500_ALU_RGB_R_SWIZ_B_G | 3760 R500_ALU_RGB_B_SWIZ_B_G | 3761 R500_ALU_RGB_G_SWIZ_B_G)); 3762 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3763 R500_ALPHA_ADDRD(1) | 3764 R500_ALPHA_SWIZ_A_0 | 3765 R500_ALPHA_SWIZ_B_0)); 3766 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3767 R500_ALU_RGBA_ADDRD(1) | 3768 R500_ALU_RGBA_SEL_C_SRC0 | 3769 R500_ALU_RGBA_R_SWIZ_R | 3770 R500_ALU_RGBA_G_SWIZ_G | 3771 R500_ALU_RGBA_B_SWIZ_B | 3772 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3773 R500_ALU_RGBA_A_SWIZ_0)); 3774 3775 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 3776 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3777 R500_INST_TEX_SEM_WAIT | 3778 R500_INST_RGB_WMASK_R | 3779 R500_INST_RGB_WMASK_G | 3780 R500_INST_RGB_WMASK_B | 3781 R500_INST_ALPHA_WMASK)); 3782 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3783 R500_RGB_ADDR0_CONST | 3784 R500_RGB_ADDR1(0) | 3785 R500_RGB_ADDR2(1))); 3786 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3787 R500_ALPHA_ADDR0_CONST | 3788 R500_ALPHA_ADDR1(0) | 3789 R500_ALPHA_ADDR2(1))); 3790 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3791 R500_ALU_RGB_R_SWIZ_A_R | 3792 R500_ALU_RGB_G_SWIZ_A_G | 3793 R500_ALU_RGB_B_SWIZ_A_B | 3794 R500_ALU_RGB_SEL_B_SRC1 | 3795 R500_ALU_RGB_R_SWIZ_B_B | 3796 R500_ALU_RGB_B_SWIZ_B_B | 3797 R500_ALU_RGB_G_SWIZ_B_B)); 3798 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3799 R500_ALPHA_ADDRD(1) | 3800 R500_ALPHA_SWIZ_A_0 | 3801 R500_ALPHA_SWIZ_B_0)); 3802 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3803 R500_ALU_RGBA_ADDRD(1) | 3804 R500_ALU_RGBA_SEL_C_SRC2 | 3805 R500_ALU_RGBA_R_SWIZ_R | 3806 R500_ALU_RGBA_G_SWIZ_G | 3807 R500_ALU_RGBA_B_SWIZ_B | 3808 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3809 R500_ALU_RGBA_A_SWIZ_0)); 3810 3811 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 3812 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3813 R500_INST_TEX_SEM_WAIT | 3814 R500_INST_LAST | 3815 R500_INST_RGB_OMASK_R | 3816 R500_INST_RGB_OMASK_G | 3817 R500_INST_RGB_OMASK_B | 3818 R500_INST_ALPHA_OMASK | 3819 R500_INST_RGB_CLAMP | 3820 R500_INST_ALPHA_CLAMP)); 3821 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3822 R500_RGB_ADDR0_CONST | 3823 R500_RGB_ADDR1(0) | 3824 R500_RGB_ADDR2(1))); 3825 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3826 R500_ALPHA_ADDR0_CONST | 3827 R500_ALPHA_ADDR1(0) | 3828 R500_ALPHA_ADDR2(1))); 3829 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3830 R500_ALU_RGB_R_SWIZ_A_R | 3831 R500_ALU_RGB_G_SWIZ_A_G | 3832 R500_ALU_RGB_B_SWIZ_A_B | 3833 R500_ALU_RGB_SEL_B_SRC1 | 3834 R500_ALU_RGB_R_SWIZ_B_R | 3835 R500_ALU_RGB_B_SWIZ_B_R | 3836 R500_ALU_RGB_G_SWIZ_B_R)); 3837 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3838 R500_ALPHA_ADDRD(1) | 3839 R500_ALPHA_SWIZ_A_0 | 3840 R500_ALPHA_SWIZ_B_0)); 3841 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3842 R500_ALU_RGBA_ADDRD(1) | 3843 R500_ALU_RGBA_SEL_C_SRC2 | 3844 R500_ALU_RGBA_R_SWIZ_R | 3845 R500_ALU_RGBA_G_SWIZ_G | 3846 R500_ALU_RGBA_B_SWIZ_B | 3847 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3848 R500_ALU_RGBA_A_SWIZ_1)); 3849 } 3850 3851 /* Shader constants. */ 3852 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3853 3854 /* constant 0: off, yco */ 3855 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 3856 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 3857 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 3858 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 3859 /* constant 1: uco */ 3860 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 3861 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 3862 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 3863 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 3864 /* constant 2: vco */ 3865 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 3866 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 3867 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 3868 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 3869 3870 FINISH_ACCEL(); 3871 } 3872 3873 BEGIN_ACCEL_RELOC(6, 2); 3874 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 3875 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 3876 3877 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 3878 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 3879 3880 /* no need to enable blending */ 3881 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 3882 3883 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); 3884 FINISH_ACCEL(); 3885 3886 if (pPriv->vsync) { 3887 xf86CrtcPtr crtc; 3888 if (pPriv->desired_crtc) 3889 crtc = pPriv->desired_crtc; 3890 else 3891 crtc = radeon_pick_best_crtc(pScrn, 3892 pPriv->drw_x, 3893 pPriv->drw_x + pPriv->dst_w, 3894 pPriv->drw_y, 3895 pPriv->drw_y + pPriv->dst_h); 3896 if (crtc) 3897 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 3898 crtc, 3899 pPriv->drw_y - crtc->y, 3900 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 3901 } 3902 /* 3903 * Rendering of the actual polygon is done in two different 3904 * ways depending on chip generation: 3905 * 3906 * < R300: 3907 * 3908 * These chips can render a rectangle in one pass, so 3909 * handling is pretty straight-forward. 3910 * 3911 * >= R300: 3912 * 3913 * These chips can accept a quad, but will render it as 3914 * two triangles which results in a diagonal tear. Instead 3915 * We render a single, large triangle and use the scissor 3916 * functionality to restrict it to the desired rectangle. 3917 * Due to guardband limits on r3xx/r4xx, we can only use 3918 * the single triangle up to 2880 pixels; above that we 3919 * render as a quad. 3920 */ 3921 3922 while (nBox--) { 3923 int srcX, srcY, srcw, srch; 3924 int dstX, dstY, dstw, dsth; 3925 dstX = pBox->x1 + dstxoff; 3926 dstY = pBox->y1 + dstyoff; 3927 dstw = pBox->x2 - pBox->x1; 3928 dsth = pBox->y2 - pBox->y1; 3929 3930 srcX = pPriv->src_x; 3931 srcX += ((pBox->x1 - pPriv->drw_x) * 3932 pPriv->src_w) / pPriv->dst_w; 3933 srcY = pPriv->src_y; 3934 srcY += ((pBox->y1 - pPriv->drw_y) * 3935 pPriv->src_h) / pPriv->dst_h; 3936 3937 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 3938 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 3939 3940 BEGIN_ACCEL(2); 3941 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 3942 ((dstY) << R300_SCISSOR_Y_SHIFT))); 3943 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 3944 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 3945 FINISH_ACCEL(); 3946 3947#ifdef ACCEL_CP 3948 BEGIN_RING(3 * vtx_count + 4); 3949 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 3950 3 * vtx_count)); 3951 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 3952 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 3953 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 3954#else /* ACCEL_CP */ 3955 BEGIN_ACCEL(2 + vtx_count * 3); 3956 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 3957 RADEON_VF_PRIM_WALK_DATA | 3958 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 3959#endif 3960 if (pPriv->bicubic_enabled) { 3961 VTX_OUT_6((float)dstX, (float)dstY, 3962 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 3963 (float)srcX + 0.5, (float)srcY + 0.5); 3964 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 3965 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 3966 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 3967 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 3968 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3969 (float)srcY / pPriv->h, 3970 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 3971 (float)srcY + 0.5); 3972 } else { 3973 /* 3974 * Render a big, scissored triangle. This means 3975 * increasing the triangle size and adjusting 3976 * texture coordinates. 3977 */ 3978 VTX_OUT_4((float)dstX, (float)dstY, 3979 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 3980 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 3981 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 3982 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 3983 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3984 (float)srcY / pPriv->h); 3985 } 3986 3987 /* flushing is pipelined, free/finish is not */ 3988 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 3989 3990#ifdef ACCEL_CP 3991 ADVANCE_RING(); 3992#else 3993 FINISH_ACCEL(); 3994#endif /* !ACCEL_CP */ 3995 3996 pBox++; 3997 } 3998 3999 BEGIN_ACCEL(3); 4000 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 4001 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 4002 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 4003 FINISH_ACCEL(); 4004 4005 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 4006} 4007 4008#undef VTX_OUT_4 4009#undef VTX_OUT_6 4010#undef FUNC_NAME 4011