radeon_textured_videofuncs.c revision 921a55d8
1/* 2 * Copyright 2008 Alex Deucher 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * 24 * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25 * 26 */ 27 28#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 29#error Cannot define both MMIO and CP acceleration! 30#endif 31 32#if !defined(UNIXCPP) || defined(ANSICPP) 33#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 34#else 35#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 36#endif 37 38#ifdef ACCEL_MMIO 39#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 40#else 41#ifdef ACCEL_CP 42#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 43#else 44#error No accel type defined! 45#endif 46#endif 47 48#ifdef ACCEL_CP 49 50#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 51do { \ 52 OUT_RING_F(_dstX); \ 53 OUT_RING_F(_dstY); \ 54 OUT_RING_F(_srcX); \ 55 OUT_RING_F(_srcY); \ 56 OUT_RING_F(_maskX); \ 57 OUT_RING_F(_maskY); \ 58} while (0) 59 60#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 61do { \ 62 OUT_RING_F(_dstX); \ 63 OUT_RING_F(_dstY); \ 64 OUT_RING_F(_srcX); \ 65 OUT_RING_F(_srcY); \ 66} while (0) 67 68#else /* ACCEL_CP */ 69 70#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 71do { \ 72 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 73 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 74 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 75 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 76 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX); \ 77 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ 78} while (0) 79 80#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 81do { \ 82 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 83 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 84 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 85 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 86} while (0) 87 88#endif /* !ACCEL_CP */ 89 90static Bool 91FUNC_NAME(RADEONPrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 92{ 93 RADEONInfoPtr info = RADEONPTR(pScrn); 94 PixmapPtr pPixmap = pPriv->pPixmap; 95 struct radeon_exa_pixmap_priv *driver_priv; 96 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 97 uint32_t txformat, txsize, txpitch, txoffset; 98 uint32_t dst_pitch, dst_format; 99 uint32_t colorpitch; 100 int pixel_shift; 101 int scissor_w = MIN(pPixmap->drawable.width, 2047); 102 int scissor_h = MIN(pPixmap->drawable.height, 2047); 103 ACCEL_PREAMBLE(); 104 105#ifdef XF86DRM_MODE 106 if (info->cs) { 107 int ret; 108 109 radeon_cs_space_reset_bos(info->cs); 110 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 111 112 if (pPriv->bicubic_enabled) 113 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 114 115 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 116 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 117 118 ret = radeon_cs_space_check(info->cs); 119 if (ret) { 120 ErrorF("Not enough RAM to hw accel xv operation\n"); 121 return FALSE; 122 } 123 } 124#endif 125 126 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 127 128 129#ifdef USE_EXA 130 if (info->useEXA) { 131 dst_pitch = exaGetPixmapPitch(pPixmap); 132 } else 133#endif 134 { 135 dst_pitch = pPixmap->devKind; 136 } 137 138#ifdef USE_EXA 139 if (info->useEXA) { 140 RADEON_SWITCH_TO_3D(); 141 } else 142#endif 143 { 144 BEGIN_ACCEL(2); 145 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 146 /* We must wait for 3d to idle, in case source was just written as a dest. */ 147 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 148 RADEON_WAIT_HOST_IDLECLEAN | 149 RADEON_WAIT_2D_IDLECLEAN | 150 RADEON_WAIT_3D_IDLECLEAN | 151 RADEON_WAIT_DMA_GUI_IDLE); 152 FINISH_ACCEL(); 153 154 if (!info->accel_state->XInited3D) 155 RADEONInit3DEngine(pScrn); 156 } 157 158 /* Same for R100/R200 */ 159 switch (pPixmap->drawable.bitsPerPixel) { 160 case 16: 161 if (pPixmap->drawable.depth == 15) 162 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 163 else 164 dst_format = RADEON_COLOR_FORMAT_RGB565; 165 break; 166 case 32: 167 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 168 break; 169 default: 170 return FALSE; 171 } 172 173 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 174 pPriv->is_planar = TRUE; 175 txformat = RADEON_TXFORMAT_Y8; 176 } else { 177 pPriv->is_planar = FALSE; 178 if (pPriv->id == FOURCC_UYVY) 179 txformat = RADEON_TXFORMAT_YVYU422; 180 else 181 txformat = RADEON_TXFORMAT_VYUY422; 182 } 183 184 txformat |= RADEON_TXFORMAT_NON_POWER2; 185 186 colorpitch = dst_pitch >> pixel_shift; 187 188 if (RADEONTilingEnabled(pScrn, pPixmap)) 189 colorpitch |= RADEON_COLOR_TILE_ENABLE; 190 191 txoffset = info->cs ? 0 : pPriv->src_offset; 192 193 BEGIN_ACCEL_RELOC(4,2); 194 195 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 196 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 197 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 198 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 199 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 200 201 FINISH_ACCEL(); 202 203 if (pPriv->is_planar) { 204 /* need 2 texcoord sets (even though they are identical) due 205 to denormalization! hw apparently can't premultiply 206 same coord set by different texture size */ 207 pPriv->vtx_count = 6; 208 209 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 210 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 211 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 212 txpitch -= 32; 213 214 BEGIN_ACCEL_RELOC(23, 3); 215 216 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 217 RADEON_SE_VTX_FMT_ST0 | 218 RADEON_SE_VTX_FMT_ST1)); 219 220 OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 221 RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 222 RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 223 RADEON_PLANAR_YUV_ENABLE)); 224 225 /* Y */ 226 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 227 RADEON_MAG_FILTER_LINEAR | 228 RADEON_MIN_FILTER_LINEAR | 229 RADEON_CLAMP_S_CLAMP_LAST | 230 RADEON_CLAMP_T_CLAMP_LAST | 231 RADEON_YUV_TO_RGB); 232 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 233 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 234 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 235 RADEON_COLOR_ARG_A_ZERO | 236 RADEON_COLOR_ARG_B_ZERO | 237 RADEON_COLOR_ARG_C_T0_COLOR | 238 RADEON_BLEND_CTL_ADD | 239 RADEON_CLAMP_TX); 240 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 241 RADEON_ALPHA_ARG_A_ZERO | 242 RADEON_ALPHA_ARG_B_ZERO | 243 RADEON_ALPHA_ARG_C_T0_ALPHA | 244 RADEON_BLEND_CTL_ADD | 245 RADEON_CLAMP_TX); 246 247 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 248 (pPriv->w - 1) | 249 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 250 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 251 pPriv->src_pitch - 32); 252 253 /* U */ 254 OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, 255 RADEON_MAG_FILTER_LINEAR | 256 RADEON_MIN_FILTER_LINEAR | 257 RADEON_CLAMP_S_CLAMP_LAST | 258 RADEON_CLAMP_T_CLAMP_LAST); 259 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 260 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 261 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1, 262 RADEON_COLOR_ARG_A_ZERO | 263 RADEON_COLOR_ARG_B_ZERO | 264 RADEON_COLOR_ARG_C_T0_COLOR | 265 RADEON_BLEND_CTL_ADD | 266 RADEON_CLAMP_TX); 267 OUT_ACCEL_REG(RADEON_PP_TXABLEND_1, 268 RADEON_ALPHA_ARG_A_ZERO | 269 RADEON_ALPHA_ARG_B_ZERO | 270 RADEON_ALPHA_ARG_C_T0_ALPHA | 271 RADEON_BLEND_CTL_ADD | 272 RADEON_CLAMP_TX); 273 274 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize); 275 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch); 276 277 /* V */ 278 OUT_ACCEL_REG(RADEON_PP_TXFILTER_2, 279 RADEON_MAG_FILTER_LINEAR | 280 RADEON_MIN_FILTER_LINEAR | 281 RADEON_CLAMP_S_CLAMP_LAST | 282 RADEON_CLAMP_T_CLAMP_LAST); 283 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 284 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 285 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2, 286 RADEON_COLOR_ARG_A_ZERO | 287 RADEON_COLOR_ARG_B_ZERO | 288 RADEON_COLOR_ARG_C_T0_COLOR | 289 RADEON_BLEND_CTL_ADD | 290 RADEON_CLAMP_TX); 291 OUT_ACCEL_REG(RADEON_PP_TXABLEND_2, 292 RADEON_ALPHA_ARG_A_ZERO | 293 RADEON_ALPHA_ARG_B_ZERO | 294 RADEON_ALPHA_ARG_C_T0_ALPHA | 295 RADEON_BLEND_CTL_ADD | 296 RADEON_CLAMP_TX); 297 298 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize); 299 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch); 300 FINISH_ACCEL(); 301 } else { 302 pPriv->vtx_count = 4; 303 BEGIN_ACCEL_RELOC(9, 1); 304 305 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 306 RADEON_SE_VTX_FMT_ST0)); 307 308 OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 309 310 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 311 RADEON_MAG_FILTER_LINEAR | 312 RADEON_MIN_FILTER_LINEAR | 313 RADEON_CLAMP_S_CLAMP_LAST | 314 RADEON_CLAMP_T_CLAMP_LAST | 315 RADEON_YUV_TO_RGB); 316 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 317 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 318 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 319 RADEON_COLOR_ARG_A_ZERO | 320 RADEON_COLOR_ARG_B_ZERO | 321 RADEON_COLOR_ARG_C_T0_COLOR | 322 RADEON_BLEND_CTL_ADD | 323 RADEON_CLAMP_TX); 324 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 325 RADEON_ALPHA_ARG_A_ZERO | 326 RADEON_ALPHA_ARG_B_ZERO | 327 RADEON_ALPHA_ARG_C_T0_ALPHA | 328 RADEON_BLEND_CTL_ADD | 329 RADEON_CLAMP_TX); 330 331 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 332 (pPriv->w - 1) | 333 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 334 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 335 pPriv->src_pitch - 32); 336 FINISH_ACCEL(); 337 } 338 339 BEGIN_ACCEL(2); 340 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 341 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 342 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 343 FINISH_ACCEL(); 344 345 if (pPriv->vsync) { 346 xf86CrtcPtr crtc; 347 if (pPriv->desired_crtc) 348 crtc = pPriv->desired_crtc; 349 else 350 crtc = radeon_pick_best_crtc(pScrn, 351 pPriv->drw_x, 352 pPriv->drw_x + pPriv->dst_w, 353 pPriv->drw_y, 354 pPriv->drw_y + pPriv->dst_h); 355 if (crtc) 356 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 357 crtc, 358 pPriv->drw_y - crtc->y, 359 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 360 } 361 362 return TRUE; 363} 364 365static void 366FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 367{ 368 RADEONInfoPtr info = RADEONPTR(pScrn); 369 PixmapPtr pPixmap = pPriv->pPixmap; 370 int dstxoff, dstyoff; 371 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 372 int nBox = REGION_NUM_RECTS(&pPriv->clip); 373 ACCEL_PREAMBLE(); 374 375#ifdef COMPOSITE 376 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 377 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 378#else 379 dstxoff = 0; 380 dstyoff = 0; 381#endif 382 383 if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 384 return; 385 386 /* 387 * Rendering of the actual polygon is done in two different 388 * ways depending on chip generation: 389 * 390 * < R300: 391 * 392 * These chips can render a rectangle in one pass, so 393 * handling is pretty straight-forward. 394 * 395 * >= R300: 396 * 397 * These chips can accept a quad, but will render it as 398 * two triangles which results in a diagonal tear. Instead 399 * We render a single, large triangle and use the scissor 400 * functionality to restrict it to the desired rectangle. 401 * Due to guardband limits on r3xx/r4xx, we can only use 402 * the single triangle up to 2560/4021 pixels; above that we 403 * render as a quad. 404 */ 405#ifdef ACCEL_CP 406 while (nBox) { 407 int draw_size = 3 * pPriv->vtx_count + 5; 408 int loop_boxes; 409 410 if (draw_size > radeon_cs_space_remaining(pScrn)) { 411 if (info->cs) 412 radeon_cs_flush_indirect(pScrn); 413 else 414 RADEONCPFlushIndirect(pScrn, 1); 415 if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 416 return; 417 } 418 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 419 nBox -= loop_boxes; 420 421 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 422 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 423 loop_boxes * 3 * pPriv->vtx_count + 1)); 424 if (pPriv->is_planar) 425 OUT_RING(RADEON_CP_VC_FRMT_XY | 426 RADEON_CP_VC_FRMT_ST0 | 427 RADEON_CP_VC_FRMT_ST1); 428 else 429 OUT_RING(RADEON_CP_VC_FRMT_XY | 430 RADEON_CP_VC_FRMT_ST0); 431 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 432 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 433 RADEON_CP_VC_CNTL_MAOS_ENABLE | 434 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 435 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 436 437 while (loop_boxes--) { 438 int srcX, srcY, srcw, srch; 439 int dstX, dstY, dstw, dsth; 440 dstX = pBox->x1 + dstxoff; 441 dstY = pBox->y1 + dstyoff; 442 dstw = pBox->x2 - pBox->x1; 443 dsth = pBox->y2 - pBox->y1; 444 445 srcX = pPriv->src_x; 446 srcX += ((pBox->x1 - pPriv->drw_x) * 447 pPriv->src_w) / pPriv->dst_w; 448 srcY = pPriv->src_y; 449 srcY += ((pBox->y1 - pPriv->drw_y) * 450 pPriv->src_h) / pPriv->dst_h; 451 452 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 453 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 454 455 456 if (pPriv->is_planar) { 457 /* 458 * Just render a rect (using three coords). 459 */ 460 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 461 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 462 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 463 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 464 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 465 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 466 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 467 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 468 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 469 } else { 470 /* 471 * Just render a rect (using three coords). 472 */ 473 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 474 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 475 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 476 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 477 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 478 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 479 } 480 481 pBox++; 482 } 483 484 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 485 ADVANCE_RING(); 486 } 487#else /* ACCEL_CP */ 488 BEGIN_ACCEL(nBox * pPriv->vtx_count * 3 + 2); 489 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 490 RADEON_VF_PRIM_WALK_DATA | 491 RADEON_VF_RADEON_MODE | 492 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 493 while (nBox--) { 494 int srcX, srcY, srcw, srch; 495 int dstX, dstY, dstw, dsth; 496 dstX = pBox->x1 + dstxoff; 497 dstY = pBox->y1 + dstyoff; 498 dstw = pBox->x2 - pBox->x1; 499 dsth = pBox->y2 - pBox->y1; 500 501 srcX = pPriv->src_x; 502 srcX += ((pBox->x1 - pPriv->drw_x) * 503 pPriv->src_w) / pPriv->dst_w; 504 srcY = pPriv->src_y; 505 srcY += ((pBox->y1 - pPriv->drw_y) * 506 pPriv->src_h) / pPriv->dst_h; 507 508 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 509 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 510 511 512 if (pPriv->is_planar) { 513 /* 514 * Just render a rect (using three coords). 515 */ 516 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 517 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 518 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 519 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 520 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 521 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 522 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 523 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 524 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 525 } else { 526 /* 527 * Just render a rect (using three coords). 528 */ 529 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 530 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 531 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 532 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 533 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 534 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 535 } 536 537 pBox++; 538 } 539 540 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 541 FINISH_ACCEL(); 542#endif /* !ACCEL_CP */ 543 544 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 545} 546 547static Bool 548FUNC_NAME(R200PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 549{ 550 RADEONInfoPtr info = RADEONPTR(pScrn); 551 PixmapPtr pPixmap = pPriv->pPixmap; 552 struct radeon_exa_pixmap_priv *driver_priv; 553 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 554 uint32_t txformat; 555 uint32_t txfilter, txsize, txpitch, txoffset; 556 uint32_t dst_pitch, dst_format; 557 uint32_t colorpitch; 558 int pixel_shift; 559 int scissor_w = MIN(pPixmap->drawable.width, 2047); 560 int scissor_h = MIN(pPixmap->drawable.height, 2047); 561 /* note: in contrast to r300, use input biasing on uv components */ 562 const float Loff = -0.0627; 563 float uvcosf, uvsinf; 564 float yco, yoff; 565 float uco[3], vco[3]; 566 float bright, cont, sat; 567 int ref = pPriv->transform_index; 568 float ucscale = 0.25, vcscale = 0.25; 569 Bool needux8 = FALSE, needvx8 = FALSE; 570 ACCEL_PREAMBLE(); 571 572#ifdef XF86DRM_MODE 573 if (info->cs) { 574 int ret; 575 576 radeon_cs_space_reset_bos(info->cs); 577 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 578 579 if (pPriv->bicubic_enabled) 580 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 581 582 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 583 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 584 585 ret = radeon_cs_space_check(info->cs); 586 if (ret) { 587 ErrorF("Not enough RAM to hw accel xv operation\n"); 588 return FALSE; 589 } 590 } 591#endif 592 593 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 594 595#ifdef USE_EXA 596 if (info->useEXA) { 597 dst_pitch = exaGetPixmapPitch(pPixmap); 598 } else 599#endif 600 { 601 dst_pitch = pPixmap->devKind; 602 } 603 604#ifdef USE_EXA 605 if (info->useEXA) { 606 RADEON_SWITCH_TO_3D(); 607 } else 608#endif 609 { 610 BEGIN_ACCEL(2); 611 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 612 /* We must wait for 3d to idle, in case source was just written as a dest. */ 613 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 614 RADEON_WAIT_HOST_IDLECLEAN | 615 RADEON_WAIT_2D_IDLECLEAN | 616 RADEON_WAIT_3D_IDLECLEAN | 617 RADEON_WAIT_DMA_GUI_IDLE); 618 FINISH_ACCEL(); 619 620 if (!info->accel_state->XInited3D) 621 RADEONInit3DEngine(pScrn); 622 } 623 624 /* Same for R100/R200 */ 625 switch (pPixmap->drawable.bitsPerPixel) { 626 case 16: 627 if (pPixmap->drawable.depth == 15) 628 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 629 else 630 dst_format = RADEON_COLOR_FORMAT_RGB565; 631 break; 632 case 32: 633 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 634 break; 635 default: 636 return FALSE; 637 } 638 639 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 640 pPriv->is_planar = TRUE; 641 txformat = RADEON_TXFORMAT_I8; 642 } else { 643 pPriv->is_planar = FALSE; 644 if (pPriv->id == FOURCC_UYVY) 645 txformat = RADEON_TXFORMAT_YVYU422; 646 else 647 txformat = RADEON_TXFORMAT_VYUY422; 648 } 649 650 txformat |= RADEON_TXFORMAT_NON_POWER2; 651 652 colorpitch = dst_pitch >> pixel_shift; 653 654 if (RADEONTilingEnabled(pScrn, pPixmap)) 655 colorpitch |= RADEON_COLOR_TILE_ENABLE; 656 657 BEGIN_ACCEL_RELOC(4,2); 658 659 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 660 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 661 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 662 663 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 664 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 665 666 FINISH_ACCEL(); 667 668 txfilter = R200_MAG_FILTER_LINEAR | 669 R200_MIN_FILTER_LINEAR | 670 R200_CLAMP_S_CLAMP_LAST | 671 R200_CLAMP_T_CLAMP_LAST; 672 673 /* contrast can cause constant overflow, clamp */ 674 cont = RTFContrast(pPriv->contrast); 675 if (cont * trans[ref].RefLuma > 2.0) 676 cont = 2.0 / trans[ref].RefLuma; 677 /* brightness is only from -0.5 to 0.5 should be safe */ 678 bright = RTFBrightness(pPriv->brightness); 679 /* saturation can also cause overflow, clamp */ 680 sat = RTFSaturation(pPriv->saturation); 681 if (sat * trans[ref].RefBCb > 4.0) 682 sat = 4.0 / trans[ref].RefBCb; 683 uvcosf = sat * cos(RTFHue(pPriv->hue)); 684 uvsinf = sat * sin(RTFHue(pPriv->hue)); 685 686 yco = trans[ref].RefLuma * cont; 687 uco[0] = -trans[ref].RefRCr * uvsinf; 688 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 689 uco[2] = trans[ref].RefBCb * uvcosf; 690 vco[0] = trans[ref].RefRCr * uvcosf; 691 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 692 vco[2] = trans[ref].RefBCb * uvsinf; 693 yoff = Loff * yco + bright; 694 695 if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 696 needux8 = TRUE; 697 ucscale = 0.125; 698 } 699 if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 700 needvx8 = TRUE; 701 vcscale = 0.125; 702 } 703 704 txoffset = info->cs ? 0 : pPriv->src_offset; 705 706 if (pPriv->is_planar) { 707 /* need 2 texcoord sets (even though they are identical) due 708 to denormalization! hw apparently can't premultiply 709 same coord set by different texture size */ 710 pPriv->vtx_count = 6; 711 712 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 713 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 714 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 715 txpitch -= 32; 716 717 BEGIN_ACCEL_RELOC(36, 3); 718 719 OUT_ACCEL_REG(RADEON_PP_CNTL, 720 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 721 RADEON_TEX_BLEND_0_ENABLE | 722 RADEON_TEX_BLEND_1_ENABLE | 723 RADEON_TEX_BLEND_2_ENABLE); 724 725 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 726 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 727 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 728 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 729 730 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 731 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 732 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 733 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 734 (pPriv->w - 1) | 735 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 736 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 737 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 738 739 OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); 740 OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 741 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); 742 OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize); 743 OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); 744 OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 745 746 OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); 747 OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 748 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); 749 OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize); 750 OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); 751 OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 752 753 /* similar to r300 code. Note the big problem is that hardware constants 754 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 755 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 756 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 757 * the constants not. To get larger range can use output scale, but for 758 * that 2.018 value we need a total scale by 8, which means the constants 759 * really have no accuracy whatsoever (5 fractional bits only). 760 * The only direct way to get high precision "constants" into the fragment 761 * pipe I know of is to use the texcoord interpolator (not color, this one 762 * is 8 bit only too), which seems a bit expensive. We're lucky though it 763 * seems the values we need seem to fit better than worst case (get about 764 * 6 fractional bits for this instead of 5, at least when not correcting for 765 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 766 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 767 * even with non-default saturation/hue/contrast/brightness adjustments, 768 * it gets a little crazy and ultimately precision might still be lacking. 769 * 770 * A higher precision (8 fractional bits) version might just put uco into 771 * a texcoord, and calculate a new vcoconst in the shader, like so: 772 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 773 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 774 * vcocalc = ADD temp, bias/scale(cohelper), vco 775 * would in total use 4 tex units, 4 instructions which seems fairly 776 * balanced for this architecture (instead of 3 + 3 for the solution here) 777 * 778 * temp = MAD(yco, yuv.yyyy, yoff) 779 * temp = MAD(uco, yuv.uuuu, temp) 780 * result = MAD(vco, yuv.vvvv, temp) 781 * 782 * note first mad produces actually scalar, hence we transform 783 * it into a dp2a to get 8 bit precision of yco instead of 7 - 784 * That's assuming hw correctly expands consts to internal precision. 785 * (y * 1 + y * (yco - 1) + yoff) 786 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 787 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 788 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 789 * 790 * vco, uco need bias (and hence scale too) 791 * 792 */ 793 794 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 795 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 796 R200_TXC_ARG_A_TFACTOR_COLOR | 797 R200_TXC_ARG_B_R0_COLOR | 798 R200_TXC_ARG_C_TFACTOR_COLOR | 799 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 800 R200_TXC_OP_DOT2_ADD); 801 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 802 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 803 R200_TXC_SCALE_INV2 | 804 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 805 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 806 R200_TXA_ARG_A_ZERO | 807 R200_TXA_ARG_B_ZERO | 808 R200_TXA_ARG_C_ZERO | 809 R200_TXA_OP_MADD); 810 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 811 R200_TXA_OUTPUT_REG_NONE); 812 813 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 814 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 815 R200_TXC_ARG_A_TFACTOR_COLOR | 816 R200_TXC_BIAS_ARG_A | 817 R200_TXC_SCALE_ARG_A | 818 R200_TXC_ARG_B_R1_COLOR | 819 R200_TXC_BIAS_ARG_B | 820 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 821 R200_TXC_ARG_C_R0_COLOR | 822 R200_TXC_OP_MADD); 823 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 824 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 825 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 826 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 827 R200_TXA_ARG_A_ZERO | 828 R200_TXA_ARG_B_ZERO | 829 R200_TXA_ARG_C_ZERO | 830 R200_TXA_OP_MADD); 831 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 832 R200_TXA_OUTPUT_REG_NONE); 833 834 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 835 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 836 R200_TXC_ARG_A_TFACTOR_COLOR | 837 R200_TXC_BIAS_ARG_A | 838 R200_TXC_SCALE_ARG_A | 839 R200_TXC_ARG_B_R2_COLOR | 840 R200_TXC_BIAS_ARG_B | 841 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 842 R200_TXC_ARG_C_R0_COLOR | 843 R200_TXC_OP_MADD); 844 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 845 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 846 R200_TXC_SCALE_2X | 847 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 848 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 849 R200_TXA_ARG_A_ZERO | 850 R200_TXA_ARG_B_ZERO | 851 R200_TXA_ARG_C_ZERO | 852 R200_TXA_COMP_ARG_C | 853 R200_TXA_OP_MADD); 854 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 855 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 856 857 /* shader constants */ 858 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 859 yco > 1.0 ? yco - 1.0: yco, 860 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 861 0.0)); 862 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 863 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 864 uco[2] * ucscale + 0.5, 865 0.0)); 866 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 867 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 868 vco[2] * vcscale + 0.5, 869 0.0)); 870 871 FINISH_ACCEL(); 872 } else { 873 pPriv->vtx_count = 4; 874 875 BEGIN_ACCEL_RELOC(24, 1); 876 877 OUT_ACCEL_REG(RADEON_PP_CNTL, 878 RADEON_TEX_0_ENABLE | 879 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 880 RADEON_TEX_BLEND_2_ENABLE); 881 882 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 883 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 884 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 885 886 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 887 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 888 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 889 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 890 (pPriv->w - 1) | 891 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 892 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 893 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 894 895 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 896 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 897 R200_TXC_ARG_A_TFACTOR_COLOR | 898 R200_TXC_ARG_B_R0_COLOR | 899 R200_TXC_ARG_C_TFACTOR_COLOR | 900 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 901 R200_TXC_OP_DOT2_ADD); 902 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 903 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 904 R200_TXC_SCALE_INV2 | 905 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 906 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 907 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 908 R200_TXA_ARG_A_ZERO | 909 R200_TXA_ARG_B_ZERO | 910 R200_TXA_ARG_C_ZERO | 911 R200_TXA_OP_MADD); 912 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 913 R200_TXA_OUTPUT_REG_NONE); 914 915 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 916 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 917 R200_TXC_ARG_A_TFACTOR_COLOR | 918 R200_TXC_BIAS_ARG_A | 919 R200_TXC_SCALE_ARG_A | 920 R200_TXC_ARG_B_R0_COLOR | 921 R200_TXC_BIAS_ARG_B | 922 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 923 R200_TXC_ARG_C_R1_COLOR | 924 R200_TXC_OP_MADD); 925 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 926 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 927 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 928 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 929 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 930 R200_TXA_ARG_A_ZERO | 931 R200_TXA_ARG_B_ZERO | 932 R200_TXA_ARG_C_ZERO | 933 R200_TXA_OP_MADD); 934 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 935 R200_TXA_OUTPUT_REG_NONE); 936 937 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 938 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 939 R200_TXC_ARG_A_TFACTOR_COLOR | 940 R200_TXC_BIAS_ARG_A | 941 R200_TXC_SCALE_ARG_A | 942 R200_TXC_ARG_B_R0_COLOR | 943 R200_TXC_BIAS_ARG_B | 944 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 945 R200_TXC_ARG_C_R1_COLOR | 946 R200_TXC_OP_MADD); 947 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 948 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 949 R200_TXC_SCALE_2X | 950 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 951 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 952 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 953 R200_TXA_ARG_A_ZERO | 954 R200_TXA_ARG_B_ZERO | 955 R200_TXA_ARG_C_ZERO | 956 R200_TXA_COMP_ARG_C | 957 R200_TXA_OP_MADD); 958 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 959 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 960 961 /* shader constants */ 962 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 963 yco > 1.0 ? yco - 1.0: yco, 964 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 965 0.0)); 966 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 967 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 968 uco[2] * ucscale + 0.5, 969 0.0)); 970 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 971 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 972 vco[2] * vcscale + 0.5, 973 0.0)); 974 975 FINISH_ACCEL(); 976 } 977 978 BEGIN_ACCEL(2); 979 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 980 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 981 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 982 FINISH_ACCEL(); 983 984 if (pPriv->vsync) { 985 xf86CrtcPtr crtc; 986 if (pPriv->desired_crtc) 987 crtc = pPriv->desired_crtc; 988 else 989 crtc = radeon_pick_best_crtc(pScrn, 990 pPriv->drw_x, 991 pPriv->drw_x + pPriv->dst_w, 992 pPriv->drw_y, 993 pPriv->drw_y + pPriv->dst_h); 994 if (crtc) 995 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 996 crtc, 997 pPriv->drw_y - crtc->y, 998 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 999 } 1000 1001 return TRUE; 1002} 1003 1004static void 1005FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1006{ 1007 RADEONInfoPtr info = RADEONPTR(pScrn); 1008 PixmapPtr pPixmap = pPriv->pPixmap; 1009 int dstxoff, dstyoff; 1010 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 1011 int nBox = REGION_NUM_RECTS(&pPriv->clip); 1012 ACCEL_PREAMBLE(); 1013 1014#ifdef COMPOSITE 1015 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 1016 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 1017#else 1018 dstxoff = 0; 1019 dstyoff = 0; 1020#endif 1021 1022 if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1023 return; 1024 1025 /* 1026 * Rendering of the actual polygon is done in two different 1027 * ways depending on chip generation: 1028 * 1029 * < R300: 1030 * 1031 * These chips can render a rectangle in one pass, so 1032 * handling is pretty straight-forward. 1033 * 1034 * >= R300: 1035 * 1036 * These chips can accept a quad, but will render it as 1037 * two triangles which results in a diagonal tear. Instead 1038 * We render a single, large triangle and use the scissor 1039 * functionality to restrict it to the desired rectangle. 1040 * Due to guardband limits on r3xx/r4xx, we can only use 1041 * the single triangle up to 2560/4021 pixels; above that we 1042 * render as a quad. 1043 */ 1044 1045#ifdef ACCEL_CP 1046 while (nBox) { 1047 int draw_size = 3 * pPriv->vtx_count + 4; 1048 int loop_boxes; 1049 1050 if (draw_size > radeon_cs_space_remaining(pScrn)) { 1051 if (info->cs) 1052 radeon_cs_flush_indirect(pScrn); 1053 else 1054 RADEONCPFlushIndirect(pScrn, 1); 1055 if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1056 return; 1057 } 1058 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 1059 nBox -= loop_boxes; 1060 1061 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 1062 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 1063 loop_boxes * 3 * pPriv->vtx_count)); 1064 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 1065 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 1066 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 1067 1068 while (loop_boxes--) { 1069 int srcX, srcY, srcw, srch; 1070 int dstX, dstY, dstw, dsth; 1071 dstX = pBox->x1 + dstxoff; 1072 dstY = pBox->y1 + dstyoff; 1073 dstw = pBox->x2 - pBox->x1; 1074 dsth = pBox->y2 - pBox->y1; 1075 1076 srcX = pPriv->src_x; 1077 srcX += ((pBox->x1 - pPriv->drw_x) * 1078 pPriv->src_w) / pPriv->dst_w; 1079 srcY = pPriv->src_y; 1080 srcY += ((pBox->y1 - pPriv->drw_y) * 1081 pPriv->src_h) / pPriv->dst_h; 1082 1083 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 1084 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 1085 1086 if (pPriv->is_planar) { 1087 /* 1088 * Just render a rect (using three coords). 1089 */ 1090 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1091 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1092 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1093 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1094 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1095 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1096 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1097 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1098 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1099 } else { 1100 /* 1101 * Just render a rect (using three coords). 1102 */ 1103 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1104 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1105 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1106 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1107 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1108 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1109 } 1110 1111 pBox++; 1112 } 1113 1114 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1115 ADVANCE_RING(); 1116 } 1117#else /* ACCEL_CP */ 1118 BEGIN_ACCEL(nBox * 3 * pPriv->vtx_count + 2); 1119 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 1120 RADEON_VF_PRIM_WALK_DATA | 1121 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 1122 while (nBox--) { 1123 int srcX, srcY, srcw, srch; 1124 int dstX, dstY, dstw, dsth; 1125 dstX = pBox->x1 + dstxoff; 1126 dstY = pBox->y1 + dstyoff; 1127 dstw = pBox->x2 - pBox->x1; 1128 dsth = pBox->y2 - pBox->y1; 1129 1130 srcX = pPriv->src_x; 1131 srcX += ((pBox->x1 - pPriv->drw_x) * 1132 pPriv->src_w) / pPriv->dst_w; 1133 srcY = pPriv->src_y; 1134 srcY += ((pBox->y1 - pPriv->drw_y) * 1135 pPriv->src_h) / pPriv->dst_h; 1136 1137 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 1138 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 1139 1140 if (pPriv->is_planar) { 1141 /* 1142 * Just render a rect (using three coords). 1143 */ 1144 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1145 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1146 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1147 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1148 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1149 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1150 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1151 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1152 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1153 } else { 1154 /* 1155 * Just render a rect (using three coords). 1156 */ 1157 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1158 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1159 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1160 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1161 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1162 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1163 } 1164 1165 pBox++; 1166 } 1167 1168 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1169 FINISH_ACCEL(); 1170#endif /* !ACCEL_CP */ 1171 1172 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 1173} 1174 1175static Bool 1176FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1177{ 1178 RADEONInfoPtr info = RADEONPTR(pScrn); 1179 PixmapPtr pPixmap = pPriv->pPixmap; 1180 struct radeon_exa_pixmap_priv *driver_priv; 1181 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 1182 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 1183 uint32_t dst_pitch, dst_format; 1184 uint32_t txenable, colorpitch, bicubic_offset; 1185 uint32_t output_fmt; 1186 int pixel_shift; 1187 ACCEL_PREAMBLE(); 1188 1189#ifdef XF86DRM_MODE 1190 if (info->cs) { 1191 int ret; 1192 1193 radeon_cs_space_reset_bos(info->cs); 1194 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1195 1196 if (pPriv->bicubic_enabled) 1197 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1198 1199 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 1200 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 1201 1202 ret = radeon_cs_space_check(info->cs); 1203 if (ret) { 1204 ErrorF("Not enough RAM to hw accel xv operation\n"); 1205 return FALSE; 1206 } 1207 } 1208#endif 1209 1210 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 1211 1212#ifdef USE_EXA 1213 if (info->useEXA) { 1214 dst_pitch = exaGetPixmapPitch(pPixmap); 1215 } else 1216#endif 1217 { 1218 dst_pitch = pPixmap->devKind; 1219 } 1220 1221#ifdef USE_EXA 1222 if (info->useEXA) { 1223 RADEON_SWITCH_TO_3D(); 1224 } else 1225#endif 1226 { 1227 BEGIN_ACCEL(2); 1228 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 1229 /* We must wait for 3d to idle, in case source was just written as a dest. */ 1230 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 1231 RADEON_WAIT_HOST_IDLECLEAN | 1232 RADEON_WAIT_2D_IDLECLEAN | 1233 RADEON_WAIT_3D_IDLECLEAN | 1234 RADEON_WAIT_DMA_GUI_IDLE); 1235 FINISH_ACCEL(); 1236 1237 if (!info->accel_state->XInited3D) 1238 RADEONInit3DEngine(pScrn); 1239 } 1240 1241 if (pPriv->bicubic_enabled) 1242 pPriv->vtx_count = 6; 1243 else 1244 pPriv->vtx_count = 4; 1245 1246 switch (pPixmap->drawable.bitsPerPixel) { 1247 case 16: 1248 if (pPixmap->drawable.depth == 15) 1249 dst_format = R300_COLORFORMAT_ARGB1555; 1250 else 1251 dst_format = R300_COLORFORMAT_RGB565; 1252 break; 1253 case 32: 1254 dst_format = R300_COLORFORMAT_ARGB8888; 1255 break; 1256 default: 1257 return FALSE; 1258 } 1259 1260 output_fmt = (R300_OUT_FMT_C4_8 | 1261 R300_OUT_FMT_C0_SEL_BLUE | 1262 R300_OUT_FMT_C1_SEL_GREEN | 1263 R300_OUT_FMT_C2_SEL_RED | 1264 R300_OUT_FMT_C3_SEL_ALPHA); 1265 1266 colorpitch = dst_pitch >> pixel_shift; 1267 colorpitch |= dst_format; 1268 1269 if (RADEONTilingEnabled(pScrn, pPixmap)) 1270 colorpitch |= R300_COLORTILE; 1271 1272 1273 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1274 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1275 pPriv->is_planar = TRUE; 1276 else 1277 pPriv->is_planar = FALSE; 1278 1279 if (pPriv->is_planar) { 1280 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1281 txpitch = pPriv->src_pitch; 1282 } else { 1283 if (pPriv->id == FOURCC_UYVY) 1284 txformat1 = R300_TX_FORMAT_YVYU422; 1285 else 1286 txformat1 = R300_TX_FORMAT_VYUY422; 1287 1288 if (pPriv->bicubic_state != BICUBIC_OFF) 1289 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1290 1291 /* pitch is in pixels */ 1292 txpitch = pPriv->src_pitch / 2; 1293 } 1294 txpitch -= 1; 1295 1296 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1297 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1298 R300_TXPITCH_EN); 1299 1300 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1301 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1302 R300_TX_MAG_FILTER_LINEAR | 1303 R300_TX_MIN_FILTER_LINEAR | 1304 (0 << R300_TX_ID_SHIFT)); 1305 1306 txoffset = info->cs ? 0 : pPriv->src_offset; 1307 1308 BEGIN_ACCEL_RELOC(6, 1); 1309 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 1310 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 1311 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 1312 if (pPriv->is_planar) 1313 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1314 else 1315 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 1316 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 1317 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 1318 FINISH_ACCEL(); 1319 1320 txenable = R300_TEX_0_ENABLE; 1321 1322 if (pPriv->is_planar) { 1323 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1324 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1325 R300_TXPITCH_EN); 1326 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1327 txpitch -= 1; 1328 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1329 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1330 R300_TX_MIN_FILTER_LINEAR | 1331 R300_TX_MAG_FILTER_LINEAR); 1332 1333 BEGIN_ACCEL_RELOC(12, 2); 1334 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 1335 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1336 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1337 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 1338 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1339 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 1340 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 1341 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 1342 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 1343 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 1344 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 1345 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 1346 FINISH_ACCEL(); 1347 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1348 } 1349 1350 if (pPriv->bicubic_enabled) { 1351 /* Size is 128x1 */ 1352 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1353 (0x0 << R300_TXHEIGHT_SHIFT) | 1354 R300_TXPITCH_EN); 1355 /* Format is 32-bit floats, 4bpp */ 1356 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1357 /* Pitch is 127 (128-1) */ 1358 txpitch = 0x7f; 1359 /* Tex filter */ 1360 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1361 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1362 R300_TX_MIN_FILTER_NEAREST | 1363 R300_TX_MAG_FILTER_NEAREST | 1364 (1 << R300_TX_ID_SHIFT)); 1365 1366 if (info->cs) 1367 bicubic_offset = 0; 1368 else 1369 bicubic_offset = pPriv->bicubic_src_offset; 1370 1371 BEGIN_ACCEL_RELOC(6, 1); 1372 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 1373 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1374 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1375 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 1376 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1377 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 1378 FINISH_ACCEL(); 1379 1380 /* Enable tex 1 */ 1381 txenable |= R300_TEX_1_ENABLE; 1382 } 1383 1384 /* setup the VAP */ 1385 if (info->accel_state->has_tcl) { 1386 if (pPriv->bicubic_enabled) 1387 BEGIN_ACCEL(7); 1388 else 1389 BEGIN_ACCEL(6); 1390 } else { 1391 if (pPriv->bicubic_enabled) 1392 BEGIN_ACCEL(5); 1393 else 1394 BEGIN_ACCEL(4); 1395 } 1396 1397 /* These registers define the number, type, and location of data submitted 1398 * to the PVS unit of GA input (when PVS is disabled) 1399 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1400 * enabled. This memory provides the imputs to the vertex shader program 1401 * and ordering is not important. When PVS/TCL is disabled, this field maps 1402 * directly to the GA input memory and the order is signifigant. In 1403 * PVS_BYPASS mode the order is as follows: 1404 * Position 1405 * Point Size 1406 * Color 0-3 1407 * Textures 0-7 1408 * Fog 1409 */ 1410 if (pPriv->bicubic_enabled) { 1411 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1412 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1413 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1414 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1415 R300_SIGNED_0 | 1416 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1417 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1418 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1419 R300_SIGNED_1)); 1420 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 1421 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1422 (0 << R300_SKIP_DWORDS_2_SHIFT) | 1423 (7 << R300_DST_VEC_LOC_2_SHIFT) | 1424 R300_LAST_VEC_2 | 1425 R300_SIGNED_2)); 1426 } else { 1427 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1428 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1429 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1430 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1431 R300_SIGNED_0 | 1432 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1433 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1434 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1435 R300_LAST_VEC_1 | 1436 R300_SIGNED_1)); 1437 } 1438 1439 /* load the vertex shader 1440 * We pre-load vertex programs in RADEONInit3DEngine(): 1441 * - exa 1442 * - Xv 1443 * - Xv bicubic 1444 * Here we select the offset of the vertex program we want to use 1445 */ 1446 if (info->accel_state->has_tcl) { 1447 if (pPriv->bicubic_enabled) { 1448 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1449 ((11 << R300_PVS_FIRST_INST_SHIFT) | 1450 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1451 (13 << R300_PVS_LAST_INST_SHIFT))); 1452 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1453 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1454 } else { 1455 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1456 ((9 << R300_PVS_FIRST_INST_SHIFT) | 1457 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1458 (10 << R300_PVS_LAST_INST_SHIFT))); 1459 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1460 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1461 } 1462 } 1463 1464 /* Position and one set of 2 texture coordinates */ 1465 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1466 if (pPriv->bicubic_enabled) 1467 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1468 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1469 else 1470 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1471 1472 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 1473 FINISH_ACCEL(); 1474 1475 /* setup pixel shader */ 1476 if (pPriv->bicubic_state != BICUBIC_OFF) { 1477 if (pPriv->bicubic_enabled) { 1478 BEGIN_ACCEL(79); 1479 1480 /* 4 components: 2 for tex0 and 2 for tex1 */ 1481 OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1482 R300_RS_COUNT_HIRES_EN)); 1483 1484 /* R300_INST_COUNT_RS - highest RS instruction used */ 1485 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1486 1487 /* Pixel stack frame size. */ 1488 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 1489 1490 /* Indirection levels */ 1491 OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1492 R300_FIRST_TEX)); 1493 1494 /* Set nodes. */ 1495 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1496 R300_ALU_CODE_SIZE(14) | 1497 R300_TEX_CODE_OFFSET(0) | 1498 R300_TEX_CODE_SIZE(6))); 1499 1500 /* Nodes are allocated highest first, but executed lowest first */ 1501 OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); 1502 OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1503 R300_ALU_SIZE(0) | 1504 R300_TEX_START(0) | 1505 R300_TEX_SIZE(0))); 1506 OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1507 R300_ALU_SIZE(9) | 1508 R300_TEX_START(1) | 1509 R300_TEX_SIZE(0))); 1510 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1511 R300_ALU_SIZE(2) | 1512 R300_TEX_START(2) | 1513 R300_TEX_SIZE(3) | 1514 R300_RGBA_OUT)); 1515 1516 /* ** BICUBIC FP ** */ 1517 1518 /* texcoord0 => temp0 1519 * texcoord1 => temp1 */ 1520 1521 // first node 1522 /* TEX temp2, temp1.rrr0, tex1, 1D */ 1523 OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1524 R300_TEX_ID(1) | 1525 R300_TEX_SRC_ADDR(1) | 1526 R300_TEX_DST_ADDR(2))); 1527 1528 /* MOV temp1.r, temp1.ggg0 */ 1529 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1530 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1531 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1532 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1533 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1534 R300_ALU_RGB_ADDRD(1) | 1535 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1536 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1537 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1538 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1539 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1540 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1541 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1542 1543 1544 // second node 1545 /* TEX temp1, temp1, tex1, 1D */ 1546 OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1547 R300_TEX_ID(1) | 1548 R300_TEX_SRC_ADDR(1) | 1549 R300_TEX_DST_ADDR(1))); 1550 1551 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 1552 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1553 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1554 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1555 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1556 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1557 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1558 R300_ALU_RGB_ADDRD(3) | 1559 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1560 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1561 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1562 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1563 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1564 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1565 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1566 1567 1568 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 1569 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1570 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1571 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1572 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1573 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1574 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1575 R300_ALU_RGB_ADDRD(2) | 1576 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1577 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1578 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1579 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1580 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1581 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1582 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1583 1584 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 1585 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1586 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1587 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1588 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1589 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1590 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1591 R300_ALU_RGB_ADDR2(3) | 1592 R300_ALU_RGB_ADDRD(4) | 1593 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1594 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1595 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1596 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1597 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1598 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1599 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1600 1601 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 1602 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1603 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1604 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1605 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1606 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1607 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1608 R300_ALU_RGB_ADDR2(2) | 1609 R300_ALU_RGB_ADDRD(5) | 1610 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1611 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1612 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1613 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1614 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1615 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1616 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1617 1618 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 1619 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1620 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1621 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1622 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1623 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1624 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1625 R300_ALU_RGB_ADDR2(3) | 1626 R300_ALU_RGB_ADDRD(3) | 1627 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1628 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1629 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1630 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1631 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1632 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1633 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1634 1635 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 1636 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1637 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1638 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1639 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1640 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1641 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1642 R300_ALU_RGB_ADDR2(2) | 1643 R300_ALU_RGB_ADDRD(1) | 1644 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1645 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1646 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1647 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1648 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1649 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1650 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1651 1652 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 1653 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1654 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1655 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1656 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1657 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1658 R300_ALU_RGB_ADDR2(1) | 1659 R300_ALU_RGB_ADDRD(1) | 1660 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1661 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1662 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1663 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1664 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1665 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1666 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1667 1668 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 1669 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1670 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1671 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1672 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1673 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1674 R300_ALU_RGB_ADDR2(3) | 1675 R300_ALU_RGB_ADDRD(2) | 1676 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1677 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1678 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1679 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1680 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1681 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1682 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1683 1684 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 1685 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1686 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1687 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1688 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1689 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1690 R300_ALU_RGB_ADDR2(5) | 1691 R300_ALU_RGB_ADDRD(3) | 1692 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1693 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1694 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1695 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1696 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1697 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1698 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1699 1700 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 1701 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1702 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1703 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1704 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1705 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1706 R300_ALU_RGB_ADDR2(4) | 1707 R300_ALU_RGB_ADDRD(0) | 1708 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1709 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1710 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1711 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1712 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1713 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1714 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1715 1716 1717 // third node 1718 /* TEX temp4, temp1.rg--, tex0, 1D */ 1719 OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1720 R300_TEX_ID(0) | 1721 R300_TEX_SRC_ADDR(1) | 1722 R300_TEX_DST_ADDR(4))); 1723 1724 /* TEX temp3, temp3.rg--, tex0, 1D */ 1725 OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1726 R300_TEX_ID(0) | 1727 R300_TEX_SRC_ADDR(3) | 1728 R300_TEX_DST_ADDR(3))); 1729 1730 /* TEX temp5, temp2.rg--, tex0, 1D */ 1731 OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1732 R300_TEX_ID(0) | 1733 R300_TEX_SRC_ADDR(2) | 1734 R300_TEX_DST_ADDR(5))); 1735 1736 /* TEX temp0, temp0.rg--, tex0, 1D */ 1737 OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1738 R300_TEX_ID(0) | 1739 R300_TEX_SRC_ADDR(0) | 1740 R300_TEX_DST_ADDR(0))); 1741 1742 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1743 * - PRESUB temps, temp4 - temp3 1744 * - MAD temp3, temp1.bbbb, temps, temp3 */ 1745 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1746 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1747 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1748 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1749 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1750 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1751 R300_ALU_RGB_ADDR1(4) | 1752 R300_ALU_RGB_ADDR2(1) | 1753 R300_ALU_RGB_ADDRD(3) | 1754 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1755 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1756 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1757 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1758 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1759 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1760 R300_ALU_ALPHA_ADDR1(4) | 1761 R300_ALU_ALPHA_ADDR2(1) | 1762 R300_ALU_ALPHA_ADDRD(3) | 1763 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1764 1765 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1766 * - PRESUB temps, temp5 - temp0 1767 * - MAD temp0, temp1.bbbb, temps, temp0 */ 1768 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1769 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1770 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1771 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1772 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1773 R300_ALU_RGB_INSERT_NOP)); 1774 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1775 R300_ALU_RGB_ADDR1(5) | 1776 R300_ALU_RGB_ADDR2(1) | 1777 R300_ALU_RGB_ADDRD(0) | 1778 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1779 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1780 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1781 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1782 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1783 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1784 R300_ALU_ALPHA_ADDR1(5) | 1785 R300_ALU_ALPHA_ADDR2(1) | 1786 R300_ALU_ALPHA_ADDRD(0) | 1787 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1788 1789 /* LRP output, temp2.bbbb, temp3, temp0 -> 1790 * - PRESUB temps, temp3 - temp0 1791 * - MAD output, temp2.bbbb, temps, temp0 */ 1792 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1793 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1794 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1795 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1796 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1797 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1798 R300_ALU_RGB_ADDR1(3) | 1799 R300_ALU_RGB_ADDR2(2) | 1800 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 1801 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1802 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1803 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1804 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1805 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1806 R300_ALU_ALPHA_ADDR1(3) | 1807 R300_ALU_ALPHA_ADDR2(2) | 1808 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1809 1810 /* Shader constants. */ 1811 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 1812 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); 1813 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); 1814 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); 1815 1816 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); 1817 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 1818 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); 1819 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); 1820 1821 FINISH_ACCEL(); 1822 } else { 1823 BEGIN_ACCEL(11); 1824 /* 2 components: 2 for tex0 */ 1825 OUT_ACCEL_REG(R300_RS_COUNT, 1826 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1827 R300_RS_COUNT_HIRES_EN)); 1828 /* R300_INST_COUNT_RS - highest RS instruction used */ 1829 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1830 1831 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1832 1833 /* Indirection levels */ 1834 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1835 R300_FIRST_TEX)); 1836 1837 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1838 R300_ALU_CODE_SIZE(1) | 1839 R300_TEX_CODE_OFFSET(0) | 1840 R300_TEX_CODE_SIZE(1))); 1841 1842 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1843 R300_ALU_SIZE(0) | 1844 R300_TEX_START(0) | 1845 R300_TEX_SIZE(0) | 1846 R300_RGBA_OUT)); 1847 1848 /* tex inst */ 1849 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1850 R300_TEX_DST_ADDR(0) | 1851 R300_TEX_ID(0) | 1852 R300_TEX_INST(R300_TEX_INST_LD))); 1853 1854 /* ALU inst */ 1855 /* RGB */ 1856 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1857 R300_ALU_RGB_ADDR1(0) | 1858 R300_ALU_RGB_ADDR2(0) | 1859 R300_ALU_RGB_ADDRD(0) | 1860 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1861 R300_ALU_RGB_MASK_G | 1862 R300_ALU_RGB_MASK_B)) | 1863 R300_ALU_RGB_TARGET_A)); 1864 OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1865 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1866 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1867 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1868 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1869 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1870 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1871 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1872 R300_ALU_RGB_CLAMP)); 1873 /* Alpha */ 1874 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1875 R300_ALU_ALPHA_ADDR1(0) | 1876 R300_ALU_ALPHA_ADDR2(0) | 1877 R300_ALU_ALPHA_ADDRD(0) | 1878 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1879 R300_ALU_ALPHA_TARGET_A | 1880 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 1881 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1882 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1883 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1884 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1885 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1886 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1887 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1888 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1889 R300_ALU_ALPHA_CLAMP)); 1890 FINISH_ACCEL(); 1891 } 1892 } else { 1893 /* 1894 * y' = y - .0625 1895 * u' = u - .5 1896 * v' = v - .5; 1897 * 1898 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1899 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1900 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1901 * 1902 * DP3 might look like the straightforward solution 1903 * but we'd need to move the texture yuv values in 1904 * the same reg for this to work. Therefore use MADs. 1905 * Brightness just adds to the off constant. 1906 * Contrast is multiplication of luminance. 1907 * Saturation and hue change the u and v coeffs. 1908 * Default values (before adjustments - depend on colorspace): 1909 * yco = 1.1643 1910 * uco = 0, -0.39173, 2.017 1911 * vco = 1.5958, -0.8129, 0 1912 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1913 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1914 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1915 * 1916 * temp = MAD(yco, yuv.yyyy, off) 1917 * temp = MAD(uco, yuv.uuuu, temp) 1918 * result = MAD(vco, yuv.vvvv, temp) 1919 */ 1920 /* TODO: don't recalc consts always */ 1921 const float Loff = -0.0627; 1922 const float Coff = -0.502; 1923 float uvcosf, uvsinf; 1924 float yco; 1925 float uco[3], vco[3], off[3]; 1926 float bright, cont, gamma; 1927 int ref = pPriv->transform_index; 1928 Bool needgamma = FALSE; 1929 1930 cont = RTFContrast(pPriv->contrast); 1931 bright = RTFBrightness(pPriv->brightness); 1932 gamma = (float)pPriv->gamma / 1000.0; 1933 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1934 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1935 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1936 1937 yco = trans[ref].RefLuma * cont; 1938 uco[0] = -trans[ref].RefRCr * uvsinf; 1939 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1940 uco[2] = trans[ref].RefBCb * uvcosf; 1941 vco[0] = trans[ref].RefRCr * uvcosf; 1942 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1943 vco[2] = trans[ref].RefBCb * uvsinf; 1944 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1945 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1946 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1947 1948 if (gamma != 1.0) { 1949 needgamma = TRUE; 1950 /* note: gamma correction is out = in ^ gamma; 1951 gpu can only do LG2/EX2 therefore we transform into 1952 in ^ gamma = 2 ^ (log2(in) * gamma). 1953 Lots of scalar ops, unfortunately (better solution?) - 1954 without gamma that's 3 inst, with gamma it's 10... 1955 could use different gamma factors per channel, 1956 if that's of any use. */ 1957 } 1958 1959 if (pPriv->is_planar) { 1960 BEGIN_ACCEL(needgamma ? 28 + 33 : 33); 1961 /* 2 components: same 2 for tex0/1/2 */ 1962 OUT_ACCEL_REG(R300_RS_COUNT, 1963 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1964 R300_RS_COUNT_HIRES_EN)); 1965 /* R300_INST_COUNT_RS - highest RS instruction used */ 1966 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1967 1968 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1969 1970 /* Indirection levels */ 1971 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1972 R300_FIRST_TEX)); 1973 1974 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1975 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1976 R300_TEX_CODE_OFFSET(0) | 1977 R300_TEX_CODE_SIZE(3))); 1978 1979 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1980 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1981 R300_TEX_START(0) | 1982 R300_TEX_SIZE(2) | 1983 R300_RGBA_OUT)); 1984 1985 /* tex inst */ 1986 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1987 R300_TEX_DST_ADDR(2) | 1988 R300_TEX_ID(0) | 1989 R300_TEX_INST(R300_TEX_INST_LD))); 1990 OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1991 R300_TEX_DST_ADDR(1) | 1992 R300_TEX_ID(1) | 1993 R300_TEX_INST(R300_TEX_INST_LD))); 1994 OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 1995 R300_TEX_DST_ADDR(0) | 1996 R300_TEX_ID(2) | 1997 R300_TEX_INST(R300_TEX_INST_LD))); 1998 1999 /* ALU inst */ 2000 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 2001 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2002 R300_ALU_RGB_ADDR1(2) | 2003 R300_ALU_RGB_ADDR2(0) | 2004 R300_ALU_RGB_ADDRD(2) | 2005 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2006 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2007 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2008 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2009 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2010 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2011 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2012 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2013 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2014 /* alpha nop, but need to set up alpha source for rgb usage */ 2015 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2016 R300_ALU_ALPHA_ADDR1(2) | 2017 R300_ALU_ALPHA_ADDR2(0) | 2018 R300_ALU_ALPHA_ADDRD(2) | 2019 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2020 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2021 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2022 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2023 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2024 2025 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 2026 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2027 R300_ALU_RGB_ADDR1(1) | 2028 R300_ALU_RGB_ADDR2(2) | 2029 R300_ALU_RGB_ADDRD(2) | 2030 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2031 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2032 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2033 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2034 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2035 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2036 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2037 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2038 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2039 /* alpha nop */ 2040 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 2041 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2042 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2043 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2044 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2045 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2046 2047 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 2048 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2049 R300_ALU_RGB_ADDR1(0) | 2050 R300_ALU_RGB_ADDR2(2) | 2051 R300_ALU_RGB_ADDRD(0) | 2052 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2053 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2054 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2055 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2056 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2057 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2058 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2059 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2060 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2061 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2062 R300_ALU_RGB_CLAMP)); 2063 /* write alpha 1 */ 2064 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2065 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2066 R300_ALU_ALPHA_TARGET_A)); 2067 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2068 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2069 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2070 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2071 2072 if (needgamma) { 2073 /* rgb temp0.r = op_sop, set up src0 reg */ 2074 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2075 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2076 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2077 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2078 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2079 /* alpha lg2 temp0, temp0.r */ 2080 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2081 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2082 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2083 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2084 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2085 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2086 2087 /* rgb temp0.g = op_sop, set up src0 reg */ 2088 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2089 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2090 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2091 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2092 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2093 /* alpha lg2 temp0, temp0.g */ 2094 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2095 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2096 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2097 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2098 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2099 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2100 2101 /* rgb temp0.b = op_sop, set up src0 reg */ 2102 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2103 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2104 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2105 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2106 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2107 /* alpha lg2 temp0, temp0.b */ 2108 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2109 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2110 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2111 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2112 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2113 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2114 2115 /* MUL const1, temp1, temp0 */ 2116 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2117 R300_ALU_RGB_ADDR1(0) | 2118 R300_ALU_RGB_ADDR2(0) | 2119 R300_ALU_RGB_ADDRD(0) | 2120 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2121 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2122 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2123 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2124 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2125 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2126 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2127 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2128 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2129 /* alpha nop, but set up const1 */ 2130 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2131 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2132 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2133 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2134 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2135 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2136 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2137 2138 /* rgb out0.r = op_sop, set up src0 reg */ 2139 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2140 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2141 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2142 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2143 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2144 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2145 /* alpha ex2 temp0, temp0.r */ 2146 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2147 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2148 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2149 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2150 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2151 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2152 2153 /* rgb out0.g = op_sop, set up src0 reg */ 2154 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2155 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2156 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2157 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2158 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2159 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2160 /* alpha ex2 temp0, temp0.g */ 2161 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2162 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2163 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2164 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2165 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2166 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2167 2168 /* rgb out0.b = op_sop, set up src0 reg */ 2169 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2170 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2171 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2172 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2173 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2174 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2175 /* alpha ex2 temp0, temp0.b */ 2176 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2177 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2178 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2179 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2180 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2181 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2182 } 2183 } else { 2184 BEGIN_ACCEL(needgamma ? 28 + 31 : 31); 2185 /* 2 components */ 2186 OUT_ACCEL_REG(R300_RS_COUNT, 2187 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2188 R300_RS_COUNT_HIRES_EN)); 2189 /* R300_INST_COUNT_RS - highest RS instruction used */ 2190 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 2191 2192 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 2193 2194 /* Indirection levels */ 2195 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 2196 R300_FIRST_TEX)); 2197 2198 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 2199 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 2200 R300_TEX_CODE_OFFSET(0) | 2201 R300_TEX_CODE_SIZE(1))); 2202 2203 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 2204 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 2205 R300_TEX_START(0) | 2206 R300_TEX_SIZE(0) | 2207 R300_RGBA_OUT)); 2208 2209 /* tex inst */ 2210 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 2211 R300_TEX_DST_ADDR(0) | 2212 R300_TEX_ID(0) | 2213 R300_TEX_INST(R300_TEX_INST_LD))); 2214 2215 /* ALU inst */ 2216 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 2217 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2218 R300_ALU_RGB_ADDR1(0) | 2219 R300_ALU_RGB_ADDR2(0) | 2220 R300_ALU_RGB_ADDRD(1) | 2221 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2222 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2223 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2224 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 2225 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2226 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2227 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2228 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2229 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2230 /* alpha nop, but need to set up alpha source for rgb usage */ 2231 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2232 R300_ALU_ALPHA_ADDR1(0) | 2233 R300_ALU_ALPHA_ADDR2(0) | 2234 R300_ALU_ALPHA_ADDRD(0) | 2235 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2236 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2237 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2238 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2239 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2240 2241 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 2242 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2243 R300_ALU_RGB_ADDR1(0) | 2244 R300_ALU_RGB_ADDR2(1) | 2245 R300_ALU_RGB_ADDRD(1) | 2246 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2247 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2248 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2249 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 2250 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2251 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2252 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2253 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2254 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2255 /* alpha nop */ 2256 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 2257 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2258 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2259 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2260 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2261 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2262 2263 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 2264 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2265 R300_ALU_RGB_ADDR1(0) | 2266 R300_ALU_RGB_ADDR2(1) | 2267 R300_ALU_RGB_ADDRD(0) | 2268 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2269 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2270 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2271 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2272 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2273 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2274 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2275 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2276 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2277 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2278 R300_ALU_RGB_CLAMP)); 2279 /* write alpha 1 */ 2280 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2281 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2282 R300_ALU_ALPHA_TARGET_A)); 2283 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2284 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2285 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2286 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2287 2288 if (needgamma) { 2289 /* rgb temp0.r = op_sop, set up src0 reg */ 2290 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2291 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2292 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2293 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2294 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2295 /* alpha lg2 temp0, temp0.r */ 2296 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2297 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2298 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2299 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2300 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2301 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2302 2303 /* rgb temp0.g = op_sop, set up src0 reg */ 2304 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2305 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2306 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2307 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2308 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2309 /* alpha lg2 temp0, temp0.g */ 2310 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2311 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2312 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2313 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2314 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2315 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2316 2317 /* rgb temp0.b = op_sop, set up src0 reg */ 2318 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2319 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2320 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2321 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2322 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2323 /* alpha lg2 temp0, temp0.b */ 2324 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2325 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2326 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2327 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2328 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2329 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2330 2331 /* MUL const1, temp1, temp0 */ 2332 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2333 R300_ALU_RGB_ADDR1(0) | 2334 R300_ALU_RGB_ADDR2(0) | 2335 R300_ALU_RGB_ADDRD(0) | 2336 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2337 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2338 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2339 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2340 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2341 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2342 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2343 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2344 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2345 /* alpha nop, but set up const1 */ 2346 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2347 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2348 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2349 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2350 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2351 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2352 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2353 2354 /* rgb out0.r = op_sop, set up src0 reg */ 2355 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2356 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2357 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2358 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2359 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2360 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2361 /* alpha ex2 temp0, temp0.r */ 2362 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2363 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2364 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2365 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2366 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2367 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2368 2369 /* rgb out0.g = op_sop, set up src0 reg */ 2370 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2371 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2372 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2373 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2374 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2375 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2376 /* alpha ex2 temp0, temp0.g */ 2377 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2378 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2379 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2380 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2381 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2382 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2383 2384 /* rgb out0.b = op_sop, set up src0 reg */ 2385 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2386 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2387 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2388 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2389 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2390 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2391 /* alpha ex2 temp0, temp0.b */ 2392 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2393 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2394 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2395 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2396 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2397 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2398 } 2399 } 2400 2401 /* Shader constants. */ 2402 /* constant 0: off, yco */ 2403 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 2404 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 2405 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 2406 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2407 /* constant 1: uco */ 2408 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 2409 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 2410 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 2411 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2412 /* constant 2: vco */ 2413 OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 2414 OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 2415 OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 2416 OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2417 2418 FINISH_ACCEL(); 2419 } 2420 2421 BEGIN_ACCEL_RELOC(6, 2); 2422 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 2423 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 2424 2425 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2426 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2427 2428 /* no need to enable blending */ 2429 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2430 2431 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 2432 FINISH_ACCEL(); 2433 2434 if (pPriv->vsync) { 2435 xf86CrtcPtr crtc; 2436 if (pPriv->desired_crtc) 2437 crtc = pPriv->desired_crtc; 2438 else 2439 crtc = radeon_pick_best_crtc(pScrn, 2440 pPriv->drw_x, 2441 pPriv->drw_x + pPriv->dst_w, 2442 pPriv->drw_y, 2443 pPriv->drw_y + pPriv->dst_h); 2444 if (crtc) 2445 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 2446 crtc, 2447 pPriv->drw_y - crtc->y, 2448 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2449 } 2450 2451 return TRUE; 2452} 2453 2454static void 2455FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2456{ 2457 RADEONInfoPtr info = RADEONPTR(pScrn); 2458 PixmapPtr pPixmap = pPriv->pPixmap; 2459 int dstxoff, dstyoff; 2460 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2461 int nBox = REGION_NUM_RECTS(&pPriv->clip); 2462 ACCEL_PREAMBLE(); 2463 2464#ifdef COMPOSITE 2465 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2466 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2467#else 2468 dstxoff = 0; 2469 dstyoff = 0; 2470#endif 2471 2472 if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2473 return; 2474 2475 /* 2476 * Rendering of the actual polygon is done in two different 2477 * ways depending on chip generation: 2478 * 2479 * < R300: 2480 * 2481 * These chips can render a rectangle in one pass, so 2482 * handling is pretty straight-forward. 2483 * 2484 * >= R300: 2485 * 2486 * These chips can accept a quad, but will render it as 2487 * two triangles which results in a diagonal tear. Instead 2488 * We render a single, large triangle and use the scissor 2489 * functionality to restrict it to the desired rectangle. 2490 * Due to guardband limits on r3xx/r4xx, we can only use 2491 * the single triangle up to 2560/4021 pixels; above that we 2492 * render as a quad. 2493 */ 2494 2495 while (nBox--) { 2496 int srcX, srcY, srcw, srch; 2497 int dstX, dstY, dstw, dsth; 2498 Bool use_quad = FALSE; 2499#ifdef ACCEL_CP 2500 int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2501 2502 if (draw_size > radeon_cs_space_remaining(pScrn)) { 2503 if (info->cs) 2504 radeon_cs_flush_indirect(pScrn); 2505 else 2506 RADEONCPFlushIndirect(pScrn, 1); 2507 if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2508 return; 2509 } 2510#endif 2511 2512 dstX = pBox->x1 + dstxoff; 2513 dstY = pBox->y1 + dstyoff; 2514 dstw = pBox->x2 - pBox->x1; 2515 dsth = pBox->y2 - pBox->y1; 2516 2517 srcX = pPriv->src_x; 2518 srcX += ((pBox->x1 - pPriv->drw_x) * 2519 pPriv->src_w) / pPriv->dst_w; 2520 srcY = pPriv->src_y; 2521 srcY += ((pBox->y1 - pPriv->drw_y) * 2522 pPriv->src_h) / pPriv->dst_h; 2523 2524 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 2525 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 2526 2527 if (IS_R400_3D) { 2528 if ((dstw+dsth) > 4021) 2529 use_quad = TRUE; 2530 } else { 2531 if ((dstw+dsth) > 2560) 2532 use_quad = TRUE; 2533 } 2534 /* 2535 * Set up the scissor area to that of the output size. 2536 */ 2537 BEGIN_ACCEL(2); 2538 /* R300 has an offset */ 2539 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2540 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 2541 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2542 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 2543 FINISH_ACCEL(); 2544 2545#ifdef ACCEL_CP 2546 if (use_quad) { 2547 BEGIN_RING(4 * pPriv->vtx_count + 4); 2548 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2549 4 * pPriv->vtx_count)); 2550 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2551 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2552 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2553 } else { 2554 BEGIN_RING(3 * pPriv->vtx_count + 4); 2555 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2556 3 * pPriv->vtx_count)); 2557 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2558 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2559 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2560 } 2561#else /* ACCEL_CP */ 2562 if (use_quad) 2563 BEGIN_ACCEL(2 + pPriv->vtx_count * 4); 2564 else 2565 BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 2566 2567 if (use_quad) 2568 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | 2569 RADEON_VF_PRIM_WALK_DATA | 2570 (4 << RADEON_VF_NUM_VERTICES_SHIFT))); 2571 else 2572 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 2573 RADEON_VF_PRIM_WALK_DATA | 2574 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2575#endif 2576 if (pPriv->bicubic_enabled) { 2577 /* 2578 * This code is only executed on >= R300, so we don't 2579 * have to deal with the legacy handling. 2580 */ 2581 if (use_quad) { 2582 VTX_OUT_6((float)dstX, (float)dstY, 2583 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2584 (float)srcX + 0.5, (float)srcY + 0.5); 2585 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2586 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2587 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2588 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2589 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2590 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2591 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2592 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2593 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2594 } else { 2595 VTX_OUT_6((float)dstX, (float)dstY, 2596 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2597 (float)srcX + 0.5, (float)srcY + 0.5); 2598 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2599 (float)srcX / pPriv->w, 2600 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2601 (float)srcX + 0.5, 2602 (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2603 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2604 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2605 (float)srcY / pPriv->h, 2606 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2607 (float)srcY + 0.5); 2608 } 2609 } else { 2610 if (use_quad) { 2611 VTX_OUT_4((float)dstX, (float)dstY, 2612 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2613 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2614 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2615 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2616 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2617 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2618 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2619 } else { 2620 /* 2621 * Render a big, scissored triangle. This means 2622 * increasing the triangle size and adjusting 2623 * texture coordinates. 2624 */ 2625 VTX_OUT_4((float)dstX, (float)dstY, 2626 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2627 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2628 (float)srcX / pPriv->w, 2629 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2630 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2631 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2632 (float)srcY / pPriv->h); 2633 } 2634 } 2635 2636 /* flushing is pipelined, free/finish is not */ 2637 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2638 2639#ifdef ACCEL_CP 2640 ADVANCE_RING(); 2641#else 2642 FINISH_ACCEL(); 2643#endif /* !ACCEL_CP */ 2644 2645 pBox++; 2646 } 2647 2648 BEGIN_ACCEL(3); 2649 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 2650 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2651 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2652 FINISH_ACCEL(); 2653 2654 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2655} 2656 2657static Bool 2658FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2659{ 2660 RADEONInfoPtr info = RADEONPTR(pScrn); 2661 PixmapPtr pPixmap = pPriv->pPixmap; 2662 struct radeon_exa_pixmap_priv *driver_priv; 2663 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 2664 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 2665 uint32_t dst_pitch, dst_format; 2666 uint32_t txenable, colorpitch, bicubic_offset; 2667 uint32_t output_fmt; 2668 int pixel_shift; 2669 ACCEL_PREAMBLE(); 2670 2671#ifdef XF86DRM_MODE 2672 if (info->cs) { 2673 int ret; 2674 2675 radeon_cs_space_reset_bos(info->cs); 2676 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2677 2678 if (pPriv->bicubic_enabled) 2679 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2680 2681 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 2682 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 2683 2684 ret = radeon_cs_space_check(info->cs); 2685 if (ret) { 2686 ErrorF("Not enough RAM to hw accel xv operation\n"); 2687 return FALSE; 2688 } 2689 } 2690#endif 2691 2692 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2693 2694#ifdef USE_EXA 2695 if (info->useEXA) { 2696 dst_pitch = exaGetPixmapPitch(pPixmap); 2697 } else 2698#endif 2699 { 2700 dst_pitch = pPixmap->devKind; 2701 } 2702 2703#ifdef USE_EXA 2704 if (info->useEXA) { 2705 RADEON_SWITCH_TO_3D(); 2706 } else 2707#endif 2708 { 2709 BEGIN_ACCEL(2); 2710 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2711 /* We must wait for 3d to idle, in case source was just written as a dest. */ 2712 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 2713 RADEON_WAIT_HOST_IDLECLEAN | 2714 RADEON_WAIT_2D_IDLECLEAN | 2715 RADEON_WAIT_3D_IDLECLEAN | 2716 RADEON_WAIT_DMA_GUI_IDLE); 2717 FINISH_ACCEL(); 2718 2719 if (!info->accel_state->XInited3D) 2720 RADEONInit3DEngine(pScrn); 2721 } 2722 2723 if (pPriv->bicubic_enabled) 2724 pPriv->vtx_count = 6; 2725 else 2726 pPriv->vtx_count = 4; 2727 2728 switch (pPixmap->drawable.bitsPerPixel) { 2729 case 16: 2730 if (pPixmap->drawable.depth == 15) 2731 dst_format = R300_COLORFORMAT_ARGB1555; 2732 else 2733 dst_format = R300_COLORFORMAT_RGB565; 2734 break; 2735 case 32: 2736 dst_format = R300_COLORFORMAT_ARGB8888; 2737 break; 2738 default: 2739 return FALSE; 2740 } 2741 2742 output_fmt = (R300_OUT_FMT_C4_8 | 2743 R300_OUT_FMT_C0_SEL_BLUE | 2744 R300_OUT_FMT_C1_SEL_GREEN | 2745 R300_OUT_FMT_C2_SEL_RED | 2746 R300_OUT_FMT_C3_SEL_ALPHA); 2747 2748 colorpitch = dst_pitch >> pixel_shift; 2749 colorpitch |= dst_format; 2750 2751 if (RADEONTilingEnabled(pScrn, pPixmap)) 2752 colorpitch |= R300_COLORTILE; 2753 2754 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2755 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2756 pPriv->is_planar = TRUE; 2757 else 2758 pPriv->is_planar = FALSE; 2759 2760 if (pPriv->is_planar) { 2761 txformat1 = R300_TX_FORMAT_X8; 2762 txpitch = pPriv->src_pitch; 2763 } else { 2764 if (pPriv->id == FOURCC_UYVY) 2765 txformat1 = R300_TX_FORMAT_YVYU422; 2766 else 2767 txformat1 = R300_TX_FORMAT_VYUY422; 2768 2769 if (pPriv->bicubic_state != BICUBIC_OFF) 2770 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2771 2772 /* pitch is in pixels */ 2773 txpitch = pPriv->src_pitch / 2; 2774 } 2775 txpitch -= 1; 2776 2777 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2778 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2779 R300_TXPITCH_EN); 2780 2781 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2782 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2783 R300_TX_MAG_FILTER_LINEAR | 2784 R300_TX_MIN_FILTER_LINEAR | 2785 (0 << R300_TX_ID_SHIFT)); 2786 2787 2788 if ((pPriv->w - 1) & 0x800) 2789 txpitch |= R500_TXWIDTH_11; 2790 2791 if ((pPriv->h - 1) & 0x800) 2792 txpitch |= R500_TXHEIGHT_11; 2793 2794 txoffset = info->cs ? 0 : pPriv->src_offset; 2795 2796 BEGIN_ACCEL_RELOC(6, 1); 2797 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 2798 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 2799 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 2800 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 2801 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 2802 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 2803 FINISH_ACCEL(); 2804 2805 txenable = R300_TEX_0_ENABLE; 2806 2807 if (pPriv->is_planar) { 2808 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2809 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2810 R300_TXPITCH_EN); 2811 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2812 txpitch -= 1; 2813 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2814 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2815 R300_TX_MIN_FILTER_LINEAR | 2816 R300_TX_MAG_FILTER_LINEAR); 2817 2818 BEGIN_ACCEL_RELOC(12, 2); 2819 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 2820 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2821 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2822 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 2823 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2824 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 2825 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 2826 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 2827 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 2828 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 2829 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 2830 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 2831 FINISH_ACCEL(); 2832 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2833 } 2834 2835 if (pPriv->bicubic_enabled) { 2836 /* Size is 128x1 */ 2837 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2838 (0x0 << R300_TXHEIGHT_SHIFT) | 2839 R300_TXPITCH_EN); 2840 /* Format is 32-bit floats, 4bpp */ 2841 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2842 /* Pitch is 127 (128-1) */ 2843 txpitch = 0x7f; 2844 /* Tex filter */ 2845 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2846 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2847 R300_TX_MIN_FILTER_NEAREST | 2848 R300_TX_MAG_FILTER_NEAREST | 2849 (1 << R300_TX_ID_SHIFT)); 2850 2851 if (info->cs) 2852 bicubic_offset = 0; 2853 else 2854 bicubic_offset = pPriv->bicubic_src_offset; 2855 2856 BEGIN_ACCEL_RELOC(6, 1); 2857 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 2858 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2859 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2860 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 2861 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2862 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 2863 FINISH_ACCEL(); 2864 2865 /* Enable tex 1 */ 2866 txenable |= R300_TEX_1_ENABLE; 2867 } 2868 2869 /* setup the VAP */ 2870 if (info->accel_state->has_tcl) { 2871 if (pPriv->bicubic_enabled) 2872 BEGIN_ACCEL(7); 2873 else 2874 BEGIN_ACCEL(6); 2875 } else { 2876 if (pPriv->bicubic_enabled) 2877 BEGIN_ACCEL(5); 2878 else 2879 BEGIN_ACCEL(4); 2880 } 2881 2882 /* These registers define the number, type, and location of data submitted 2883 * to the PVS unit of GA input (when PVS is disabled) 2884 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2885 * enabled. This memory provides the imputs to the vertex shader program 2886 * and ordering is not important. When PVS/TCL is disabled, this field maps 2887 * directly to the GA input memory and the order is signifigant. In 2888 * PVS_BYPASS mode the order is as follows: 2889 * Position 2890 * Point Size 2891 * Color 0-3 2892 * Textures 0-7 2893 * Fog 2894 */ 2895 if (pPriv->bicubic_enabled) { 2896 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2897 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2898 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2899 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2900 R300_SIGNED_0 | 2901 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2902 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2903 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2904 R300_SIGNED_1)); 2905 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 2906 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2907 (0 << R300_SKIP_DWORDS_2_SHIFT) | 2908 (7 << R300_DST_VEC_LOC_2_SHIFT) | 2909 R300_LAST_VEC_2 | 2910 R300_SIGNED_2)); 2911 } else { 2912 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2913 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2914 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2915 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2916 R300_SIGNED_0 | 2917 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2918 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2919 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2920 R300_LAST_VEC_1 | 2921 R300_SIGNED_1)); 2922 } 2923 2924 /* load the vertex shader 2925 * We pre-load vertex programs in RADEONInit3DEngine(): 2926 * - exa 2927 * - Xv 2928 * - Xv bicubic 2929 * Here we select the offset of the vertex program we want to use 2930 */ 2931 if (info->accel_state->has_tcl) { 2932 if (pPriv->bicubic_enabled) { 2933 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2934 ((11 << R300_PVS_FIRST_INST_SHIFT) | 2935 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2936 (13 << R300_PVS_LAST_INST_SHIFT))); 2937 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2938 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2939 } else { 2940 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2941 ((9 << R300_PVS_FIRST_INST_SHIFT) | 2942 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2943 (10 << R300_PVS_LAST_INST_SHIFT))); 2944 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2945 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2946 } 2947 } 2948 2949 /* Position and one set of 2 texture coordinates */ 2950 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2951 if (pPriv->bicubic_enabled) 2952 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2953 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2954 else 2955 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2956 2957 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 2958 FINISH_ACCEL(); 2959 2960 /* setup pixel shader */ 2961 if (pPriv->bicubic_state != BICUBIC_OFF) { 2962 if (pPriv->bicubic_enabled) { 2963 BEGIN_ACCEL(7); 2964 2965 /* 4 components: 2 for tex0 and 2 for tex1 */ 2966 OUT_ACCEL_REG(R300_RS_COUNT, 2967 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2968 R300_RS_COUNT_HIRES_EN)); 2969 2970 /* R300_INST_COUNT_RS - highest RS instruction used */ 2971 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 2972 2973 /* Pixel stack frame size. */ 2974 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 2975 2976 /* FP length. */ 2977 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 2978 R500_US_CODE_END_ADDR(13))); 2979 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 2980 R500_US_CODE_RANGE_SIZE(13))); 2981 2982 /* Prepare for FP emission. */ 2983 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 2984 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 2985 FINISH_ACCEL(); 2986 2987 BEGIN_ACCEL(89); 2988 /* Pixel shader. 2989 * I've gone ahead and annotated each instruction, since this 2990 * thing is MASSIVE. :3 2991 * Note: In order to avoid buggies with temps and multiple 2992 * inputs, all temps are offset by 2. temp0 -> register2. */ 2993 2994 /* TEX temp2, input1.xxxx, tex1, 1D */ 2995 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2996 R500_INST_RGB_WMASK_R | 2997 R500_INST_RGB_WMASK_G | 2998 R500_INST_RGB_WMASK_B)); 2999 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3000 R500_TEX_INST_LD | 3001 R500_TEX_IGNORE_UNCOVERED)); 3002 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3003 R500_TEX_SRC_S_SWIZ_R | 3004 R500_TEX_SRC_T_SWIZ_R | 3005 R500_TEX_SRC_R_SWIZ_R | 3006 R500_TEX_SRC_Q_SWIZ_R | 3007 R500_TEX_DST_ADDR(2) | 3008 R500_TEX_DST_R_SWIZ_R | 3009 R500_TEX_DST_G_SWIZ_G | 3010 R500_TEX_DST_B_SWIZ_B | 3011 R500_TEX_DST_A_SWIZ_A)); 3012 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3013 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3014 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3015 3016 /* TEX temp5, input1.yyyy, tex1, 1D */ 3017 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3018 R500_INST_TEX_SEM_WAIT | 3019 R500_INST_RGB_WMASK_R | 3020 R500_INST_RGB_WMASK_G | 3021 R500_INST_RGB_WMASK_B)); 3022 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3023 R500_TEX_INST_LD | 3024 R500_TEX_SEM_ACQUIRE | 3025 R500_TEX_IGNORE_UNCOVERED)); 3026 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3027 R500_TEX_SRC_S_SWIZ_G | 3028 R500_TEX_SRC_T_SWIZ_G | 3029 R500_TEX_SRC_R_SWIZ_G | 3030 R500_TEX_SRC_Q_SWIZ_G | 3031 R500_TEX_DST_ADDR(5) | 3032 R500_TEX_DST_R_SWIZ_R | 3033 R500_TEX_DST_G_SWIZ_G | 3034 R500_TEX_DST_B_SWIZ_B | 3035 R500_TEX_DST_A_SWIZ_A)); 3036 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3037 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3038 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3039 3040 /* MUL temp4, const0.x0x0, temp2.yyxx */ 3041 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3042 R500_INST_TEX_SEM_WAIT | 3043 R500_INST_RGB_WMASK_R | 3044 R500_INST_RGB_WMASK_G | 3045 R500_INST_RGB_WMASK_B | 3046 R500_INST_ALPHA_WMASK)); 3047 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3048 R500_RGB_ADDR0_CONST | 3049 R500_RGB_ADDR1(2))); 3050 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3051 R500_ALPHA_ADDR0_CONST | 3052 R500_ALPHA_ADDR1(2))); 3053 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3054 R500_ALU_RGB_R_SWIZ_A_R | 3055 R500_ALU_RGB_G_SWIZ_A_0 | 3056 R500_ALU_RGB_B_SWIZ_A_R | 3057 R500_ALU_RGB_SEL_B_SRC1 | 3058 R500_ALU_RGB_R_SWIZ_B_G | 3059 R500_ALU_RGB_G_SWIZ_B_G | 3060 R500_ALU_RGB_B_SWIZ_B_R)); 3061 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3062 R500_ALPHA_OP_MAD | 3063 R500_ALPHA_SEL_A_SRC0 | 3064 R500_ALPHA_SWIZ_A_0 | 3065 R500_ALPHA_SEL_B_SRC1 | 3066 R500_ALPHA_SWIZ_B_R)); 3067 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3068 R500_ALU_RGBA_OP_MAD | 3069 R500_ALU_RGBA_R_SWIZ_0 | 3070 R500_ALU_RGBA_G_SWIZ_0 | 3071 R500_ALU_RGBA_B_SWIZ_0 | 3072 R500_ALU_RGBA_A_SWIZ_0)); 3073 3074 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 3075 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3076 R500_INST_RGB_WMASK_R | 3077 R500_INST_RGB_WMASK_G | 3078 R500_INST_RGB_WMASK_B | 3079 R500_INST_ALPHA_WMASK)); 3080 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3081 R500_RGB_ADDR0_CONST | 3082 R500_RGB_ADDR1(5) | 3083 R500_RGB_ADDR2(4))); 3084 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3085 R500_ALPHA_ADDR0_CONST | 3086 R500_ALPHA_ADDR1(5) | 3087 R500_ALPHA_ADDR2(4))); 3088 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3089 R500_ALU_RGB_R_SWIZ_A_0 | 3090 R500_ALU_RGB_G_SWIZ_A_G | 3091 R500_ALU_RGB_B_SWIZ_A_0 | 3092 R500_ALU_RGB_SEL_B_SRC1 | 3093 R500_ALU_RGB_R_SWIZ_B_R | 3094 R500_ALU_RGB_G_SWIZ_B_R | 3095 R500_ALU_RGB_B_SWIZ_B_R)); 3096 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3097 R500_ALPHA_OP_MAD | 3098 R500_ALPHA_SEL_A_SRC0 | 3099 R500_ALPHA_SWIZ_A_G | 3100 R500_ALPHA_SEL_B_SRC1 | 3101 R500_ALPHA_SWIZ_B_R)); 3102 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3103 R500_ALU_RGBA_OP_MAD | 3104 R500_ALU_RGBA_SEL_C_SRC2 | 3105 R500_ALU_RGBA_R_SWIZ_R | 3106 R500_ALU_RGBA_G_SWIZ_G | 3107 R500_ALU_RGBA_B_SWIZ_B | 3108 R500_ALU_RGBA_A_SWIZ_A)); 3109 3110 /* ADD temp3, temp3, input0.xyxy */ 3111 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3112 R500_INST_RGB_WMASK_R | 3113 R500_INST_RGB_WMASK_G | 3114 R500_INST_RGB_WMASK_B | 3115 R500_INST_ALPHA_WMASK)); 3116 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 3117 R500_RGB_ADDR2(0))); 3118 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 3119 R500_ALPHA_ADDR2(0))); 3120 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3121 R500_ALU_RGB_G_SWIZ_A_1 | 3122 R500_ALU_RGB_B_SWIZ_A_1 | 3123 R500_ALU_RGB_SEL_B_SRC1 | 3124 R500_ALU_RGB_R_SWIZ_B_R | 3125 R500_ALU_RGB_G_SWIZ_B_G | 3126 R500_ALU_RGB_B_SWIZ_B_B)); 3127 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3128 R500_ALPHA_OP_MAD | 3129 R500_ALPHA_SWIZ_A_1 | 3130 R500_ALPHA_SEL_B_SRC1 | 3131 R500_ALPHA_SWIZ_B_A)); 3132 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3133 R500_ALU_RGBA_OP_MAD | 3134 R500_ALU_RGBA_SEL_C_SRC2 | 3135 R500_ALU_RGBA_R_SWIZ_R | 3136 R500_ALU_RGBA_G_SWIZ_G | 3137 R500_ALU_RGBA_B_SWIZ_R | 3138 R500_ALU_RGBA_A_SWIZ_G)); 3139 3140 /* TEX temp1, temp3.zwxy, tex0, 2D */ 3141 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3142 R500_INST_RGB_WMASK_R | 3143 R500_INST_RGB_WMASK_G | 3144 R500_INST_RGB_WMASK_B | 3145 R500_INST_ALPHA_WMASK)); 3146 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3147 R500_TEX_INST_LD | 3148 R500_TEX_IGNORE_UNCOVERED)); 3149 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3150 R500_TEX_SRC_S_SWIZ_B | 3151 R500_TEX_SRC_T_SWIZ_A | 3152 R500_TEX_SRC_R_SWIZ_R | 3153 R500_TEX_SRC_Q_SWIZ_G | 3154 R500_TEX_DST_ADDR(1) | 3155 R500_TEX_DST_R_SWIZ_R | 3156 R500_TEX_DST_G_SWIZ_G | 3157 R500_TEX_DST_B_SWIZ_B | 3158 R500_TEX_DST_A_SWIZ_A)); 3159 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3160 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3161 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3162 3163 /* TEX temp3, temp3.xyzw, tex0, 2D */ 3164 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3165 R500_INST_TEX_SEM_WAIT | 3166 R500_INST_RGB_WMASK_R | 3167 R500_INST_RGB_WMASK_G | 3168 R500_INST_RGB_WMASK_B | 3169 R500_INST_ALPHA_WMASK)); 3170 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3171 R500_TEX_INST_LD | 3172 R500_TEX_SEM_ACQUIRE | 3173 R500_TEX_IGNORE_UNCOVERED)); 3174 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3175 R500_TEX_SRC_S_SWIZ_R | 3176 R500_TEX_SRC_T_SWIZ_G | 3177 R500_TEX_SRC_R_SWIZ_B | 3178 R500_TEX_SRC_Q_SWIZ_A | 3179 R500_TEX_DST_ADDR(3) | 3180 R500_TEX_DST_R_SWIZ_R | 3181 R500_TEX_DST_G_SWIZ_G | 3182 R500_TEX_DST_B_SWIZ_B | 3183 R500_TEX_DST_A_SWIZ_A)); 3184 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3185 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3186 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3187 3188 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 3189 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3190 R500_INST_RGB_WMASK_R | 3191 R500_INST_RGB_WMASK_G | 3192 R500_INST_RGB_WMASK_B | 3193 R500_INST_ALPHA_WMASK)); 3194 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3195 R500_RGB_ADDR0_CONST | 3196 R500_RGB_ADDR1(5) | 3197 R500_RGB_ADDR2(4))); 3198 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3199 R500_ALPHA_ADDR0_CONST | 3200 R500_ALPHA_ADDR1(5) | 3201 R500_ALPHA_ADDR2(4))); 3202 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3203 R500_ALU_RGB_R_SWIZ_A_0 | 3204 R500_ALU_RGB_G_SWIZ_A_G | 3205 R500_ALU_RGB_B_SWIZ_A_0 | 3206 R500_ALU_RGB_SEL_B_SRC1 | 3207 R500_ALU_RGB_R_SWIZ_B_G | 3208 R500_ALU_RGB_G_SWIZ_B_G | 3209 R500_ALU_RGB_B_SWIZ_B_G)); 3210 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3211 R500_ALPHA_OP_MAD | 3212 R500_ALPHA_SEL_A_SRC0 | 3213 R500_ALPHA_SWIZ_A_G | 3214 R500_ALPHA_SEL_B_SRC1 | 3215 R500_ALPHA_SWIZ_B_G)); 3216 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3217 R500_ALU_RGBA_OP_MAD | 3218 R500_ALU_RGBA_SEL_C_SRC2 | 3219 R500_ALU_RGBA_R_SWIZ_R | 3220 R500_ALU_RGBA_G_SWIZ_G | 3221 R500_ALU_RGBA_B_SWIZ_B | 3222 R500_ALU_RGBA_A_SWIZ_A)); 3223 3224 /* ADD temp0, temp4, input0.xyxy */ 3225 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3226 R500_INST_RGB_WMASK_R | 3227 R500_INST_RGB_WMASK_G | 3228 R500_INST_RGB_WMASK_B | 3229 R500_INST_ALPHA_WMASK)); 3230 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 3231 R500_RGB_ADDR2(0))); 3232 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 3233 R500_ALPHA_ADDR2(0))); 3234 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3235 R500_ALU_RGB_G_SWIZ_A_1 | 3236 R500_ALU_RGB_B_SWIZ_A_1 | 3237 R500_ALU_RGB_SEL_B_SRC1 | 3238 R500_ALU_RGB_R_SWIZ_B_R | 3239 R500_ALU_RGB_G_SWIZ_B_G | 3240 R500_ALU_RGB_B_SWIZ_B_B)); 3241 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3242 R500_ALPHA_OP_MAD | 3243 R500_ALPHA_SWIZ_A_1 | 3244 R500_ALPHA_SEL_B_SRC1 | 3245 R500_ALPHA_SWIZ_B_A)); 3246 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3247 R500_ALU_RGBA_OP_MAD | 3248 R500_ALU_RGBA_SEL_C_SRC2 | 3249 R500_ALU_RGBA_R_SWIZ_R | 3250 R500_ALU_RGBA_G_SWIZ_G | 3251 R500_ALU_RGBA_B_SWIZ_R | 3252 R500_ALU_RGBA_A_SWIZ_G)); 3253 3254 /* TEX temp4, temp0.zwzw, tex0, 2D */ 3255 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3256 R500_INST_TEX_SEM_WAIT | 3257 R500_INST_RGB_WMASK_R | 3258 R500_INST_RGB_WMASK_G | 3259 R500_INST_RGB_WMASK_B | 3260 R500_INST_ALPHA_WMASK)); 3261 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3262 R500_TEX_INST_LD | 3263 R500_TEX_IGNORE_UNCOVERED)); 3264 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3265 R500_TEX_SRC_S_SWIZ_B | 3266 R500_TEX_SRC_T_SWIZ_A | 3267 R500_TEX_SRC_R_SWIZ_B | 3268 R500_TEX_SRC_Q_SWIZ_A | 3269 R500_TEX_DST_ADDR(4) | 3270 R500_TEX_DST_R_SWIZ_R | 3271 R500_TEX_DST_G_SWIZ_G | 3272 R500_TEX_DST_B_SWIZ_B | 3273 R500_TEX_DST_A_SWIZ_A)); 3274 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3275 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3276 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3277 3278 /* TEX temp0, temp0.xyzw, tex0, 2D */ 3279 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3280 R500_INST_TEX_SEM_WAIT | 3281 R500_INST_RGB_WMASK_R | 3282 R500_INST_RGB_WMASK_G | 3283 R500_INST_RGB_WMASK_B | 3284 R500_INST_ALPHA_WMASK)); 3285 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3286 R500_TEX_INST_LD | 3287 R500_TEX_SEM_ACQUIRE | 3288 R500_TEX_IGNORE_UNCOVERED)); 3289 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3290 R500_TEX_SRC_S_SWIZ_R | 3291 R500_TEX_SRC_T_SWIZ_G | 3292 R500_TEX_SRC_R_SWIZ_B | 3293 R500_TEX_SRC_Q_SWIZ_A | 3294 R500_TEX_DST_ADDR(0) | 3295 R500_TEX_DST_R_SWIZ_R | 3296 R500_TEX_DST_G_SWIZ_G | 3297 R500_TEX_DST_B_SWIZ_B | 3298 R500_TEX_DST_A_SWIZ_A)); 3299 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3300 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3301 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3302 3303 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 3304 * - PRESUB temps, temp1 - temp3 3305 * - MAD temp2.zzzz, temps, temp3 */ 3306 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3307 R500_INST_RGB_WMASK_R | 3308 R500_INST_RGB_WMASK_G | 3309 R500_INST_RGB_WMASK_B | 3310 R500_INST_ALPHA_WMASK)); 3311 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3312 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3313 R500_RGB_ADDR1(1) | 3314 R500_RGB_ADDR2(2))); 3315 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3316 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3317 R500_ALPHA_ADDR1(1) | 3318 R500_ALPHA_ADDR2(2))); 3319 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3320 R500_ALU_RGB_R_SWIZ_A_B | 3321 R500_ALU_RGB_G_SWIZ_A_B | 3322 R500_ALU_RGB_B_SWIZ_A_B | 3323 R500_ALU_RGB_SEL_B_SRCP | 3324 R500_ALU_RGB_R_SWIZ_B_R | 3325 R500_ALU_RGB_G_SWIZ_B_G | 3326 R500_ALU_RGB_B_SWIZ_B_B)); 3327 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3328 R500_ALPHA_OP_MAD | 3329 R500_ALPHA_SEL_A_SRC2 | 3330 R500_ALPHA_SWIZ_A_B | 3331 R500_ALPHA_SEL_B_SRCP | 3332 R500_ALPHA_SWIZ_B_A)); 3333 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3334 R500_ALU_RGBA_OP_MAD | 3335 R500_ALU_RGBA_SEL_C_SRC0 | 3336 R500_ALU_RGBA_R_SWIZ_R | 3337 R500_ALU_RGBA_G_SWIZ_G | 3338 R500_ALU_RGBA_B_SWIZ_B | 3339 R500_ALU_RGBA_A_SWIZ_A)); 3340 3341 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3342 * - PRESUB temps, temp4 - temp1 3343 * - MAD temp2.zzzz, temps, temp0 */ 3344 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3345 R500_INST_TEX_SEM_WAIT | 3346 R500_INST_RGB_WMASK_R | 3347 R500_INST_RGB_WMASK_G | 3348 R500_INST_RGB_WMASK_B | 3349 R500_INST_ALPHA_WMASK)); 3350 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3351 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3352 R500_RGB_ADDR1(4) | 3353 R500_RGB_ADDR2(2))); 3354 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3355 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3356 R500_ALPHA_ADDR1(4) | 3357 R500_ALPHA_ADDR2(2))); 3358 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3359 R500_ALU_RGB_R_SWIZ_A_B | 3360 R500_ALU_RGB_G_SWIZ_A_B | 3361 R500_ALU_RGB_B_SWIZ_A_B | 3362 R500_ALU_RGB_SEL_B_SRCP | 3363 R500_ALU_RGB_R_SWIZ_B_R | 3364 R500_ALU_RGB_G_SWIZ_B_G | 3365 R500_ALU_RGB_B_SWIZ_B_B)); 3366 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3367 R500_ALPHA_OP_MAD | 3368 R500_ALPHA_SEL_A_SRC2 | 3369 R500_ALPHA_SWIZ_A_B | 3370 R500_ALPHA_SEL_B_SRCP | 3371 R500_ALPHA_SWIZ_B_A)); 3372 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3373 R500_ALU_RGBA_OP_MAD | 3374 R500_ALU_RGBA_SEL_C_SRC0 | 3375 R500_ALU_RGBA_R_SWIZ_R | 3376 R500_ALU_RGBA_G_SWIZ_G | 3377 R500_ALU_RGBA_B_SWIZ_B | 3378 R500_ALU_RGBA_A_SWIZ_A)); 3379 3380 /* LRP output, temp5.zzzz, temp3, temp0 -> 3381 * - PRESUB temps, temp3 - temp0 3382 * - MAD temp5.zzzz, temps, temp0 */ 3383 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3384 R500_INST_LAST | 3385 R500_INST_TEX_SEM_WAIT | 3386 R500_INST_RGB_WMASK_R | 3387 R500_INST_RGB_WMASK_G | 3388 R500_INST_RGB_WMASK_B | 3389 R500_INST_ALPHA_WMASK | 3390 R500_INST_RGB_OMASK_R | 3391 R500_INST_RGB_OMASK_G | 3392 R500_INST_RGB_OMASK_B | 3393 R500_INST_ALPHA_OMASK)); 3394 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3395 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3396 R500_RGB_ADDR1(3) | 3397 R500_RGB_ADDR2(5))); 3398 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3399 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3400 R500_ALPHA_ADDR1(3) | 3401 R500_ALPHA_ADDR2(5))); 3402 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3403 R500_ALU_RGB_R_SWIZ_A_B | 3404 R500_ALU_RGB_G_SWIZ_A_B | 3405 R500_ALU_RGB_B_SWIZ_A_B | 3406 R500_ALU_RGB_SEL_B_SRCP | 3407 R500_ALU_RGB_R_SWIZ_B_R | 3408 R500_ALU_RGB_G_SWIZ_B_G | 3409 R500_ALU_RGB_B_SWIZ_B_B)); 3410 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3411 R500_ALPHA_OP_MAD | 3412 R500_ALPHA_SEL_A_SRC2 | 3413 R500_ALPHA_SWIZ_A_B | 3414 R500_ALPHA_SEL_B_SRCP | 3415 R500_ALPHA_SWIZ_B_A)); 3416 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3417 R500_ALU_RGBA_OP_MAD | 3418 R500_ALU_RGBA_SEL_C_SRC0 | 3419 R500_ALU_RGBA_R_SWIZ_R | 3420 R500_ALU_RGBA_G_SWIZ_G | 3421 R500_ALU_RGBA_B_SWIZ_B | 3422 R500_ALU_RGBA_A_SWIZ_A)); 3423 3424 /* Shader constants. */ 3425 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3426 3427 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3428 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3429 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3430 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3431 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3432 3433 FINISH_ACCEL(); 3434 } else { 3435 BEGIN_ACCEL(19); 3436 /* 2 components: 2 for tex0 */ 3437 OUT_ACCEL_REG(R300_RS_COUNT, 3438 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3439 R300_RS_COUNT_HIRES_EN)); 3440 3441 /* R300_INST_COUNT_RS - highest RS instruction used */ 3442 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3443 3444 /* Pixel stack frame size. */ 3445 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3446 3447 /* FP length. */ 3448 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3449 R500_US_CODE_END_ADDR(1))); 3450 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3451 R500_US_CODE_RANGE_SIZE(1))); 3452 3453 /* Prepare for FP emission. */ 3454 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3455 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3456 3457 /* tex inst */ 3458 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3459 R500_INST_TEX_SEM_WAIT | 3460 R500_INST_RGB_WMASK_R | 3461 R500_INST_RGB_WMASK_G | 3462 R500_INST_RGB_WMASK_B | 3463 R500_INST_ALPHA_WMASK | 3464 R500_INST_RGB_CLAMP | 3465 R500_INST_ALPHA_CLAMP)); 3466 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3467 R500_TEX_INST_LD | 3468 R500_TEX_SEM_ACQUIRE | 3469 R500_TEX_IGNORE_UNCOVERED)); 3470 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3471 R500_TEX_SRC_S_SWIZ_R | 3472 R500_TEX_SRC_T_SWIZ_G | 3473 R500_TEX_DST_ADDR(0) | 3474 R500_TEX_DST_R_SWIZ_R | 3475 R500_TEX_DST_G_SWIZ_G | 3476 R500_TEX_DST_B_SWIZ_B | 3477 R500_TEX_DST_A_SWIZ_A)); 3478 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3479 R500_DX_S_SWIZ_R | 3480 R500_DX_T_SWIZ_R | 3481 R500_DX_R_SWIZ_R | 3482 R500_DX_Q_SWIZ_R | 3483 R500_DY_ADDR(0) | 3484 R500_DY_S_SWIZ_R | 3485 R500_DY_T_SWIZ_R | 3486 R500_DY_R_SWIZ_R | 3487 R500_DY_Q_SWIZ_R)); 3488 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3489 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3490 3491 /* ALU inst */ 3492 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3493 R500_INST_TEX_SEM_WAIT | 3494 R500_INST_LAST | 3495 R500_INST_RGB_OMASK_R | 3496 R500_INST_RGB_OMASK_G | 3497 R500_INST_RGB_OMASK_B | 3498 R500_INST_ALPHA_OMASK | 3499 R500_INST_RGB_CLAMP | 3500 R500_INST_ALPHA_CLAMP)); 3501 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3502 R500_RGB_ADDR1(0) | 3503 R500_RGB_ADDR1_CONST | 3504 R500_RGB_ADDR2(0) | 3505 R500_RGB_ADDR2_CONST)); 3506 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3507 R500_ALPHA_ADDR1(0) | 3508 R500_ALPHA_ADDR1_CONST | 3509 R500_ALPHA_ADDR2(0) | 3510 R500_ALPHA_ADDR2_CONST)); 3511 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3512 R500_ALU_RGB_R_SWIZ_A_R | 3513 R500_ALU_RGB_G_SWIZ_A_G | 3514 R500_ALU_RGB_B_SWIZ_A_B | 3515 R500_ALU_RGB_SEL_B_SRC0 | 3516 R500_ALU_RGB_R_SWIZ_B_1 | 3517 R500_ALU_RGB_B_SWIZ_B_1 | 3518 R500_ALU_RGB_G_SWIZ_B_1)); 3519 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3520 R500_ALPHA_SWIZ_A_A | 3521 R500_ALPHA_SWIZ_B_1)); 3522 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3523 R500_ALU_RGBA_R_SWIZ_0 | 3524 R500_ALU_RGBA_G_SWIZ_0 | 3525 R500_ALU_RGBA_B_SWIZ_0 | 3526 R500_ALU_RGBA_A_SWIZ_0)); 3527 FINISH_ACCEL(); 3528 } 3529 } else { 3530 /* 3531 * y' = y - .0625 3532 * u' = u - .5 3533 * v' = v - .5; 3534 * 3535 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3536 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3537 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3538 * 3539 * DP3 might look like the straightforward solution 3540 * but we'd need to move the texture yuv values in 3541 * the same reg for this to work. Therefore use MADs. 3542 * Brightness just adds to the off constant. 3543 * Contrast is multiplication of luminance. 3544 * Saturation and hue change the u and v coeffs. 3545 * Default values (before adjustments - depend on colorspace): 3546 * yco = 1.1643 3547 * uco = 0, -0.39173, 2.017 3548 * vco = 1.5958, -0.8129, 0 3549 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3550 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3551 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3552 * 3553 * temp = MAD(yco, yuv.yyyy, off) 3554 * temp = MAD(uco, yuv.uuuu, temp) 3555 * result = MAD(vco, yuv.vvvv, temp) 3556 */ 3557 /* TODO: don't recalc consts always */ 3558 const float Loff = -0.0627; 3559 const float Coff = -0.502; 3560 float uvcosf, uvsinf; 3561 float yco; 3562 float uco[3], vco[3], off[3]; 3563 float bright, cont, gamma; 3564 int ref = pPriv->transform_index; 3565 Bool needgamma = FALSE; 3566 3567 cont = RTFContrast(pPriv->contrast); 3568 bright = RTFBrightness(pPriv->brightness); 3569 gamma = (float)pPriv->gamma / 1000.0; 3570 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3571 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3572 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3573 3574 yco = trans[ref].RefLuma * cont; 3575 uco[0] = -trans[ref].RefRCr * uvsinf; 3576 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3577 uco[2] = trans[ref].RefBCb * uvcosf; 3578 vco[0] = trans[ref].RefRCr * uvcosf; 3579 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3580 vco[2] = trans[ref].RefBCb * uvsinf; 3581 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3582 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3583 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3584 3585 //XXX gamma 3586 3587 if (gamma != 1.0) { 3588 needgamma = TRUE; 3589 /* note: gamma correction is out = in ^ gamma; 3590 gpu can only do LG2/EX2 therefore we transform into 3591 in ^ gamma = 2 ^ (log2(in) * gamma). 3592 Lots of scalar ops, unfortunately (better solution?) - 3593 without gamma that's 3 inst, with gamma it's 10... 3594 could use different gamma factors per channel, 3595 if that's of any use. */ 3596 } 3597 3598 if (pPriv->is_planar) { 3599 BEGIN_ACCEL(56); 3600 /* 2 components: 2 for tex0 */ 3601 OUT_ACCEL_REG(R300_RS_COUNT, 3602 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3603 R300_RS_COUNT_HIRES_EN)); 3604 3605 /* R300_INST_COUNT_RS - highest RS instruction used */ 3606 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3607 3608 /* Pixel stack frame size. */ 3609 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3610 3611 /* FP length. */ 3612 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3613 R500_US_CODE_END_ADDR(5))); 3614 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3615 R500_US_CODE_RANGE_SIZE(5))); 3616 3617 /* Prepare for FP emission. */ 3618 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3619 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3620 3621 /* tex inst */ 3622 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3623 R500_INST_TEX_SEM_WAIT | 3624 R500_INST_RGB_WMASK_R | 3625 R500_INST_RGB_WMASK_G | 3626 R500_INST_RGB_WMASK_B | 3627 R500_INST_ALPHA_WMASK | 3628 R500_INST_RGB_CLAMP | 3629 R500_INST_ALPHA_CLAMP)); 3630 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3631 R500_TEX_INST_LD | 3632 R500_TEX_IGNORE_UNCOVERED)); 3633 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3634 R500_TEX_SRC_S_SWIZ_R | 3635 R500_TEX_SRC_T_SWIZ_G | 3636 R500_TEX_DST_ADDR(2) | 3637 R500_TEX_DST_R_SWIZ_R | 3638 R500_TEX_DST_G_SWIZ_G | 3639 R500_TEX_DST_B_SWIZ_B | 3640 R500_TEX_DST_A_SWIZ_A)); 3641 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3642 R500_DX_S_SWIZ_R | 3643 R500_DX_T_SWIZ_R | 3644 R500_DX_R_SWIZ_R | 3645 R500_DX_Q_SWIZ_R | 3646 R500_DY_ADDR(0) | 3647 R500_DY_S_SWIZ_R | 3648 R500_DY_T_SWIZ_R | 3649 R500_DY_R_SWIZ_R | 3650 R500_DY_Q_SWIZ_R)); 3651 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3652 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3653 3654 /* tex inst */ 3655 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3656 R500_INST_TEX_SEM_WAIT | 3657 R500_INST_RGB_WMASK_R | 3658 R500_INST_RGB_WMASK_G | 3659 R500_INST_RGB_WMASK_B | 3660 R500_INST_ALPHA_WMASK | 3661 R500_INST_RGB_CLAMP | 3662 R500_INST_ALPHA_CLAMP)); 3663 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3664 R500_TEX_INST_LD | 3665 R500_TEX_IGNORE_UNCOVERED)); 3666 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3667 R500_TEX_SRC_S_SWIZ_R | 3668 R500_TEX_SRC_T_SWIZ_G | 3669 R500_TEX_DST_ADDR(1) | 3670 R500_TEX_DST_R_SWIZ_R | 3671 R500_TEX_DST_G_SWIZ_G | 3672 R500_TEX_DST_B_SWIZ_B | 3673 R500_TEX_DST_A_SWIZ_A)); 3674 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3675 R500_DX_S_SWIZ_R | 3676 R500_DX_T_SWIZ_R | 3677 R500_DX_R_SWIZ_R | 3678 R500_DX_Q_SWIZ_R | 3679 R500_DY_ADDR(0) | 3680 R500_DY_S_SWIZ_R | 3681 R500_DY_T_SWIZ_R | 3682 R500_DY_R_SWIZ_R | 3683 R500_DY_Q_SWIZ_R)); 3684 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3685 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3686 3687 /* tex inst */ 3688 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3689 R500_INST_TEX_SEM_WAIT | 3690 R500_INST_RGB_WMASK_R | 3691 R500_INST_RGB_WMASK_G | 3692 R500_INST_RGB_WMASK_B | 3693 R500_INST_ALPHA_WMASK | 3694 R500_INST_RGB_CLAMP | 3695 R500_INST_ALPHA_CLAMP)); 3696 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3697 R500_TEX_INST_LD | 3698 R500_TEX_SEM_ACQUIRE | 3699 R500_TEX_IGNORE_UNCOVERED)); 3700 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3701 R500_TEX_SRC_S_SWIZ_R | 3702 R500_TEX_SRC_T_SWIZ_G | 3703 R500_TEX_DST_ADDR(0) | 3704 R500_TEX_DST_R_SWIZ_R | 3705 R500_TEX_DST_G_SWIZ_G | 3706 R500_TEX_DST_B_SWIZ_B | 3707 R500_TEX_DST_A_SWIZ_A)); 3708 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3709 R500_DX_S_SWIZ_R | 3710 R500_DX_T_SWIZ_R | 3711 R500_DX_R_SWIZ_R | 3712 R500_DX_Q_SWIZ_R | 3713 R500_DY_ADDR(0) | 3714 R500_DY_S_SWIZ_R | 3715 R500_DY_T_SWIZ_R | 3716 R500_DY_R_SWIZ_R | 3717 R500_DY_Q_SWIZ_R)); 3718 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3719 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3720 3721 /* ALU inst */ 3722 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 3723 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3724 R500_INST_TEX_SEM_WAIT | 3725 R500_INST_RGB_WMASK_R | 3726 R500_INST_RGB_WMASK_G | 3727 R500_INST_RGB_WMASK_B | 3728 R500_INST_ALPHA_WMASK)); 3729 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3730 R500_RGB_ADDR0_CONST | 3731 R500_RGB_ADDR1(2) | 3732 R500_RGB_ADDR2(0) | 3733 R500_RGB_ADDR2_CONST)); 3734 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3735 R500_ALPHA_ADDR0_CONST | 3736 R500_ALPHA_ADDR1(2) | 3737 R500_ALPHA_ADDR2(0) | 3738 R500_ALPHA_ADDR2_CONST)); 3739 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3740 R500_ALU_RGB_R_SWIZ_A_A | 3741 R500_ALU_RGB_G_SWIZ_A_A | 3742 R500_ALU_RGB_B_SWIZ_A_A | 3743 R500_ALU_RGB_SEL_B_SRC1 | 3744 R500_ALU_RGB_R_SWIZ_B_R | 3745 R500_ALU_RGB_B_SWIZ_B_G | 3746 R500_ALU_RGB_G_SWIZ_B_B)); 3747 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3748 R500_ALPHA_ADDRD(2) | 3749 R500_ALPHA_SWIZ_A_0 | 3750 R500_ALPHA_SWIZ_B_0)); 3751 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3752 R500_ALU_RGBA_ADDRD(2) | 3753 R500_ALU_RGBA_SEL_C_SRC0 | 3754 R500_ALU_RGBA_R_SWIZ_R | 3755 R500_ALU_RGBA_G_SWIZ_G | 3756 R500_ALU_RGBA_B_SWIZ_B | 3757 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3758 R500_ALU_RGBA_A_SWIZ_0)); 3759 3760 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 3761 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3762 R500_INST_TEX_SEM_WAIT | 3763 R500_INST_RGB_WMASK_R | 3764 R500_INST_RGB_WMASK_G | 3765 R500_INST_RGB_WMASK_B | 3766 R500_INST_ALPHA_WMASK)); 3767 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3768 R500_RGB_ADDR0_CONST | 3769 R500_RGB_ADDR1(1) | 3770 R500_RGB_ADDR2(2))); 3771 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3772 R500_ALPHA_ADDR0_CONST | 3773 R500_ALPHA_ADDR1(1) | 3774 R500_ALPHA_ADDR2(2))); 3775 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3776 R500_ALU_RGB_R_SWIZ_A_R | 3777 R500_ALU_RGB_G_SWIZ_A_G | 3778 R500_ALU_RGB_B_SWIZ_A_B | 3779 R500_ALU_RGB_SEL_B_SRC1 | 3780 R500_ALU_RGB_R_SWIZ_B_R | 3781 R500_ALU_RGB_B_SWIZ_B_G | 3782 R500_ALU_RGB_G_SWIZ_B_B)); 3783 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3784 R500_ALPHA_ADDRD(2) | 3785 R500_ALPHA_SWIZ_A_0 | 3786 R500_ALPHA_SWIZ_B_0)); 3787 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3788 R500_ALU_RGBA_ADDRD(2) | 3789 R500_ALU_RGBA_SEL_C_SRC2 | 3790 R500_ALU_RGBA_R_SWIZ_R | 3791 R500_ALU_RGBA_G_SWIZ_G | 3792 R500_ALU_RGBA_B_SWIZ_B | 3793 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3794 R500_ALU_RGBA_A_SWIZ_0)); 3795 3796 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 3797 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3798 R500_INST_TEX_SEM_WAIT | 3799 R500_INST_LAST | 3800 R500_INST_RGB_OMASK_R | 3801 R500_INST_RGB_OMASK_G | 3802 R500_INST_RGB_OMASK_B | 3803 R500_INST_ALPHA_OMASK | 3804 R500_INST_RGB_CLAMP | 3805 R500_INST_ALPHA_CLAMP)); 3806 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3807 R500_RGB_ADDR0_CONST | 3808 R500_RGB_ADDR1(0) | 3809 R500_RGB_ADDR2(2))); 3810 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3811 R500_ALPHA_ADDR0_CONST | 3812 R500_ALPHA_ADDR1(0) | 3813 R500_ALPHA_ADDR2(2))); 3814 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3815 R500_ALU_RGB_R_SWIZ_A_R | 3816 R500_ALU_RGB_G_SWIZ_A_G | 3817 R500_ALU_RGB_B_SWIZ_A_B | 3818 R500_ALU_RGB_SEL_B_SRC1 | 3819 R500_ALU_RGB_R_SWIZ_B_R | 3820 R500_ALU_RGB_B_SWIZ_B_G | 3821 R500_ALU_RGB_G_SWIZ_B_B)); 3822 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3823 R500_ALPHA_ADDRD(0) | 3824 R500_ALPHA_SWIZ_A_0 | 3825 R500_ALPHA_SWIZ_B_0)); 3826 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3827 R500_ALU_RGBA_ADDRD(0) | 3828 R500_ALU_RGBA_SEL_C_SRC2 | 3829 R500_ALU_RGBA_R_SWIZ_R | 3830 R500_ALU_RGBA_G_SWIZ_G | 3831 R500_ALU_RGBA_B_SWIZ_B | 3832 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3833 R500_ALU_RGBA_A_SWIZ_1)); 3834 3835 } else { 3836 BEGIN_ACCEL(44); 3837 /* 2 components: 2 for tex0/1/2 */ 3838 OUT_ACCEL_REG(R300_RS_COUNT, 3839 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3840 R300_RS_COUNT_HIRES_EN)); 3841 3842 /* R300_INST_COUNT_RS - highest RS instruction used */ 3843 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3844 3845 /* Pixel stack frame size. */ 3846 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3847 3848 /* FP length. */ 3849 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3850 R500_US_CODE_END_ADDR(3))); 3851 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3852 R500_US_CODE_RANGE_SIZE(3))); 3853 3854 /* Prepare for FP emission. */ 3855 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3856 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3857 3858 /* tex inst */ 3859 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3860 R500_INST_TEX_SEM_WAIT | 3861 R500_INST_RGB_WMASK_R | 3862 R500_INST_RGB_WMASK_G | 3863 R500_INST_RGB_WMASK_B | 3864 R500_INST_ALPHA_WMASK | 3865 R500_INST_RGB_CLAMP | 3866 R500_INST_ALPHA_CLAMP)); 3867 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3868 R500_TEX_INST_LD | 3869 R500_TEX_SEM_ACQUIRE | 3870 R500_TEX_IGNORE_UNCOVERED)); 3871 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3872 R500_TEX_SRC_S_SWIZ_R | 3873 R500_TEX_SRC_T_SWIZ_G | 3874 R500_TEX_DST_ADDR(0) | 3875 R500_TEX_DST_R_SWIZ_R | 3876 R500_TEX_DST_G_SWIZ_G | 3877 R500_TEX_DST_B_SWIZ_B | 3878 R500_TEX_DST_A_SWIZ_A)); 3879 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3880 R500_DX_S_SWIZ_R | 3881 R500_DX_T_SWIZ_R | 3882 R500_DX_R_SWIZ_R | 3883 R500_DX_Q_SWIZ_R | 3884 R500_DY_ADDR(0) | 3885 R500_DY_S_SWIZ_R | 3886 R500_DY_T_SWIZ_R | 3887 R500_DY_R_SWIZ_R | 3888 R500_DY_Q_SWIZ_R)); 3889 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3890 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3891 3892 /* ALU inst */ 3893 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 3894 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3895 R500_INST_TEX_SEM_WAIT | 3896 R500_INST_RGB_WMASK_R | 3897 R500_INST_RGB_WMASK_G | 3898 R500_INST_RGB_WMASK_B | 3899 R500_INST_ALPHA_WMASK)); 3900 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3901 R500_RGB_ADDR0_CONST | 3902 R500_RGB_ADDR1(0) | 3903 R500_RGB_ADDR2(0) | 3904 R500_RGB_ADDR2_CONST)); 3905 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3906 R500_ALPHA_ADDR0_CONST | 3907 R500_ALPHA_ADDR1(0) | 3908 R500_ALPHA_ADDR2(0) | 3909 R500_ALPHA_ADDR2_CONST)); 3910 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3911 R500_ALU_RGB_R_SWIZ_A_A | 3912 R500_ALU_RGB_G_SWIZ_A_A | 3913 R500_ALU_RGB_B_SWIZ_A_A | 3914 R500_ALU_RGB_SEL_B_SRC1 | 3915 R500_ALU_RGB_R_SWIZ_B_G | 3916 R500_ALU_RGB_B_SWIZ_B_G | 3917 R500_ALU_RGB_G_SWIZ_B_G)); 3918 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3919 R500_ALPHA_ADDRD(1) | 3920 R500_ALPHA_SWIZ_A_0 | 3921 R500_ALPHA_SWIZ_B_0)); 3922 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3923 R500_ALU_RGBA_ADDRD(1) | 3924 R500_ALU_RGBA_SEL_C_SRC0 | 3925 R500_ALU_RGBA_R_SWIZ_R | 3926 R500_ALU_RGBA_G_SWIZ_G | 3927 R500_ALU_RGBA_B_SWIZ_B | 3928 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3929 R500_ALU_RGBA_A_SWIZ_0)); 3930 3931 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 3932 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3933 R500_INST_TEX_SEM_WAIT | 3934 R500_INST_RGB_WMASK_R | 3935 R500_INST_RGB_WMASK_G | 3936 R500_INST_RGB_WMASK_B | 3937 R500_INST_ALPHA_WMASK)); 3938 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3939 R500_RGB_ADDR0_CONST | 3940 R500_RGB_ADDR1(0) | 3941 R500_RGB_ADDR2(1))); 3942 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3943 R500_ALPHA_ADDR0_CONST | 3944 R500_ALPHA_ADDR1(0) | 3945 R500_ALPHA_ADDR2(1))); 3946 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3947 R500_ALU_RGB_R_SWIZ_A_R | 3948 R500_ALU_RGB_G_SWIZ_A_G | 3949 R500_ALU_RGB_B_SWIZ_A_B | 3950 R500_ALU_RGB_SEL_B_SRC1 | 3951 R500_ALU_RGB_R_SWIZ_B_B | 3952 R500_ALU_RGB_B_SWIZ_B_B | 3953 R500_ALU_RGB_G_SWIZ_B_B)); 3954 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3955 R500_ALPHA_ADDRD(1) | 3956 R500_ALPHA_SWIZ_A_0 | 3957 R500_ALPHA_SWIZ_B_0)); 3958 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3959 R500_ALU_RGBA_ADDRD(1) | 3960 R500_ALU_RGBA_SEL_C_SRC2 | 3961 R500_ALU_RGBA_R_SWIZ_R | 3962 R500_ALU_RGBA_G_SWIZ_G | 3963 R500_ALU_RGBA_B_SWIZ_B | 3964 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3965 R500_ALU_RGBA_A_SWIZ_0)); 3966 3967 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 3968 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3969 R500_INST_TEX_SEM_WAIT | 3970 R500_INST_LAST | 3971 R500_INST_RGB_OMASK_R | 3972 R500_INST_RGB_OMASK_G | 3973 R500_INST_RGB_OMASK_B | 3974 R500_INST_ALPHA_OMASK | 3975 R500_INST_RGB_CLAMP | 3976 R500_INST_ALPHA_CLAMP)); 3977 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3978 R500_RGB_ADDR0_CONST | 3979 R500_RGB_ADDR1(0) | 3980 R500_RGB_ADDR2(1))); 3981 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3982 R500_ALPHA_ADDR0_CONST | 3983 R500_ALPHA_ADDR1(0) | 3984 R500_ALPHA_ADDR2(1))); 3985 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3986 R500_ALU_RGB_R_SWIZ_A_R | 3987 R500_ALU_RGB_G_SWIZ_A_G | 3988 R500_ALU_RGB_B_SWIZ_A_B | 3989 R500_ALU_RGB_SEL_B_SRC1 | 3990 R500_ALU_RGB_R_SWIZ_B_R | 3991 R500_ALU_RGB_B_SWIZ_B_R | 3992 R500_ALU_RGB_G_SWIZ_B_R)); 3993 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3994 R500_ALPHA_ADDRD(1) | 3995 R500_ALPHA_SWIZ_A_0 | 3996 R500_ALPHA_SWIZ_B_0)); 3997 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3998 R500_ALU_RGBA_ADDRD(1) | 3999 R500_ALU_RGBA_SEL_C_SRC2 | 4000 R500_ALU_RGBA_R_SWIZ_R | 4001 R500_ALU_RGBA_G_SWIZ_G | 4002 R500_ALU_RGBA_B_SWIZ_B | 4003 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 4004 R500_ALU_RGBA_A_SWIZ_1)); 4005 } 4006 4007 /* Shader constants. */ 4008 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 4009 4010 /* constant 0: off, yco */ 4011 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 4012 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 4013 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 4014 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 4015 /* constant 1: uco */ 4016 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 4017 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 4018 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 4019 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 4020 /* constant 2: vco */ 4021 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 4022 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 4023 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 4024 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 4025 4026 FINISH_ACCEL(); 4027 } 4028 4029 BEGIN_ACCEL_RELOC(6, 2); 4030 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 4031 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 4032 4033 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 4034 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 4035 4036 /* no need to enable blending */ 4037 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 4038 4039 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 4040 FINISH_ACCEL(); 4041 4042 if (pPriv->vsync) { 4043 xf86CrtcPtr crtc; 4044 if (pPriv->desired_crtc) 4045 crtc = pPriv->desired_crtc; 4046 else 4047 crtc = radeon_pick_best_crtc(pScrn, 4048 pPriv->drw_x, 4049 pPriv->drw_x + pPriv->dst_w, 4050 pPriv->drw_y, 4051 pPriv->drw_y + pPriv->dst_h); 4052 if (crtc) 4053 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 4054 crtc, 4055 pPriv->drw_y - crtc->y, 4056 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 4057 } 4058 4059 return TRUE; 4060} 4061 4062static void 4063FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 4064{ 4065 RADEONInfoPtr info = RADEONPTR(pScrn); 4066 PixmapPtr pPixmap = pPriv->pPixmap; 4067 int dstxoff, dstyoff; 4068 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 4069 int nBox = REGION_NUM_RECTS(&pPriv->clip); 4070 ACCEL_PREAMBLE(); 4071 4072#ifdef COMPOSITE 4073 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 4074 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 4075#else 4076 dstxoff = 0; 4077 dstyoff = 0; 4078#endif 4079 4080 if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4081 return; 4082 4083 /* 4084 * Rendering of the actual polygon is done in two different 4085 * ways depending on chip generation: 4086 * 4087 * < R300: 4088 * 4089 * These chips can render a rectangle in one pass, so 4090 * handling is pretty straight-forward. 4091 * 4092 * >= R300: 4093 * 4094 * These chips can accept a quad, but will render it as 4095 * two triangles which results in a diagonal tear. Instead 4096 * We render a single, large triangle and use the scissor 4097 * functionality to restrict it to the desired rectangle. 4098 * Due to guardband limits on r3xx/r4xx, we can only use 4099 * the single triangle up to 2880 pixels; above that we 4100 * render as a quad. 4101 */ 4102 4103 while (nBox--) { 4104 int srcX, srcY, srcw, srch; 4105 int dstX, dstY, dstw, dsth; 4106#ifdef ACCEL_CP 4107 int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 4108 4109 if (draw_size > radeon_cs_space_remaining(pScrn)) { 4110 if (info->cs) 4111 radeon_cs_flush_indirect(pScrn); 4112 else 4113 RADEONCPFlushIndirect(pScrn, 1); 4114 if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4115 return; 4116 } 4117#endif 4118 4119 dstX = pBox->x1 + dstxoff; 4120 dstY = pBox->y1 + dstyoff; 4121 dstw = pBox->x2 - pBox->x1; 4122 dsth = pBox->y2 - pBox->y1; 4123 4124 srcX = pPriv->src_x; 4125 srcX += ((pBox->x1 - pPriv->drw_x) * 4126 pPriv->src_w) / pPriv->dst_w; 4127 srcY = pPriv->src_y; 4128 srcY += ((pBox->y1 - pPriv->drw_y) * 4129 pPriv->src_h) / pPriv->dst_h; 4130 4131 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 4132 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 4133 4134 BEGIN_ACCEL(2); 4135 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 4136 ((dstY) << R300_SCISSOR_Y_SHIFT))); 4137 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 4138 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 4139 FINISH_ACCEL(); 4140 4141#ifdef ACCEL_CP 4142 BEGIN_RING(3 * pPriv->vtx_count + 4); 4143 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 4144 3 * pPriv->vtx_count)); 4145 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 4146 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 4147 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 4148#else /* ACCEL_CP */ 4149 BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 4150 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 4151 RADEON_VF_PRIM_WALK_DATA | 4152 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 4153#endif 4154 if (pPriv->bicubic_enabled) { 4155 VTX_OUT_6((float)dstX, (float)dstY, 4156 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 4157 (float)srcX + 0.5, (float)srcY + 0.5); 4158 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 4159 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 4160 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 4161 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 4162 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4163 (float)srcY / pPriv->h, 4164 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 4165 (float)srcY + 0.5); 4166 } else { 4167 /* 4168 * Render a big, scissored triangle. This means 4169 * increasing the triangle size and adjusting 4170 * texture coordinates. 4171 */ 4172 VTX_OUT_4((float)dstX, (float)dstY, 4173 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 4174 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 4175 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 4176 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 4177 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4178 (float)srcY / pPriv->h); 4179 } 4180 4181 /* flushing is pipelined, free/finish is not */ 4182 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 4183 4184#ifdef ACCEL_CP 4185 ADVANCE_RING(); 4186#else 4187 FINISH_ACCEL(); 4188#endif /* !ACCEL_CP */ 4189 4190 pBox++; 4191 } 4192 4193 BEGIN_ACCEL(3); 4194 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 4195 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 4196 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 4197 FINISH_ACCEL(); 4198 4199 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 4200} 4201 4202#undef VTX_OUT_4 4203#undef VTX_OUT_6 4204#undef FUNC_NAME 4205