1/* 2 * Copyright 2008 Alex Deucher 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * 24 * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25 * 26 */ 27 28#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 29#error Cannot define both MMIO and CP acceleration! 30#endif 31 32#if !defined(UNIXCPP) || defined(ANSICPP) 33#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 34#else 35#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 36#endif 37 38#ifdef ACCEL_MMIO 39#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 40#else 41#ifdef ACCEL_CP 42#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 43#else 44#error No accel type defined! 45#endif 46#endif 47 48#ifdef ACCEL_CP 49 50#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 51do { \ 52 OUT_RING_F(_dstX); \ 53 OUT_RING_F(_dstY); \ 54 OUT_RING_F(_srcX); \ 55 OUT_RING_F(_srcY); \ 56 OUT_RING_F(_maskX); \ 57 OUT_RING_F(_maskY); \ 58} while (0) 59 60#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 61do { \ 62 OUT_RING_F(_dstX); \ 63 OUT_RING_F(_dstY); \ 64 OUT_RING_F(_srcX); \ 65 OUT_RING_F(_srcY); \ 66} while (0) 67 68#else /* ACCEL_CP */ 69 70#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 71do { \ 72 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 73 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 74 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 75 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 76 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX); \ 77 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ 78} while (0) 79 80#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 81do { \ 82 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ 83 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ 84 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ 85 OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ 86} while (0) 87 88#endif /* !ACCEL_CP */ 89 90static Bool 91FUNC_NAME(RADEONPrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 92{ 93 RADEONInfoPtr info = RADEONPTR(pScrn); 94 PixmapPtr pPixmap = pPriv->pPixmap; 95 struct radeon_exa_pixmap_priv *driver_priv; 96 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 97 uint32_t txformat, txsize, txpitch, txoffset; 98 uint32_t dst_pitch, dst_format; 99 uint32_t colorpitch; 100 int pixel_shift; 101 int scissor_w = MIN(pPixmap->drawable.width, 2047); 102 int scissor_h = MIN(pPixmap->drawable.height, 2047); 103 ACCEL_PREAMBLE(); 104 105#ifdef XF86DRM_MODE 106 if (info->cs) { 107 int ret; 108 109 radeon_cs_space_reset_bos(info->cs); 110 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 111 112 if (pPriv->bicubic_enabled) 113 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 114 115 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 116 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 117 118 ret = radeon_cs_space_check(info->cs); 119 if (ret) { 120 ErrorF("Not enough RAM to hw accel xv operation\n"); 121 return FALSE; 122 } 123 } 124#else 125 (void)src_bo; 126#endif 127 128 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 129 130 131#ifdef USE_EXA 132 if (info->useEXA) { 133 dst_pitch = exaGetPixmapPitch(pPixmap); 134 } else 135#endif 136 { 137 dst_pitch = pPixmap->devKind; 138 } 139 140#ifdef USE_EXA 141 if (info->useEXA) { 142 RADEON_SWITCH_TO_3D(); 143 } else 144#endif 145 { 146 BEGIN_ACCEL(2); 147 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 148 /* We must wait for 3d to idle, in case source was just written as a dest. */ 149 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 150 RADEON_WAIT_HOST_IDLECLEAN | 151 RADEON_WAIT_2D_IDLECLEAN | 152 RADEON_WAIT_3D_IDLECLEAN | 153 RADEON_WAIT_DMA_GUI_IDLE); 154 FINISH_ACCEL(); 155 156 if (!info->accel_state->XInited3D) 157 RADEONInit3DEngine(pScrn); 158 } 159 160 /* Same for R100/R200 */ 161 switch (pPixmap->drawable.bitsPerPixel) { 162 case 16: 163 if (pPixmap->drawable.depth == 15) 164 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 165 else 166 dst_format = RADEON_COLOR_FORMAT_RGB565; 167 break; 168 case 32: 169 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 170 break; 171 default: 172 return FALSE; 173 } 174 175 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 176 pPriv->is_planar = TRUE; 177 txformat = RADEON_TXFORMAT_Y8; 178 } else { 179 pPriv->is_planar = FALSE; 180 if (pPriv->id == FOURCC_UYVY) 181 txformat = RADEON_TXFORMAT_YVYU422; 182 else 183 txformat = RADEON_TXFORMAT_VYUY422; 184 } 185 186 txformat |= RADEON_TXFORMAT_NON_POWER2; 187 188 colorpitch = dst_pitch >> pixel_shift; 189 190 if (RADEONTilingEnabled(pScrn, pPixmap)) 191 colorpitch |= RADEON_COLOR_TILE_ENABLE; 192 193 txoffset = info->cs ? 0 : pPriv->src_offset; 194 195 BEGIN_ACCEL_RELOC(4,2); 196 197 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 198 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 199 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 200 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 201 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 202 203 FINISH_ACCEL(); 204 205 if (pPriv->is_planar) { 206 /* need 2 texcoord sets (even though they are identical) due 207 to denormalization! hw apparently can't premultiply 208 same coord set by different texture size */ 209 pPriv->vtx_count = 6; 210 211 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 212 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 213 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 214 txpitch -= 32; 215 216 BEGIN_ACCEL_RELOC(23, 3); 217 218 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 219 RADEON_SE_VTX_FMT_ST0 | 220 RADEON_SE_VTX_FMT_ST1)); 221 222 OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 223 RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 224 RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 225 RADEON_PLANAR_YUV_ENABLE)); 226 227 /* Y */ 228 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 229 RADEON_MAG_FILTER_LINEAR | 230 RADEON_MIN_FILTER_LINEAR | 231 RADEON_CLAMP_S_CLAMP_LAST | 232 RADEON_CLAMP_T_CLAMP_LAST | 233 RADEON_YUV_TO_RGB); 234 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 235 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 236 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 237 RADEON_COLOR_ARG_A_ZERO | 238 RADEON_COLOR_ARG_B_ZERO | 239 RADEON_COLOR_ARG_C_T0_COLOR | 240 RADEON_BLEND_CTL_ADD | 241 RADEON_CLAMP_TX); 242 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 243 RADEON_ALPHA_ARG_A_ZERO | 244 RADEON_ALPHA_ARG_B_ZERO | 245 RADEON_ALPHA_ARG_C_T0_ALPHA | 246 RADEON_BLEND_CTL_ADD | 247 RADEON_CLAMP_TX); 248 249 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 250 (pPriv->w - 1) | 251 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 252 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 253 pPriv->src_pitch - 32); 254 255 /* U */ 256 OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, 257 RADEON_MAG_FILTER_LINEAR | 258 RADEON_MIN_FILTER_LINEAR | 259 RADEON_CLAMP_S_CLAMP_LAST | 260 RADEON_CLAMP_T_CLAMP_LAST); 261 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 262 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 263 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1, 264 RADEON_COLOR_ARG_A_ZERO | 265 RADEON_COLOR_ARG_B_ZERO | 266 RADEON_COLOR_ARG_C_T0_COLOR | 267 RADEON_BLEND_CTL_ADD | 268 RADEON_CLAMP_TX); 269 OUT_ACCEL_REG(RADEON_PP_TXABLEND_1, 270 RADEON_ALPHA_ARG_A_ZERO | 271 RADEON_ALPHA_ARG_B_ZERO | 272 RADEON_ALPHA_ARG_C_T0_ALPHA | 273 RADEON_BLEND_CTL_ADD | 274 RADEON_CLAMP_TX); 275 276 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize); 277 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch); 278 279 /* V */ 280 OUT_ACCEL_REG(RADEON_PP_TXFILTER_2, 281 RADEON_MAG_FILTER_LINEAR | 282 RADEON_MIN_FILTER_LINEAR | 283 RADEON_CLAMP_S_CLAMP_LAST | 284 RADEON_CLAMP_T_CLAMP_LAST); 285 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 286 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 287 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2, 288 RADEON_COLOR_ARG_A_ZERO | 289 RADEON_COLOR_ARG_B_ZERO | 290 RADEON_COLOR_ARG_C_T0_COLOR | 291 RADEON_BLEND_CTL_ADD | 292 RADEON_CLAMP_TX); 293 OUT_ACCEL_REG(RADEON_PP_TXABLEND_2, 294 RADEON_ALPHA_ARG_A_ZERO | 295 RADEON_ALPHA_ARG_B_ZERO | 296 RADEON_ALPHA_ARG_C_T0_ALPHA | 297 RADEON_BLEND_CTL_ADD | 298 RADEON_CLAMP_TX); 299 300 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize); 301 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch); 302 FINISH_ACCEL(); 303 } else { 304 pPriv->vtx_count = 4; 305 BEGIN_ACCEL_RELOC(9, 1); 306 307 OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 308 RADEON_SE_VTX_FMT_ST0)); 309 310 OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 311 312 OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, 313 RADEON_MAG_FILTER_LINEAR | 314 RADEON_MIN_FILTER_LINEAR | 315 RADEON_CLAMP_S_CLAMP_LAST | 316 RADEON_CLAMP_T_CLAMP_LAST | 317 RADEON_YUV_TO_RGB); 318 OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 319 OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo); 320 OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, 321 RADEON_COLOR_ARG_A_ZERO | 322 RADEON_COLOR_ARG_B_ZERO | 323 RADEON_COLOR_ARG_C_T0_COLOR | 324 RADEON_BLEND_CTL_ADD | 325 RADEON_CLAMP_TX); 326 OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, 327 RADEON_ALPHA_ARG_A_ZERO | 328 RADEON_ALPHA_ARG_B_ZERO | 329 RADEON_ALPHA_ARG_C_T0_ALPHA | 330 RADEON_BLEND_CTL_ADD | 331 RADEON_CLAMP_TX); 332 333 OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, 334 (pPriv->w - 1) | 335 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 336 OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, 337 pPriv->src_pitch - 32); 338 FINISH_ACCEL(); 339 } 340 341 BEGIN_ACCEL(2); 342 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 343 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 344 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 345 FINISH_ACCEL(); 346 347 if (pPriv->vsync) { 348 xf86CrtcPtr crtc; 349 if (pPriv->desired_crtc) 350 crtc = pPriv->desired_crtc; 351 else 352 crtc = radeon_pick_best_crtc(pScrn, 353 pPriv->drw_x, 354 pPriv->drw_x + pPriv->dst_w, 355 pPriv->drw_y, 356 pPriv->drw_y + pPriv->dst_h); 357 if (crtc) 358 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 359 crtc, 360 pPriv->drw_y - crtc->y, 361 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 362 } 363 364 return TRUE; 365} 366 367static void 368FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 369{ 370 RADEONInfoPtr info = RADEONPTR(pScrn); 371 PixmapPtr pPixmap = pPriv->pPixmap; 372 int dstxoff, dstyoff; 373 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 374 int nBox = REGION_NUM_RECTS(&pPriv->clip); 375 ACCEL_PREAMBLE(); 376 377#ifdef COMPOSITE 378 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 379 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 380#else 381 dstxoff = 0; 382 dstyoff = 0; 383#endif 384 385 if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 386 return; 387 388 /* 389 * Rendering of the actual polygon is done in two different 390 * ways depending on chip generation: 391 * 392 * < R300: 393 * 394 * These chips can render a rectangle in one pass, so 395 * handling is pretty straight-forward. 396 * 397 * >= R300: 398 * 399 * These chips can accept a quad, but will render it as 400 * two triangles which results in a diagonal tear. Instead 401 * We render a single, large triangle and use the scissor 402 * functionality to restrict it to the desired rectangle. 403 * Due to guardband limits on r3xx/r4xx, we can only use 404 * the single triangle up to 2560/4021 pixels; above that we 405 * render as a quad. 406 */ 407#ifdef ACCEL_CP 408 while (nBox) { 409 int draw_size = 3 * pPriv->vtx_count + 5; 410 int loop_boxes; 411 412 if (draw_size > radeon_cs_space_remaining(pScrn)) { 413 if (info->cs) 414 radeon_cs_flush_indirect(pScrn); 415 else 416 RADEONCPFlushIndirect(pScrn, 1); 417 if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv)) 418 return; 419 } 420 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 421 nBox -= loop_boxes; 422 423 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 424 OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 425 loop_boxes * 3 * pPriv->vtx_count + 1)); 426 if (pPriv->is_planar) 427 OUT_RING(RADEON_CP_VC_FRMT_XY | 428 RADEON_CP_VC_FRMT_ST0 | 429 RADEON_CP_VC_FRMT_ST1); 430 else 431 OUT_RING(RADEON_CP_VC_FRMT_XY | 432 RADEON_CP_VC_FRMT_ST0); 433 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 434 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 435 RADEON_CP_VC_CNTL_MAOS_ENABLE | 436 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 437 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 438 439 while (loop_boxes--) { 440 float srcX, srcY, srcw, srch; 441 int dstX, dstY, dstw, dsth; 442 dstX = pBox->x1 + dstxoff; 443 dstY = pBox->y1 + dstyoff; 444 dstw = pBox->x2 - pBox->x1; 445 dsth = pBox->y2 - pBox->y1; 446 447 srcX = pPriv->src_x; 448 srcX += ((pBox->x1 - pPriv->drw_x) * 449 pPriv->src_w) / (float)pPriv->dst_w; 450 srcY = pPriv->src_y; 451 srcY += ((pBox->y1 - pPriv->drw_y) * 452 pPriv->src_h) / (float)pPriv->dst_h; 453 454 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 455 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 456 457 458 if (pPriv->is_planar) { 459 /* 460 * Just render a rect (using three coords). 461 */ 462 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 463 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 464 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 465 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 466 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 467 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 468 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 469 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 470 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 471 } else { 472 /* 473 * Just render a rect (using three coords). 474 */ 475 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 476 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 477 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 478 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 479 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 480 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 481 } 482 483 pBox++; 484 } 485 486 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 487 ADVANCE_RING(); 488 } 489#else /* ACCEL_CP */ 490 BEGIN_ACCEL(nBox * pPriv->vtx_count * 3 + 2); 491 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 492 RADEON_VF_PRIM_WALK_DATA | 493 RADEON_VF_RADEON_MODE | 494 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 495 while (nBox--) { 496 float srcX, srcY, srcw, srch; 497 int dstX, dstY, dstw, dsth; 498 dstX = pBox->x1 + dstxoff; 499 dstY = pBox->y1 + dstyoff; 500 dstw = pBox->x2 - pBox->x1; 501 dsth = pBox->y2 - pBox->y1; 502 503 srcX = pPriv->src_x; 504 srcX += ((pBox->x1 - pPriv->drw_x) * 505 pPriv->src_w) / (float)pPriv->dst_w; 506 srcY = pPriv->src_y; 507 srcY += ((pBox->y1 - pPriv->drw_y) * 508 pPriv->src_h) / (float)pPriv->dst_h; 509 510 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 511 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 512 513 514 if (pPriv->is_planar) { 515 /* 516 * Just render a rect (using three coords). 517 */ 518 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 519 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 520 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 521 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 522 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 523 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 524 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 525 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 526 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 527 } else { 528 /* 529 * Just render a rect (using three coords). 530 */ 531 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 532 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 533 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 534 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 535 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 536 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 537 } 538 539 pBox++; 540 } 541 542 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 543 FINISH_ACCEL(); 544#endif /* !ACCEL_CP */ 545 546 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 547} 548 549static Bool 550FUNC_NAME(R200PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 551{ 552 RADEONInfoPtr info = RADEONPTR(pScrn); 553 PixmapPtr pPixmap = pPriv->pPixmap; 554 struct radeon_exa_pixmap_priv *driver_priv; 555 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 556 uint32_t txformat; 557 uint32_t txfilter, txsize, txpitch, txoffset; 558 uint32_t dst_pitch, dst_format; 559 uint32_t colorpitch; 560 int pixel_shift; 561 int scissor_w = MIN(pPixmap->drawable.width, 2047); 562 int scissor_h = MIN(pPixmap->drawable.height, 2047); 563 /* note: in contrast to r300, use input biasing on uv components */ 564 const float Loff = -0.0627; 565 float uvcosf, uvsinf; 566 float yco, yoff; 567 float uco[3], vco[3]; 568 float bright, cont, sat; 569 int ref = pPriv->transform_index; 570 float ucscale = 0.25, vcscale = 0.25; 571 Bool needux8 = FALSE, needvx8 = FALSE; 572 ACCEL_PREAMBLE(); 573 574#ifdef XF86DRM_MODE 575 if (info->cs) { 576 int ret; 577 578 radeon_cs_space_reset_bos(info->cs); 579 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 580 581 if (pPriv->bicubic_enabled) 582 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 583 584 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 585 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 586 587 ret = radeon_cs_space_check(info->cs); 588 if (ret) { 589 ErrorF("Not enough RAM to hw accel xv operation\n"); 590 return FALSE; 591 } 592 } 593#else 594 (void)src_bo; 595#endif 596 597 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 598 599#ifdef USE_EXA 600 if (info->useEXA) { 601 dst_pitch = exaGetPixmapPitch(pPixmap); 602 } else 603#endif 604 { 605 dst_pitch = pPixmap->devKind; 606 } 607 608#ifdef USE_EXA 609 if (info->useEXA) { 610 RADEON_SWITCH_TO_3D(); 611 } else 612#endif 613 { 614 BEGIN_ACCEL(2); 615 OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); 616 /* We must wait for 3d to idle, in case source was just written as a dest. */ 617 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 618 RADEON_WAIT_HOST_IDLECLEAN | 619 RADEON_WAIT_2D_IDLECLEAN | 620 RADEON_WAIT_3D_IDLECLEAN | 621 RADEON_WAIT_DMA_GUI_IDLE); 622 FINISH_ACCEL(); 623 624 if (!info->accel_state->XInited3D) 625 RADEONInit3DEngine(pScrn); 626 } 627 628 /* Same for R100/R200 */ 629 switch (pPixmap->drawable.bitsPerPixel) { 630 case 16: 631 if (pPixmap->drawable.depth == 15) 632 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 633 else 634 dst_format = RADEON_COLOR_FORMAT_RGB565; 635 break; 636 case 32: 637 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 638 break; 639 default: 640 return FALSE; 641 } 642 643 if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 644 pPriv->is_planar = TRUE; 645 txformat = RADEON_TXFORMAT_I8; 646 } else { 647 pPriv->is_planar = FALSE; 648 if (pPriv->id == FOURCC_UYVY) 649 txformat = RADEON_TXFORMAT_YVYU422; 650 else 651 txformat = RADEON_TXFORMAT_VYUY422; 652 } 653 654 txformat |= RADEON_TXFORMAT_NON_POWER2; 655 656 colorpitch = dst_pitch >> pixel_shift; 657 658 if (RADEONTilingEnabled(pScrn, pPixmap)) 659 colorpitch |= RADEON_COLOR_TILE_ENABLE; 660 661 BEGIN_ACCEL_RELOC(4,2); 662 663 OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); 664 EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 665 EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 666 667 OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, 668 RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 669 670 FINISH_ACCEL(); 671 672 txfilter = R200_MAG_FILTER_LINEAR | 673 R200_MIN_FILTER_LINEAR | 674 R200_CLAMP_S_CLAMP_LAST | 675 R200_CLAMP_T_CLAMP_LAST; 676 677 /* contrast can cause constant overflow, clamp */ 678 cont = RTFContrast(pPriv->contrast); 679 if (cont * trans[ref].RefLuma > 2.0) 680 cont = 2.0 / trans[ref].RefLuma; 681 /* brightness is only from -0.5 to 0.5 should be safe */ 682 bright = RTFBrightness(pPriv->brightness); 683 /* saturation can also cause overflow, clamp */ 684 sat = RTFSaturation(pPriv->saturation); 685 if (sat * trans[ref].RefBCb > 4.0) 686 sat = 4.0 / trans[ref].RefBCb; 687 uvcosf = sat * cos(RTFHue(pPriv->hue)); 688 uvsinf = sat * sin(RTFHue(pPriv->hue)); 689 690 yco = trans[ref].RefLuma * cont; 691 uco[0] = -trans[ref].RefRCr * uvsinf; 692 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 693 uco[2] = trans[ref].RefBCb * uvcosf; 694 vco[0] = trans[ref].RefRCr * uvcosf; 695 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 696 vco[2] = trans[ref].RefBCb * uvsinf; 697 yoff = Loff * yco + bright; 698 699 if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 700 needux8 = TRUE; 701 ucscale = 0.125; 702 } 703 if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 704 needvx8 = TRUE; 705 vcscale = 0.125; 706 } 707 708 txoffset = info->cs ? 0 : pPriv->src_offset; 709 710 if (pPriv->is_planar) { 711 /* need 2 texcoord sets (even though they are identical) due 712 to denormalization! hw apparently can't premultiply 713 same coord set by different texture size */ 714 pPriv->vtx_count = 6; 715 716 txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 717 (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 718 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 719 txpitch -= 32; 720 721 BEGIN_ACCEL_RELOC(36, 3); 722 723 OUT_ACCEL_REG(RADEON_PP_CNTL, 724 RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 725 RADEON_TEX_BLEND_0_ENABLE | 726 RADEON_TEX_BLEND_1_ENABLE | 727 RADEON_TEX_BLEND_2_ENABLE); 728 729 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 730 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 731 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 732 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 733 734 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 735 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 736 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 737 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 738 (pPriv->w - 1) | 739 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 740 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 741 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 742 743 OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); 744 OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 745 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); 746 OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize); 747 OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); 748 OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 749 750 OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); 751 OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 752 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); 753 OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize); 754 OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); 755 OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo); 756 757 /* similar to r300 code. Note the big problem is that hardware constants 758 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 759 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 760 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 761 * the constants not. To get larger range can use output scale, but for 762 * that 2.018 value we need a total scale by 8, which means the constants 763 * really have no accuracy whatsoever (5 fractional bits only). 764 * The only direct way to get high precision "constants" into the fragment 765 * pipe I know of is to use the texcoord interpolator (not color, this one 766 * is 8 bit only too), which seems a bit expensive. We're lucky though it 767 * seems the values we need seem to fit better than worst case (get about 768 * 6 fractional bits for this instead of 5, at least when not correcting for 769 * hue/saturation/contrast/brightness, which is the same as for vco - yco and 770 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 771 * even with non-default saturation/hue/contrast/brightness adjustments, 772 * it gets a little crazy and ultimately precision might still be lacking. 773 * 774 * A higher precision (8 fractional bits) version might just put uco into 775 * a texcoord, and calculate a new vcoconst in the shader, like so: 776 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 777 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 778 * vcocalc = ADD temp, bias/scale(cohelper), vco 779 * would in total use 4 tex units, 4 instructions which seems fairly 780 * balanced for this architecture (instead of 3 + 3 for the solution here) 781 * 782 * temp = MAD(yco, yuv.yyyy, yoff) 783 * temp = MAD(uco, yuv.uuuu, temp) 784 * result = MAD(vco, yuv.vvvv, temp) 785 * 786 * note first mad produces actually scalar, hence we transform 787 * it into a dp2a to get 8 bit precision of yco instead of 7 - 788 * That's assuming hw correctly expands consts to internal precision. 789 * (y * 1 + y * (yco - 1) + yoff) 790 * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 791 * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 792 * result = MAD x2 (vco / 2, yuv.vvvv, temp) 793 * 794 * vco, uco need bias (and hence scale too) 795 * 796 */ 797 798 /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 799 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 800 R200_TXC_ARG_A_TFACTOR_COLOR | 801 R200_TXC_ARG_B_R0_COLOR | 802 R200_TXC_ARG_C_TFACTOR_COLOR | 803 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 804 R200_TXC_OP_DOT2_ADD); 805 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 806 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 807 R200_TXC_SCALE_INV2 | 808 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 809 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 810 R200_TXA_ARG_A_ZERO | 811 R200_TXA_ARG_B_ZERO | 812 R200_TXA_ARG_C_ZERO | 813 R200_TXA_OP_MADD); 814 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 815 R200_TXA_OUTPUT_REG_NONE); 816 817 /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 818 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 819 R200_TXC_ARG_A_TFACTOR_COLOR | 820 R200_TXC_BIAS_ARG_A | 821 R200_TXC_SCALE_ARG_A | 822 R200_TXC_ARG_B_R1_COLOR | 823 R200_TXC_BIAS_ARG_B | 824 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 825 R200_TXC_ARG_C_R0_COLOR | 826 R200_TXC_OP_MADD); 827 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 828 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 829 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 830 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 831 R200_TXA_ARG_A_ZERO | 832 R200_TXA_ARG_B_ZERO | 833 R200_TXA_ARG_C_ZERO | 834 R200_TXA_OP_MADD); 835 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 836 R200_TXA_OUTPUT_REG_NONE); 837 838 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 839 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 840 R200_TXC_ARG_A_TFACTOR_COLOR | 841 R200_TXC_BIAS_ARG_A | 842 R200_TXC_SCALE_ARG_A | 843 R200_TXC_ARG_B_R2_COLOR | 844 R200_TXC_BIAS_ARG_B | 845 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 846 R200_TXC_ARG_C_R0_COLOR | 847 R200_TXC_OP_MADD); 848 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 849 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 850 R200_TXC_SCALE_2X | 851 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 852 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 853 R200_TXA_ARG_A_ZERO | 854 R200_TXA_ARG_B_ZERO | 855 R200_TXA_ARG_C_ZERO | 856 R200_TXA_COMP_ARG_C | 857 R200_TXA_OP_MADD); 858 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 859 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 860 861 /* shader constants */ 862 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 863 yco > 1.0 ? yco - 1.0: yco, 864 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 865 0.0)); 866 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 867 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 868 uco[2] * ucscale + 0.5, 869 0.0)); 870 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 871 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 872 vco[2] * vcscale + 0.5, 873 0.0)); 874 875 FINISH_ACCEL(); 876 } else { 877 pPriv->vtx_count = 4; 878 879 BEGIN_ACCEL_RELOC(24, 1); 880 881 OUT_ACCEL_REG(RADEON_PP_CNTL, 882 RADEON_TEX_0_ENABLE | 883 RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 884 RADEON_TEX_BLEND_2_ENABLE); 885 886 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 887 OUT_ACCEL_REG(R200_SE_VTX_FMT_1, 888 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 889 890 OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); 891 OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); 892 OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); 893 OUT_ACCEL_REG(R200_PP_TXSIZE_0, 894 (pPriv->w - 1) | 895 ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 896 OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 897 OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo); 898 899 /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 900 OUT_ACCEL_REG(R200_PP_TXCBLEND_0, 901 R200_TXC_ARG_A_TFACTOR_COLOR | 902 R200_TXC_ARG_B_R0_COLOR | 903 R200_TXC_ARG_C_TFACTOR_COLOR | 904 (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 905 R200_TXC_OP_DOT2_ADD); 906 OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, 907 (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 908 R200_TXC_SCALE_INV2 | 909 (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 910 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 911 OUT_ACCEL_REG(R200_PP_TXABLEND_0, 912 R200_TXA_ARG_A_ZERO | 913 R200_TXA_ARG_B_ZERO | 914 R200_TXA_ARG_C_ZERO | 915 R200_TXA_OP_MADD); 916 OUT_ACCEL_REG(R200_PP_TXABLEND2_0, 917 R200_TXA_OUTPUT_REG_NONE); 918 919 /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 920 OUT_ACCEL_REG(R200_PP_TXCBLEND_1, 921 R200_TXC_ARG_A_TFACTOR_COLOR | 922 R200_TXC_BIAS_ARG_A | 923 R200_TXC_SCALE_ARG_A | 924 R200_TXC_ARG_B_R0_COLOR | 925 R200_TXC_BIAS_ARG_B | 926 (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 927 R200_TXC_ARG_C_R1_COLOR | 928 R200_TXC_OP_MADD); 929 OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, 930 (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 931 (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 932 R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 933 OUT_ACCEL_REG(R200_PP_TXABLEND_1, 934 R200_TXA_ARG_A_ZERO | 935 R200_TXA_ARG_B_ZERO | 936 R200_TXA_ARG_C_ZERO | 937 R200_TXA_OP_MADD); 938 OUT_ACCEL_REG(R200_PP_TXABLEND2_1, 939 R200_TXA_OUTPUT_REG_NONE); 940 941 /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 942 OUT_ACCEL_REG(R200_PP_TXCBLEND_2, 943 R200_TXC_ARG_A_TFACTOR_COLOR | 944 R200_TXC_BIAS_ARG_A | 945 R200_TXC_SCALE_ARG_A | 946 R200_TXC_ARG_B_R0_COLOR | 947 R200_TXC_BIAS_ARG_B | 948 (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 949 R200_TXC_ARG_C_R1_COLOR | 950 R200_TXC_OP_MADD); 951 OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, 952 (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 953 R200_TXC_SCALE_2X | 954 (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 955 R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 956 OUT_ACCEL_REG(R200_PP_TXABLEND_2, 957 R200_TXA_ARG_A_ZERO | 958 R200_TXA_ARG_B_ZERO | 959 R200_TXA_ARG_C_ZERO | 960 R200_TXA_COMP_ARG_C | 961 R200_TXA_OP_MADD); 962 OUT_ACCEL_REG(R200_PP_TXABLEND2_2, 963 R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 964 965 /* shader constants */ 966 OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 967 yco > 1.0 ? yco - 1.0: yco, 968 yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 969 0.0)); 970 OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 971 uco[1] * ucscale + 0.5, /* or [-2, 2] */ 972 uco[2] * ucscale + 0.5, 973 0.0)); 974 OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 975 vco[1] * vcscale + 0.5, /* or [-4, 4] */ 976 vco[2] * vcscale + 0.5, 977 0.0)); 978 979 FINISH_ACCEL(); 980 } 981 982 BEGIN_ACCEL(2); 983 OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); 984 OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 985 (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 986 FINISH_ACCEL(); 987 988 if (pPriv->vsync) { 989 xf86CrtcPtr crtc; 990 if (pPriv->desired_crtc) 991 crtc = pPriv->desired_crtc; 992 else 993 crtc = radeon_pick_best_crtc(pScrn, 994 pPriv->drw_x, 995 pPriv->drw_x + pPriv->dst_w, 996 pPriv->drw_y, 997 pPriv->drw_y + pPriv->dst_h); 998 if (crtc) 999 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 1000 crtc, 1001 pPriv->drw_y - crtc->y, 1002 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 1003 } 1004 1005 return TRUE; 1006} 1007 1008static void 1009FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1010{ 1011 RADEONInfoPtr info = RADEONPTR(pScrn); 1012 PixmapPtr pPixmap = pPriv->pPixmap; 1013 int dstxoff, dstyoff; 1014 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 1015 int nBox = REGION_NUM_RECTS(&pPriv->clip); 1016 ACCEL_PREAMBLE(); 1017 1018#ifdef COMPOSITE 1019 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 1020 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 1021#else 1022 dstxoff = 0; 1023 dstyoff = 0; 1024#endif 1025 1026 if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1027 return; 1028 1029 /* 1030 * Rendering of the actual polygon is done in two different 1031 * ways depending on chip generation: 1032 * 1033 * < R300: 1034 * 1035 * These chips can render a rectangle in one pass, so 1036 * handling is pretty straight-forward. 1037 * 1038 * >= R300: 1039 * 1040 * These chips can accept a quad, but will render it as 1041 * two triangles which results in a diagonal tear. Instead 1042 * We render a single, large triangle and use the scissor 1043 * functionality to restrict it to the desired rectangle. 1044 * Due to guardband limits on r3xx/r4xx, we can only use 1045 * the single triangle up to 2560/4021 pixels; above that we 1046 * render as a quad. 1047 */ 1048 1049#ifdef ACCEL_CP 1050 while (nBox) { 1051 int draw_size = 3 * pPriv->vtx_count + 4; 1052 int loop_boxes; 1053 1054 if (draw_size > radeon_cs_space_remaining(pScrn)) { 1055 if (info->cs) 1056 radeon_cs_flush_indirect(pScrn); 1057 else 1058 RADEONCPFlushIndirect(pScrn, 1); 1059 if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv)) 1060 return; 1061 } 1062 loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 1063 nBox -= loop_boxes; 1064 1065 BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 1066 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 1067 loop_boxes * 3 * pPriv->vtx_count)); 1068 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 1069 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 1070 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 1071 1072 while (loop_boxes--) { 1073 float srcX, srcY, srcw, srch; 1074 int dstX, dstY, dstw, dsth; 1075 dstX = pBox->x1 + dstxoff; 1076 dstY = pBox->y1 + dstyoff; 1077 dstw = pBox->x2 - pBox->x1; 1078 dsth = pBox->y2 - pBox->y1; 1079 1080 srcX = pPriv->src_x; 1081 srcX += ((pBox->x1 - pPriv->drw_x) * 1082 pPriv->src_w) / (float)pPriv->dst_w; 1083 srcY = pPriv->src_y; 1084 srcY += ((pBox->y1 - pPriv->drw_y) * 1085 pPriv->src_h) / (float)pPriv->dst_h; 1086 1087 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 1088 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 1089 1090 if (pPriv->is_planar) { 1091 /* 1092 * Just render a rect (using three coords). 1093 */ 1094 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1095 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1096 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1097 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1098 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1099 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1100 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1101 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1102 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1103 } else { 1104 /* 1105 * Just render a rect (using three coords). 1106 */ 1107 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1108 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1109 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1110 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1111 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1112 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1113 } 1114 1115 pBox++; 1116 } 1117 1118 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1119 ADVANCE_RING(); 1120 } 1121#else /* ACCEL_CP */ 1122 BEGIN_ACCEL(nBox * 3 * pPriv->vtx_count + 2); 1123 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | 1124 RADEON_VF_PRIM_WALK_DATA | 1125 ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); 1126 while (nBox--) { 1127 float srcX, srcY, srcw, srch; 1128 int dstX, dstY, dstw, dsth; 1129 dstX = pBox->x1 + dstxoff; 1130 dstY = pBox->y1 + dstyoff; 1131 dstw = pBox->x2 - pBox->x1; 1132 dsth = pBox->y2 - pBox->y1; 1133 1134 srcX = pPriv->src_x; 1135 srcX += ((pBox->x1 - pPriv->drw_x) * 1136 pPriv->src_w) / (float)pPriv->dst_w; 1137 srcY = pPriv->src_y; 1138 srcY += ((pBox->y1 - pPriv->drw_y) * 1139 pPriv->src_h) / (float)pPriv->dst_h; 1140 1141 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 1142 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 1143 1144 if (pPriv->is_planar) { 1145 /* 1146 * Just render a rect (using three coords). 1147 */ 1148 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 1149 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 1150 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1151 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 1152 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 1153 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1154 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 1155 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 1156 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1157 } else { 1158 /* 1159 * Just render a rect (using three coords). 1160 */ 1161 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 1162 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 1163 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 1164 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 1165 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 1166 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 1167 } 1168 1169 pBox++; 1170 } 1171 1172 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 1173 FINISH_ACCEL(); 1174#endif /* !ACCEL_CP */ 1175 1176 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 1177} 1178 1179static Bool 1180FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 1181{ 1182 RADEONInfoPtr info = RADEONPTR(pScrn); 1183 PixmapPtr pPixmap = pPriv->pPixmap; 1184 struct radeon_exa_pixmap_priv *driver_priv; 1185 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 1186 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; 1187 uint32_t dst_pitch, dst_format; 1188 uint32_t txenable, colorpitch, bicubic_offset; 1189 uint32_t output_fmt; 1190 int pixel_shift; 1191 ACCEL_PREAMBLE(); 1192 1193#ifdef XF86DRM_MODE 1194 if (info->cs) { 1195 int ret; 1196 1197 radeon_cs_space_reset_bos(info->cs); 1198 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1199 1200 if (pPriv->bicubic_enabled) 1201 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 1202 1203 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 1204 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 1205 1206 ret = radeon_cs_space_check(info->cs); 1207 if (ret) { 1208 ErrorF("Not enough RAM to hw accel xv operation\n"); 1209 return FALSE; 1210 } 1211 } 1212#else 1213 (void)src_bo; 1214#endif 1215 1216 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 1217 1218#ifdef USE_EXA 1219 if (info->useEXA) { 1220 dst_pitch = exaGetPixmapPitch(pPixmap); 1221 } else 1222#endif 1223 { 1224 dst_pitch = pPixmap->devKind; 1225 } 1226 1227#ifdef USE_EXA 1228 if (info->useEXA) { 1229 RADEON_SWITCH_TO_3D(); 1230 } else 1231#endif 1232 { 1233 BEGIN_ACCEL(2); 1234 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 1235 /* We must wait for 3d to idle, in case source was just written as a dest. */ 1236 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 1237 RADEON_WAIT_HOST_IDLECLEAN | 1238 RADEON_WAIT_2D_IDLECLEAN | 1239 RADEON_WAIT_3D_IDLECLEAN | 1240 RADEON_WAIT_DMA_GUI_IDLE); 1241 FINISH_ACCEL(); 1242 1243 if (!info->accel_state->XInited3D) 1244 RADEONInit3DEngine(pScrn); 1245 } 1246 1247 if (pPriv->bicubic_enabled) 1248 pPriv->vtx_count = 6; 1249 else 1250 pPriv->vtx_count = 4; 1251 1252 switch (pPixmap->drawable.bitsPerPixel) { 1253 case 16: 1254 if (pPixmap->drawable.depth == 15) 1255 dst_format = R300_COLORFORMAT_ARGB1555; 1256 else 1257 dst_format = R300_COLORFORMAT_RGB565; 1258 break; 1259 case 32: 1260 dst_format = R300_COLORFORMAT_ARGB8888; 1261 break; 1262 default: 1263 return FALSE; 1264 } 1265 1266 output_fmt = (R300_OUT_FMT_C4_8 | 1267 R300_OUT_FMT_C0_SEL_BLUE | 1268 R300_OUT_FMT_C1_SEL_GREEN | 1269 R300_OUT_FMT_C2_SEL_RED | 1270 R300_OUT_FMT_C3_SEL_ALPHA); 1271 1272 colorpitch = dst_pitch >> pixel_shift; 1273 colorpitch |= dst_format; 1274 1275 if (RADEONTilingEnabled(pScrn, pPixmap)) 1276 colorpitch |= R300_COLORTILE; 1277 1278 1279 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1280 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1281 pPriv->is_planar = TRUE; 1282 else 1283 pPriv->is_planar = FALSE; 1284 1285 if (pPriv->is_planar) { 1286 txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1287 txpitch = pPriv->src_pitch; 1288 } else { 1289 if (pPriv->id == FOURCC_UYVY) 1290 txformat1 = R300_TX_FORMAT_YVYU422; 1291 else 1292 txformat1 = R300_TX_FORMAT_VYUY422; 1293 1294 if (pPriv->bicubic_state != BICUBIC_OFF) 1295 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1296 1297 /* pitch is in pixels */ 1298 txpitch = pPriv->src_pitch / 2; 1299 } 1300 txpitch -= 1; 1301 1302 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1303 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1304 R300_TXPITCH_EN); 1305 1306 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1307 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1308 R300_TX_MAG_FILTER_LINEAR | 1309 R300_TX_MIN_FILTER_LINEAR | 1310 (0 << R300_TX_ID_SHIFT)); 1311 1312 txoffset = info->cs ? 0 : pPriv->src_offset; 1313 1314 BEGIN_ACCEL_RELOC(6, 1); 1315 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 1316 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 1317 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 1318 if (pPriv->is_planar) 1319 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1320 else 1321 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 1322 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 1323 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 1324 FINISH_ACCEL(); 1325 1326 txenable = R300_TEX_0_ENABLE; 1327 1328 if (pPriv->is_planar) { 1329 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1330 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1331 R300_TXPITCH_EN); 1332 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1333 txpitch -= 1; 1334 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1335 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1336 R300_TX_MIN_FILTER_LINEAR | 1337 R300_TX_MAG_FILTER_LINEAR); 1338 1339 BEGIN_ACCEL_RELOC(12, 2); 1340 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 1341 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1342 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1343 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 1344 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1345 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 1346 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 1347 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 1348 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 1349 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 1350 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 1351 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 1352 FINISH_ACCEL(); 1353 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1354 } 1355 1356 if (pPriv->bicubic_enabled) { 1357 /* Size is 128x1 */ 1358 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1359 (0x0 << R300_TXHEIGHT_SHIFT) | 1360 R300_TXPITCH_EN); 1361 /* Format is 32-bit floats, 4bpp */ 1362 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1363 /* Pitch is 127 (128-1) */ 1364 txpitch = 0x7f; 1365 /* Tex filter */ 1366 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1367 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1368 R300_TX_MIN_FILTER_NEAREST | 1369 R300_TX_MAG_FILTER_NEAREST | 1370 (1 << R300_TX_ID_SHIFT)); 1371 1372 if (info->cs) 1373 bicubic_offset = 0; 1374 else 1375 bicubic_offset = pPriv->bicubic_src_offset; 1376 1377 BEGIN_ACCEL_RELOC(6, 1); 1378 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 1379 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 1380 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 1381 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 1382 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 1383 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 1384 FINISH_ACCEL(); 1385 1386 /* Enable tex 1 */ 1387 txenable |= R300_TEX_1_ENABLE; 1388 } 1389 1390 /* setup the VAP */ 1391 if (info->accel_state->has_tcl) { 1392 if (pPriv->bicubic_enabled) 1393 BEGIN_ACCEL(7); 1394 else 1395 BEGIN_ACCEL(6); 1396 } else { 1397 if (pPriv->bicubic_enabled) 1398 BEGIN_ACCEL(5); 1399 else 1400 BEGIN_ACCEL(4); 1401 } 1402 1403 /* These registers define the number, type, and location of data submitted 1404 * to the PVS unit of GA input (when PVS is disabled) 1405 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1406 * enabled. This memory provides the imputs to the vertex shader program 1407 * and ordering is not important. When PVS/TCL is disabled, this field maps 1408 * directly to the GA input memory and the order is signifigant. In 1409 * PVS_BYPASS mode the order is as follows: 1410 * Position 1411 * Point Size 1412 * Color 0-3 1413 * Textures 0-7 1414 * Fog 1415 */ 1416 if (pPriv->bicubic_enabled) { 1417 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1418 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1419 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1420 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1421 R300_SIGNED_0 | 1422 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1423 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1424 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1425 R300_SIGNED_1)); 1426 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 1427 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1428 (0 << R300_SKIP_DWORDS_2_SHIFT) | 1429 (7 << R300_DST_VEC_LOC_2_SHIFT) | 1430 R300_LAST_VEC_2 | 1431 R300_SIGNED_2)); 1432 } else { 1433 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 1434 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1435 (0 << R300_SKIP_DWORDS_0_SHIFT) | 1436 (0 << R300_DST_VEC_LOC_0_SHIFT) | 1437 R300_SIGNED_0 | 1438 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1439 (0 << R300_SKIP_DWORDS_1_SHIFT) | 1440 (6 << R300_DST_VEC_LOC_1_SHIFT) | 1441 R300_LAST_VEC_1 | 1442 R300_SIGNED_1)); 1443 } 1444 1445 /* load the vertex shader 1446 * We pre-load vertex programs in RADEONInit3DEngine(): 1447 * - exa 1448 * - Xv 1449 * - Xv bicubic 1450 * Here we select the offset of the vertex program we want to use 1451 */ 1452 if (info->accel_state->has_tcl) { 1453 if (pPriv->bicubic_enabled) { 1454 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1455 ((11 << R300_PVS_FIRST_INST_SHIFT) | 1456 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1457 (13 << R300_PVS_LAST_INST_SHIFT))); 1458 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1459 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1460 } else { 1461 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 1462 ((9 << R300_PVS_FIRST_INST_SHIFT) | 1463 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1464 (10 << R300_PVS_LAST_INST_SHIFT))); 1465 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 1466 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1467 } 1468 } 1469 1470 /* Position and one set of 2 texture coordinates */ 1471 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1472 if (pPriv->bicubic_enabled) 1473 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1474 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1475 else 1476 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1477 1478 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 1479 FINISH_ACCEL(); 1480 1481 /* setup pixel shader */ 1482 if (pPriv->bicubic_state != BICUBIC_OFF) { 1483 if (pPriv->bicubic_enabled) { 1484 BEGIN_ACCEL(79); 1485 1486 /* 4 components: 2 for tex0 and 2 for tex1 */ 1487 OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1488 R300_RS_COUNT_HIRES_EN)); 1489 1490 /* R300_INST_COUNT_RS - highest RS instruction used */ 1491 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1492 1493 /* Pixel stack frame size. */ 1494 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 1495 1496 /* Indirection levels */ 1497 OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1498 R300_FIRST_TEX)); 1499 1500 /* Set nodes. */ 1501 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1502 R300_ALU_CODE_SIZE(14) | 1503 R300_TEX_CODE_OFFSET(0) | 1504 R300_TEX_CODE_SIZE(6))); 1505 1506 /* Nodes are allocated highest first, but executed lowest first */ 1507 OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); 1508 OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1509 R300_ALU_SIZE(0) | 1510 R300_TEX_START(0) | 1511 R300_TEX_SIZE(0))); 1512 OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1513 R300_ALU_SIZE(9) | 1514 R300_TEX_START(1) | 1515 R300_TEX_SIZE(0))); 1516 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1517 R300_ALU_SIZE(2) | 1518 R300_TEX_START(2) | 1519 R300_TEX_SIZE(3) | 1520 R300_RGBA_OUT)); 1521 1522 /* ** BICUBIC FP ** */ 1523 1524 /* texcoord0 => temp0 1525 * texcoord1 => temp1 */ 1526 1527 // first node 1528 /* TEX temp2, temp1.rrr0, tex1, 1D */ 1529 OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1530 R300_TEX_ID(1) | 1531 R300_TEX_SRC_ADDR(1) | 1532 R300_TEX_DST_ADDR(2))); 1533 1534 /* MOV temp1.r, temp1.ggg0 */ 1535 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1536 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1537 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1538 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1539 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1540 R300_ALU_RGB_ADDRD(1) | 1541 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 1542 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1543 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1544 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1545 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1546 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1547 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1548 1549 1550 // second node 1551 /* TEX temp1, temp1, tex1, 1D */ 1552 OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1553 R300_TEX_ID(1) | 1554 R300_TEX_SRC_ADDR(1) | 1555 R300_TEX_DST_ADDR(1))); 1556 1557 /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 1558 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1559 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1560 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1561 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1562 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1563 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1564 R300_ALU_RGB_ADDRD(3) | 1565 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1566 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1567 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1568 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1569 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1570 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1571 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1572 1573 1574 /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 1575 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1576 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1577 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1578 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 1579 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1580 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1581 R300_ALU_RGB_ADDRD(2) | 1582 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1583 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1584 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1585 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1586 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1587 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1588 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1589 1590 /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 1591 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1592 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1593 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1594 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1595 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1596 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1597 R300_ALU_RGB_ADDR2(3) | 1598 R300_ALU_RGB_ADDRD(4) | 1599 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1600 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1601 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1602 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1603 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1604 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1605 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1606 1607 /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 1608 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1609 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1610 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1611 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1612 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1613 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1614 R300_ALU_RGB_ADDR2(2) | 1615 R300_ALU_RGB_ADDRD(5) | 1616 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1617 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1618 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1619 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1620 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1621 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1622 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1623 1624 /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 1625 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1626 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1627 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1628 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1629 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1630 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1631 R300_ALU_RGB_ADDR2(3) | 1632 R300_ALU_RGB_ADDRD(3) | 1633 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1634 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1635 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1636 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1637 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1638 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1639 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1640 1641 /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 1642 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1643 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1644 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1645 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1646 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1647 R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1648 R300_ALU_RGB_ADDR2(2) | 1649 R300_ALU_RGB_ADDRD(1) | 1650 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1651 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1652 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1653 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1654 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1655 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1656 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1657 1658 /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 1659 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1660 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1661 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1662 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1663 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1664 R300_ALU_RGB_ADDR2(1) | 1665 R300_ALU_RGB_ADDRD(1) | 1666 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1667 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1668 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1669 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1670 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1671 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1672 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1673 1674 /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 1675 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1676 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1677 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1678 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1679 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1680 R300_ALU_RGB_ADDR2(3) | 1681 R300_ALU_RGB_ADDRD(2) | 1682 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1683 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1684 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1685 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1686 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1687 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1688 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1689 1690 /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 1691 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1692 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1693 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1694 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1695 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1696 R300_ALU_RGB_ADDR2(5) | 1697 R300_ALU_RGB_ADDRD(3) | 1698 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1699 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1700 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1701 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1702 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1703 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1704 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1705 1706 /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 1707 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1708 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1709 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1710 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 1711 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1712 R300_ALU_RGB_ADDR2(4) | 1713 R300_ALU_RGB_ADDRD(0) | 1714 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 1715 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1716 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1717 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1718 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1719 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1720 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1721 1722 1723 // third node 1724 /* TEX temp4, temp1.rg--, tex0, 1D */ 1725 OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1726 R300_TEX_ID(0) | 1727 R300_TEX_SRC_ADDR(1) | 1728 R300_TEX_DST_ADDR(4))); 1729 1730 /* TEX temp3, temp3.rg--, tex0, 1D */ 1731 OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1732 R300_TEX_ID(0) | 1733 R300_TEX_SRC_ADDR(3) | 1734 R300_TEX_DST_ADDR(3))); 1735 1736 /* TEX temp5, temp2.rg--, tex0, 1D */ 1737 OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1738 R300_TEX_ID(0) | 1739 R300_TEX_SRC_ADDR(2) | 1740 R300_TEX_DST_ADDR(5))); 1741 1742 /* TEX temp0, temp0.rg--, tex0, 1D */ 1743 OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1744 R300_TEX_ID(0) | 1745 R300_TEX_SRC_ADDR(0) | 1746 R300_TEX_DST_ADDR(0))); 1747 1748 /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1749 * - PRESUB temps, temp4 - temp3 1750 * - MAD temp3, temp1.bbbb, temps, temp3 */ 1751 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1752 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1753 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1754 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1755 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1756 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1757 R300_ALU_RGB_ADDR1(4) | 1758 R300_ALU_RGB_ADDR2(1) | 1759 R300_ALU_RGB_ADDRD(3) | 1760 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1761 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1762 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1763 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1764 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1765 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1766 R300_ALU_ALPHA_ADDR1(4) | 1767 R300_ALU_ALPHA_ADDR2(1) | 1768 R300_ALU_ALPHA_ADDRD(3) | 1769 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1770 1771 /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1772 * - PRESUB temps, temp5 - temp0 1773 * - MAD temp0, temp1.bbbb, temps, temp0 */ 1774 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1775 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1776 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1777 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1778 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1779 R300_ALU_RGB_INSERT_NOP)); 1780 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1781 R300_ALU_RGB_ADDR1(5) | 1782 R300_ALU_RGB_ADDR2(1) | 1783 R300_ALU_RGB_ADDRD(0) | 1784 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 1785 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1786 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1787 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1788 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1789 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1790 R300_ALU_ALPHA_ADDR1(5) | 1791 R300_ALU_ALPHA_ADDR2(1) | 1792 R300_ALU_ALPHA_ADDRD(0) | 1793 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1794 1795 /* LRP output, temp2.bbbb, temp3, temp0 -> 1796 * - PRESUB temps, temp3 - temp0 1797 * - MAD output, temp2.bbbb, temps, temp0 */ 1798 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1799 R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1800 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1801 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1802 R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 1803 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1804 R300_ALU_RGB_ADDR1(3) | 1805 R300_ALU_RGB_ADDR2(2) | 1806 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 1807 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1808 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1809 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1810 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 1811 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1812 R300_ALU_ALPHA_ADDR1(3) | 1813 R300_ALU_ALPHA_ADDR2(2) | 1814 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1815 1816 /* Shader constants. */ 1817 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 1818 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); 1819 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); 1820 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); 1821 1822 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); 1823 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 1824 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); 1825 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); 1826 1827 FINISH_ACCEL(); 1828 } else { 1829 BEGIN_ACCEL(11); 1830 /* 2 components: 2 for tex0 */ 1831 OUT_ACCEL_REG(R300_RS_COUNT, 1832 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1833 R300_RS_COUNT_HIRES_EN)); 1834 /* R300_INST_COUNT_RS - highest RS instruction used */ 1835 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1836 1837 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1838 1839 /* Indirection levels */ 1840 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1841 R300_FIRST_TEX)); 1842 1843 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1844 R300_ALU_CODE_SIZE(1) | 1845 R300_TEX_CODE_OFFSET(0) | 1846 R300_TEX_CODE_SIZE(1))); 1847 1848 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1849 R300_ALU_SIZE(0) | 1850 R300_TEX_START(0) | 1851 R300_TEX_SIZE(0) | 1852 R300_RGBA_OUT)); 1853 1854 /* tex inst */ 1855 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1856 R300_TEX_DST_ADDR(0) | 1857 R300_TEX_ID(0) | 1858 R300_TEX_INST(R300_TEX_INST_LD))); 1859 1860 /* ALU inst */ 1861 /* RGB */ 1862 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1863 R300_ALU_RGB_ADDR1(0) | 1864 R300_ALU_RGB_ADDR2(0) | 1865 R300_ALU_RGB_ADDRD(0) | 1866 R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1867 R300_ALU_RGB_MASK_G | 1868 R300_ALU_RGB_MASK_B)) | 1869 R300_ALU_RGB_TARGET_A)); 1870 OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1871 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1872 R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1873 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1874 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1875 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1876 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1877 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1878 R300_ALU_RGB_CLAMP)); 1879 /* Alpha */ 1880 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1881 R300_ALU_ALPHA_ADDR1(0) | 1882 R300_ALU_ALPHA_ADDR2(0) | 1883 R300_ALU_ALPHA_ADDRD(0) | 1884 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1885 R300_ALU_ALPHA_TARGET_A | 1886 R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 1887 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1888 R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1889 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1890 R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1891 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1892 R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1893 R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1894 R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1895 R300_ALU_ALPHA_CLAMP)); 1896 FINISH_ACCEL(); 1897 } 1898 } else { 1899 /* 1900 * y' = y - .0625 1901 * u' = u - .5 1902 * v' = v - .5; 1903 * 1904 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1905 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1906 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1907 * 1908 * DP3 might look like the straightforward solution 1909 * but we'd need to move the texture yuv values in 1910 * the same reg for this to work. Therefore use MADs. 1911 * Brightness just adds to the off constant. 1912 * Contrast is multiplication of luminance. 1913 * Saturation and hue change the u and v coeffs. 1914 * Default values (before adjustments - depend on colorspace): 1915 * yco = 1.1643 1916 * uco = 0, -0.39173, 2.017 1917 * vco = 1.5958, -0.8129, 0 1918 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1919 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1920 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1921 * 1922 * temp = MAD(yco, yuv.yyyy, off) 1923 * temp = MAD(uco, yuv.uuuu, temp) 1924 * result = MAD(vco, yuv.vvvv, temp) 1925 */ 1926 /* TODO: don't recalc consts always */ 1927 const float Loff = -0.0627; 1928 const float Coff = -0.502; 1929 float uvcosf, uvsinf; 1930 float yco; 1931 float uco[3], vco[3], off[3]; 1932 float bright, cont, gamma; 1933 int ref = pPriv->transform_index; 1934 Bool needgamma = FALSE; 1935 1936 cont = RTFContrast(pPriv->contrast); 1937 bright = RTFBrightness(pPriv->brightness); 1938 gamma = (float)pPriv->gamma / 1000.0; 1939 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1940 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1941 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1942 1943 yco = trans[ref].RefLuma * cont; 1944 uco[0] = -trans[ref].RefRCr * uvsinf; 1945 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1946 uco[2] = trans[ref].RefBCb * uvcosf; 1947 vco[0] = trans[ref].RefRCr * uvcosf; 1948 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1949 vco[2] = trans[ref].RefBCb * uvsinf; 1950 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1951 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1952 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1953 1954 if (gamma != 1.0) { 1955 needgamma = TRUE; 1956 /* note: gamma correction is out = in ^ gamma; 1957 gpu can only do LG2/EX2 therefore we transform into 1958 in ^ gamma = 2 ^ (log2(in) * gamma). 1959 Lots of scalar ops, unfortunately (better solution?) - 1960 without gamma that's 3 inst, with gamma it's 10... 1961 could use different gamma factors per channel, 1962 if that's of any use. */ 1963 } 1964 1965 if (pPriv->is_planar) { 1966 BEGIN_ACCEL(needgamma ? 28 + 33 : 33); 1967 /* 2 components: same 2 for tex0/1/2 */ 1968 OUT_ACCEL_REG(R300_RS_COUNT, 1969 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1970 R300_RS_COUNT_HIRES_EN)); 1971 /* R300_INST_COUNT_RS - highest RS instruction used */ 1972 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1973 1974 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1975 1976 /* Indirection levels */ 1977 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1978 R300_FIRST_TEX)); 1979 1980 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1981 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1982 R300_TEX_CODE_OFFSET(0) | 1983 R300_TEX_CODE_SIZE(3))); 1984 1985 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1986 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1987 R300_TEX_START(0) | 1988 R300_TEX_SIZE(2) | 1989 R300_RGBA_OUT)); 1990 1991 /* tex inst */ 1992 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1993 R300_TEX_DST_ADDR(2) | 1994 R300_TEX_ID(0) | 1995 R300_TEX_INST(R300_TEX_INST_LD))); 1996 OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1997 R300_TEX_DST_ADDR(1) | 1998 R300_TEX_ID(1) | 1999 R300_TEX_INST(R300_TEX_INST_LD))); 2000 OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 2001 R300_TEX_DST_ADDR(0) | 2002 R300_TEX_ID(2) | 2003 R300_TEX_INST(R300_TEX_INST_LD))); 2004 2005 /* ALU inst */ 2006 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 2007 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2008 R300_ALU_RGB_ADDR1(2) | 2009 R300_ALU_RGB_ADDR2(0) | 2010 R300_ALU_RGB_ADDRD(2) | 2011 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2012 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2013 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2014 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2015 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2016 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2017 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2018 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2019 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2020 /* alpha nop, but need to set up alpha source for rgb usage */ 2021 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2022 R300_ALU_ALPHA_ADDR1(2) | 2023 R300_ALU_ALPHA_ADDR2(0) | 2024 R300_ALU_ALPHA_ADDRD(2) | 2025 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2026 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2027 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2028 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2029 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2030 2031 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 2032 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2033 R300_ALU_RGB_ADDR1(1) | 2034 R300_ALU_RGB_ADDR2(2) | 2035 R300_ALU_RGB_ADDRD(2) | 2036 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2037 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2038 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2039 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2040 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2041 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2042 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2043 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2044 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2045 /* alpha nop */ 2046 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 2047 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2048 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2049 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2050 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2051 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2052 2053 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 2054 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2055 R300_ALU_RGB_ADDR1(0) | 2056 R300_ALU_RGB_ADDR2(2) | 2057 R300_ALU_RGB_ADDRD(0) | 2058 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2059 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2060 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2061 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2062 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 2063 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2064 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2065 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2066 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2067 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2068 R300_ALU_RGB_CLAMP)); 2069 /* write alpha 1 */ 2070 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2071 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2072 R300_ALU_ALPHA_TARGET_A)); 2073 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2074 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2075 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2076 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2077 2078 if (needgamma) { 2079 /* rgb temp0.r = op_sop, set up src0 reg */ 2080 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2081 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2082 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2083 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2084 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2085 /* alpha lg2 temp0, temp0.r */ 2086 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2087 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2088 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2089 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2090 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2091 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2092 2093 /* rgb temp0.g = op_sop, set up src0 reg */ 2094 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2095 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2096 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2097 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2098 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2099 /* alpha lg2 temp0, temp0.g */ 2100 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2101 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2102 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2103 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2104 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2105 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2106 2107 /* rgb temp0.b = op_sop, set up src0 reg */ 2108 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2109 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2110 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2111 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2112 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2113 /* alpha lg2 temp0, temp0.b */ 2114 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2115 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2116 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2117 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2118 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2119 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2120 2121 /* MUL const1, temp1, temp0 */ 2122 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2123 R300_ALU_RGB_ADDR1(0) | 2124 R300_ALU_RGB_ADDR2(0) | 2125 R300_ALU_RGB_ADDRD(0) | 2126 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2127 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2128 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2129 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2130 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2131 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2132 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2133 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2134 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2135 /* alpha nop, but set up const1 */ 2136 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2137 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2138 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2139 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2140 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2141 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2142 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2143 2144 /* rgb out0.r = op_sop, set up src0 reg */ 2145 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2146 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2147 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2148 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2149 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2150 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2151 /* alpha ex2 temp0, temp0.r */ 2152 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2153 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2154 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2155 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2156 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2157 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2158 2159 /* rgb out0.g = op_sop, set up src0 reg */ 2160 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2161 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2162 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2163 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2164 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2165 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2166 /* alpha ex2 temp0, temp0.g */ 2167 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2168 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2169 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2170 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2171 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2172 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2173 2174 /* rgb out0.b = op_sop, set up src0 reg */ 2175 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2176 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2177 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2178 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2179 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2180 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2181 /* alpha ex2 temp0, temp0.b */ 2182 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2183 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2184 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2185 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2186 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2187 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2188 } 2189 } else { 2190 BEGIN_ACCEL(needgamma ? 28 + 31 : 31); 2191 /* 2 components */ 2192 OUT_ACCEL_REG(R300_RS_COUNT, 2193 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2194 R300_RS_COUNT_HIRES_EN)); 2195 /* R300_INST_COUNT_RS - highest RS instruction used */ 2196 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 2197 2198 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 2199 2200 /* Indirection levels */ 2201 OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 2202 R300_FIRST_TEX)); 2203 2204 OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 2205 R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 2206 R300_TEX_CODE_OFFSET(0) | 2207 R300_TEX_CODE_SIZE(1))); 2208 2209 OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 2210 R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 2211 R300_TEX_START(0) | 2212 R300_TEX_SIZE(0) | 2213 R300_RGBA_OUT)); 2214 2215 /* tex inst */ 2216 OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 2217 R300_TEX_DST_ADDR(0) | 2218 R300_TEX_ID(0) | 2219 R300_TEX_INST(R300_TEX_INST_LD))); 2220 2221 /* ALU inst */ 2222 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 2223 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 2224 R300_ALU_RGB_ADDR1(0) | 2225 R300_ALU_RGB_ADDR2(0) | 2226 R300_ALU_RGB_ADDRD(1) | 2227 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2228 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 2229 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2230 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 2231 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2232 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 2233 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2234 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2235 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2236 /* alpha nop, but need to set up alpha source for rgb usage */ 2237 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 2238 R300_ALU_ALPHA_ADDR1(0) | 2239 R300_ALU_ALPHA_ADDR2(0) | 2240 R300_ALU_ALPHA_ADDRD(0) | 2241 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2242 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2243 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2244 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2245 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2246 2247 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 2248 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 2249 R300_ALU_RGB_ADDR1(0) | 2250 R300_ALU_RGB_ADDR2(1) | 2251 R300_ALU_RGB_ADDRD(1) | 2252 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2253 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2254 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2255 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 2256 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2257 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2258 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2259 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2260 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2261 /* alpha nop */ 2262 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 2263 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2264 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2265 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2266 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2267 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2268 2269 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 2270 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2271 R300_ALU_RGB_ADDR1(0) | 2272 R300_ALU_RGB_ADDR2(1) | 2273 R300_ALU_RGB_ADDRD(0) | 2274 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2275 (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 2276 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2277 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2278 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2279 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2280 R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2281 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2282 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2283 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2284 R300_ALU_RGB_CLAMP)); 2285 /* write alpha 1 */ 2286 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2287 R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2288 R300_ALU_ALPHA_TARGET_A)); 2289 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2290 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2291 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2292 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2293 2294 if (needgamma) { 2295 /* rgb temp0.r = op_sop, set up src0 reg */ 2296 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2297 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 2298 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), 2299 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2300 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2301 /* alpha lg2 temp0, temp0.r */ 2302 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2303 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2304 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2305 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2306 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2307 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2308 2309 /* rgb temp0.g = op_sop, set up src0 reg */ 2310 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2311 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 2312 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), 2313 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2314 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2315 /* alpha lg2 temp0, temp0.g */ 2316 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2317 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2318 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2319 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2320 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2321 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2322 2323 /* rgb temp0.b = op_sop, set up src0 reg */ 2324 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2325 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 2326 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), 2327 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2328 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2329 /* alpha lg2 temp0, temp0.b */ 2330 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2331 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2332 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2333 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2334 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2335 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2336 2337 /* MUL const1, temp1, temp0 */ 2338 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2339 R300_ALU_RGB_ADDR1(0) | 2340 R300_ALU_RGB_ADDR2(0) | 2341 R300_ALU_RGB_ADDRD(0) | 2342 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 2343 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2344 R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2345 R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2346 R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2347 R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2348 R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2349 R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2350 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2351 /* alpha nop, but set up const1 */ 2352 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2353 R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2354 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2355 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2356 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2357 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2358 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2359 2360 /* rgb out0.r = op_sop, set up src0 reg */ 2361 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2362 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2363 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 2364 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), 2365 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2366 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2367 /* alpha ex2 temp0, temp0.r */ 2368 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2369 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2370 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2371 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2372 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2373 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2374 2375 /* rgb out0.g = op_sop, set up src0 reg */ 2376 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2377 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2378 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 2379 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), 2380 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2381 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2382 /* alpha ex2 temp0, temp0.g */ 2383 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2384 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2385 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2386 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2387 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2388 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2389 2390 /* rgb out0.b = op_sop, set up src0 reg */ 2391 OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2392 R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2393 R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 2394 OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), 2395 R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2396 R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2397 /* alpha ex2 temp0, temp0.b */ 2398 OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2399 R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 2400 OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2401 R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2402 R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2403 R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2404 } 2405 } 2406 2407 /* Shader constants. */ 2408 /* constant 0: off, yco */ 2409 OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 2410 OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 2411 OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 2412 OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2413 /* constant 1: uco */ 2414 OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 2415 OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 2416 OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 2417 OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2418 /* constant 2: vco */ 2419 OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 2420 OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 2421 OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 2422 OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2423 2424 FINISH_ACCEL(); 2425 } 2426 2427 BEGIN_ACCEL_RELOC(6, 2); 2428 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 2429 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 2430 2431 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2432 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2433 2434 /* no need to enable blending */ 2435 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2436 2437 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 2438 FINISH_ACCEL(); 2439 2440 if (pPriv->vsync) { 2441 xf86CrtcPtr crtc; 2442 if (pPriv->desired_crtc) 2443 crtc = pPriv->desired_crtc; 2444 else 2445 crtc = radeon_pick_best_crtc(pScrn, 2446 pPriv->drw_x, 2447 pPriv->drw_x + pPriv->dst_w, 2448 pPriv->drw_y, 2449 pPriv->drw_y + pPriv->dst_h); 2450 if (crtc) 2451 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 2452 crtc, 2453 pPriv->drw_y - crtc->y, 2454 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2455 } 2456 2457 return TRUE; 2458} 2459 2460static void 2461FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2462{ 2463 RADEONInfoPtr info = RADEONPTR(pScrn); 2464 PixmapPtr pPixmap = pPriv->pPixmap; 2465 int dstxoff, dstyoff; 2466 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2467 int nBox = REGION_NUM_RECTS(&pPriv->clip); 2468 ACCEL_PREAMBLE(); 2469 2470#ifdef COMPOSITE 2471 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2472 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2473#else 2474 dstxoff = 0; 2475 dstyoff = 0; 2476#endif 2477 2478 if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2479 return; 2480 2481 /* 2482 * Rendering of the actual polygon is done in two different 2483 * ways depending on chip generation: 2484 * 2485 * < R300: 2486 * 2487 * These chips can render a rectangle in one pass, so 2488 * handling is pretty straight-forward. 2489 * 2490 * >= R300: 2491 * 2492 * These chips can accept a quad, but will render it as 2493 * two triangles which results in a diagonal tear. Instead 2494 * We render a single, large triangle and use the scissor 2495 * functionality to restrict it to the desired rectangle. 2496 * Due to guardband limits on r3xx/r4xx, we can only use 2497 * the single triangle up to 2560/4021 pixels; above that we 2498 * render as a quad. 2499 */ 2500 2501 while (nBox--) { 2502 float srcX, srcY, srcw, srch; 2503 int dstX, dstY, dstw, dsth; 2504 Bool use_quad = FALSE; 2505#ifdef ACCEL_CP 2506 int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2507 2508 if (draw_size > radeon_cs_space_remaining(pScrn)) { 2509 if (info->cs) 2510 radeon_cs_flush_indirect(pScrn); 2511 else 2512 RADEONCPFlushIndirect(pScrn, 1); 2513 if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv)) 2514 return; 2515 } 2516#endif 2517 2518 dstX = pBox->x1 + dstxoff; 2519 dstY = pBox->y1 + dstyoff; 2520 dstw = pBox->x2 - pBox->x1; 2521 dsth = pBox->y2 - pBox->y1; 2522 2523 srcX = pPriv->src_x; 2524 srcX += ((pBox->x1 - pPriv->drw_x) * 2525 pPriv->src_w) / (float)pPriv->dst_w; 2526 srcY = pPriv->src_y; 2527 srcY += ((pBox->y1 - pPriv->drw_y) * 2528 pPriv->src_h) / (float)pPriv->dst_h; 2529 2530 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 2531 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 2532 2533 if (IS_R400_3D) { 2534 if ((dstw+dsth) > 4021) 2535 use_quad = TRUE; 2536 } else { 2537 if ((dstw+dsth) > 2560) 2538 use_quad = TRUE; 2539 } 2540 /* 2541 * Set up the scissor area to that of the output size. 2542 */ 2543 BEGIN_ACCEL(2); 2544 /* R300 has an offset */ 2545 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2546 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 2547 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2548 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 2549 FINISH_ACCEL(); 2550 2551#ifdef ACCEL_CP 2552 if (use_quad) { 2553 BEGIN_RING(4 * pPriv->vtx_count + 4); 2554 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2555 4 * pPriv->vtx_count)); 2556 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2557 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2558 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2559 } else { 2560 BEGIN_RING(3 * pPriv->vtx_count + 4); 2561 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2562 3 * pPriv->vtx_count)); 2563 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2564 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2565 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2566 } 2567#else /* ACCEL_CP */ 2568 if (use_quad) 2569 BEGIN_ACCEL(2 + pPriv->vtx_count * 4); 2570 else 2571 BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 2572 2573 if (use_quad) 2574 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | 2575 RADEON_VF_PRIM_WALK_DATA | 2576 (4 << RADEON_VF_NUM_VERTICES_SHIFT))); 2577 else 2578 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 2579 RADEON_VF_PRIM_WALK_DATA | 2580 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 2581#endif 2582 if (pPriv->bicubic_enabled) { 2583 /* 2584 * This code is only executed on >= R300, so we don't 2585 * have to deal with the legacy handling. 2586 */ 2587 if (use_quad) { 2588 VTX_OUT_6((float)dstX, (float)dstY, 2589 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2590 (float)srcX + 0.5, (float)srcY + 0.5); 2591 VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2592 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2593 (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2594 VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2595 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2596 (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2597 VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2598 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2599 (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2600 } else { 2601 VTX_OUT_6((float)dstX, (float)dstY, 2602 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2603 (float)srcX + 0.5, (float)srcY + 0.5); 2604 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2605 (float)srcX / pPriv->w, 2606 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2607 (float)srcX + 0.5, 2608 (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2609 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2610 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2611 (float)srcY / pPriv->h, 2612 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2613 (float)srcY + 0.5); 2614 } 2615 } else { 2616 if (use_quad) { 2617 VTX_OUT_4((float)dstX, (float)dstY, 2618 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2619 VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2620 (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2621 VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2622 (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2623 VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2624 (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2625 } else { 2626 /* 2627 * Render a big, scissored triangle. This means 2628 * increasing the triangle size and adjusting 2629 * texture coordinates. 2630 */ 2631 VTX_OUT_4((float)dstX, (float)dstY, 2632 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2633 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2634 (float)srcX / pPriv->w, 2635 ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2636 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2637 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2638 (float)srcY / pPriv->h); 2639 } 2640 } 2641 2642 /* flushing is pipelined, free/finish is not */ 2643 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2644 2645#ifdef ACCEL_CP 2646 ADVANCE_RING(); 2647#else 2648 FINISH_ACCEL(); 2649#endif /* !ACCEL_CP */ 2650 2651 pBox++; 2652 } 2653 2654 BEGIN_ACCEL(3); 2655 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 2656 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 2657 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 2658 FINISH_ACCEL(); 2659 2660 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2661} 2662 2663static Bool 2664FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2665{ 2666 RADEONInfoPtr info = RADEONPTR(pScrn); 2667 PixmapPtr pPixmap = pPriv->pPixmap; 2668 struct radeon_exa_pixmap_priv *driver_priv; 2669 struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 2670 uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0; 2671 uint32_t dst_pitch, dst_format; 2672 uint32_t txenable, colorpitch, bicubic_offset; 2673 uint32_t output_fmt; 2674 int pixel_shift, out_size = 6; 2675 ACCEL_PREAMBLE(); 2676 2677#ifdef XF86DRM_MODE 2678 if (info->cs) { 2679 int ret; 2680 2681 radeon_cs_space_reset_bos(info->cs); 2682 radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2683 2684 if (pPriv->bicubic_enabled) 2685 radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 2686 2687 driver_priv = exaGetPixmapDriverPrivate(pPixmap); 2688 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 2689 2690 ret = radeon_cs_space_check(info->cs); 2691 if (ret) { 2692 ErrorF("Not enough RAM to hw accel xv operation\n"); 2693 return FALSE; 2694 } 2695 } 2696#else 2697 (void)src_bo; 2698#endif 2699 2700 pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2701 2702#ifdef USE_EXA 2703 if (info->useEXA) { 2704 dst_pitch = exaGetPixmapPitch(pPixmap); 2705 } else 2706#endif 2707 { 2708 dst_pitch = pPixmap->devKind; 2709 } 2710 2711#ifdef USE_EXA 2712 if (info->useEXA) { 2713 RADEON_SWITCH_TO_3D(); 2714 } else 2715#endif 2716 { 2717 BEGIN_ACCEL(2); 2718 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2719 /* We must wait for 3d to idle, in case source was just written as a dest. */ 2720 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 2721 RADEON_WAIT_HOST_IDLECLEAN | 2722 RADEON_WAIT_2D_IDLECLEAN | 2723 RADEON_WAIT_3D_IDLECLEAN | 2724 RADEON_WAIT_DMA_GUI_IDLE); 2725 FINISH_ACCEL(); 2726 2727 if (!info->accel_state->XInited3D) 2728 RADEONInit3DEngine(pScrn); 2729 } 2730 2731 if (pPriv->bicubic_enabled) 2732 pPriv->vtx_count = 6; 2733 else 2734 pPriv->vtx_count = 4; 2735 2736 switch (pPixmap->drawable.bitsPerPixel) { 2737 case 16: 2738 if (pPixmap->drawable.depth == 15) 2739 dst_format = R300_COLORFORMAT_ARGB1555; 2740 else 2741 dst_format = R300_COLORFORMAT_RGB565; 2742 break; 2743 case 32: 2744 dst_format = R300_COLORFORMAT_ARGB8888; 2745 break; 2746 default: 2747 return FALSE; 2748 } 2749 2750 output_fmt = (R300_OUT_FMT_C4_8 | 2751 R300_OUT_FMT_C0_SEL_BLUE | 2752 R300_OUT_FMT_C1_SEL_GREEN | 2753 R300_OUT_FMT_C2_SEL_RED | 2754 R300_OUT_FMT_C3_SEL_ALPHA); 2755 2756 colorpitch = dst_pitch >> pixel_shift; 2757 colorpitch |= dst_format; 2758 2759 if (RADEONTilingEnabled(pScrn, pPixmap)) 2760 colorpitch |= R300_COLORTILE; 2761 2762 if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2763 (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2764 pPriv->is_planar = TRUE; 2765 else 2766 pPriv->is_planar = FALSE; 2767 2768 if (pPriv->is_planar) { 2769 txformat1 = R300_TX_FORMAT_X8; 2770 txpitch = pPriv->src_pitch; 2771 } else { 2772 if (pPriv->id == FOURCC_UYVY) 2773 txformat1 = R300_TX_FORMAT_YVYU422; 2774 else 2775 txformat1 = R300_TX_FORMAT_VYUY422; 2776 2777 if (pPriv->bicubic_state != BICUBIC_OFF) 2778 txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2779 2780 /* pitch is in pixels */ 2781 txpitch = pPriv->src_pitch / 2; 2782 } 2783 txpitch -= 1; 2784 2785 txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2786 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2787 R300_TXPITCH_EN); 2788 2789 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2790 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2791 R300_TX_MAG_FILTER_LINEAR | 2792 R300_TX_MIN_FILTER_LINEAR | 2793 (0 << R300_TX_ID_SHIFT)); 2794 2795 2796 if ((pPriv->w - 1) & 0x800) 2797 txpitch |= R500_TXWIDTH_11; 2798 2799 if ((pPriv->h - 1) & 0x800) 2800 txpitch |= R500_TXHEIGHT_11; 2801 2802 if (info->ChipFamily == CHIP_FAMILY_R520) { 2803 unsigned us_width = (pPriv->w - 1) & 0x7ff; 2804 unsigned us_height = (pPriv->h - 1) & 0x7ff; 2805 unsigned us_depth = 0; 2806 2807 if (pPriv->w > 2048) { 2808 us_width = (0x7ff + us_width) >> 1; 2809 us_depth |= 0x0d; 2810 } 2811 if (pPriv->h > 2048) { 2812 us_height = (0x7ff + us_height) >> 1; 2813 us_depth |= 0x0e; 2814 } 2815 us_format = (us_width << R300_TXWIDTH_SHIFT) | 2816 (us_height << R300_TXHEIGHT_SHIFT) | 2817 (us_depth << R300_TXDEPTH_SHIFT); 2818 out_size++; 2819 } 2820 2821 txoffset = info->cs ? 0 : pPriv->src_offset; 2822 2823 BEGIN_ACCEL_RELOC(out_size, 1); 2824 OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); 2825 OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); 2826 OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); 2827 OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); 2828 OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); 2829 OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo); 2830 if (info->ChipFamily == CHIP_FAMILY_R520) 2831 OUT_ACCEL_REG(R500_US_FORMAT0_0, us_format); 2832 FINISH_ACCEL(); 2833 2834 txenable = R300_TEX_0_ENABLE; 2835 2836 if (pPriv->is_planar) { 2837 txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2838 (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2839 R300_TXPITCH_EN); 2840 txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2841 txpitch -= 1; 2842 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2843 R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2844 R300_TX_MIN_FILTER_LINEAR | 2845 R300_TX_MAG_FILTER_LINEAR); 2846 2847 BEGIN_ACCEL_RELOC(12, 2); 2848 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 2849 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2850 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2851 OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 2852 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2853 OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo); 2854 OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 2855 OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); 2856 OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); 2857 OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 2858 OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); 2859 OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo); 2860 FINISH_ACCEL(); 2861 txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2862 } 2863 2864 if (pPriv->bicubic_enabled) { 2865 /* Size is 128x1 */ 2866 txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2867 (0x0 << R300_TXHEIGHT_SHIFT) | 2868 R300_TXPITCH_EN); 2869 /* Format is 32-bit floats, 4bpp */ 2870 txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2871 /* Pitch is 127 (128-1) */ 2872 txpitch = 0x7f; 2873 /* Tex filter */ 2874 txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2875 R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2876 R300_TX_MIN_FILTER_NEAREST | 2877 R300_TX_MAG_FILTER_NEAREST | 2878 (1 << R300_TX_ID_SHIFT)); 2879 2880 if (info->cs) 2881 bicubic_offset = 0; 2882 else 2883 bicubic_offset = pPriv->bicubic_src_offset; 2884 2885 BEGIN_ACCEL_RELOC(6, 1); 2886 OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); 2887 OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); 2888 OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); 2889 OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); 2890 OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); 2891 OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); 2892 FINISH_ACCEL(); 2893 2894 /* Enable tex 1 */ 2895 txenable |= R300_TEX_1_ENABLE; 2896 } 2897 2898 /* setup the VAP */ 2899 if (info->accel_state->has_tcl) { 2900 if (pPriv->bicubic_enabled) 2901 BEGIN_ACCEL(7); 2902 else 2903 BEGIN_ACCEL(6); 2904 } else { 2905 if (pPriv->bicubic_enabled) 2906 BEGIN_ACCEL(5); 2907 else 2908 BEGIN_ACCEL(4); 2909 } 2910 2911 /* These registers define the number, type, and location of data submitted 2912 * to the PVS unit of GA input (when PVS is disabled) 2913 * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2914 * enabled. This memory provides the imputs to the vertex shader program 2915 * and ordering is not important. When PVS/TCL is disabled, this field maps 2916 * directly to the GA input memory and the order is signifigant. In 2917 * PVS_BYPASS mode the order is as follows: 2918 * Position 2919 * Point Size 2920 * Color 0-3 2921 * Textures 0-7 2922 * Fog 2923 */ 2924 if (pPriv->bicubic_enabled) { 2925 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2926 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2927 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2928 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2929 R300_SIGNED_0 | 2930 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2931 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2932 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2933 R300_SIGNED_1)); 2934 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 2935 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2936 (0 << R300_SKIP_DWORDS_2_SHIFT) | 2937 (7 << R300_DST_VEC_LOC_2_SHIFT) | 2938 R300_LAST_VEC_2 | 2939 R300_SIGNED_2)); 2940 } else { 2941 OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 2942 ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2943 (0 << R300_SKIP_DWORDS_0_SHIFT) | 2944 (0 << R300_DST_VEC_LOC_0_SHIFT) | 2945 R300_SIGNED_0 | 2946 (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2947 (0 << R300_SKIP_DWORDS_1_SHIFT) | 2948 (6 << R300_DST_VEC_LOC_1_SHIFT) | 2949 R300_LAST_VEC_1 | 2950 R300_SIGNED_1)); 2951 } 2952 2953 /* load the vertex shader 2954 * We pre-load vertex programs in RADEONInit3DEngine(): 2955 * - exa 2956 * - Xv 2957 * - Xv bicubic 2958 * Here we select the offset of the vertex program we want to use 2959 */ 2960 if (info->accel_state->has_tcl) { 2961 if (pPriv->bicubic_enabled) { 2962 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2963 ((11 << R300_PVS_FIRST_INST_SHIFT) | 2964 (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2965 (13 << R300_PVS_LAST_INST_SHIFT))); 2966 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2967 (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2968 } else { 2969 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 2970 ((9 << R300_PVS_FIRST_INST_SHIFT) | 2971 (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2972 (10 << R300_PVS_LAST_INST_SHIFT))); 2973 OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 2974 (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2975 } 2976 } 2977 2978 /* Position and one set of 2 texture coordinates */ 2979 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2980 if (pPriv->bicubic_enabled) 2981 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2982 (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2983 else 2984 OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2985 2986 OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); 2987 FINISH_ACCEL(); 2988 2989 /* setup pixel shader */ 2990 if (pPriv->bicubic_state != BICUBIC_OFF) { 2991 if (pPriv->bicubic_enabled) { 2992 BEGIN_ACCEL(7); 2993 2994 /* 4 components: 2 for tex0 and 2 for tex1 */ 2995 OUT_ACCEL_REG(R300_RS_COUNT, 2996 ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2997 R300_RS_COUNT_HIRES_EN)); 2998 2999 /* R300_INST_COUNT_RS - highest RS instruction used */ 3000 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 3001 3002 /* Pixel stack frame size. */ 3003 OUT_ACCEL_REG(R300_US_PIXSIZE, 5); 3004 3005 /* FP length. */ 3006 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3007 R500_US_CODE_END_ADDR(13))); 3008 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3009 R500_US_CODE_RANGE_SIZE(13))); 3010 3011 /* Prepare for FP emission. */ 3012 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3013 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3014 FINISH_ACCEL(); 3015 3016 BEGIN_ACCEL(89); 3017 /* Pixel shader. 3018 * I've gone ahead and annotated each instruction, since this 3019 * thing is MASSIVE. :3 3020 * Note: In order to avoid buggies with temps and multiple 3021 * inputs, all temps are offset by 2. temp0 -> register2. */ 3022 3023 /* TEX temp2, input1.xxxx, tex1, 1D */ 3024 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3025 R500_INST_RGB_WMASK_R | 3026 R500_INST_RGB_WMASK_G | 3027 R500_INST_RGB_WMASK_B)); 3028 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3029 R500_TEX_INST_LD | 3030 R500_TEX_IGNORE_UNCOVERED)); 3031 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3032 R500_TEX_SRC_S_SWIZ_R | 3033 R500_TEX_SRC_T_SWIZ_R | 3034 R500_TEX_SRC_R_SWIZ_R | 3035 R500_TEX_SRC_Q_SWIZ_R | 3036 R500_TEX_DST_ADDR(2) | 3037 R500_TEX_DST_R_SWIZ_R | 3038 R500_TEX_DST_G_SWIZ_G | 3039 R500_TEX_DST_B_SWIZ_B | 3040 R500_TEX_DST_A_SWIZ_A)); 3041 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3042 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3043 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3044 3045 /* TEX temp5, input1.yyyy, tex1, 1D */ 3046 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3047 R500_INST_TEX_SEM_WAIT | 3048 R500_INST_RGB_WMASK_R | 3049 R500_INST_RGB_WMASK_G | 3050 R500_INST_RGB_WMASK_B)); 3051 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3052 R500_TEX_INST_LD | 3053 R500_TEX_SEM_ACQUIRE | 3054 R500_TEX_IGNORE_UNCOVERED)); 3055 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 3056 R500_TEX_SRC_S_SWIZ_G | 3057 R500_TEX_SRC_T_SWIZ_G | 3058 R500_TEX_SRC_R_SWIZ_G | 3059 R500_TEX_SRC_Q_SWIZ_G | 3060 R500_TEX_DST_ADDR(5) | 3061 R500_TEX_DST_R_SWIZ_R | 3062 R500_TEX_DST_G_SWIZ_G | 3063 R500_TEX_DST_B_SWIZ_B | 3064 R500_TEX_DST_A_SWIZ_A)); 3065 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3066 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3067 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3068 3069 /* MUL temp4, const0.x0x0, temp2.yyxx */ 3070 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3071 R500_INST_TEX_SEM_WAIT | 3072 R500_INST_RGB_WMASK_R | 3073 R500_INST_RGB_WMASK_G | 3074 R500_INST_RGB_WMASK_B | 3075 R500_INST_ALPHA_WMASK)); 3076 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3077 R500_RGB_ADDR0_CONST | 3078 R500_RGB_ADDR1(2))); 3079 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3080 R500_ALPHA_ADDR0_CONST | 3081 R500_ALPHA_ADDR1(2))); 3082 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3083 R500_ALU_RGB_R_SWIZ_A_R | 3084 R500_ALU_RGB_G_SWIZ_A_0 | 3085 R500_ALU_RGB_B_SWIZ_A_R | 3086 R500_ALU_RGB_SEL_B_SRC1 | 3087 R500_ALU_RGB_R_SWIZ_B_G | 3088 R500_ALU_RGB_G_SWIZ_B_G | 3089 R500_ALU_RGB_B_SWIZ_B_R)); 3090 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3091 R500_ALPHA_OP_MAD | 3092 R500_ALPHA_SEL_A_SRC0 | 3093 R500_ALPHA_SWIZ_A_0 | 3094 R500_ALPHA_SEL_B_SRC1 | 3095 R500_ALPHA_SWIZ_B_R)); 3096 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3097 R500_ALU_RGBA_OP_MAD | 3098 R500_ALU_RGBA_R_SWIZ_0 | 3099 R500_ALU_RGBA_G_SWIZ_0 | 3100 R500_ALU_RGBA_B_SWIZ_0 | 3101 R500_ALU_RGBA_A_SWIZ_0)); 3102 3103 /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 3104 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3105 R500_INST_RGB_WMASK_R | 3106 R500_INST_RGB_WMASK_G | 3107 R500_INST_RGB_WMASK_B | 3108 R500_INST_ALPHA_WMASK)); 3109 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3110 R500_RGB_ADDR0_CONST | 3111 R500_RGB_ADDR1(5) | 3112 R500_RGB_ADDR2(4))); 3113 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3114 R500_ALPHA_ADDR0_CONST | 3115 R500_ALPHA_ADDR1(5) | 3116 R500_ALPHA_ADDR2(4))); 3117 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3118 R500_ALU_RGB_R_SWIZ_A_0 | 3119 R500_ALU_RGB_G_SWIZ_A_G | 3120 R500_ALU_RGB_B_SWIZ_A_0 | 3121 R500_ALU_RGB_SEL_B_SRC1 | 3122 R500_ALU_RGB_R_SWIZ_B_R | 3123 R500_ALU_RGB_G_SWIZ_B_R | 3124 R500_ALU_RGB_B_SWIZ_B_R)); 3125 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3126 R500_ALPHA_OP_MAD | 3127 R500_ALPHA_SEL_A_SRC0 | 3128 R500_ALPHA_SWIZ_A_G | 3129 R500_ALPHA_SEL_B_SRC1 | 3130 R500_ALPHA_SWIZ_B_R)); 3131 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3132 R500_ALU_RGBA_OP_MAD | 3133 R500_ALU_RGBA_SEL_C_SRC2 | 3134 R500_ALU_RGBA_R_SWIZ_R | 3135 R500_ALU_RGBA_G_SWIZ_G | 3136 R500_ALU_RGBA_B_SWIZ_B | 3137 R500_ALU_RGBA_A_SWIZ_A)); 3138 3139 /* ADD temp3, temp3, input0.xyxy */ 3140 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3141 R500_INST_RGB_WMASK_R | 3142 R500_INST_RGB_WMASK_G | 3143 R500_INST_RGB_WMASK_B | 3144 R500_INST_ALPHA_WMASK)); 3145 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 3146 R500_RGB_ADDR2(0))); 3147 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 3148 R500_ALPHA_ADDR2(0))); 3149 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3150 R500_ALU_RGB_G_SWIZ_A_1 | 3151 R500_ALU_RGB_B_SWIZ_A_1 | 3152 R500_ALU_RGB_SEL_B_SRC1 | 3153 R500_ALU_RGB_R_SWIZ_B_R | 3154 R500_ALU_RGB_G_SWIZ_B_G | 3155 R500_ALU_RGB_B_SWIZ_B_B)); 3156 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3157 R500_ALPHA_OP_MAD | 3158 R500_ALPHA_SWIZ_A_1 | 3159 R500_ALPHA_SEL_B_SRC1 | 3160 R500_ALPHA_SWIZ_B_A)); 3161 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3162 R500_ALU_RGBA_OP_MAD | 3163 R500_ALU_RGBA_SEL_C_SRC2 | 3164 R500_ALU_RGBA_R_SWIZ_R | 3165 R500_ALU_RGBA_G_SWIZ_G | 3166 R500_ALU_RGBA_B_SWIZ_R | 3167 R500_ALU_RGBA_A_SWIZ_G)); 3168 3169 /* TEX temp1, temp3.zwxy, tex0, 2D */ 3170 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3171 R500_INST_RGB_WMASK_R | 3172 R500_INST_RGB_WMASK_G | 3173 R500_INST_RGB_WMASK_B | 3174 R500_INST_ALPHA_WMASK)); 3175 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3176 R500_TEX_INST_LD | 3177 R500_TEX_IGNORE_UNCOVERED)); 3178 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3179 R500_TEX_SRC_S_SWIZ_B | 3180 R500_TEX_SRC_T_SWIZ_A | 3181 R500_TEX_SRC_R_SWIZ_R | 3182 R500_TEX_SRC_Q_SWIZ_G | 3183 R500_TEX_DST_ADDR(1) | 3184 R500_TEX_DST_R_SWIZ_R | 3185 R500_TEX_DST_G_SWIZ_G | 3186 R500_TEX_DST_B_SWIZ_B | 3187 R500_TEX_DST_A_SWIZ_A)); 3188 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3189 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3190 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3191 3192 /* TEX temp3, temp3.xyzw, tex0, 2D */ 3193 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3194 R500_INST_TEX_SEM_WAIT | 3195 R500_INST_RGB_WMASK_R | 3196 R500_INST_RGB_WMASK_G | 3197 R500_INST_RGB_WMASK_B | 3198 R500_INST_ALPHA_WMASK)); 3199 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3200 R500_TEX_INST_LD | 3201 R500_TEX_SEM_ACQUIRE | 3202 R500_TEX_IGNORE_UNCOVERED)); 3203 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 3204 R500_TEX_SRC_S_SWIZ_R | 3205 R500_TEX_SRC_T_SWIZ_G | 3206 R500_TEX_SRC_R_SWIZ_B | 3207 R500_TEX_SRC_Q_SWIZ_A | 3208 R500_TEX_DST_ADDR(3) | 3209 R500_TEX_DST_R_SWIZ_R | 3210 R500_TEX_DST_G_SWIZ_G | 3211 R500_TEX_DST_B_SWIZ_B | 3212 R500_TEX_DST_A_SWIZ_A)); 3213 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3214 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3215 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3216 3217 /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 3218 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3219 R500_INST_RGB_WMASK_R | 3220 R500_INST_RGB_WMASK_G | 3221 R500_INST_RGB_WMASK_B | 3222 R500_INST_ALPHA_WMASK)); 3223 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3224 R500_RGB_ADDR0_CONST | 3225 R500_RGB_ADDR1(5) | 3226 R500_RGB_ADDR2(4))); 3227 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3228 R500_ALPHA_ADDR0_CONST | 3229 R500_ALPHA_ADDR1(5) | 3230 R500_ALPHA_ADDR2(4))); 3231 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3232 R500_ALU_RGB_R_SWIZ_A_0 | 3233 R500_ALU_RGB_G_SWIZ_A_G | 3234 R500_ALU_RGB_B_SWIZ_A_0 | 3235 R500_ALU_RGB_SEL_B_SRC1 | 3236 R500_ALU_RGB_R_SWIZ_B_G | 3237 R500_ALU_RGB_G_SWIZ_B_G | 3238 R500_ALU_RGB_B_SWIZ_B_G)); 3239 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 3240 R500_ALPHA_OP_MAD | 3241 R500_ALPHA_SEL_A_SRC0 | 3242 R500_ALPHA_SWIZ_A_G | 3243 R500_ALPHA_SEL_B_SRC1 | 3244 R500_ALPHA_SWIZ_B_G)); 3245 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 3246 R500_ALU_RGBA_OP_MAD | 3247 R500_ALU_RGBA_SEL_C_SRC2 | 3248 R500_ALU_RGBA_R_SWIZ_R | 3249 R500_ALU_RGBA_G_SWIZ_G | 3250 R500_ALU_RGBA_B_SWIZ_B | 3251 R500_ALU_RGBA_A_SWIZ_A)); 3252 3253 /* ADD temp0, temp4, input0.xyxy */ 3254 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3255 R500_INST_RGB_WMASK_R | 3256 R500_INST_RGB_WMASK_G | 3257 R500_INST_RGB_WMASK_B | 3258 R500_INST_ALPHA_WMASK)); 3259 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 3260 R500_RGB_ADDR2(0))); 3261 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 3262 R500_ALPHA_ADDR2(0))); 3263 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 3264 R500_ALU_RGB_G_SWIZ_A_1 | 3265 R500_ALU_RGB_B_SWIZ_A_1 | 3266 R500_ALU_RGB_SEL_B_SRC1 | 3267 R500_ALU_RGB_R_SWIZ_B_R | 3268 R500_ALU_RGB_G_SWIZ_B_G | 3269 R500_ALU_RGB_B_SWIZ_B_B)); 3270 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3271 R500_ALPHA_OP_MAD | 3272 R500_ALPHA_SWIZ_A_1 | 3273 R500_ALPHA_SEL_B_SRC1 | 3274 R500_ALPHA_SWIZ_B_A)); 3275 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3276 R500_ALU_RGBA_OP_MAD | 3277 R500_ALU_RGBA_SEL_C_SRC2 | 3278 R500_ALU_RGBA_R_SWIZ_R | 3279 R500_ALU_RGBA_G_SWIZ_G | 3280 R500_ALU_RGBA_B_SWIZ_R | 3281 R500_ALU_RGBA_A_SWIZ_G)); 3282 3283 /* TEX temp4, temp0.zwzw, tex0, 2D */ 3284 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3285 R500_INST_TEX_SEM_WAIT | 3286 R500_INST_RGB_WMASK_R | 3287 R500_INST_RGB_WMASK_G | 3288 R500_INST_RGB_WMASK_B | 3289 R500_INST_ALPHA_WMASK)); 3290 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3291 R500_TEX_INST_LD | 3292 R500_TEX_IGNORE_UNCOVERED)); 3293 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3294 R500_TEX_SRC_S_SWIZ_B | 3295 R500_TEX_SRC_T_SWIZ_A | 3296 R500_TEX_SRC_R_SWIZ_B | 3297 R500_TEX_SRC_Q_SWIZ_A | 3298 R500_TEX_DST_ADDR(4) | 3299 R500_TEX_DST_R_SWIZ_R | 3300 R500_TEX_DST_G_SWIZ_G | 3301 R500_TEX_DST_B_SWIZ_B | 3302 R500_TEX_DST_A_SWIZ_A)); 3303 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3304 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3305 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3306 3307 /* TEX temp0, temp0.xyzw, tex0, 2D */ 3308 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3309 R500_INST_TEX_SEM_WAIT | 3310 R500_INST_RGB_WMASK_R | 3311 R500_INST_RGB_WMASK_G | 3312 R500_INST_RGB_WMASK_B | 3313 R500_INST_ALPHA_WMASK)); 3314 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3315 R500_TEX_INST_LD | 3316 R500_TEX_SEM_ACQUIRE | 3317 R500_TEX_IGNORE_UNCOVERED)); 3318 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3319 R500_TEX_SRC_S_SWIZ_R | 3320 R500_TEX_SRC_T_SWIZ_G | 3321 R500_TEX_SRC_R_SWIZ_B | 3322 R500_TEX_SRC_Q_SWIZ_A | 3323 R500_TEX_DST_ADDR(0) | 3324 R500_TEX_DST_R_SWIZ_R | 3325 R500_TEX_DST_G_SWIZ_G | 3326 R500_TEX_DST_B_SWIZ_B | 3327 R500_TEX_DST_A_SWIZ_A)); 3328 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3329 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3330 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3331 3332 /* LRP temp3, temp2.zzzz, temp1, temp3 -> 3333 * - PRESUB temps, temp1 - temp3 3334 * - MAD temp2.zzzz, temps, temp3 */ 3335 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3336 R500_INST_RGB_WMASK_R | 3337 R500_INST_RGB_WMASK_G | 3338 R500_INST_RGB_WMASK_B | 3339 R500_INST_ALPHA_WMASK)); 3340 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3341 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3342 R500_RGB_ADDR1(1) | 3343 R500_RGB_ADDR2(2))); 3344 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3345 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3346 R500_ALPHA_ADDR1(1) | 3347 R500_ALPHA_ADDR2(2))); 3348 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3349 R500_ALU_RGB_R_SWIZ_A_B | 3350 R500_ALU_RGB_G_SWIZ_A_B | 3351 R500_ALU_RGB_B_SWIZ_A_B | 3352 R500_ALU_RGB_SEL_B_SRCP | 3353 R500_ALU_RGB_R_SWIZ_B_R | 3354 R500_ALU_RGB_G_SWIZ_B_G | 3355 R500_ALU_RGB_B_SWIZ_B_B)); 3356 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3357 R500_ALPHA_OP_MAD | 3358 R500_ALPHA_SEL_A_SRC2 | 3359 R500_ALPHA_SWIZ_A_B | 3360 R500_ALPHA_SEL_B_SRCP | 3361 R500_ALPHA_SWIZ_B_A)); 3362 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3363 R500_ALU_RGBA_OP_MAD | 3364 R500_ALU_RGBA_SEL_C_SRC0 | 3365 R500_ALU_RGBA_R_SWIZ_R | 3366 R500_ALU_RGBA_G_SWIZ_G | 3367 R500_ALU_RGBA_B_SWIZ_B | 3368 R500_ALU_RGBA_A_SWIZ_A)); 3369 3370 /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3371 * - PRESUB temps, temp4 - temp1 3372 * - MAD temp2.zzzz, temps, temp0 */ 3373 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3374 R500_INST_TEX_SEM_WAIT | 3375 R500_INST_RGB_WMASK_R | 3376 R500_INST_RGB_WMASK_G | 3377 R500_INST_RGB_WMASK_B | 3378 R500_INST_ALPHA_WMASK)); 3379 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3380 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3381 R500_RGB_ADDR1(4) | 3382 R500_RGB_ADDR2(2))); 3383 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3384 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3385 R500_ALPHA_ADDR1(4) | 3386 R500_ALPHA_ADDR2(2))); 3387 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3388 R500_ALU_RGB_R_SWIZ_A_B | 3389 R500_ALU_RGB_G_SWIZ_A_B | 3390 R500_ALU_RGB_B_SWIZ_A_B | 3391 R500_ALU_RGB_SEL_B_SRCP | 3392 R500_ALU_RGB_R_SWIZ_B_R | 3393 R500_ALU_RGB_G_SWIZ_B_G | 3394 R500_ALU_RGB_B_SWIZ_B_B)); 3395 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3396 R500_ALPHA_OP_MAD | 3397 R500_ALPHA_SEL_A_SRC2 | 3398 R500_ALPHA_SWIZ_A_B | 3399 R500_ALPHA_SEL_B_SRCP | 3400 R500_ALPHA_SWIZ_B_A)); 3401 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3402 R500_ALU_RGBA_OP_MAD | 3403 R500_ALU_RGBA_SEL_C_SRC0 | 3404 R500_ALU_RGBA_R_SWIZ_R | 3405 R500_ALU_RGBA_G_SWIZ_G | 3406 R500_ALU_RGBA_B_SWIZ_B | 3407 R500_ALU_RGBA_A_SWIZ_A)); 3408 3409 /* LRP output, temp5.zzzz, temp3, temp0 -> 3410 * - PRESUB temps, temp3 - temp0 3411 * - MAD temp5.zzzz, temps, temp0 */ 3412 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3413 R500_INST_LAST | 3414 R500_INST_TEX_SEM_WAIT | 3415 R500_INST_RGB_WMASK_R | 3416 R500_INST_RGB_WMASK_G | 3417 R500_INST_RGB_WMASK_B | 3418 R500_INST_ALPHA_WMASK | 3419 R500_INST_RGB_OMASK_R | 3420 R500_INST_RGB_OMASK_G | 3421 R500_INST_RGB_OMASK_B | 3422 R500_INST_ALPHA_OMASK)); 3423 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3424 R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3425 R500_RGB_ADDR1(3) | 3426 R500_RGB_ADDR2(5))); 3427 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3428 R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3429 R500_ALPHA_ADDR1(3) | 3430 R500_ALPHA_ADDR2(5))); 3431 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3432 R500_ALU_RGB_R_SWIZ_A_B | 3433 R500_ALU_RGB_G_SWIZ_A_B | 3434 R500_ALU_RGB_B_SWIZ_A_B | 3435 R500_ALU_RGB_SEL_B_SRCP | 3436 R500_ALU_RGB_R_SWIZ_B_R | 3437 R500_ALU_RGB_G_SWIZ_B_G | 3438 R500_ALU_RGB_B_SWIZ_B_B)); 3439 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3440 R500_ALPHA_OP_MAD | 3441 R500_ALPHA_SEL_A_SRC2 | 3442 R500_ALPHA_SWIZ_A_B | 3443 R500_ALPHA_SEL_B_SRCP | 3444 R500_ALPHA_SWIZ_B_A)); 3445 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3446 R500_ALU_RGBA_OP_MAD | 3447 R500_ALU_RGBA_SEL_C_SRC0 | 3448 R500_ALU_RGBA_R_SWIZ_R | 3449 R500_ALU_RGBA_G_SWIZ_G | 3450 R500_ALU_RGBA_B_SWIZ_B | 3451 R500_ALU_RGBA_A_SWIZ_A)); 3452 3453 /* Shader constants. */ 3454 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3455 3456 /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3457 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3458 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3459 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3460 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3461 3462 FINISH_ACCEL(); 3463 } else { 3464 BEGIN_ACCEL(19); 3465 /* 2 components: 2 for tex0 */ 3466 OUT_ACCEL_REG(R300_RS_COUNT, 3467 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3468 R300_RS_COUNT_HIRES_EN)); 3469 3470 /* R300_INST_COUNT_RS - highest RS instruction used */ 3471 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3472 3473 /* Pixel stack frame size. */ 3474 OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3475 3476 /* FP length. */ 3477 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3478 R500_US_CODE_END_ADDR(1))); 3479 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3480 R500_US_CODE_RANGE_SIZE(1))); 3481 3482 /* Prepare for FP emission. */ 3483 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3484 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3485 3486 /* tex inst */ 3487 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3488 R500_INST_TEX_SEM_WAIT | 3489 R500_INST_RGB_WMASK_R | 3490 R500_INST_RGB_WMASK_G | 3491 R500_INST_RGB_WMASK_B | 3492 R500_INST_ALPHA_WMASK | 3493 R500_INST_RGB_CLAMP | 3494 R500_INST_ALPHA_CLAMP)); 3495 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3496 R500_TEX_INST_LD | 3497 R500_TEX_SEM_ACQUIRE | 3498 R500_TEX_IGNORE_UNCOVERED)); 3499 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3500 R500_TEX_SRC_S_SWIZ_R | 3501 R500_TEX_SRC_T_SWIZ_G | 3502 R500_TEX_DST_ADDR(0) | 3503 R500_TEX_DST_R_SWIZ_R | 3504 R500_TEX_DST_G_SWIZ_G | 3505 R500_TEX_DST_B_SWIZ_B | 3506 R500_TEX_DST_A_SWIZ_A)); 3507 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3508 R500_DX_S_SWIZ_R | 3509 R500_DX_T_SWIZ_R | 3510 R500_DX_R_SWIZ_R | 3511 R500_DX_Q_SWIZ_R | 3512 R500_DY_ADDR(0) | 3513 R500_DY_S_SWIZ_R | 3514 R500_DY_T_SWIZ_R | 3515 R500_DY_R_SWIZ_R | 3516 R500_DY_Q_SWIZ_R)); 3517 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3518 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3519 3520 /* ALU inst */ 3521 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3522 R500_INST_TEX_SEM_WAIT | 3523 R500_INST_LAST | 3524 R500_INST_RGB_OMASK_R | 3525 R500_INST_RGB_OMASK_G | 3526 R500_INST_RGB_OMASK_B | 3527 R500_INST_ALPHA_OMASK | 3528 R500_INST_RGB_CLAMP | 3529 R500_INST_ALPHA_CLAMP)); 3530 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3531 R500_RGB_ADDR1(0) | 3532 R500_RGB_ADDR1_CONST | 3533 R500_RGB_ADDR2(0) | 3534 R500_RGB_ADDR2_CONST)); 3535 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3536 R500_ALPHA_ADDR1(0) | 3537 R500_ALPHA_ADDR1_CONST | 3538 R500_ALPHA_ADDR2(0) | 3539 R500_ALPHA_ADDR2_CONST)); 3540 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3541 R500_ALU_RGB_R_SWIZ_A_R | 3542 R500_ALU_RGB_G_SWIZ_A_G | 3543 R500_ALU_RGB_B_SWIZ_A_B | 3544 R500_ALU_RGB_SEL_B_SRC0 | 3545 R500_ALU_RGB_R_SWIZ_B_1 | 3546 R500_ALU_RGB_B_SWIZ_B_1 | 3547 R500_ALU_RGB_G_SWIZ_B_1)); 3548 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3549 R500_ALPHA_SWIZ_A_A | 3550 R500_ALPHA_SWIZ_B_1)); 3551 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3552 R500_ALU_RGBA_R_SWIZ_0 | 3553 R500_ALU_RGBA_G_SWIZ_0 | 3554 R500_ALU_RGBA_B_SWIZ_0 | 3555 R500_ALU_RGBA_A_SWIZ_0)); 3556 FINISH_ACCEL(); 3557 } 3558 } else { 3559 /* 3560 * y' = y - .0625 3561 * u' = u - .5 3562 * v' = v - .5; 3563 * 3564 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3565 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3566 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3567 * 3568 * DP3 might look like the straightforward solution 3569 * but we'd need to move the texture yuv values in 3570 * the same reg for this to work. Therefore use MADs. 3571 * Brightness just adds to the off constant. 3572 * Contrast is multiplication of luminance. 3573 * Saturation and hue change the u and v coeffs. 3574 * Default values (before adjustments - depend on colorspace): 3575 * yco = 1.1643 3576 * uco = 0, -0.39173, 2.017 3577 * vco = 1.5958, -0.8129, 0 3578 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3579 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3580 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3581 * 3582 * temp = MAD(yco, yuv.yyyy, off) 3583 * temp = MAD(uco, yuv.uuuu, temp) 3584 * result = MAD(vco, yuv.vvvv, temp) 3585 */ 3586 /* TODO: don't recalc consts always */ 3587 const float Loff = -0.0627; 3588 const float Coff = -0.502; 3589 float uvcosf, uvsinf; 3590 float yco; 3591 float uco[3], vco[3], off[3]; 3592 float bright, cont, gamma; 3593 int ref = pPriv->transform_index; 3594 Bool needgamma = FALSE; 3595 3596 cont = RTFContrast(pPriv->contrast); 3597 bright = RTFBrightness(pPriv->brightness); 3598 gamma = (float)pPriv->gamma / 1000.0; 3599 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3600 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3601 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3602 3603 yco = trans[ref].RefLuma * cont; 3604 uco[0] = -trans[ref].RefRCr * uvsinf; 3605 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3606 uco[2] = trans[ref].RefBCb * uvcosf; 3607 vco[0] = trans[ref].RefRCr * uvcosf; 3608 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3609 vco[2] = trans[ref].RefBCb * uvsinf; 3610 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3611 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3612 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3613 3614 //XXX gamma 3615 3616 if (gamma != 1.0) { 3617 needgamma = TRUE; 3618 /* note: gamma correction is out = in ^ gamma; 3619 gpu can only do LG2/EX2 therefore we transform into 3620 in ^ gamma = 2 ^ (log2(in) * gamma). 3621 Lots of scalar ops, unfortunately (better solution?) - 3622 without gamma that's 3 inst, with gamma it's 10... 3623 could use different gamma factors per channel, 3624 if that's of any use. */ 3625 } 3626 3627 if (pPriv->is_planar) { 3628 BEGIN_ACCEL(56); 3629 /* 2 components: 2 for tex0 */ 3630 OUT_ACCEL_REG(R300_RS_COUNT, 3631 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3632 R300_RS_COUNT_HIRES_EN)); 3633 3634 /* R300_INST_COUNT_RS - highest RS instruction used */ 3635 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3636 3637 /* Pixel stack frame size. */ 3638 OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3639 3640 /* FP length. */ 3641 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3642 R500_US_CODE_END_ADDR(5))); 3643 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3644 R500_US_CODE_RANGE_SIZE(5))); 3645 3646 /* Prepare for FP emission. */ 3647 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3648 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3649 3650 /* tex inst */ 3651 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3652 R500_INST_TEX_SEM_WAIT | 3653 R500_INST_RGB_WMASK_R | 3654 R500_INST_RGB_WMASK_G | 3655 R500_INST_RGB_WMASK_B | 3656 R500_INST_ALPHA_WMASK | 3657 R500_INST_RGB_CLAMP | 3658 R500_INST_ALPHA_CLAMP)); 3659 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3660 R500_TEX_INST_LD | 3661 R500_TEX_IGNORE_UNCOVERED)); 3662 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3663 R500_TEX_SRC_S_SWIZ_R | 3664 R500_TEX_SRC_T_SWIZ_G | 3665 R500_TEX_DST_ADDR(2) | 3666 R500_TEX_DST_R_SWIZ_R | 3667 R500_TEX_DST_G_SWIZ_G | 3668 R500_TEX_DST_B_SWIZ_B | 3669 R500_TEX_DST_A_SWIZ_A)); 3670 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3671 R500_DX_S_SWIZ_R | 3672 R500_DX_T_SWIZ_R | 3673 R500_DX_R_SWIZ_R | 3674 R500_DX_Q_SWIZ_R | 3675 R500_DY_ADDR(0) | 3676 R500_DY_S_SWIZ_R | 3677 R500_DY_T_SWIZ_R | 3678 R500_DY_R_SWIZ_R | 3679 R500_DY_Q_SWIZ_R)); 3680 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3681 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3682 3683 /* tex inst */ 3684 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3685 R500_INST_TEX_SEM_WAIT | 3686 R500_INST_RGB_WMASK_R | 3687 R500_INST_RGB_WMASK_G | 3688 R500_INST_RGB_WMASK_B | 3689 R500_INST_ALPHA_WMASK | 3690 R500_INST_RGB_CLAMP | 3691 R500_INST_ALPHA_CLAMP)); 3692 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3693 R500_TEX_INST_LD | 3694 R500_TEX_IGNORE_UNCOVERED)); 3695 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3696 R500_TEX_SRC_S_SWIZ_R | 3697 R500_TEX_SRC_T_SWIZ_G | 3698 R500_TEX_DST_ADDR(1) | 3699 R500_TEX_DST_R_SWIZ_R | 3700 R500_TEX_DST_G_SWIZ_G | 3701 R500_TEX_DST_B_SWIZ_B | 3702 R500_TEX_DST_A_SWIZ_A)); 3703 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3704 R500_DX_S_SWIZ_R | 3705 R500_DX_T_SWIZ_R | 3706 R500_DX_R_SWIZ_R | 3707 R500_DX_Q_SWIZ_R | 3708 R500_DY_ADDR(0) | 3709 R500_DY_S_SWIZ_R | 3710 R500_DY_T_SWIZ_R | 3711 R500_DY_R_SWIZ_R | 3712 R500_DY_Q_SWIZ_R)); 3713 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3714 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3715 3716 /* tex inst */ 3717 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3718 R500_INST_TEX_SEM_WAIT | 3719 R500_INST_RGB_WMASK_R | 3720 R500_INST_RGB_WMASK_G | 3721 R500_INST_RGB_WMASK_B | 3722 R500_INST_ALPHA_WMASK | 3723 R500_INST_RGB_CLAMP | 3724 R500_INST_ALPHA_CLAMP)); 3725 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3726 R500_TEX_INST_LD | 3727 R500_TEX_SEM_ACQUIRE | 3728 R500_TEX_IGNORE_UNCOVERED)); 3729 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3730 R500_TEX_SRC_S_SWIZ_R | 3731 R500_TEX_SRC_T_SWIZ_G | 3732 R500_TEX_DST_ADDR(0) | 3733 R500_TEX_DST_R_SWIZ_R | 3734 R500_TEX_DST_G_SWIZ_G | 3735 R500_TEX_DST_B_SWIZ_B | 3736 R500_TEX_DST_A_SWIZ_A)); 3737 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3738 R500_DX_S_SWIZ_R | 3739 R500_DX_T_SWIZ_R | 3740 R500_DX_R_SWIZ_R | 3741 R500_DX_Q_SWIZ_R | 3742 R500_DY_ADDR(0) | 3743 R500_DY_S_SWIZ_R | 3744 R500_DY_T_SWIZ_R | 3745 R500_DY_R_SWIZ_R | 3746 R500_DY_Q_SWIZ_R)); 3747 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3748 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3749 3750 /* ALU inst */ 3751 /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 3752 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3753 R500_INST_TEX_SEM_WAIT | 3754 R500_INST_RGB_WMASK_R | 3755 R500_INST_RGB_WMASK_G | 3756 R500_INST_RGB_WMASK_B | 3757 R500_INST_ALPHA_WMASK)); 3758 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3759 R500_RGB_ADDR0_CONST | 3760 R500_RGB_ADDR1(2) | 3761 R500_RGB_ADDR2(0) | 3762 R500_RGB_ADDR2_CONST)); 3763 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3764 R500_ALPHA_ADDR0_CONST | 3765 R500_ALPHA_ADDR1(2) | 3766 R500_ALPHA_ADDR2(0) | 3767 R500_ALPHA_ADDR2_CONST)); 3768 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3769 R500_ALU_RGB_R_SWIZ_A_A | 3770 R500_ALU_RGB_G_SWIZ_A_A | 3771 R500_ALU_RGB_B_SWIZ_A_A | 3772 R500_ALU_RGB_SEL_B_SRC1 | 3773 R500_ALU_RGB_R_SWIZ_B_R | 3774 R500_ALU_RGB_B_SWIZ_B_G | 3775 R500_ALU_RGB_G_SWIZ_B_B)); 3776 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3777 R500_ALPHA_ADDRD(2) | 3778 R500_ALPHA_SWIZ_A_0 | 3779 R500_ALPHA_SWIZ_B_0)); 3780 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3781 R500_ALU_RGBA_ADDRD(2) | 3782 R500_ALU_RGBA_SEL_C_SRC0 | 3783 R500_ALU_RGBA_R_SWIZ_R | 3784 R500_ALU_RGBA_G_SWIZ_G | 3785 R500_ALU_RGBA_B_SWIZ_B | 3786 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3787 R500_ALU_RGBA_A_SWIZ_0)); 3788 3789 /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 3790 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3791 R500_INST_TEX_SEM_WAIT | 3792 R500_INST_RGB_WMASK_R | 3793 R500_INST_RGB_WMASK_G | 3794 R500_INST_RGB_WMASK_B | 3795 R500_INST_ALPHA_WMASK)); 3796 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3797 R500_RGB_ADDR0_CONST | 3798 R500_RGB_ADDR1(1) | 3799 R500_RGB_ADDR2(2))); 3800 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3801 R500_ALPHA_ADDR0_CONST | 3802 R500_ALPHA_ADDR1(1) | 3803 R500_ALPHA_ADDR2(2))); 3804 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3805 R500_ALU_RGB_R_SWIZ_A_R | 3806 R500_ALU_RGB_G_SWIZ_A_G | 3807 R500_ALU_RGB_B_SWIZ_A_B | 3808 R500_ALU_RGB_SEL_B_SRC1 | 3809 R500_ALU_RGB_R_SWIZ_B_R | 3810 R500_ALU_RGB_B_SWIZ_B_G | 3811 R500_ALU_RGB_G_SWIZ_B_B)); 3812 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3813 R500_ALPHA_ADDRD(2) | 3814 R500_ALPHA_SWIZ_A_0 | 3815 R500_ALPHA_SWIZ_B_0)); 3816 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3817 R500_ALU_RGBA_ADDRD(2) | 3818 R500_ALU_RGBA_SEL_C_SRC2 | 3819 R500_ALU_RGBA_R_SWIZ_R | 3820 R500_ALU_RGBA_G_SWIZ_G | 3821 R500_ALU_RGBA_B_SWIZ_B | 3822 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3823 R500_ALU_RGBA_A_SWIZ_0)); 3824 3825 /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 3826 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3827 R500_INST_TEX_SEM_WAIT | 3828 R500_INST_LAST | 3829 R500_INST_RGB_OMASK_R | 3830 R500_INST_RGB_OMASK_G | 3831 R500_INST_RGB_OMASK_B | 3832 R500_INST_ALPHA_OMASK | 3833 R500_INST_RGB_CLAMP | 3834 R500_INST_ALPHA_CLAMP)); 3835 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3836 R500_RGB_ADDR0_CONST | 3837 R500_RGB_ADDR1(0) | 3838 R500_RGB_ADDR2(2))); 3839 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3840 R500_ALPHA_ADDR0_CONST | 3841 R500_ALPHA_ADDR1(0) | 3842 R500_ALPHA_ADDR2(2))); 3843 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3844 R500_ALU_RGB_R_SWIZ_A_R | 3845 R500_ALU_RGB_G_SWIZ_A_G | 3846 R500_ALU_RGB_B_SWIZ_A_B | 3847 R500_ALU_RGB_SEL_B_SRC1 | 3848 R500_ALU_RGB_R_SWIZ_B_R | 3849 R500_ALU_RGB_B_SWIZ_B_G | 3850 R500_ALU_RGB_G_SWIZ_B_B)); 3851 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3852 R500_ALPHA_ADDRD(0) | 3853 R500_ALPHA_SWIZ_A_0 | 3854 R500_ALPHA_SWIZ_B_0)); 3855 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3856 R500_ALU_RGBA_ADDRD(0) | 3857 R500_ALU_RGBA_SEL_C_SRC2 | 3858 R500_ALU_RGBA_R_SWIZ_R | 3859 R500_ALU_RGBA_G_SWIZ_G | 3860 R500_ALU_RGBA_B_SWIZ_B | 3861 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3862 R500_ALU_RGBA_A_SWIZ_1)); 3863 3864 } else { 3865 BEGIN_ACCEL(44); 3866 /* 2 components: 2 for tex0/1/2 */ 3867 OUT_ACCEL_REG(R300_RS_COUNT, 3868 ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3869 R300_RS_COUNT_HIRES_EN)); 3870 3871 /* R300_INST_COUNT_RS - highest RS instruction used */ 3872 OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3873 3874 /* Pixel stack frame size. */ 3875 OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3876 3877 /* FP length. */ 3878 OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3879 R500_US_CODE_END_ADDR(3))); 3880 OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3881 R500_US_CODE_RANGE_SIZE(3))); 3882 3883 /* Prepare for FP emission. */ 3884 OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); 3885 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3886 3887 /* tex inst */ 3888 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3889 R500_INST_TEX_SEM_WAIT | 3890 R500_INST_RGB_WMASK_R | 3891 R500_INST_RGB_WMASK_G | 3892 R500_INST_RGB_WMASK_B | 3893 R500_INST_ALPHA_WMASK | 3894 R500_INST_RGB_CLAMP | 3895 R500_INST_ALPHA_CLAMP)); 3896 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3897 R500_TEX_INST_LD | 3898 R500_TEX_SEM_ACQUIRE | 3899 R500_TEX_IGNORE_UNCOVERED)); 3900 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3901 R500_TEX_SRC_S_SWIZ_R | 3902 R500_TEX_SRC_T_SWIZ_G | 3903 R500_TEX_DST_ADDR(0) | 3904 R500_TEX_DST_R_SWIZ_R | 3905 R500_TEX_DST_G_SWIZ_G | 3906 R500_TEX_DST_B_SWIZ_B | 3907 R500_TEX_DST_A_SWIZ_A)); 3908 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3909 R500_DX_S_SWIZ_R | 3910 R500_DX_T_SWIZ_R | 3911 R500_DX_R_SWIZ_R | 3912 R500_DX_Q_SWIZ_R | 3913 R500_DY_ADDR(0) | 3914 R500_DY_S_SWIZ_R | 3915 R500_DY_T_SWIZ_R | 3916 R500_DY_R_SWIZ_R | 3917 R500_DY_Q_SWIZ_R)); 3918 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3919 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3920 3921 /* ALU inst */ 3922 /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 3923 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3924 R500_INST_TEX_SEM_WAIT | 3925 R500_INST_RGB_WMASK_R | 3926 R500_INST_RGB_WMASK_G | 3927 R500_INST_RGB_WMASK_B | 3928 R500_INST_ALPHA_WMASK)); 3929 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3930 R500_RGB_ADDR0_CONST | 3931 R500_RGB_ADDR1(0) | 3932 R500_RGB_ADDR2(0) | 3933 R500_RGB_ADDR2_CONST)); 3934 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3935 R500_ALPHA_ADDR0_CONST | 3936 R500_ALPHA_ADDR1(0) | 3937 R500_ALPHA_ADDR2(0) | 3938 R500_ALPHA_ADDR2_CONST)); 3939 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3940 R500_ALU_RGB_R_SWIZ_A_A | 3941 R500_ALU_RGB_G_SWIZ_A_A | 3942 R500_ALU_RGB_B_SWIZ_A_A | 3943 R500_ALU_RGB_SEL_B_SRC1 | 3944 R500_ALU_RGB_R_SWIZ_B_G | 3945 R500_ALU_RGB_B_SWIZ_B_G | 3946 R500_ALU_RGB_G_SWIZ_B_G)); 3947 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3948 R500_ALPHA_ADDRD(1) | 3949 R500_ALPHA_SWIZ_A_0 | 3950 R500_ALPHA_SWIZ_B_0)); 3951 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3952 R500_ALU_RGBA_ADDRD(1) | 3953 R500_ALU_RGBA_SEL_C_SRC0 | 3954 R500_ALU_RGBA_R_SWIZ_R | 3955 R500_ALU_RGBA_G_SWIZ_G | 3956 R500_ALU_RGBA_B_SWIZ_B | 3957 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3958 R500_ALU_RGBA_A_SWIZ_0)); 3959 3960 /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 3961 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3962 R500_INST_TEX_SEM_WAIT | 3963 R500_INST_RGB_WMASK_R | 3964 R500_INST_RGB_WMASK_G | 3965 R500_INST_RGB_WMASK_B | 3966 R500_INST_ALPHA_WMASK)); 3967 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3968 R500_RGB_ADDR0_CONST | 3969 R500_RGB_ADDR1(0) | 3970 R500_RGB_ADDR2(1))); 3971 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3972 R500_ALPHA_ADDR0_CONST | 3973 R500_ALPHA_ADDR1(0) | 3974 R500_ALPHA_ADDR2(1))); 3975 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3976 R500_ALU_RGB_R_SWIZ_A_R | 3977 R500_ALU_RGB_G_SWIZ_A_G | 3978 R500_ALU_RGB_B_SWIZ_A_B | 3979 R500_ALU_RGB_SEL_B_SRC1 | 3980 R500_ALU_RGB_R_SWIZ_B_B | 3981 R500_ALU_RGB_B_SWIZ_B_B | 3982 R500_ALU_RGB_G_SWIZ_B_B)); 3983 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3984 R500_ALPHA_ADDRD(1) | 3985 R500_ALPHA_SWIZ_A_0 | 3986 R500_ALPHA_SWIZ_B_0)); 3987 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3988 R500_ALU_RGBA_ADDRD(1) | 3989 R500_ALU_RGBA_SEL_C_SRC2 | 3990 R500_ALU_RGBA_R_SWIZ_R | 3991 R500_ALU_RGBA_G_SWIZ_G | 3992 R500_ALU_RGBA_B_SWIZ_B | 3993 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3994 R500_ALU_RGBA_A_SWIZ_0)); 3995 3996 /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 3997 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3998 R500_INST_TEX_SEM_WAIT | 3999 R500_INST_LAST | 4000 R500_INST_RGB_OMASK_R | 4001 R500_INST_RGB_OMASK_G | 4002 R500_INST_RGB_OMASK_B | 4003 R500_INST_ALPHA_OMASK | 4004 R500_INST_RGB_CLAMP | 4005 R500_INST_ALPHA_CLAMP)); 4006 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 4007 R500_RGB_ADDR0_CONST | 4008 R500_RGB_ADDR1(0) | 4009 R500_RGB_ADDR2(1))); 4010 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 4011 R500_ALPHA_ADDR0_CONST | 4012 R500_ALPHA_ADDR1(0) | 4013 R500_ALPHA_ADDR2(1))); 4014 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 4015 R500_ALU_RGB_R_SWIZ_A_R | 4016 R500_ALU_RGB_G_SWIZ_A_G | 4017 R500_ALU_RGB_B_SWIZ_A_B | 4018 R500_ALU_RGB_SEL_B_SRC1 | 4019 R500_ALU_RGB_R_SWIZ_B_R | 4020 R500_ALU_RGB_B_SWIZ_B_R | 4021 R500_ALU_RGB_G_SWIZ_B_R)); 4022 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 4023 R500_ALPHA_ADDRD(1) | 4024 R500_ALPHA_SWIZ_A_0 | 4025 R500_ALPHA_SWIZ_B_0)); 4026 OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 4027 R500_ALU_RGBA_ADDRD(1) | 4028 R500_ALU_RGBA_SEL_C_SRC2 | 4029 R500_ALU_RGBA_R_SWIZ_R | 4030 R500_ALU_RGBA_G_SWIZ_G | 4031 R500_ALU_RGBA_B_SWIZ_B | 4032 R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 4033 R500_ALU_RGBA_A_SWIZ_1)); 4034 } 4035 4036 /* Shader constants. */ 4037 OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 4038 4039 /* constant 0: off, yco */ 4040 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 4041 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 4042 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 4043 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 4044 /* constant 1: uco */ 4045 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 4046 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 4047 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 4048 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 4049 /* constant 2: vco */ 4050 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 4051 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 4052 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 4053 OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 4054 4055 FINISH_ACCEL(); 4056 } 4057 4058 BEGIN_ACCEL_RELOC(6, 2); 4059 OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); 4060 OUT_ACCEL_REG(R300_TX_ENABLE, txenable); 4061 4062 EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 4063 EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 4064 4065 /* no need to enable blending */ 4066 OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 4067 4068 OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 4069 FINISH_ACCEL(); 4070 4071 if (pPriv->vsync) { 4072 xf86CrtcPtr crtc; 4073 if (pPriv->desired_crtc) 4074 crtc = pPriv->desired_crtc; 4075 else 4076 crtc = radeon_pick_best_crtc(pScrn, 4077 pPriv->drw_x, 4078 pPriv->drw_x + pPriv->dst_w, 4079 pPriv->drw_y, 4080 pPriv->drw_y + pPriv->dst_h); 4081 if (crtc) 4082 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, 4083 crtc, 4084 pPriv->drw_y - crtc->y, 4085 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 4086 } 4087 4088 return TRUE; 4089} 4090 4091static void 4092FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 4093{ 4094 RADEONInfoPtr info = RADEONPTR(pScrn); 4095 PixmapPtr pPixmap = pPriv->pPixmap; 4096 int dstxoff, dstyoff; 4097 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 4098 int nBox = REGION_NUM_RECTS(&pPriv->clip); 4099 ACCEL_PREAMBLE(); 4100 4101#ifdef COMPOSITE 4102 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 4103 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 4104#else 4105 dstxoff = 0; 4106 dstyoff = 0; 4107#endif 4108 4109 if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4110 return; 4111 4112 /* 4113 * Rendering of the actual polygon is done in two different 4114 * ways depending on chip generation: 4115 * 4116 * < R300: 4117 * 4118 * These chips can render a rectangle in one pass, so 4119 * handling is pretty straight-forward. 4120 * 4121 * >= R300: 4122 * 4123 * These chips can accept a quad, but will render it as 4124 * two triangles which results in a diagonal tear. Instead 4125 * We render a single, large triangle and use the scissor 4126 * functionality to restrict it to the desired rectangle. 4127 * Due to guardband limits on r3xx/r4xx, we can only use 4128 * the single triangle up to 2880 pixels; above that we 4129 * render as a quad. 4130 */ 4131 4132 while (nBox--) { 4133 float srcX, srcY, srcw, srch; 4134 int dstX, dstY, dstw, dsth; 4135#ifdef ACCEL_CP 4136 int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 4137 4138 if (draw_size > radeon_cs_space_remaining(pScrn)) { 4139 if (info->cs) 4140 radeon_cs_flush_indirect(pScrn); 4141 else 4142 RADEONCPFlushIndirect(pScrn, 1); 4143 if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv)) 4144 return; 4145 } 4146#endif 4147 4148 dstX = pBox->x1 + dstxoff; 4149 dstY = pBox->y1 + dstyoff; 4150 dstw = pBox->x2 - pBox->x1; 4151 dsth = pBox->y2 - pBox->y1; 4152 4153 srcX = pPriv->src_x; 4154 srcX += ((pBox->x1 - pPriv->drw_x) * 4155 pPriv->src_w) / (float)pPriv->dst_w; 4156 srcY = pPriv->src_y; 4157 srcY += ((pBox->y1 - pPriv->drw_y) * 4158 pPriv->src_h) / (float)pPriv->dst_h; 4159 4160 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 4161 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 4162 4163 BEGIN_ACCEL(2); 4164 OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 4165 ((dstY) << R300_SCISSOR_Y_SHIFT))); 4166 OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 4167 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 4168 FINISH_ACCEL(); 4169 4170#ifdef ACCEL_CP 4171 BEGIN_RING(3 * pPriv->vtx_count + 4); 4172 OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 4173 3 * pPriv->vtx_count)); 4174 OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 4175 RADEON_CP_VC_CNTL_PRIM_WALK_RING | 4176 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 4177#else /* ACCEL_CP */ 4178 BEGIN_ACCEL(2 + pPriv->vtx_count * 3); 4179 OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | 4180 RADEON_VF_PRIM_WALK_DATA | 4181 (3 << RADEON_VF_NUM_VERTICES_SHIFT))); 4182#endif 4183 if (pPriv->bicubic_enabled) { 4184 VTX_OUT_6((float)dstX, (float)dstY, 4185 (float)srcX / pPriv->w, (float)srcY / pPriv->h, 4186 (float)srcX + 0.5, (float)srcY + 0.5); 4187 VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 4188 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 4189 (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 4190 VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 4191 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4192 (float)srcY / pPriv->h, 4193 (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 4194 (float)srcY + 0.5); 4195 } else { 4196 /* 4197 * Render a big, scissored triangle. This means 4198 * increasing the triangle size and adjusting 4199 * texture coordinates. 4200 */ 4201 VTX_OUT_4((float)dstX, (float)dstY, 4202 (float)srcX / pPriv->w, (float)srcY / pPriv->h); 4203 VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 4204 (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 4205 VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 4206 ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 4207 (float)srcY / pPriv->h); 4208 } 4209 4210 /* flushing is pipelined, free/finish is not */ 4211 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 4212 4213#ifdef ACCEL_CP 4214 ADVANCE_RING(); 4215#else 4216 FINISH_ACCEL(); 4217#endif /* !ACCEL_CP */ 4218 4219 pBox++; 4220 } 4221 4222 BEGIN_ACCEL(3); 4223 OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); 4224 OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 4225 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 4226 FINISH_ACCEL(); 4227 4228 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 4229} 4230 4231#undef VTX_OUT_4 4232#undef VTX_OUT_6 4233#undef FUNC_NAME 4234