radeon_textured_videofuncs.c revision 0a1d3ae0
1de2362d3Smrg/* 2de2362d3Smrg * Copyright 2008 Alex Deucher 3de2362d3Smrg * 4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5de2362d3Smrg * copy of this software and associated documentation files (the "Software"), 6de2362d3Smrg * to deal in the Software without restriction, including without limitation 7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the 9de2362d3Smrg * Software is furnished to do so, subject to the following conditions: 10de2362d3Smrg * 11de2362d3Smrg * The above copyright notice and this permission notice (including the next 12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the 13de2362d3Smrg * Software. 14de2362d3Smrg * 15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21de2362d3Smrg * SOFTWARE. 22de2362d3Smrg * 23de2362d3Smrg * 24de2362d3Smrg * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. 25de2362d3Smrg * 26de2362d3Smrg */ 27de2362d3Smrg 28de2362d3Smrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ 29de2362d3Smrgdo { \ 3018781e08Smrg OUT_RING(F_TO_DW(_dstX)); \ 3118781e08Smrg OUT_RING(F_TO_DW(_dstY)); \ 3218781e08Smrg OUT_RING(F_TO_DW(_srcX)); \ 3318781e08Smrg OUT_RING(F_TO_DW(_srcY)); \ 3418781e08Smrg OUT_RING(F_TO_DW(_maskX)); \ 3518781e08Smrg OUT_RING(F_TO_DW(_maskY)); \ 36de2362d3Smrg} while (0) 37de2362d3Smrg 38de2362d3Smrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ 39de2362d3Smrgdo { \ 4018781e08Smrg OUT_RING(F_TO_DW(_dstX)); \ 4118781e08Smrg OUT_RING(F_TO_DW(_dstY)); \ 4218781e08Smrg OUT_RING(F_TO_DW(_srcX)); \ 4318781e08Smrg OUT_RING(F_TO_DW(_srcY)); \ 44de2362d3Smrg} while (0) 45de2362d3Smrg 46de2362d3Smrg 47de2362d3Smrgstatic Bool 4818781e08SmrgRADEONPrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 49de2362d3Smrg{ 50de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 51de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 52de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 53de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 5418781e08Smrg uint32_t txformat, txsize, txpitch; 55de2362d3Smrg uint32_t dst_pitch, dst_format; 56de2362d3Smrg uint32_t colorpitch; 57de2362d3Smrg int pixel_shift; 5818781e08Smrg int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 5918781e08Smrg int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 6018781e08Smrg int ret; 61de2362d3Smrg 6218781e08Smrg radeon_cs_space_reset_bos(info->cs); 6318781e08Smrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 64de2362d3Smrg 6518781e08Smrg if (pPriv->bicubic_enabled) 6639413783Smrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 6739413783Smrg RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 68de2362d3Smrg 6918781e08Smrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 7039413783Smrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 7139413783Smrg RADEON_GEM_DOMAIN_VRAM); 727821949aSmrg 7318781e08Smrg ret = radeon_cs_space_check(info->cs); 7418781e08Smrg if (ret) { 7518781e08Smrg ErrorF("Not enough RAM to hw accel xv operation\n"); 7618781e08Smrg return FALSE; 77de2362d3Smrg } 78de2362d3Smrg 79de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 80de2362d3Smrg 8118781e08Smrg dst_pitch = exaGetPixmapPitch(pPixmap); 8218781e08Smrg RADEON_SWITCH_TO_3D(); 83de2362d3Smrg 84de2362d3Smrg /* Same for R100/R200 */ 85de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 86de2362d3Smrg case 16: 87de2362d3Smrg if (pPixmap->drawable.depth == 15) 88de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB1555; 89de2362d3Smrg else 90de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_RGB565; 91de2362d3Smrg break; 92de2362d3Smrg case 32: 93de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB8888; 94de2362d3Smrg break; 95de2362d3Smrg default: 96de2362d3Smrg return FALSE; 97de2362d3Smrg } 98de2362d3Smrg 99de2362d3Smrg if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 100de2362d3Smrg pPriv->is_planar = TRUE; 101de2362d3Smrg txformat = RADEON_TXFORMAT_Y8; 102de2362d3Smrg } else { 103de2362d3Smrg pPriv->is_planar = FALSE; 104de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 105de2362d3Smrg txformat = RADEON_TXFORMAT_YVYU422; 106de2362d3Smrg else 107de2362d3Smrg txformat = RADEON_TXFORMAT_VYUY422; 108de2362d3Smrg } 109de2362d3Smrg 110de2362d3Smrg txformat |= RADEON_TXFORMAT_NON_POWER2; 111de2362d3Smrg 112de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 113de2362d3Smrg 114de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 115de2362d3Smrg colorpitch |= RADEON_COLOR_TILE_ENABLE; 116de2362d3Smrg 117de2362d3Smrg BEGIN_ACCEL_RELOC(4,2); 118de2362d3Smrg 11918781e08Smrg OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 120de2362d3Smrg EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 121de2362d3Smrg EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 12218781e08Smrg OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 123de2362d3Smrg RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 124de2362d3Smrg 12518781e08Smrg ADVANCE_RING(); 126de2362d3Smrg 127de2362d3Smrg if (pPriv->is_planar) { 128de2362d3Smrg /* need 2 texcoord sets (even though they are identical) due 129de2362d3Smrg to denormalization! hw apparently can't premultiply 130de2362d3Smrg same coord set by different texture size */ 131de2362d3Smrg pPriv->vtx_count = 6; 132de2362d3Smrg 133de2362d3Smrg txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 134de2362d3Smrg (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 135de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 136de2362d3Smrg txpitch -= 32; 137de2362d3Smrg 138de2362d3Smrg BEGIN_ACCEL_RELOC(23, 3); 139de2362d3Smrg 14018781e08Smrg OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 141de2362d3Smrg RADEON_SE_VTX_FMT_ST0 | 142de2362d3Smrg RADEON_SE_VTX_FMT_ST1)); 143de2362d3Smrg 14418781e08Smrg OUT_RING_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | 145de2362d3Smrg RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 146de2362d3Smrg RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | 147de2362d3Smrg RADEON_PLANAR_YUV_ENABLE)); 148de2362d3Smrg 149de2362d3Smrg /* Y */ 15018781e08Smrg OUT_RING_REG(RADEON_PP_TXFILTER_0, 151de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 152de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 153de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 154de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST | 155de2362d3Smrg RADEON_YUV_TO_RGB); 15618781e08Smrg OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 15718781e08Smrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 15818781e08Smrg OUT_RING_REG(RADEON_PP_TXCBLEND_0, 159de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 160de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 161de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 162de2362d3Smrg RADEON_BLEND_CTL_ADD | 163de2362d3Smrg RADEON_CLAMP_TX); 16418781e08Smrg OUT_RING_REG(RADEON_PP_TXABLEND_0, 165de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 166de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 167de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 168de2362d3Smrg RADEON_BLEND_CTL_ADD | 169de2362d3Smrg RADEON_CLAMP_TX); 170de2362d3Smrg 17118781e08Smrg OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 172de2362d3Smrg (pPriv->w - 1) | 173de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 17418781e08Smrg OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 175de2362d3Smrg pPriv->src_pitch - 32); 176de2362d3Smrg 177de2362d3Smrg /* U */ 17818781e08Smrg OUT_RING_REG(RADEON_PP_TXFILTER_1, 179de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 180de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 181de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 182de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST); 18318781e08Smrg OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 18418781e08Smrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 18518781e08Smrg OUT_RING_REG(RADEON_PP_TXCBLEND_1, 186de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 187de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 188de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 189de2362d3Smrg RADEON_BLEND_CTL_ADD | 190de2362d3Smrg RADEON_CLAMP_TX); 19118781e08Smrg OUT_RING_REG(RADEON_PP_TXABLEND_1, 192de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 193de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 194de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 195de2362d3Smrg RADEON_BLEND_CTL_ADD | 196de2362d3Smrg RADEON_CLAMP_TX); 197de2362d3Smrg 19818781e08Smrg OUT_RING_REG(RADEON_PP_TEX_SIZE_1, txsize); 19918781e08Smrg OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch); 200de2362d3Smrg 201de2362d3Smrg /* V */ 20218781e08Smrg OUT_RING_REG(RADEON_PP_TXFILTER_2, 203de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 204de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 205de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 206de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST); 20718781e08Smrg OUT_RING_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); 20818781e08Smrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 20918781e08Smrg OUT_RING_REG(RADEON_PP_TXCBLEND_2, 210de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 211de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 212de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 213de2362d3Smrg RADEON_BLEND_CTL_ADD | 214de2362d3Smrg RADEON_CLAMP_TX); 21518781e08Smrg OUT_RING_REG(RADEON_PP_TXABLEND_2, 216de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 217de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 218de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 219de2362d3Smrg RADEON_BLEND_CTL_ADD | 220de2362d3Smrg RADEON_CLAMP_TX); 221de2362d3Smrg 22218781e08Smrg OUT_RING_REG(RADEON_PP_TEX_SIZE_2, txsize); 22318781e08Smrg OUT_RING_REG(RADEON_PP_TEX_PITCH_2, txpitch); 22418781e08Smrg ADVANCE_RING(); 225de2362d3Smrg } else { 226de2362d3Smrg pPriv->vtx_count = 4; 227de2362d3Smrg BEGIN_ACCEL_RELOC(9, 1); 228de2362d3Smrg 22918781e08Smrg OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | 230de2362d3Smrg RADEON_SE_VTX_FMT_ST0)); 231de2362d3Smrg 23218781e08Smrg OUT_RING_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 233de2362d3Smrg 23418781e08Smrg OUT_RING_REG(RADEON_PP_TXFILTER_0, 235de2362d3Smrg RADEON_MAG_FILTER_LINEAR | 236de2362d3Smrg RADEON_MIN_FILTER_LINEAR | 237de2362d3Smrg RADEON_CLAMP_S_CLAMP_LAST | 238de2362d3Smrg RADEON_CLAMP_T_CLAMP_LAST | 239de2362d3Smrg RADEON_YUV_TO_RGB); 24018781e08Smrg OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); 24118781e08Smrg OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo); 24218781e08Smrg OUT_RING_REG(RADEON_PP_TXCBLEND_0, 243de2362d3Smrg RADEON_COLOR_ARG_A_ZERO | 244de2362d3Smrg RADEON_COLOR_ARG_B_ZERO | 245de2362d3Smrg RADEON_COLOR_ARG_C_T0_COLOR | 246de2362d3Smrg RADEON_BLEND_CTL_ADD | 247de2362d3Smrg RADEON_CLAMP_TX); 24818781e08Smrg OUT_RING_REG(RADEON_PP_TXABLEND_0, 249de2362d3Smrg RADEON_ALPHA_ARG_A_ZERO | 250de2362d3Smrg RADEON_ALPHA_ARG_B_ZERO | 251de2362d3Smrg RADEON_ALPHA_ARG_C_T0_ALPHA | 252de2362d3Smrg RADEON_BLEND_CTL_ADD | 253de2362d3Smrg RADEON_CLAMP_TX); 254de2362d3Smrg 25518781e08Smrg OUT_RING_REG(RADEON_PP_TEX_SIZE_0, 256de2362d3Smrg (pPriv->w - 1) | 257de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 25818781e08Smrg OUT_RING_REG(RADEON_PP_TEX_PITCH_0, 259de2362d3Smrg pPriv->src_pitch - 32); 26018781e08Smrg ADVANCE_RING(); 261de2362d3Smrg } 262de2362d3Smrg 26318781e08Smrg BEGIN_RING(2*2); 26418781e08Smrg OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 26518781e08Smrg OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 266de2362d3Smrg (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 26718781e08Smrg ADVANCE_RING(); 268de2362d3Smrg 269de2362d3Smrg if (pPriv->vsync) { 270de2362d3Smrg xf86CrtcPtr crtc; 271de2362d3Smrg if (pPriv->desired_crtc) 272de2362d3Smrg crtc = pPriv->desired_crtc; 273de2362d3Smrg else 27418781e08Smrg crtc = radeon_pick_best_crtc(pScrn, FALSE, 275de2362d3Smrg pPriv->drw_x, 276de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 277de2362d3Smrg pPriv->drw_y, 278de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 279de2362d3Smrg if (crtc) 28018781e08Smrg RADEONWaitForVLine(pScrn, pPixmap, 28118781e08Smrg crtc, 28218781e08Smrg pPriv->drw_y - crtc->y, 28318781e08Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 284de2362d3Smrg } 285de2362d3Smrg 286de2362d3Smrg return TRUE; 287de2362d3Smrg} 288de2362d3Smrg 289de2362d3Smrgstatic void 29018781e08SmrgRADEONDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 291de2362d3Smrg{ 292de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 293de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 294de2362d3Smrg int dstxoff, dstyoff; 295de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 296de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 297de2362d3Smrg 298de2362d3Smrg#ifdef COMPOSITE 299de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 300de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 301de2362d3Smrg#else 302de2362d3Smrg dstxoff = 0; 303de2362d3Smrg dstyoff = 0; 304de2362d3Smrg#endif 305de2362d3Smrg 30618781e08Smrg if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 307de2362d3Smrg return; 308de2362d3Smrg 309de2362d3Smrg /* 310de2362d3Smrg * Rendering of the actual polygon is done in two different 311de2362d3Smrg * ways depending on chip generation: 312de2362d3Smrg * 313de2362d3Smrg * < R300: 314de2362d3Smrg * 315de2362d3Smrg * These chips can render a rectangle in one pass, so 316de2362d3Smrg * handling is pretty straight-forward. 317de2362d3Smrg * 318de2362d3Smrg * >= R300: 319de2362d3Smrg * 320de2362d3Smrg * These chips can accept a quad, but will render it as 321de2362d3Smrg * two triangles which results in a diagonal tear. Instead 322de2362d3Smrg * We render a single, large triangle and use the scissor 323de2362d3Smrg * functionality to restrict it to the desired rectangle. 324de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 325de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 326de2362d3Smrg * render as a quad. 327de2362d3Smrg */ 328de2362d3Smrg while (nBox) { 329de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 5; 330de2362d3Smrg int loop_boxes; 331de2362d3Smrg 332de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 33318781e08Smrg radeon_cs_flush_indirect(pScrn); 33418781e08Smrg if (!RADEONPrepareTexturedVideo(pScrn, pPriv)) 335de2362d3Smrg return; 336de2362d3Smrg } 337de2362d3Smrg loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 338de2362d3Smrg nBox -= loop_boxes; 339de2362d3Smrg 340de2362d3Smrg BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5); 341de2362d3Smrg OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 342de2362d3Smrg loop_boxes * 3 * pPriv->vtx_count + 1)); 343de2362d3Smrg if (pPriv->is_planar) 344de2362d3Smrg OUT_RING(RADEON_CP_VC_FRMT_XY | 345de2362d3Smrg RADEON_CP_VC_FRMT_ST0 | 346de2362d3Smrg RADEON_CP_VC_FRMT_ST1); 347de2362d3Smrg else 348de2362d3Smrg OUT_RING(RADEON_CP_VC_FRMT_XY | 349de2362d3Smrg RADEON_CP_VC_FRMT_ST0); 350de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 351de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 352de2362d3Smrg RADEON_CP_VC_CNTL_MAOS_ENABLE | 353de2362d3Smrg RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 354de2362d3Smrg ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 355de2362d3Smrg 356de2362d3Smrg while (loop_boxes--) { 357de2362d3Smrg float srcX, srcY, srcw, srch; 358de2362d3Smrg int dstX, dstY, dstw, dsth; 359de2362d3Smrg dstX = pBox->x1 + dstxoff; 360de2362d3Smrg dstY = pBox->y1 + dstyoff; 361de2362d3Smrg dstw = pBox->x2 - pBox->x1; 362de2362d3Smrg dsth = pBox->y2 - pBox->y1; 363de2362d3Smrg 364de2362d3Smrg srcX = pPriv->src_x; 365de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 366de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 367de2362d3Smrg srcY = pPriv->src_y; 368de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 369de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 370de2362d3Smrg 371de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 372de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 373de2362d3Smrg 374de2362d3Smrg 375de2362d3Smrg if (pPriv->is_planar) { 376de2362d3Smrg /* 377de2362d3Smrg * Just render a rect (using three coords). 378de2362d3Smrg */ 379de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 380de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 381de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 382de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 383de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 384de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 385de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 386de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 387de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 388de2362d3Smrg } else { 389de2362d3Smrg /* 390de2362d3Smrg * Just render a rect (using three coords). 391de2362d3Smrg */ 392de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 393de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 394de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 395de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 396de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 397de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 398de2362d3Smrg } 399de2362d3Smrg 400de2362d3Smrg pBox++; 401de2362d3Smrg } 402de2362d3Smrg 40318781e08Smrg OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 404de2362d3Smrg ADVANCE_RING(); 405de2362d3Smrg } 406de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 407de2362d3Smrg} 408de2362d3Smrg 409de2362d3Smrgstatic Bool 41018781e08SmrgR200PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 411de2362d3Smrg{ 412de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 413de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 414de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 415de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 416de2362d3Smrg uint32_t txformat; 41718781e08Smrg uint32_t txfilter, txsize, txpitch; 418de2362d3Smrg uint32_t dst_pitch, dst_format; 419de2362d3Smrg uint32_t colorpitch; 420de2362d3Smrg int pixel_shift; 42118781e08Smrg int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1; 42218781e08Smrg int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1; 423de2362d3Smrg /* note: in contrast to r300, use input biasing on uv components */ 424de2362d3Smrg const float Loff = -0.0627; 425de2362d3Smrg float uvcosf, uvsinf; 426de2362d3Smrg float yco, yoff; 427de2362d3Smrg float uco[3], vco[3]; 428de2362d3Smrg float bright, cont, sat; 429de2362d3Smrg int ref = pPriv->transform_index; 430de2362d3Smrg float ucscale = 0.25, vcscale = 0.25; 431de2362d3Smrg Bool needux8 = FALSE, needvx8 = FALSE; 43218781e08Smrg int ret; 433de2362d3Smrg 43418781e08Smrg radeon_cs_space_reset_bos(info->cs); 43518781e08Smrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 4367821949aSmrg 43718781e08Smrg if (pPriv->bicubic_enabled) 43839413783Smrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 43939413783Smrg RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 440de2362d3Smrg 44118781e08Smrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 44239413783Smrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 44339413783Smrg RADEON_GEM_DOMAIN_VRAM); 444de2362d3Smrg 44518781e08Smrg ret = radeon_cs_space_check(info->cs); 44618781e08Smrg if (ret) { 44718781e08Smrg ErrorF("Not enough RAM to hw accel xv operation\n"); 44818781e08Smrg return FALSE; 449de2362d3Smrg } 450de2362d3Smrg 451de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 452de2362d3Smrg 45318781e08Smrg dst_pitch = exaGetPixmapPitch(pPixmap); 454de2362d3Smrg 45518781e08Smrg RADEON_SWITCH_TO_3D(); 456de2362d3Smrg 457de2362d3Smrg /* Same for R100/R200 */ 458de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 459de2362d3Smrg case 16: 460de2362d3Smrg if (pPixmap->drawable.depth == 15) 461de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB1555; 462de2362d3Smrg else 463de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_RGB565; 464de2362d3Smrg break; 465de2362d3Smrg case 32: 466de2362d3Smrg dst_format = RADEON_COLOR_FORMAT_ARGB8888; 467de2362d3Smrg break; 468de2362d3Smrg default: 469de2362d3Smrg return FALSE; 470de2362d3Smrg } 471de2362d3Smrg 472de2362d3Smrg if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { 473de2362d3Smrg pPriv->is_planar = TRUE; 474de2362d3Smrg txformat = RADEON_TXFORMAT_I8; 475de2362d3Smrg } else { 476de2362d3Smrg pPriv->is_planar = FALSE; 477de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 478de2362d3Smrg txformat = RADEON_TXFORMAT_YVYU422; 479de2362d3Smrg else 480de2362d3Smrg txformat = RADEON_TXFORMAT_VYUY422; 481de2362d3Smrg } 482de2362d3Smrg 483de2362d3Smrg txformat |= RADEON_TXFORMAT_NON_POWER2; 484de2362d3Smrg 485de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 486de2362d3Smrg 487de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 488de2362d3Smrg colorpitch |= RADEON_COLOR_TILE_ENABLE; 489de2362d3Smrg 490de2362d3Smrg BEGIN_ACCEL_RELOC(4,2); 491de2362d3Smrg 49218781e08Smrg OUT_RING_REG(RADEON_RB3D_CNTL, dst_format); 493de2362d3Smrg EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); 494de2362d3Smrg EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); 495de2362d3Smrg 49618781e08Smrg OUT_RING_REG(RADEON_RB3D_BLENDCNTL, 497de2362d3Smrg RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 498de2362d3Smrg 49918781e08Smrg ADVANCE_RING(); 500de2362d3Smrg 501de2362d3Smrg txfilter = R200_MAG_FILTER_LINEAR | 502de2362d3Smrg R200_MIN_FILTER_LINEAR | 503de2362d3Smrg R200_CLAMP_S_CLAMP_LAST | 504de2362d3Smrg R200_CLAMP_T_CLAMP_LAST; 505de2362d3Smrg 506de2362d3Smrg /* contrast can cause constant overflow, clamp */ 507de2362d3Smrg cont = RTFContrast(pPriv->contrast); 508de2362d3Smrg if (cont * trans[ref].RefLuma > 2.0) 509de2362d3Smrg cont = 2.0 / trans[ref].RefLuma; 510de2362d3Smrg /* brightness is only from -0.5 to 0.5 should be safe */ 511de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 512de2362d3Smrg /* saturation can also cause overflow, clamp */ 513de2362d3Smrg sat = RTFSaturation(pPriv->saturation); 514de2362d3Smrg if (sat * trans[ref].RefBCb > 4.0) 515de2362d3Smrg sat = 4.0 / trans[ref].RefBCb; 516de2362d3Smrg uvcosf = sat * cos(RTFHue(pPriv->hue)); 517de2362d3Smrg uvsinf = sat * sin(RTFHue(pPriv->hue)); 518de2362d3Smrg 519de2362d3Smrg yco = trans[ref].RefLuma * cont; 520de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 521de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 522de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 523de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 524de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 525de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 526de2362d3Smrg yoff = Loff * yco + bright; 527de2362d3Smrg 528de2362d3Smrg if ((uco[0] > 2.0) || (uco[2] > 2.0)) { 529de2362d3Smrg needux8 = TRUE; 530de2362d3Smrg ucscale = 0.125; 531de2362d3Smrg } 532de2362d3Smrg if ((vco[0] > 2.0) || (vco[2] > 2.0)) { 533de2362d3Smrg needvx8 = TRUE; 534de2362d3Smrg vcscale = 0.125; 535de2362d3Smrg } 536de2362d3Smrg 537de2362d3Smrg if (pPriv->is_planar) { 538de2362d3Smrg /* need 2 texcoord sets (even though they are identical) due 539de2362d3Smrg to denormalization! hw apparently can't premultiply 540de2362d3Smrg same coord set by different texture size */ 541de2362d3Smrg pPriv->vtx_count = 6; 542de2362d3Smrg 543de2362d3Smrg txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | 544de2362d3Smrg (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); 545de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 546de2362d3Smrg txpitch -= 32; 547de2362d3Smrg 548de2362d3Smrg BEGIN_ACCEL_RELOC(36, 3); 549de2362d3Smrg 55018781e08Smrg OUT_RING_REG(RADEON_PP_CNTL, 551de2362d3Smrg RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | 552de2362d3Smrg RADEON_TEX_BLEND_0_ENABLE | 553de2362d3Smrg RADEON_TEX_BLEND_1_ENABLE | 554de2362d3Smrg RADEON_TEX_BLEND_2_ENABLE); 555de2362d3Smrg 55618781e08Smrg OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 55718781e08Smrg OUT_RING_REG(R200_SE_VTX_FMT_1, 558de2362d3Smrg (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | 559de2362d3Smrg (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); 560de2362d3Smrg 56118781e08Smrg OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 56218781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 56318781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 56418781e08Smrg OUT_RING_REG(R200_PP_TXSIZE_0, 565de2362d3Smrg (pPriv->w - 1) | 566de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 56718781e08Smrg OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 56818781e08Smrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 56918781e08Smrg 57018781e08Smrg OUT_RING_REG(R200_PP_TXFILTER_1, txfilter); 57118781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 57218781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0); 57318781e08Smrg OUT_RING_REG(R200_PP_TXSIZE_1, txsize); 57418781e08Smrg OUT_RING_REG(R200_PP_TXPITCH_1, txpitch); 57518781e08Smrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo); 57618781e08Smrg 57718781e08Smrg OUT_RING_REG(R200_PP_TXFILTER_2, txfilter); 57818781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); 57918781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_X_2, 0); 58018781e08Smrg OUT_RING_REG(R200_PP_TXSIZE_2, txsize); 58118781e08Smrg OUT_RING_REG(R200_PP_TXPITCH_2, txpitch); 58218781e08Smrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, pPriv->planev_offset, src_bo); 583de2362d3Smrg 584de2362d3Smrg /* similar to r300 code. Note the big problem is that hardware constants 585de2362d3Smrg * are 8 bits only, representing 0.0-1.0. We can get that up (using bias 586de2362d3Smrg * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually 587de2362d3Smrg * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but 588de2362d3Smrg * the constants not. To get larger range can use output scale, but for 589de2362d3Smrg * that 2.018 value we need a total scale by 8, which means the constants 590de2362d3Smrg * really have no accuracy whatsoever (5 fractional bits only). 591de2362d3Smrg * The only direct way to get high precision "constants" into the fragment 592de2362d3Smrg * pipe I know of is to use the texcoord interpolator (not color, this one 593de2362d3Smrg * is 8 bit only too), which seems a bit expensive. We're lucky though it 594de2362d3Smrg * seems the values we need seem to fit better than worst case (get about 595de2362d3Smrg * 6 fractional bits for this instead of 5, at least when not correcting for 596de2362d3Smrg * hue/saturation/contrast/brightness, which is the same as for vco - yco and 597de2362d3Smrg * yoff get 8 fractional bits). Try to preserve as much accuracy as possible 598de2362d3Smrg * even with non-default saturation/hue/contrast/brightness adjustments, 599de2362d3Smrg * it gets a little crazy and ultimately precision might still be lacking. 600de2362d3Smrg * 601de2362d3Smrg * A higher precision (8 fractional bits) version might just put uco into 602de2362d3Smrg * a texcoord, and calculate a new vcoconst in the shader, like so: 603de2362d3Smrg * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable 604de2362d3Smrg * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} 605de2362d3Smrg * vcocalc = ADD temp, bias/scale(cohelper), vco 606de2362d3Smrg * would in total use 4 tex units, 4 instructions which seems fairly 607de2362d3Smrg * balanced for this architecture (instead of 3 + 3 for the solution here) 608de2362d3Smrg * 609de2362d3Smrg * temp = MAD(yco, yuv.yyyy, yoff) 610de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 611de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 612de2362d3Smrg * 613de2362d3Smrg * note first mad produces actually scalar, hence we transform 614de2362d3Smrg * it into a dp2a to get 8 bit precision of yco instead of 7 - 615de2362d3Smrg * That's assuming hw correctly expands consts to internal precision. 616de2362d3Smrg * (y * 1 + y * (yco - 1) + yoff) 617de2362d3Smrg * temp = DP2A / 2 (yco, yuv.yyyy, yoff) 618de2362d3Smrg * temp = MAD (uco / 4, yuv.uuuu * 2, temp) 619de2362d3Smrg * result = MAD x2 (vco / 2, yuv.vvvv, temp) 620de2362d3Smrg * 621de2362d3Smrg * vco, uco need bias (and hence scale too) 622de2362d3Smrg * 623de2362d3Smrg */ 624de2362d3Smrg 625de2362d3Smrg /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ 62618781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_0, 627de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 628de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 629de2362d3Smrg R200_TXC_ARG_C_TFACTOR_COLOR | 630de2362d3Smrg (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 631de2362d3Smrg R200_TXC_OP_DOT2_ADD); 63218781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_0, 633de2362d3Smrg (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 634de2362d3Smrg R200_TXC_SCALE_INV2 | 635de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 63618781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_0, 637de2362d3Smrg R200_TXA_ARG_A_ZERO | 638de2362d3Smrg R200_TXA_ARG_B_ZERO | 639de2362d3Smrg R200_TXA_ARG_C_ZERO | 640de2362d3Smrg R200_TXA_OP_MADD); 64118781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_0, 642de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 643de2362d3Smrg 644de2362d3Smrg /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ 64518781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_1, 646de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 647de2362d3Smrg R200_TXC_BIAS_ARG_A | 648de2362d3Smrg R200_TXC_SCALE_ARG_A | 649de2362d3Smrg R200_TXC_ARG_B_R1_COLOR | 650de2362d3Smrg R200_TXC_BIAS_ARG_B | 651de2362d3Smrg (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 652de2362d3Smrg R200_TXC_ARG_C_R0_COLOR | 653de2362d3Smrg R200_TXC_OP_MADD); 65418781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_1, 655de2362d3Smrg (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 656de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); 65718781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_1, 658de2362d3Smrg R200_TXA_ARG_A_ZERO | 659de2362d3Smrg R200_TXA_ARG_B_ZERO | 660de2362d3Smrg R200_TXA_ARG_C_ZERO | 661de2362d3Smrg R200_TXA_OP_MADD); 66218781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_1, 663de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 664de2362d3Smrg 665de2362d3Smrg /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ 66618781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_2, 667de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 668de2362d3Smrg R200_TXC_BIAS_ARG_A | 669de2362d3Smrg R200_TXC_SCALE_ARG_A | 670de2362d3Smrg R200_TXC_ARG_B_R2_COLOR | 671de2362d3Smrg R200_TXC_BIAS_ARG_B | 672de2362d3Smrg (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 673de2362d3Smrg R200_TXC_ARG_C_R0_COLOR | 674de2362d3Smrg R200_TXC_OP_MADD); 67518781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_2, 676de2362d3Smrg (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 677de2362d3Smrg R200_TXC_SCALE_2X | 678de2362d3Smrg R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 67918781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_2, 680de2362d3Smrg R200_TXA_ARG_A_ZERO | 681de2362d3Smrg R200_TXA_ARG_B_ZERO | 682de2362d3Smrg R200_TXA_ARG_C_ZERO | 683de2362d3Smrg R200_TXA_COMP_ARG_C | 684de2362d3Smrg R200_TXA_OP_MADD); 68518781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_2, 686de2362d3Smrg R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 687de2362d3Smrg 688de2362d3Smrg /* shader constants */ 68918781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 690de2362d3Smrg yco > 1.0 ? yco - 1.0: yco, 691de2362d3Smrg yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 692de2362d3Smrg 0.0)); 69318781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 694de2362d3Smrg uco[1] * ucscale + 0.5, /* or [-2, 2] */ 695de2362d3Smrg uco[2] * ucscale + 0.5, 696de2362d3Smrg 0.0)); 69718781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 698de2362d3Smrg vco[1] * vcscale + 0.5, /* or [-4, 4] */ 699de2362d3Smrg vco[2] * vcscale + 0.5, 700de2362d3Smrg 0.0)); 701de2362d3Smrg 70218781e08Smrg ADVANCE_RING(); 703de2362d3Smrg } else { 704de2362d3Smrg pPriv->vtx_count = 4; 705de2362d3Smrg 706de2362d3Smrg BEGIN_ACCEL_RELOC(24, 1); 707de2362d3Smrg 70818781e08Smrg OUT_RING_REG(RADEON_PP_CNTL, 709de2362d3Smrg RADEON_TEX_0_ENABLE | 710de2362d3Smrg RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | 711de2362d3Smrg RADEON_TEX_BLEND_2_ENABLE); 712de2362d3Smrg 71318781e08Smrg OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); 71418781e08Smrg OUT_RING_REG(R200_SE_VTX_FMT_1, 715de2362d3Smrg (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); 716de2362d3Smrg 71718781e08Smrg OUT_RING_REG(R200_PP_TXFILTER_0, txfilter); 71818781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_0, txformat); 71918781e08Smrg OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0); 72018781e08Smrg OUT_RING_REG(R200_PP_TXSIZE_0, 721de2362d3Smrg (pPriv->w - 1) | 722de2362d3Smrg ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); 72318781e08Smrg OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); 72418781e08Smrg OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo); 725de2362d3Smrg 726de2362d3Smrg /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ 72718781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_0, 728de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 729de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 730de2362d3Smrg R200_TXC_ARG_C_TFACTOR_COLOR | 731de2362d3Smrg (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | 732de2362d3Smrg R200_TXC_OP_DOT2_ADD); 73318781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_0, 734de2362d3Smrg (0 << R200_TXC_TFACTOR_SEL_SHIFT) | 735de2362d3Smrg R200_TXC_SCALE_INV2 | 736de2362d3Smrg (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | 737de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 73818781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_0, 739de2362d3Smrg R200_TXA_ARG_A_ZERO | 740de2362d3Smrg R200_TXA_ARG_B_ZERO | 741de2362d3Smrg R200_TXA_ARG_C_ZERO | 742de2362d3Smrg R200_TXA_OP_MADD); 74318781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_0, 744de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 745de2362d3Smrg 746de2362d3Smrg /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ 74718781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_1, 748de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 749de2362d3Smrg R200_TXC_BIAS_ARG_A | 750de2362d3Smrg R200_TXC_SCALE_ARG_A | 751de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 752de2362d3Smrg R200_TXC_BIAS_ARG_B | 753de2362d3Smrg (needux8 ? R200_TXC_SCALE_ARG_B : 0) | 754de2362d3Smrg R200_TXC_ARG_C_R1_COLOR | 755de2362d3Smrg R200_TXC_OP_MADD); 75618781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_1, 757de2362d3Smrg (1 << R200_TXC_TFACTOR_SEL_SHIFT) | 758de2362d3Smrg (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | 759de2362d3Smrg R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); 76018781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_1, 761de2362d3Smrg R200_TXA_ARG_A_ZERO | 762de2362d3Smrg R200_TXA_ARG_B_ZERO | 763de2362d3Smrg R200_TXA_ARG_C_ZERO | 764de2362d3Smrg R200_TXA_OP_MADD); 76518781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_1, 766de2362d3Smrg R200_TXA_OUTPUT_REG_NONE); 767de2362d3Smrg 768de2362d3Smrg /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ 76918781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND_2, 770de2362d3Smrg R200_TXC_ARG_A_TFACTOR_COLOR | 771de2362d3Smrg R200_TXC_BIAS_ARG_A | 772de2362d3Smrg R200_TXC_SCALE_ARG_A | 773de2362d3Smrg R200_TXC_ARG_B_R0_COLOR | 774de2362d3Smrg R200_TXC_BIAS_ARG_B | 775de2362d3Smrg (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | 776de2362d3Smrg R200_TXC_ARG_C_R1_COLOR | 777de2362d3Smrg R200_TXC_OP_MADD); 77818781e08Smrg OUT_RING_REG(R200_PP_TXCBLEND2_2, 779de2362d3Smrg (2 << R200_TXC_TFACTOR_SEL_SHIFT) | 780de2362d3Smrg R200_TXC_SCALE_2X | 781de2362d3Smrg (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | 782de2362d3Smrg R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); 78318781e08Smrg OUT_RING_REG(R200_PP_TXABLEND_2, 784de2362d3Smrg R200_TXA_ARG_A_ZERO | 785de2362d3Smrg R200_TXA_ARG_B_ZERO | 786de2362d3Smrg R200_TXA_ARG_C_ZERO | 787de2362d3Smrg R200_TXA_COMP_ARG_C | 788de2362d3Smrg R200_TXA_OP_MADD); 78918781e08Smrg OUT_RING_REG(R200_PP_TXABLEND2_2, 790de2362d3Smrg R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); 791de2362d3Smrg 792de2362d3Smrg /* shader constants */ 79318781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ 794de2362d3Smrg yco > 1.0 ? yco - 1.0: yco, 795de2362d3Smrg yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 796de2362d3Smrg 0.0)); 79718781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ 798de2362d3Smrg uco[1] * ucscale + 0.5, /* or [-2, 2] */ 799de2362d3Smrg uco[2] * ucscale + 0.5, 800de2362d3Smrg 0.0)); 80118781e08Smrg OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ 802de2362d3Smrg vco[1] * vcscale + 0.5, /* or [-4, 4] */ 803de2362d3Smrg vco[2] * vcscale + 0.5, 804de2362d3Smrg 0.0)); 805de2362d3Smrg 80618781e08Smrg ADVANCE_RING(); 807de2362d3Smrg } 808de2362d3Smrg 80918781e08Smrg BEGIN_RING(2*2); 81018781e08Smrg OUT_RING_REG(RADEON_RE_TOP_LEFT, 0); 81118781e08Smrg OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) | 812de2362d3Smrg (scissor_h << RADEON_RE_HEIGHT_SHIFT))); 81318781e08Smrg ADVANCE_RING(); 814de2362d3Smrg 815de2362d3Smrg if (pPriv->vsync) { 816de2362d3Smrg xf86CrtcPtr crtc; 817de2362d3Smrg if (pPriv->desired_crtc) 818de2362d3Smrg crtc = pPriv->desired_crtc; 819de2362d3Smrg else 82018781e08Smrg crtc = radeon_pick_best_crtc(pScrn, FALSE, 821de2362d3Smrg pPriv->drw_x, 822de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 823de2362d3Smrg pPriv->drw_y, 824de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 825de2362d3Smrg if (crtc) 82618781e08Smrg RADEONWaitForVLine(pScrn, pPixmap, 82718781e08Smrg crtc, 82818781e08Smrg pPriv->drw_y - crtc->y, 82918781e08Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 830de2362d3Smrg } 831de2362d3Smrg 832de2362d3Smrg return TRUE; 833de2362d3Smrg} 834de2362d3Smrg 835de2362d3Smrgstatic void 83618781e08SmrgR200DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 837de2362d3Smrg{ 838de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 839de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 840de2362d3Smrg int dstxoff, dstyoff; 841de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 842de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 843de2362d3Smrg 844de2362d3Smrg#ifdef COMPOSITE 845de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 846de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 847de2362d3Smrg#else 848de2362d3Smrg dstxoff = 0; 849de2362d3Smrg dstyoff = 0; 850de2362d3Smrg#endif 851de2362d3Smrg 85218781e08Smrg if (!R200PrepareTexturedVideo(pScrn, pPriv)) 853de2362d3Smrg return; 854de2362d3Smrg 855de2362d3Smrg /* 856de2362d3Smrg * Rendering of the actual polygon is done in two different 857de2362d3Smrg * ways depending on chip generation: 858de2362d3Smrg * 859de2362d3Smrg * < R300: 860de2362d3Smrg * 861de2362d3Smrg * These chips can render a rectangle in one pass, so 862de2362d3Smrg * handling is pretty straight-forward. 863de2362d3Smrg * 864de2362d3Smrg * >= R300: 865de2362d3Smrg * 866de2362d3Smrg * These chips can accept a quad, but will render it as 867de2362d3Smrg * two triangles which results in a diagonal tear. Instead 868de2362d3Smrg * We render a single, large triangle and use the scissor 869de2362d3Smrg * functionality to restrict it to the desired rectangle. 870de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 871de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 872de2362d3Smrg * render as a quad. 873de2362d3Smrg */ 874de2362d3Smrg 875de2362d3Smrg while (nBox) { 876de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 4; 877de2362d3Smrg int loop_boxes; 878de2362d3Smrg 879de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 88018781e08Smrg radeon_cs_flush_indirect(pScrn); 88118781e08Smrg if (!R200PrepareTexturedVideo(pScrn, pPriv)) 882de2362d3Smrg return; 883de2362d3Smrg } 884de2362d3Smrg loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox); 885de2362d3Smrg nBox -= loop_boxes; 886de2362d3Smrg 887de2362d3Smrg BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4); 888de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 889de2362d3Smrg loop_boxes * 3 * pPriv->vtx_count)); 890de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 891de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 892de2362d3Smrg ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); 893de2362d3Smrg 894de2362d3Smrg while (loop_boxes--) { 895de2362d3Smrg float srcX, srcY, srcw, srch; 896de2362d3Smrg int dstX, dstY, dstw, dsth; 897de2362d3Smrg dstX = pBox->x1 + dstxoff; 898de2362d3Smrg dstY = pBox->y1 + dstyoff; 899de2362d3Smrg dstw = pBox->x2 - pBox->x1; 900de2362d3Smrg dsth = pBox->y2 - pBox->y1; 901de2362d3Smrg 902de2362d3Smrg srcX = pPriv->src_x; 903de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 904de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 905de2362d3Smrg srcY = pPriv->src_y; 906de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 907de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 908de2362d3Smrg 909de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 910de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 911de2362d3Smrg 912de2362d3Smrg if (pPriv->is_planar) { 913de2362d3Smrg /* 914de2362d3Smrg * Just render a rect (using three coords). 915de2362d3Smrg */ 916de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 917de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 918de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 919de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 920de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 921de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 922de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 923de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 924de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 925de2362d3Smrg } else { 926de2362d3Smrg /* 927de2362d3Smrg * Just render a rect (using three coords). 928de2362d3Smrg */ 929de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 930de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 931de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 932de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 933de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 934de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 935de2362d3Smrg } 936de2362d3Smrg 937de2362d3Smrg pBox++; 938de2362d3Smrg } 939de2362d3Smrg 94018781e08Smrg OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 941de2362d3Smrg ADVANCE_RING(); 942de2362d3Smrg } 943de2362d3Smrg 944de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 945de2362d3Smrg} 946de2362d3Smrg 947de2362d3Smrgstatic Bool 94818781e08SmrgR300PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 949de2362d3Smrg{ 950de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 951de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 952de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 953de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 95418781e08Smrg uint32_t txfilter, txformat0, txformat1, txpitch; 955de2362d3Smrg uint32_t dst_pitch, dst_format; 95618781e08Smrg uint32_t txenable, colorpitch; 957de2362d3Smrg uint32_t output_fmt; 958de2362d3Smrg int pixel_shift; 95918781e08Smrg int ret; 960de2362d3Smrg 96118781e08Smrg radeon_cs_space_reset_bos(info->cs); 96218781e08Smrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 963de2362d3Smrg 96418781e08Smrg if (pPriv->bicubic_enabled) 96539413783Smrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 96639413783Smrg RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 967de2362d3Smrg 96818781e08Smrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 96939413783Smrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 97039413783Smrg RADEON_GEM_DOMAIN_VRAM); 9717821949aSmrg 97218781e08Smrg ret = radeon_cs_space_check(info->cs); 97318781e08Smrg if (ret) { 97418781e08Smrg ErrorF("Not enough RAM to hw accel xv operation\n"); 97518781e08Smrg return FALSE; 976de2362d3Smrg } 977de2362d3Smrg 978de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 979de2362d3Smrg 98018781e08Smrg dst_pitch = exaGetPixmapPitch(pPixmap); 98118781e08Smrg RADEON_SWITCH_TO_3D(); 982de2362d3Smrg 983de2362d3Smrg if (pPriv->bicubic_enabled) 984de2362d3Smrg pPriv->vtx_count = 6; 985de2362d3Smrg else 986de2362d3Smrg pPriv->vtx_count = 4; 987de2362d3Smrg 988de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 989de2362d3Smrg case 16: 990de2362d3Smrg if (pPixmap->drawable.depth == 15) 991de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB1555; 992de2362d3Smrg else 993de2362d3Smrg dst_format = R300_COLORFORMAT_RGB565; 994de2362d3Smrg break; 995de2362d3Smrg case 32: 996de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB8888; 997de2362d3Smrg break; 998de2362d3Smrg default: 999de2362d3Smrg return FALSE; 1000de2362d3Smrg } 1001de2362d3Smrg 1002de2362d3Smrg output_fmt = (R300_OUT_FMT_C4_8 | 1003de2362d3Smrg R300_OUT_FMT_C0_SEL_BLUE | 1004de2362d3Smrg R300_OUT_FMT_C1_SEL_GREEN | 1005de2362d3Smrg R300_OUT_FMT_C2_SEL_RED | 1006de2362d3Smrg R300_OUT_FMT_C3_SEL_ALPHA); 1007de2362d3Smrg 1008de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 1009de2362d3Smrg colorpitch |= dst_format; 1010de2362d3Smrg 1011de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 1012de2362d3Smrg colorpitch |= R300_COLORTILE; 1013de2362d3Smrg 1014de2362d3Smrg 1015de2362d3Smrg if (((pPriv->bicubic_state == BICUBIC_OFF)) && 1016de2362d3Smrg (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 1017de2362d3Smrg pPriv->is_planar = TRUE; 1018de2362d3Smrg else 1019de2362d3Smrg pPriv->is_planar = FALSE; 1020de2362d3Smrg 1021de2362d3Smrg if (pPriv->is_planar) { 1022de2362d3Smrg txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; 1023de2362d3Smrg txpitch = pPriv->src_pitch; 1024de2362d3Smrg } else { 1025de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 1026de2362d3Smrg txformat1 = R300_TX_FORMAT_YVYU422; 1027de2362d3Smrg else 1028de2362d3Smrg txformat1 = R300_TX_FORMAT_VYUY422; 1029de2362d3Smrg 1030de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) 1031de2362d3Smrg txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 1032de2362d3Smrg 1033de2362d3Smrg /* pitch is in pixels */ 1034de2362d3Smrg txpitch = pPriv->src_pitch / 2; 1035de2362d3Smrg } 1036de2362d3Smrg txpitch -= 1; 1037de2362d3Smrg 1038de2362d3Smrg txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1039de2362d3Smrg (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1040de2362d3Smrg R300_TXPITCH_EN); 1041de2362d3Smrg 1042de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1043de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1044de2362d3Smrg R300_TX_MAG_FILTER_LINEAR | 1045de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 1046de2362d3Smrg (0 << R300_TX_ID_SHIFT)); 1047de2362d3Smrg 1048de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 104918781e08Smrg OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 105018781e08Smrg OUT_RING_REG(R300_TX_FILTER1_0, 0); 105118781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 1052de2362d3Smrg if (pPriv->is_planar) 105318781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); 1054de2362d3Smrg else 105518781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 105618781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 105718781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 105818781e08Smrg ADVANCE_RING(); 1059de2362d3Smrg 1060de2362d3Smrg txenable = R300_TEX_0_ENABLE; 1061de2362d3Smrg 1062de2362d3Smrg if (pPriv->is_planar) { 1063de2362d3Smrg txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 1064de2362d3Smrg (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 1065de2362d3Smrg R300_TXPITCH_EN); 1066de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 1067de2362d3Smrg txpitch -= 1; 1068de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 1069de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 1070de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 1071de2362d3Smrg R300_TX_MAG_FILTER_LINEAR); 1072de2362d3Smrg 1073de2362d3Smrg BEGIN_ACCEL_RELOC(12, 2); 107418781e08Smrg OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 107518781e08Smrg OUT_RING_REG(R300_TX_FILTER1_1, 0); 107618781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 107718781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); 107818781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 107918781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 108018781e08Smrg OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 108118781e08Smrg OUT_RING_REG(R300_TX_FILTER1_2, 0); 108218781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 108318781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); 108418781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 108518781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 108618781e08Smrg ADVANCE_RING(); 1087de2362d3Smrg txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 1088de2362d3Smrg } 1089de2362d3Smrg 1090de2362d3Smrg if (pPriv->bicubic_enabled) { 1091de2362d3Smrg /* Size is 128x1 */ 1092de2362d3Smrg txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 1093de2362d3Smrg (0x0 << R300_TXHEIGHT_SHIFT) | 1094de2362d3Smrg R300_TXPITCH_EN); 1095de2362d3Smrg /* Format is 32-bit floats, 4bpp */ 1096de2362d3Smrg txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 1097de2362d3Smrg /* Pitch is 127 (128-1) */ 1098de2362d3Smrg txpitch = 0x7f; 1099de2362d3Smrg /* Tex filter */ 1100de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 1101de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 1102de2362d3Smrg R300_TX_MIN_FILTER_NEAREST | 1103de2362d3Smrg R300_TX_MAG_FILTER_NEAREST | 1104de2362d3Smrg (1 << R300_TX_ID_SHIFT)); 1105de2362d3Smrg 1106de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 110718781e08Smrg OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 110818781e08Smrg OUT_RING_REG(R300_TX_FILTER1_1, 0); 110918781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 111018781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 111118781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 111218781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 111318781e08Smrg ADVANCE_RING(); 1114de2362d3Smrg 1115de2362d3Smrg /* Enable tex 1 */ 1116de2362d3Smrg txenable |= R300_TEX_1_ENABLE; 1117de2362d3Smrg } 1118de2362d3Smrg 1119de2362d3Smrg /* setup the VAP */ 1120de2362d3Smrg if (info->accel_state->has_tcl) { 1121de2362d3Smrg if (pPriv->bicubic_enabled) 112218781e08Smrg BEGIN_RING(2*7); 1123de2362d3Smrg else 112418781e08Smrg BEGIN_RING(2*6); 1125de2362d3Smrg } else { 1126de2362d3Smrg if (pPriv->bicubic_enabled) 112718781e08Smrg BEGIN_RING(2*5); 1128de2362d3Smrg else 112918781e08Smrg BEGIN_RING(2*4); 1130de2362d3Smrg } 1131de2362d3Smrg 1132de2362d3Smrg /* These registers define the number, type, and location of data submitted 1133de2362d3Smrg * to the PVS unit of GA input (when PVS is disabled) 1134de2362d3Smrg * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 1135de2362d3Smrg * enabled. This memory provides the imputs to the vertex shader program 1136de2362d3Smrg * and ordering is not important. When PVS/TCL is disabled, this field maps 11370a1d3ae0Smrg * directly to the GA input memory and the order is significant. In 1138de2362d3Smrg * PVS_BYPASS mode the order is as follows: 1139de2362d3Smrg * Position 1140de2362d3Smrg * Point Size 1141de2362d3Smrg * Color 0-3 1142de2362d3Smrg * Textures 0-7 1143de2362d3Smrg * Fog 1144de2362d3Smrg */ 1145de2362d3Smrg if (pPriv->bicubic_enabled) { 114618781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1147de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1148de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 1149de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 1150de2362d3Smrg R300_SIGNED_0 | 1151de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1152de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 1153de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 1154de2362d3Smrg R300_SIGNED_1)); 115518781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 1156de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 1157de2362d3Smrg (0 << R300_SKIP_DWORDS_2_SHIFT) | 1158de2362d3Smrg (7 << R300_DST_VEC_LOC_2_SHIFT) | 1159de2362d3Smrg R300_LAST_VEC_2 | 1160de2362d3Smrg R300_SIGNED_2)); 1161de2362d3Smrg } else { 116218781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 1163de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 1164de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 1165de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 1166de2362d3Smrg R300_SIGNED_0 | 1167de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 1168de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 1169de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 1170de2362d3Smrg R300_LAST_VEC_1 | 1171de2362d3Smrg R300_SIGNED_1)); 1172de2362d3Smrg } 1173de2362d3Smrg 1174de2362d3Smrg /* load the vertex shader 1175de2362d3Smrg * We pre-load vertex programs in RADEONInit3DEngine(): 1176de2362d3Smrg * - exa 1177de2362d3Smrg * - Xv 1178de2362d3Smrg * - Xv bicubic 1179de2362d3Smrg * Here we select the offset of the vertex program we want to use 1180de2362d3Smrg */ 1181de2362d3Smrg if (info->accel_state->has_tcl) { 1182de2362d3Smrg if (pPriv->bicubic_enabled) { 118318781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1184de2362d3Smrg ((11 << R300_PVS_FIRST_INST_SHIFT) | 1185de2362d3Smrg (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1186de2362d3Smrg (13 << R300_PVS_LAST_INST_SHIFT))); 118718781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1188de2362d3Smrg (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1189de2362d3Smrg } else { 119018781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 1191de2362d3Smrg ((9 << R300_PVS_FIRST_INST_SHIFT) | 1192de2362d3Smrg (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 1193de2362d3Smrg (10 << R300_PVS_LAST_INST_SHIFT))); 119418781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 1195de2362d3Smrg (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 1196de2362d3Smrg } 1197de2362d3Smrg } 1198de2362d3Smrg 1199de2362d3Smrg /* Position and one set of 2 texture coordinates */ 120018781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 1201de2362d3Smrg if (pPriv->bicubic_enabled) 120218781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 1203de2362d3Smrg (2 << R300_TEX_1_COMP_CNT_SHIFT))); 1204de2362d3Smrg else 120518781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 1206de2362d3Smrg 120718781e08Smrg OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 120818781e08Smrg ADVANCE_RING(); 1209de2362d3Smrg 1210de2362d3Smrg /* setup pixel shader */ 1211de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) { 1212de2362d3Smrg if (pPriv->bicubic_enabled) { 121318781e08Smrg BEGIN_RING(2*79); 1214de2362d3Smrg 1215de2362d3Smrg /* 4 components: 2 for tex0 and 2 for tex1 */ 121618781e08Smrg OUT_RING_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1217de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1218de2362d3Smrg 1219de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 122018781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 1221de2362d3Smrg 1222de2362d3Smrg /* Pixel stack frame size. */ 122318781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 5); 1224de2362d3Smrg 1225de2362d3Smrg /* Indirection levels */ 122618781e08Smrg OUT_RING_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | 1227de2362d3Smrg R300_FIRST_TEX)); 1228de2362d3Smrg 1229de2362d3Smrg /* Set nodes. */ 123018781e08Smrg OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1231de2362d3Smrg R300_ALU_CODE_SIZE(14) | 1232de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1233de2362d3Smrg R300_TEX_CODE_SIZE(6))); 1234de2362d3Smrg 1235de2362d3Smrg /* Nodes are allocated highest first, but executed lowest first */ 123618781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_0, 0); 123718781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | 1238de2362d3Smrg R300_ALU_SIZE(0) | 1239de2362d3Smrg R300_TEX_START(0) | 1240de2362d3Smrg R300_TEX_SIZE(0))); 124118781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | 1242de2362d3Smrg R300_ALU_SIZE(9) | 1243de2362d3Smrg R300_TEX_START(1) | 1244de2362d3Smrg R300_TEX_SIZE(0))); 124518781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | 1246de2362d3Smrg R300_ALU_SIZE(2) | 1247de2362d3Smrg R300_TEX_START(2) | 1248de2362d3Smrg R300_TEX_SIZE(3) | 1249de2362d3Smrg R300_RGBA_OUT)); 1250de2362d3Smrg 1251de2362d3Smrg /* ** BICUBIC FP ** */ 1252de2362d3Smrg 1253de2362d3Smrg /* texcoord0 => temp0 1254de2362d3Smrg * texcoord1 => temp1 */ 1255de2362d3Smrg 1256de2362d3Smrg // first node 1257de2362d3Smrg /* TEX temp2, temp1.rrr0, tex1, 1D */ 125818781e08Smrg OUT_RING_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | 1259de2362d3Smrg R300_TEX_ID(1) | 1260de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1261de2362d3Smrg R300_TEX_DST_ADDR(2))); 1262de2362d3Smrg 1263de2362d3Smrg /* MOV temp1.r, temp1.ggg0 */ 126418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1265de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1266de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1267de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 126818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | 1269de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1270de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 127118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1272de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1273de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1274de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 127518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | 1276de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1277de2362d3Smrg 1278de2362d3Smrg 1279de2362d3Smrg // second node 1280de2362d3Smrg /* TEX temp1, temp1, tex1, 1D */ 128118781e08Smrg OUT_RING_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | 1282de2362d3Smrg R300_TEX_ID(1) | 1283de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1284de2362d3Smrg R300_TEX_DST_ADDR(1))); 1285de2362d3Smrg 1286de2362d3Smrg /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ 128718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1288de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1289de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1290de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 129118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | 1292de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1293de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1294de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 129518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1296de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1297de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1298de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 129918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | 1300de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1301de2362d3Smrg 1302de2362d3Smrg 1303de2362d3Smrg /* MUL temp2.rg, temp2.rrr0, const0.rgb */ 130418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1305de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1306de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1307de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); 130818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | 1309de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | 1310de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1311de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 131218781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1313de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1314de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1315de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 131618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | 1317de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1318de2362d3Smrg 1319de2362d3Smrg /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ 132018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1321de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1322de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1323de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 132418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | 1325de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1326de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1327de2362d3Smrg R300_ALU_RGB_ADDRD(4) | 1328de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 132918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1330de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1331de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1332de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 133318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | 1334de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1335de2362d3Smrg 1336de2362d3Smrg /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ 133718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1338de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | 1339de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1340de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 134118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | 1342de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1343de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1344de2362d3Smrg R300_ALU_RGB_ADDRD(5) | 1345de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 134618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1347de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1348de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1349de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 135018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | 1351de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1352de2362d3Smrg 1353de2362d3Smrg /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ 135418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1355de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1356de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1357de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 135818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | 1359de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1360de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1361de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1362de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 136318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1364de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1365de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1366de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 136718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | 1368de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1369de2362d3Smrg 1370de2362d3Smrg /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ 137118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1372de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | 1373de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1374de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 137518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | 1376de2362d3Smrg R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | 1377de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1378de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1379de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 138018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1381de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1382de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1383de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 138418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | 1385de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1386de2362d3Smrg 1387de2362d3Smrg /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ 138818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1389de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1390de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1391de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 139218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1393de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1394de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1395de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 139618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1397de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1398de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1399de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 140018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | 1401de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1402de2362d3Smrg 1403de2362d3Smrg /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ 140418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1405de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1406de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1407de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 140818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1409de2362d3Smrg R300_ALU_RGB_ADDR2(3) | 1410de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1411de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 141218781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1413de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1414de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1415de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 141618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | 1417de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1418de2362d3Smrg 1419de2362d3Smrg /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ 142018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1421de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1422de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1423de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 142418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1425de2362d3Smrg R300_ALU_RGB_ADDR2(5) | 1426de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1427de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 142818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1429de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1430de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1431de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 143218781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | 1433de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1434de2362d3Smrg 1435de2362d3Smrg /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ 143618781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1437de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1438de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1439de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); 144018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | 1441de2362d3Smrg R300_ALU_RGB_ADDR2(4) | 1442de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1443de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); 144418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1445de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1446de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1447de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 144818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | 1449de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 1450de2362d3Smrg 1451de2362d3Smrg 1452de2362d3Smrg // third node 1453de2362d3Smrg /* TEX temp4, temp1.rg--, tex0, 1D */ 145418781e08Smrg OUT_RING_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | 1455de2362d3Smrg R300_TEX_ID(0) | 1456de2362d3Smrg R300_TEX_SRC_ADDR(1) | 1457de2362d3Smrg R300_TEX_DST_ADDR(4))); 1458de2362d3Smrg 1459de2362d3Smrg /* TEX temp3, temp3.rg--, tex0, 1D */ 146018781e08Smrg OUT_RING_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | 1461de2362d3Smrg R300_TEX_ID(0) | 1462de2362d3Smrg R300_TEX_SRC_ADDR(3) | 1463de2362d3Smrg R300_TEX_DST_ADDR(3))); 1464de2362d3Smrg 1465de2362d3Smrg /* TEX temp5, temp2.rg--, tex0, 1D */ 146618781e08Smrg OUT_RING_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | 1467de2362d3Smrg R300_TEX_ID(0) | 1468de2362d3Smrg R300_TEX_SRC_ADDR(2) | 1469de2362d3Smrg R300_TEX_DST_ADDR(5))); 1470de2362d3Smrg 1471de2362d3Smrg /* TEX temp0, temp0.rg--, tex0, 1D */ 147218781e08Smrg OUT_RING_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | 1473de2362d3Smrg R300_TEX_ID(0) | 1474de2362d3Smrg R300_TEX_SRC_ADDR(0) | 1475de2362d3Smrg R300_TEX_DST_ADDR(0))); 1476de2362d3Smrg 1477de2362d3Smrg /* LRP temp3, temp1.bbbb, temp4, temp3 -> 1478de2362d3Smrg * - PRESUB temps, temp4 - temp3 1479de2362d3Smrg * - MAD temp3, temp1.bbbb, temps, temp3 */ 148018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1481de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1482de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1483de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1484de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 148518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | 1486de2362d3Smrg R300_ALU_RGB_ADDR1(4) | 1487de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1488de2362d3Smrg R300_ALU_RGB_ADDRD(3) | 1489de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 149018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1491de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1492de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1493de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 149418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | 1495de2362d3Smrg R300_ALU_ALPHA_ADDR1(4) | 1496de2362d3Smrg R300_ALU_ALPHA_ADDR2(1) | 1497de2362d3Smrg R300_ALU_ALPHA_ADDRD(3) | 1498de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1499de2362d3Smrg 1500de2362d3Smrg /* LRP temp0, temp1.bbbb, temp5, temp0 -> 1501de2362d3Smrg * - PRESUB temps, temp5 - temp0 1502de2362d3Smrg * - MAD temp0, temp1.bbbb, temps, temp0 */ 150318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1504de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1505de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1506de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1507de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | 1508de2362d3Smrg R300_ALU_RGB_INSERT_NOP)); 150918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | 1510de2362d3Smrg R300_ALU_RGB_ADDR1(5) | 1511de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1512de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1513de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 151418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1515de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1516de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1517de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 151818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | 1519de2362d3Smrg R300_ALU_ALPHA_ADDR1(5) | 1520de2362d3Smrg R300_ALU_ALPHA_ADDR2(1) | 1521de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 1522de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); 1523de2362d3Smrg 1524de2362d3Smrg /* LRP output, temp2.bbbb, temp3, temp0 -> 1525de2362d3Smrg * - PRESUB temps, temp3 - temp0 1526de2362d3Smrg * - MAD output, temp2.bbbb, temps, temp0 */ 152718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1528de2362d3Smrg R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | 1529de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | 1530de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1531de2362d3Smrg R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); 153218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | 1533de2362d3Smrg R300_ALU_RGB_ADDR1(3) | 1534de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1535de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); 153618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1537de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | 1538de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | 1539de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); 154018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | 1541de2362d3Smrg R300_ALU_ALPHA_ADDR1(3) | 1542de2362d3Smrg R300_ALU_ALPHA_ADDR2(2) | 1543de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); 1544de2362d3Smrg 1545de2362d3Smrg /* Shader constants. */ 154618781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); 154718781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_G(0), 0); 154818781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_B(0), 0); 154918781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_A(0), 0); 1550de2362d3Smrg 155118781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_R(1), 0); 155218781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); 155318781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_B(1), 0); 155418781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_A(1), 0); 1555de2362d3Smrg 155618781e08Smrg ADVANCE_RING(); 1557de2362d3Smrg } else { 155818781e08Smrg BEGIN_RING(2*11); 1559de2362d3Smrg /* 2 components: 2 for tex0 */ 156018781e08Smrg OUT_RING_REG(R300_RS_COUNT, 1561de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1562de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1563de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 156418781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1565de2362d3Smrg 156618781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 1567de2362d3Smrg 1568de2362d3Smrg /* Indirection levels */ 156918781e08Smrg OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1570de2362d3Smrg R300_FIRST_TEX)); 1571de2362d3Smrg 157218781e08Smrg OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1573de2362d3Smrg R300_ALU_CODE_SIZE(1) | 1574de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1575de2362d3Smrg R300_TEX_CODE_SIZE(1))); 1576de2362d3Smrg 157718781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1578de2362d3Smrg R300_ALU_SIZE(0) | 1579de2362d3Smrg R300_TEX_START(0) | 1580de2362d3Smrg R300_TEX_SIZE(0) | 1581de2362d3Smrg R300_RGBA_OUT)); 1582de2362d3Smrg 1583de2362d3Smrg /* tex inst */ 158418781e08Smrg OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1585de2362d3Smrg R300_TEX_DST_ADDR(0) | 1586de2362d3Smrg R300_TEX_ID(0) | 1587de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 1588de2362d3Smrg 1589de2362d3Smrg /* ALU inst */ 1590de2362d3Smrg /* RGB */ 159118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | 1592de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1593de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 1594de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1595de2362d3Smrg R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | 1596de2362d3Smrg R300_ALU_RGB_MASK_G | 1597de2362d3Smrg R300_ALU_RGB_MASK_B)) | 1598de2362d3Smrg R300_ALU_RGB_TARGET_A)); 159918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1600de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1601de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | 1602de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1603de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1604de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1605de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1606de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1607de2362d3Smrg R300_ALU_RGB_CLAMP)); 1608de2362d3Smrg /* Alpha */ 160918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | 1610de2362d3Smrg R300_ALU_ALPHA_ADDR1(0) | 1611de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 1612de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 1613de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1614de2362d3Smrg R300_ALU_ALPHA_TARGET_A | 1615de2362d3Smrg R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); 161618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | 1617de2362d3Smrg R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | 1618de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | 1619de2362d3Smrg R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | 1620de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | 1621de2362d3Smrg R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | 1622de2362d3Smrg R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1623de2362d3Smrg R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | 1624de2362d3Smrg R300_ALU_ALPHA_CLAMP)); 162518781e08Smrg ADVANCE_RING(); 1626de2362d3Smrg } 1627de2362d3Smrg } else { 1628de2362d3Smrg /* 1629de2362d3Smrg * y' = y - .0625 1630de2362d3Smrg * u' = u - .5 1631de2362d3Smrg * v' = v - .5; 1632de2362d3Smrg * 1633de2362d3Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 1634de2362d3Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 1635de2362d3Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 1636de2362d3Smrg * 1637de2362d3Smrg * DP3 might look like the straightforward solution 1638de2362d3Smrg * but we'd need to move the texture yuv values in 1639de2362d3Smrg * the same reg for this to work. Therefore use MADs. 1640de2362d3Smrg * Brightness just adds to the off constant. 1641de2362d3Smrg * Contrast is multiplication of luminance. 1642de2362d3Smrg * Saturation and hue change the u and v coeffs. 1643de2362d3Smrg * Default values (before adjustments - depend on colorspace): 1644de2362d3Smrg * yco = 1.1643 1645de2362d3Smrg * uco = 0, -0.39173, 2.017 1646de2362d3Smrg * vco = 1.5958, -0.8129, 0 1647de2362d3Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 1648de2362d3Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 1649de2362d3Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 1650de2362d3Smrg * 1651de2362d3Smrg * temp = MAD(yco, yuv.yyyy, off) 1652de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 1653de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 1654de2362d3Smrg */ 1655de2362d3Smrg /* TODO: don't recalc consts always */ 1656de2362d3Smrg const float Loff = -0.0627; 1657de2362d3Smrg const float Coff = -0.502; 1658de2362d3Smrg float uvcosf, uvsinf; 1659de2362d3Smrg float yco; 1660de2362d3Smrg float uco[3], vco[3], off[3]; 1661de2362d3Smrg float bright, cont, gamma; 1662de2362d3Smrg int ref = pPriv->transform_index; 1663de2362d3Smrg Bool needgamma = FALSE; 1664de2362d3Smrg 1665de2362d3Smrg cont = RTFContrast(pPriv->contrast); 1666de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 1667de2362d3Smrg gamma = (float)pPriv->gamma / 1000.0; 1668de2362d3Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 1669de2362d3Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 1670de2362d3Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 1671de2362d3Smrg 1672de2362d3Smrg yco = trans[ref].RefLuma * cont; 1673de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 1674de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 1675de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 1676de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 1677de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 1678de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 1679de2362d3Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 1680de2362d3Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 1681de2362d3Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 1682de2362d3Smrg 1683de2362d3Smrg if (gamma != 1.0) { 1684de2362d3Smrg needgamma = TRUE; 1685de2362d3Smrg /* note: gamma correction is out = in ^ gamma; 1686de2362d3Smrg gpu can only do LG2/EX2 therefore we transform into 1687de2362d3Smrg in ^ gamma = 2 ^ (log2(in) * gamma). 1688de2362d3Smrg Lots of scalar ops, unfortunately (better solution?) - 1689de2362d3Smrg without gamma that's 3 inst, with gamma it's 10... 1690de2362d3Smrg could use different gamma factors per channel, 1691de2362d3Smrg if that's of any use. */ 1692de2362d3Smrg } 1693de2362d3Smrg 1694de2362d3Smrg if (pPriv->is_planar) { 169518781e08Smrg BEGIN_RING(2 * (needgamma ? (28 + 33) : 33)); 1696de2362d3Smrg /* 2 components: same 2 for tex0/1/2 */ 169718781e08Smrg OUT_RING_REG(R300_RS_COUNT, 1698de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1699de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1700de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 170118781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1702de2362d3Smrg 170318781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 1704de2362d3Smrg 1705de2362d3Smrg /* Indirection levels */ 170618781e08Smrg OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1707de2362d3Smrg R300_FIRST_TEX)); 1708de2362d3Smrg 170918781e08Smrg OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1710de2362d3Smrg R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1711de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1712de2362d3Smrg R300_TEX_CODE_SIZE(3))); 1713de2362d3Smrg 171418781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1715de2362d3Smrg R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1716de2362d3Smrg R300_TEX_START(0) | 1717de2362d3Smrg R300_TEX_SIZE(2) | 1718de2362d3Smrg R300_RGBA_OUT)); 1719de2362d3Smrg 1720de2362d3Smrg /* tex inst */ 172118781e08Smrg OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1722de2362d3Smrg R300_TEX_DST_ADDR(2) | 1723de2362d3Smrg R300_TEX_ID(0) | 1724de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 172518781e08Smrg OUT_RING_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | 1726de2362d3Smrg R300_TEX_DST_ADDR(1) | 1727de2362d3Smrg R300_TEX_ID(1) | 1728de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 172918781e08Smrg OUT_RING_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | 1730de2362d3Smrg R300_TEX_DST_ADDR(0) | 1731de2362d3Smrg R300_TEX_ID(2) | 1732de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 1733de2362d3Smrg 1734de2362d3Smrg /* ALU inst */ 1735de2362d3Smrg /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 173618781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1737de2362d3Smrg R300_ALU_RGB_ADDR1(2) | 1738de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 1739de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1740de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 174118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1742de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1743de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1744de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1745de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1746de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1747de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1748de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1749de2362d3Smrg /* alpha nop, but need to set up alpha source for rgb usage */ 175018781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1751de2362d3Smrg R300_ALU_ALPHA_ADDR1(2) | 1752de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 1753de2362d3Smrg R300_ALU_ALPHA_ADDRD(2) | 1754de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 175518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1756de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1757de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1758de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1759de2362d3Smrg 1760de2362d3Smrg /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 176118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1762de2362d3Smrg R300_ALU_RGB_ADDR1(1) | 1763de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1764de2362d3Smrg R300_ALU_RGB_ADDRD(2) | 1765de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 176618781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1767de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1768de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1769de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1770de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1771de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1772de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1773de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1774de2362d3Smrg /* alpha nop */ 177518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | 1776de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 177718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1778de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1779de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1780de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1781de2362d3Smrg 1782de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 178318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 1784de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1785de2362d3Smrg R300_ALU_RGB_ADDR2(2) | 1786de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1787de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 1788de2362d3Smrg (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 178918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1790de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1791de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | 1792de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1793de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1794de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1795de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1796de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 1797de2362d3Smrg R300_ALU_RGB_CLAMP)); 1798de2362d3Smrg /* write alpha 1 */ 179918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 1800de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 1801de2362d3Smrg R300_ALU_ALPHA_TARGET_A)); 180218781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1803de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1804de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1805de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 1806de2362d3Smrg 1807de2362d3Smrg if (needgamma) { 1808de2362d3Smrg /* rgb temp0.r = op_sop, set up src0 reg */ 180918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 1810de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 181118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(3), 1812de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1813de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1814de2362d3Smrg /* alpha lg2 temp0, temp0.r */ 181518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 1816de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 181718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1818de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1819de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1820de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1821de2362d3Smrg 1822de2362d3Smrg /* rgb temp0.g = op_sop, set up src0 reg */ 182318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 1824de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 182518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(4), 1826de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1827de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1828de2362d3Smrg /* alpha lg2 temp0, temp0.g */ 182918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 1830de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 183118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1832de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1833de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1834de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1835de2362d3Smrg 1836de2362d3Smrg /* rgb temp0.b = op_sop, set up src0 reg */ 183718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 1838de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 183918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(5), 1840de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1841de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1842de2362d3Smrg /* alpha lg2 temp0, temp0.b */ 184318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 1844de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 184518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 1846de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1847de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1848de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1849de2362d3Smrg 1850de2362d3Smrg /* MUL const1, temp1, temp0 */ 185118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 1852de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1853de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 1854de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 1855de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 185618781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1857de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1858de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 1859de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1860de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 1861de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1862de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1863de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1864de2362d3Smrg /* alpha nop, but set up const1 */ 186518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 1866de2362d3Smrg R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 1867de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 186818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1869de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1870de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1871de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1872de2362d3Smrg 1873de2362d3Smrg /* rgb out0.r = op_sop, set up src0 reg */ 187418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 1875de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 1876de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 187718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(7), 1878de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1879de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1880de2362d3Smrg /* alpha ex2 temp0, temp0.r */ 188118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 1882de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 188318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1884de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 1885de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1886de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1887de2362d3Smrg 1888de2362d3Smrg /* rgb out0.g = op_sop, set up src0 reg */ 188918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 1890de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 1891de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 189218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(8), 1893de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1894de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1895de2362d3Smrg /* alpha ex2 temp0, temp0.g */ 189618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 1897de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 189818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1899de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 1900de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1901de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1902de2362d3Smrg 1903de2362d3Smrg /* rgb out0.b = op_sop, set up src0 reg */ 190418781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 1905de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 1906de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 190718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(9), 1908de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 1909de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 1910de2362d3Smrg /* alpha ex2 temp0, temp0.b */ 191118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 1912de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 191318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 1914de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 1915de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1916de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1917de2362d3Smrg } 1918de2362d3Smrg } else { 191918781e08Smrg BEGIN_RING(2 * (needgamma ? (28 + 31) : 31)); 1920de2362d3Smrg /* 2 components */ 192118781e08Smrg OUT_RING_REG(R300_RS_COUNT, 1922de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 1923de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 1924de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 192518781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 1926de2362d3Smrg 192718781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 1928de2362d3Smrg 1929de2362d3Smrg /* Indirection levels */ 193018781e08Smrg OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | 1931de2362d3Smrg R300_FIRST_TEX)); 1932de2362d3Smrg 193318781e08Smrg OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | 1934de2362d3Smrg R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | 1935de2362d3Smrg R300_TEX_CODE_OFFSET(0) | 1936de2362d3Smrg R300_TEX_CODE_SIZE(1))); 1937de2362d3Smrg 193818781e08Smrg OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | 1939de2362d3Smrg R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | 1940de2362d3Smrg R300_TEX_START(0) | 1941de2362d3Smrg R300_TEX_SIZE(0) | 1942de2362d3Smrg R300_RGBA_OUT)); 1943de2362d3Smrg 1944de2362d3Smrg /* tex inst */ 194518781e08Smrg OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | 1946de2362d3Smrg R300_TEX_DST_ADDR(0) | 1947de2362d3Smrg R300_TEX_ID(0) | 1948de2362d3Smrg R300_TEX_INST(R300_TEX_INST_LD))); 1949de2362d3Smrg 1950de2362d3Smrg /* ALU inst */ 1951de2362d3Smrg /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 195218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | 1953de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1954de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 1955de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1956de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 195718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | 1958de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1959de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | 1960de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1961de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | 1962de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1963de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1964de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1965de2362d3Smrg /* alpha nop, but need to set up alpha source for rgb usage */ 196618781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | 1967de2362d3Smrg R300_ALU_ALPHA_ADDR1(0) | 1968de2362d3Smrg R300_ALU_ALPHA_ADDR2(0) | 1969de2362d3Smrg R300_ALU_ALPHA_ADDRD(0) | 1970de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 197118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1972de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1973de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1974de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1975de2362d3Smrg 1976de2362d3Smrg /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 197718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | 1978de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 1979de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 1980de2362d3Smrg R300_ALU_RGB_ADDRD(1) | 1981de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 198218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 1983de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 1984de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | 1985de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 1986de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 1987de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 1988de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 1989de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 1990de2362d3Smrg /* alpha nop */ 199118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | 1992de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 199318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 1994de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 1995de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 1996de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 1997de2362d3Smrg 1998de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 199918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | 2000de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2001de2362d3Smrg R300_ALU_RGB_ADDR2(1) | 2002de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2003de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | 2004de2362d3Smrg (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); 200518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2006de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2007de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | 2008de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2009de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | 2010de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2011de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2012de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | 2013de2362d3Smrg R300_ALU_RGB_CLAMP)); 2014de2362d3Smrg /* write alpha 1 */ 201518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | 2016de2362d3Smrg R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | 2017de2362d3Smrg R300_ALU_ALPHA_TARGET_A)); 201818781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2019de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2020de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2021de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); 2022de2362d3Smrg 2023de2362d3Smrg if (needgamma) { 2024de2362d3Smrg /* rgb temp0.r = op_sop, set up src0 reg */ 202518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | 2026de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); 202718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(3), 2028de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2029de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2030de2362d3Smrg /* alpha lg2 temp0, temp0.r */ 203118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | 2032de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 203318781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2034de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2035de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2036de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2037de2362d3Smrg 2038de2362d3Smrg /* rgb temp0.g = op_sop, set up src0 reg */ 203918781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | 2040de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); 204118781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(4), 2042de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2043de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2044de2362d3Smrg /* alpha lg2 temp0, temp0.g */ 204518781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | 2046de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 204718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2048de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2049de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2050de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2051de2362d3Smrg 2052de2362d3Smrg /* rgb temp0.b = op_sop, set up src0 reg */ 205318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | 2054de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); 205518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(5), 2056de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2057de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2058de2362d3Smrg /* alpha lg2 temp0, temp0.b */ 205918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | 2060de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 206118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | 2062de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2063de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2064de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2065de2362d3Smrg 2066de2362d3Smrg /* MUL const1, temp1, temp0 */ 206718781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | 2068de2362d3Smrg R300_ALU_RGB_ADDR1(0) | 2069de2362d3Smrg R300_ALU_RGB_ADDR2(0) | 2070de2362d3Smrg R300_ALU_RGB_ADDRD(0) | 2071de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); 207218781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | 2073de2362d3Smrg R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | 2074de2362d3Smrg R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | 2075de2362d3Smrg R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | 2076de2362d3Smrg R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | 2077de2362d3Smrg R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | 2078de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | 2079de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); 2080de2362d3Smrg /* alpha nop, but set up const1 */ 208118781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | 2082de2362d3Smrg R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | 2083de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 208418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | 2085de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | 2086de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2087de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2088de2362d3Smrg 2089de2362d3Smrg /* rgb out0.r = op_sop, set up src0 reg */ 209018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | 2091de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | 2092de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); 209318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(7), 2094de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2095de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2096de2362d3Smrg /* alpha ex2 temp0, temp0.r */ 209718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | 2098de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 209918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2100de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | 2101de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2102de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2103de2362d3Smrg 2104de2362d3Smrg /* rgb out0.g = op_sop, set up src0 reg */ 210518781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | 2106de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | 2107de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); 210818781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(8), 2109de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2110de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2111de2362d3Smrg /* alpha ex2 temp0, temp0.g */ 211218781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | 2113de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 211418781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2115de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | 2116de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2117de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2118de2362d3Smrg 2119de2362d3Smrg /* rgb out0.b = op_sop, set up src0 reg */ 212018781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | 2121de2362d3Smrg R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | 2122de2362d3Smrg R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); 212318781e08Smrg OUT_RING_REG(R300_US_ALU_RGB_INST(9), 2124de2362d3Smrg R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | 2125de2362d3Smrg R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); 2126de2362d3Smrg /* alpha ex2 temp0, temp0.b */ 212718781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | 2128de2362d3Smrg R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); 212918781e08Smrg OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | 2130de2362d3Smrg R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | 2131de2362d3Smrg R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | 2132de2362d3Smrg R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); 2133de2362d3Smrg } 2134de2362d3Smrg } 2135de2362d3Smrg 2136de2362d3Smrg /* Shader constants. */ 2137de2362d3Smrg /* constant 0: off, yco */ 213818781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); 213918781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); 214018781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); 214118781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); 2142de2362d3Smrg /* constant 1: uco */ 214318781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); 214418781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); 214518781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); 214618781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); 2147de2362d3Smrg /* constant 2: vco */ 214818781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); 214918781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); 215018781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); 215118781e08Smrg OUT_RING_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); 2152de2362d3Smrg 215318781e08Smrg ADVANCE_RING(); 2154de2362d3Smrg } 2155de2362d3Smrg 2156de2362d3Smrg BEGIN_ACCEL_RELOC(6, 2); 215718781e08Smrg OUT_RING_REG(R300_TX_INVALTAGS, 0); 215818781e08Smrg OUT_RING_REG(R300_TX_ENABLE, txenable); 2159de2362d3Smrg 2160de2362d3Smrg EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 2161de2362d3Smrg EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 2162de2362d3Smrg 2163de2362d3Smrg /* no need to enable blending */ 216418781e08Smrg OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 2165de2362d3Smrg 216618781e08Smrg OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 216718781e08Smrg ADVANCE_RING(); 2168de2362d3Smrg 2169de2362d3Smrg if (pPriv->vsync) { 2170de2362d3Smrg xf86CrtcPtr crtc; 2171de2362d3Smrg if (pPriv->desired_crtc) 2172de2362d3Smrg crtc = pPriv->desired_crtc; 2173de2362d3Smrg else 217418781e08Smrg crtc = radeon_pick_best_crtc(pScrn, FALSE, 2175de2362d3Smrg pPriv->drw_x, 2176de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 2177de2362d3Smrg pPriv->drw_y, 2178de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 2179de2362d3Smrg if (crtc) 218018781e08Smrg RADEONWaitForVLine(pScrn, pPixmap, 218118781e08Smrg crtc, 218218781e08Smrg pPriv->drw_y - crtc->y, 218318781e08Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 2184de2362d3Smrg } 2185de2362d3Smrg 2186de2362d3Smrg return TRUE; 2187de2362d3Smrg} 2188de2362d3Smrg 2189de2362d3Smrgstatic void 219018781e08SmrgR300DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2191de2362d3Smrg{ 2192de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2193de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 2194de2362d3Smrg int dstxoff, dstyoff; 2195de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 2196de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 2197de2362d3Smrg 2198de2362d3Smrg#ifdef COMPOSITE 2199de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 2200de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 2201de2362d3Smrg#else 2202de2362d3Smrg dstxoff = 0; 2203de2362d3Smrg dstyoff = 0; 2204de2362d3Smrg#endif 2205de2362d3Smrg 220618781e08Smrg if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2207de2362d3Smrg return; 2208de2362d3Smrg 2209de2362d3Smrg /* 2210de2362d3Smrg * Rendering of the actual polygon is done in two different 2211de2362d3Smrg * ways depending on chip generation: 2212de2362d3Smrg * 2213de2362d3Smrg * < R300: 2214de2362d3Smrg * 2215de2362d3Smrg * These chips can render a rectangle in one pass, so 2216de2362d3Smrg * handling is pretty straight-forward. 2217de2362d3Smrg * 2218de2362d3Smrg * >= R300: 2219de2362d3Smrg * 2220de2362d3Smrg * These chips can accept a quad, but will render it as 2221de2362d3Smrg * two triangles which results in a diagonal tear. Instead 2222de2362d3Smrg * We render a single, large triangle and use the scissor 2223de2362d3Smrg * functionality to restrict it to the desired rectangle. 2224de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 2225de2362d3Smrg * the single triangle up to 2560/4021 pixels; above that we 2226de2362d3Smrg * render as a quad. 2227de2362d3Smrg */ 2228de2362d3Smrg 2229de2362d3Smrg while (nBox--) { 2230de2362d3Smrg float srcX, srcY, srcw, srch; 2231de2362d3Smrg int dstX, dstY, dstw, dsth; 2232de2362d3Smrg Bool use_quad = FALSE; 2233de2362d3Smrg int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3; 2234de2362d3Smrg 2235de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 223618781e08Smrg radeon_cs_flush_indirect(pScrn); 223718781e08Smrg if (!R300PrepareTexturedVideo(pScrn, pPriv)) 2238de2362d3Smrg return; 2239de2362d3Smrg } 2240de2362d3Smrg 2241de2362d3Smrg dstX = pBox->x1 + dstxoff; 2242de2362d3Smrg dstY = pBox->y1 + dstyoff; 2243de2362d3Smrg dstw = pBox->x2 - pBox->x1; 2244de2362d3Smrg dsth = pBox->y2 - pBox->y1; 2245de2362d3Smrg 2246de2362d3Smrg srcX = pPriv->src_x; 2247de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 2248de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 2249de2362d3Smrg srcY = pPriv->src_y; 2250de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 2251de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 2252de2362d3Smrg 2253de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 2254de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 2255de2362d3Smrg 2256de2362d3Smrg if (IS_R400_3D) { 2257de2362d3Smrg if ((dstw+dsth) > 4021) 2258de2362d3Smrg use_quad = TRUE; 2259de2362d3Smrg } else { 2260de2362d3Smrg if ((dstw+dsth) > 2560) 2261de2362d3Smrg use_quad = TRUE; 2262de2362d3Smrg } 2263de2362d3Smrg /* 2264de2362d3Smrg * Set up the scissor area to that of the output size. 2265de2362d3Smrg */ 226618781e08Smrg BEGIN_RING(2*2); 2267de2362d3Smrg /* R300 has an offset */ 226818781e08Smrg OUT_RING_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | 2269de2362d3Smrg ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); 227018781e08Smrg OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | 2271de2362d3Smrg ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); 227218781e08Smrg ADVANCE_RING(); 2273de2362d3Smrg 2274de2362d3Smrg if (use_quad) { 2275de2362d3Smrg BEGIN_RING(4 * pPriv->vtx_count + 4); 2276de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2277de2362d3Smrg 4 * pPriv->vtx_count)); 2278de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | 2279de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2280de2362d3Smrg (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2281de2362d3Smrg } else { 2282de2362d3Smrg BEGIN_RING(3 * pPriv->vtx_count + 4); 2283de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 2284de2362d3Smrg 3 * pPriv->vtx_count)); 2285de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 2286de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 2287de2362d3Smrg (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 2288de2362d3Smrg } 2289de2362d3Smrg 2290de2362d3Smrg if (pPriv->bicubic_enabled) { 2291de2362d3Smrg /* 2292de2362d3Smrg * This code is only executed on >= R300, so we don't 2293de2362d3Smrg * have to deal with the legacy handling. 2294de2362d3Smrg */ 2295de2362d3Smrg if (use_quad) { 2296de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 2297de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2298de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 2299de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dsth), 2300de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, 2301de2362d3Smrg (float)srcX + 0.5, (float)(srcY + srch) + 0.5); 2302de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), 2303de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, 2304de2362d3Smrg (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); 2305de2362d3Smrg VTX_OUT_6((float)(dstX + dstw), (float)dstY, 2306de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, 2307de2362d3Smrg (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); 2308de2362d3Smrg } else { 2309de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 2310de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 2311de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 2312de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 2313de2362d3Smrg (float)srcX / pPriv->w, 2314de2362d3Smrg ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 2315de2362d3Smrg (float)srcX + 0.5, 2316de2362d3Smrg (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 2317de2362d3Smrg VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 2318de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2319de2362d3Smrg (float)srcY / pPriv->h, 2320de2362d3Smrg (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 2321de2362d3Smrg (float)srcY + 0.5); 2322de2362d3Smrg } 2323de2362d3Smrg } else { 2324de2362d3Smrg if (use_quad) { 2325de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 2326de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2327de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth), 2328de2362d3Smrg (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); 2329de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), 2330de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); 2331de2362d3Smrg VTX_OUT_4((float)(dstX + dstw), (float)dstY, 2332de2362d3Smrg (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); 2333de2362d3Smrg } else { 2334de2362d3Smrg /* 2335de2362d3Smrg * Render a big, scissored triangle. This means 2336de2362d3Smrg * increasing the triangle size and adjusting 2337de2362d3Smrg * texture coordinates. 2338de2362d3Smrg */ 2339de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 2340de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 2341de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 2342de2362d3Smrg (float)srcX / pPriv->w, 2343de2362d3Smrg ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 2344de2362d3Smrg VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 2345de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 2346de2362d3Smrg (float)srcY / pPriv->h); 2347de2362d3Smrg } 2348de2362d3Smrg } 2349de2362d3Smrg 2350de2362d3Smrg /* flushing is pipelined, free/finish is not */ 235118781e08Smrg OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 2352de2362d3Smrg 2353de2362d3Smrg ADVANCE_RING(); 2354de2362d3Smrg 2355de2362d3Smrg pBox++; 2356de2362d3Smrg } 2357de2362d3Smrg 235818781e08Smrg BEGIN_RING(2*3); 235918781e08Smrg OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 236018781e08Smrg OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 236118781e08Smrg OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 236218781e08Smrg ADVANCE_RING(); 2363de2362d3Smrg 2364de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 2365de2362d3Smrg} 2366de2362d3Smrg 2367de2362d3Smrgstatic Bool 236818781e08SmrgR500PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 2369de2362d3Smrg{ 2370de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2371de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 2372de2362d3Smrg struct radeon_exa_pixmap_priv *driver_priv; 2373de2362d3Smrg struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; 237418781e08Smrg uint32_t txfilter, txformat0, txformat1, txpitch, us_format = 0; 2375de2362d3Smrg uint32_t dst_pitch, dst_format; 237618781e08Smrg uint32_t txenable, colorpitch; 2377de2362d3Smrg uint32_t output_fmt; 2378de2362d3Smrg int pixel_shift, out_size = 6; 237918781e08Smrg int ret; 2380de2362d3Smrg 238118781e08Smrg radeon_cs_space_reset_bos(info->cs); 238218781e08Smrg radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 23837821949aSmrg 238418781e08Smrg if (pPriv->bicubic_enabled) 238539413783Smrg radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, 238639413783Smrg RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 238718781e08Smrg 238818781e08Smrg driver_priv = exaGetPixmapDriverPrivate(pPixmap); 238939413783Smrg radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0, 239039413783Smrg RADEON_GEM_DOMAIN_VRAM); 239118781e08Smrg 239218781e08Smrg ret = radeon_cs_space_check(info->cs); 239318781e08Smrg if (ret) { 239418781e08Smrg ErrorF("Not enough RAM to hw accel xv operation\n"); 239518781e08Smrg return FALSE; 2396de2362d3Smrg } 2397de2362d3Smrg 2398de2362d3Smrg pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; 2399de2362d3Smrg 240018781e08Smrg dst_pitch = exaGetPixmapPitch(pPixmap); 240118781e08Smrg RADEON_SWITCH_TO_3D(); 2402de2362d3Smrg 2403de2362d3Smrg if (pPriv->bicubic_enabled) 2404de2362d3Smrg pPriv->vtx_count = 6; 2405de2362d3Smrg else 2406de2362d3Smrg pPriv->vtx_count = 4; 2407de2362d3Smrg 2408de2362d3Smrg switch (pPixmap->drawable.bitsPerPixel) { 2409de2362d3Smrg case 16: 2410de2362d3Smrg if (pPixmap->drawable.depth == 15) 2411de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB1555; 2412de2362d3Smrg else 2413de2362d3Smrg dst_format = R300_COLORFORMAT_RGB565; 2414de2362d3Smrg break; 2415de2362d3Smrg case 32: 2416de2362d3Smrg dst_format = R300_COLORFORMAT_ARGB8888; 2417de2362d3Smrg break; 2418de2362d3Smrg default: 2419de2362d3Smrg return FALSE; 2420de2362d3Smrg } 2421de2362d3Smrg 2422de2362d3Smrg output_fmt = (R300_OUT_FMT_C4_8 | 2423de2362d3Smrg R300_OUT_FMT_C0_SEL_BLUE | 2424de2362d3Smrg R300_OUT_FMT_C1_SEL_GREEN | 2425de2362d3Smrg R300_OUT_FMT_C2_SEL_RED | 2426de2362d3Smrg R300_OUT_FMT_C3_SEL_ALPHA); 2427de2362d3Smrg 2428de2362d3Smrg colorpitch = dst_pitch >> pixel_shift; 2429de2362d3Smrg colorpitch |= dst_format; 2430de2362d3Smrg 2431de2362d3Smrg if (RADEONTilingEnabled(pScrn, pPixmap)) 2432de2362d3Smrg colorpitch |= R300_COLORTILE; 2433de2362d3Smrg 2434de2362d3Smrg if (((pPriv->bicubic_state == BICUBIC_OFF)) && 2435de2362d3Smrg (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) 2436de2362d3Smrg pPriv->is_planar = TRUE; 2437de2362d3Smrg else 2438de2362d3Smrg pPriv->is_planar = FALSE; 2439de2362d3Smrg 2440de2362d3Smrg if (pPriv->is_planar) { 2441de2362d3Smrg txformat1 = R300_TX_FORMAT_X8; 2442de2362d3Smrg txpitch = pPriv->src_pitch; 2443de2362d3Smrg } else { 2444de2362d3Smrg if (pPriv->id == FOURCC_UYVY) 2445de2362d3Smrg txformat1 = R300_TX_FORMAT_YVYU422; 2446de2362d3Smrg else 2447de2362d3Smrg txformat1 = R300_TX_FORMAT_VYUY422; 2448de2362d3Smrg 2449de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) 2450de2362d3Smrg txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; 2451de2362d3Smrg 2452de2362d3Smrg /* pitch is in pixels */ 2453de2362d3Smrg txpitch = pPriv->src_pitch / 2; 2454de2362d3Smrg } 2455de2362d3Smrg txpitch -= 1; 2456de2362d3Smrg 2457de2362d3Smrg txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2458de2362d3Smrg (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2459de2362d3Smrg R300_TXPITCH_EN); 2460de2362d3Smrg 2461de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2462de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2463de2362d3Smrg R300_TX_MAG_FILTER_LINEAR | 2464de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 2465de2362d3Smrg (0 << R300_TX_ID_SHIFT)); 2466de2362d3Smrg 2467de2362d3Smrg 2468de2362d3Smrg if ((pPriv->w - 1) & 0x800) 2469de2362d3Smrg txpitch |= R500_TXWIDTH_11; 2470de2362d3Smrg 2471de2362d3Smrg if ((pPriv->h - 1) & 0x800) 2472de2362d3Smrg txpitch |= R500_TXHEIGHT_11; 2473de2362d3Smrg 2474de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R520) { 2475de2362d3Smrg unsigned us_width = (pPriv->w - 1) & 0x7ff; 2476de2362d3Smrg unsigned us_height = (pPriv->h - 1) & 0x7ff; 2477de2362d3Smrg unsigned us_depth = 0; 2478de2362d3Smrg 2479de2362d3Smrg if (pPriv->w > 2048) { 2480de2362d3Smrg us_width = (0x7ff + us_width) >> 1; 2481de2362d3Smrg us_depth |= 0x0d; 2482de2362d3Smrg } 2483de2362d3Smrg if (pPriv->h > 2048) { 2484de2362d3Smrg us_height = (0x7ff + us_height) >> 1; 2485de2362d3Smrg us_depth |= 0x0e; 2486de2362d3Smrg } 2487de2362d3Smrg us_format = (us_width << R300_TXWIDTH_SHIFT) | 2488de2362d3Smrg (us_height << R300_TXHEIGHT_SHIFT) | 2489de2362d3Smrg (us_depth << R300_TXDEPTH_SHIFT); 2490de2362d3Smrg out_size++; 2491de2362d3Smrg } 2492de2362d3Smrg 2493de2362d3Smrg BEGIN_ACCEL_RELOC(out_size, 1); 249418781e08Smrg OUT_RING_REG(R300_TX_FILTER0_0, txfilter); 249518781e08Smrg OUT_RING_REG(R300_TX_FILTER1_0, 0); 249618781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_0, txformat0); 249718781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_0, txformat1); 249818781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_0, txpitch); 249918781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo); 2500de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R520) 250118781e08Smrg OUT_RING_REG(R500_US_FORMAT0_0, us_format); 250218781e08Smrg ADVANCE_RING(); 2503de2362d3Smrg 2504de2362d3Smrg txenable = R300_TEX_0_ENABLE; 2505de2362d3Smrg 2506de2362d3Smrg if (pPriv->is_planar) { 2507de2362d3Smrg txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | 2508de2362d3Smrg (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | 2509de2362d3Smrg R300_TXPITCH_EN); 2510de2362d3Smrg txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64); 2511de2362d3Smrg txpitch -= 1; 2512de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | 2513de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | 2514de2362d3Smrg R300_TX_MIN_FILTER_LINEAR | 2515de2362d3Smrg R300_TX_MAG_FILTER_LINEAR); 2516de2362d3Smrg 2517de2362d3Smrg BEGIN_ACCEL_RELOC(12, 2); 251818781e08Smrg OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); 251918781e08Smrg OUT_RING_REG(R300_TX_FILTER1_1, 0); 252018781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 252118781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); 252218781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 252318781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo); 252418781e08Smrg OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); 252518781e08Smrg OUT_RING_REG(R300_TX_FILTER1_2, 0); 252618781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_2, txformat0); 252718781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); 252818781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_2, txpitch); 252918781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo); 253018781e08Smrg ADVANCE_RING(); 2531de2362d3Smrg txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; 2532de2362d3Smrg } 2533de2362d3Smrg 2534de2362d3Smrg if (pPriv->bicubic_enabled) { 2535de2362d3Smrg /* Size is 128x1 */ 2536de2362d3Smrg txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | 2537de2362d3Smrg (0x0 << R300_TXHEIGHT_SHIFT) | 2538de2362d3Smrg R300_TXPITCH_EN); 2539de2362d3Smrg /* Format is 32-bit floats, 4bpp */ 2540de2362d3Smrg txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); 2541de2362d3Smrg /* Pitch is 127 (128-1) */ 2542de2362d3Smrg txpitch = 0x7f; 2543de2362d3Smrg /* Tex filter */ 2544de2362d3Smrg txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | 2545de2362d3Smrg R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | 2546de2362d3Smrg R300_TX_MIN_FILTER_NEAREST | 2547de2362d3Smrg R300_TX_MAG_FILTER_NEAREST | 2548de2362d3Smrg (1 << R300_TX_ID_SHIFT)); 2549de2362d3Smrg 2550de2362d3Smrg BEGIN_ACCEL_RELOC(6, 1); 255118781e08Smrg OUT_RING_REG(R300_TX_FILTER0_1, txfilter); 255218781e08Smrg OUT_RING_REG(R300_TX_FILTER1_1, 0); 255318781e08Smrg OUT_RING_REG(R300_TX_FORMAT0_1, txformat0); 255418781e08Smrg OUT_RING_REG(R300_TX_FORMAT1_1, txformat1); 255518781e08Smrg OUT_RING_REG(R300_TX_FORMAT2_1, txpitch); 255618781e08Smrg OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo); 255718781e08Smrg ADVANCE_RING(); 2558de2362d3Smrg 2559de2362d3Smrg /* Enable tex 1 */ 2560de2362d3Smrg txenable |= R300_TEX_1_ENABLE; 2561de2362d3Smrg } 2562de2362d3Smrg 2563de2362d3Smrg /* setup the VAP */ 2564de2362d3Smrg if (info->accel_state->has_tcl) { 2565de2362d3Smrg if (pPriv->bicubic_enabled) 256618781e08Smrg BEGIN_RING(2*7); 2567de2362d3Smrg else 256818781e08Smrg BEGIN_RING(2*6); 2569de2362d3Smrg } else { 2570de2362d3Smrg if (pPriv->bicubic_enabled) 257118781e08Smrg BEGIN_RING(2*5); 2572de2362d3Smrg else 257318781e08Smrg BEGIN_RING(2*4); 2574de2362d3Smrg } 2575de2362d3Smrg 2576de2362d3Smrg /* These registers define the number, type, and location of data submitted 2577de2362d3Smrg * to the PVS unit of GA input (when PVS is disabled) 2578de2362d3Smrg * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is 2579de2362d3Smrg * enabled. This memory provides the imputs to the vertex shader program 2580de2362d3Smrg * and ordering is not important. When PVS/TCL is disabled, this field maps 25810a1d3ae0Smrg * directly to the GA input memory and the order is significant. In 2582de2362d3Smrg * PVS_BYPASS mode the order is as follows: 2583de2362d3Smrg * Position 2584de2362d3Smrg * Point Size 2585de2362d3Smrg * Color 0-3 2586de2362d3Smrg * Textures 0-7 2587de2362d3Smrg * Fog 2588de2362d3Smrg */ 2589de2362d3Smrg if (pPriv->bicubic_enabled) { 259018781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2591de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2592de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 2593de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 2594de2362d3Smrg R300_SIGNED_0 | 2595de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2596de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 2597de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 2598de2362d3Smrg R300_SIGNED_1)); 259918781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1, 2600de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | 2601de2362d3Smrg (0 << R300_SKIP_DWORDS_2_SHIFT) | 2602de2362d3Smrg (7 << R300_DST_VEC_LOC_2_SHIFT) | 2603de2362d3Smrg R300_LAST_VEC_2 | 2604de2362d3Smrg R300_SIGNED_2)); 2605de2362d3Smrg } else { 260618781e08Smrg OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0, 2607de2362d3Smrg ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | 2608de2362d3Smrg (0 << R300_SKIP_DWORDS_0_SHIFT) | 2609de2362d3Smrg (0 << R300_DST_VEC_LOC_0_SHIFT) | 2610de2362d3Smrg R300_SIGNED_0 | 2611de2362d3Smrg (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | 2612de2362d3Smrg (0 << R300_SKIP_DWORDS_1_SHIFT) | 2613de2362d3Smrg (6 << R300_DST_VEC_LOC_1_SHIFT) | 2614de2362d3Smrg R300_LAST_VEC_1 | 2615de2362d3Smrg R300_SIGNED_1)); 2616de2362d3Smrg } 2617de2362d3Smrg 2618de2362d3Smrg /* load the vertex shader 2619de2362d3Smrg * We pre-load vertex programs in RADEONInit3DEngine(): 2620de2362d3Smrg * - exa 2621de2362d3Smrg * - Xv 2622de2362d3Smrg * - Xv bicubic 2623de2362d3Smrg * Here we select the offset of the vertex program we want to use 2624de2362d3Smrg */ 2625de2362d3Smrg if (info->accel_state->has_tcl) { 2626de2362d3Smrg if (pPriv->bicubic_enabled) { 262718781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2628de2362d3Smrg ((11 << R300_PVS_FIRST_INST_SHIFT) | 2629de2362d3Smrg (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2630de2362d3Smrg (13 << R300_PVS_LAST_INST_SHIFT))); 263118781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2632de2362d3Smrg (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2633de2362d3Smrg } else { 263418781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0, 2635de2362d3Smrg ((9 << R300_PVS_FIRST_INST_SHIFT) | 2636de2362d3Smrg (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | 2637de2362d3Smrg (10 << R300_PVS_LAST_INST_SHIFT))); 263818781e08Smrg OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1, 2639de2362d3Smrg (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); 2640de2362d3Smrg } 2641de2362d3Smrg } 2642de2362d3Smrg 2643de2362d3Smrg /* Position and one set of 2 texture coordinates */ 264418781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); 2645de2362d3Smrg if (pPriv->bicubic_enabled) 264618781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | 2647de2362d3Smrg (2 << R300_TEX_1_COMP_CNT_SHIFT))); 2648de2362d3Smrg else 264918781e08Smrg OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); 2650de2362d3Smrg 265118781e08Smrg OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt); 265218781e08Smrg ADVANCE_RING(); 2653de2362d3Smrg 2654de2362d3Smrg /* setup pixel shader */ 2655de2362d3Smrg if (pPriv->bicubic_state != BICUBIC_OFF) { 2656de2362d3Smrg if (pPriv->bicubic_enabled) { 265718781e08Smrg BEGIN_RING(2*7); 2658de2362d3Smrg 2659de2362d3Smrg /* 4 components: 2 for tex0 and 2 for tex1 */ 266018781e08Smrg OUT_RING_REG(R300_RS_COUNT, 2661de2362d3Smrg ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | 2662de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 2663de2362d3Smrg 2664de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 266518781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); 2666de2362d3Smrg 2667de2362d3Smrg /* Pixel stack frame size. */ 266818781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 5); 2669de2362d3Smrg 2670de2362d3Smrg /* FP length. */ 267118781e08Smrg OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 2672de2362d3Smrg R500_US_CODE_END_ADDR(13))); 267318781e08Smrg OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 2674de2362d3Smrg R500_US_CODE_RANGE_SIZE(13))); 2675de2362d3Smrg 2676de2362d3Smrg /* Prepare for FP emission. */ 267718781e08Smrg OUT_RING_REG(R500_US_CODE_OFFSET, 0); 267818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 267918781e08Smrg ADVANCE_RING(); 2680de2362d3Smrg 268118781e08Smrg BEGIN_RING(2*89); 2682de2362d3Smrg /* Pixel shader. 2683de2362d3Smrg * I've gone ahead and annotated each instruction, since this 2684de2362d3Smrg * thing is MASSIVE. :3 2685de2362d3Smrg * Note: In order to avoid buggies with temps and multiple 2686de2362d3Smrg * inputs, all temps are offset by 2. temp0 -> register2. */ 2687de2362d3Smrg 2688de2362d3Smrg /* TEX temp2, input1.xxxx, tex1, 1D */ 268918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2690de2362d3Smrg R500_INST_RGB_WMASK_R | 2691de2362d3Smrg R500_INST_RGB_WMASK_G | 2692de2362d3Smrg R500_INST_RGB_WMASK_B)); 269318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2694de2362d3Smrg R500_TEX_INST_LD | 2695de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 269618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2697de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 2698de2362d3Smrg R500_TEX_SRC_T_SWIZ_R | 2699de2362d3Smrg R500_TEX_SRC_R_SWIZ_R | 2700de2362d3Smrg R500_TEX_SRC_Q_SWIZ_R | 2701de2362d3Smrg R500_TEX_DST_ADDR(2) | 2702de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2703de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2704de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2705de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 270618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 270718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 270818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2709de2362d3Smrg 2710de2362d3Smrg /* TEX temp5, input1.yyyy, tex1, 1D */ 271118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2712de2362d3Smrg R500_INST_TEX_SEM_WAIT | 2713de2362d3Smrg R500_INST_RGB_WMASK_R | 2714de2362d3Smrg R500_INST_RGB_WMASK_G | 2715de2362d3Smrg R500_INST_RGB_WMASK_B)); 271618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 2717de2362d3Smrg R500_TEX_INST_LD | 2718de2362d3Smrg R500_TEX_SEM_ACQUIRE | 2719de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 272018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | 2721de2362d3Smrg R500_TEX_SRC_S_SWIZ_G | 2722de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 2723de2362d3Smrg R500_TEX_SRC_R_SWIZ_G | 2724de2362d3Smrg R500_TEX_SRC_Q_SWIZ_G | 2725de2362d3Smrg R500_TEX_DST_ADDR(5) | 2726de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2727de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2728de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2729de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 273018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 273118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 273218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2733de2362d3Smrg 2734de2362d3Smrg /* MUL temp4, const0.x0x0, temp2.yyxx */ 273518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2736de2362d3Smrg R500_INST_TEX_SEM_WAIT | 2737de2362d3Smrg R500_INST_RGB_WMASK_R | 2738de2362d3Smrg R500_INST_RGB_WMASK_G | 2739de2362d3Smrg R500_INST_RGB_WMASK_B | 2740de2362d3Smrg R500_INST_ALPHA_WMASK)); 274118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2742de2362d3Smrg R500_RGB_ADDR0_CONST | 2743de2362d3Smrg R500_RGB_ADDR1(2))); 274418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2745de2362d3Smrg R500_ALPHA_ADDR0_CONST | 2746de2362d3Smrg R500_ALPHA_ADDR1(2))); 274718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2748de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 2749de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_0 | 2750de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_R | 2751de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 2752de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 2753de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 2754de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R)); 275518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2756de2362d3Smrg R500_ALPHA_OP_MAD | 2757de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 2758de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 2759de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 2760de2362d3Smrg R500_ALPHA_SWIZ_B_R)); 276118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2762de2362d3Smrg R500_ALU_RGBA_OP_MAD | 2763de2362d3Smrg R500_ALU_RGBA_R_SWIZ_0 | 2764de2362d3Smrg R500_ALU_RGBA_G_SWIZ_0 | 2765de2362d3Smrg R500_ALU_RGBA_B_SWIZ_0 | 2766de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 2767de2362d3Smrg 2768de2362d3Smrg /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ 276918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2770de2362d3Smrg R500_INST_RGB_WMASK_R | 2771de2362d3Smrg R500_INST_RGB_WMASK_G | 2772de2362d3Smrg R500_INST_RGB_WMASK_B | 2773de2362d3Smrg R500_INST_ALPHA_WMASK)); 277418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2775de2362d3Smrg R500_RGB_ADDR0_CONST | 2776de2362d3Smrg R500_RGB_ADDR1(5) | 2777de2362d3Smrg R500_RGB_ADDR2(4))); 277818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2779de2362d3Smrg R500_ALPHA_ADDR0_CONST | 2780de2362d3Smrg R500_ALPHA_ADDR1(5) | 2781de2362d3Smrg R500_ALPHA_ADDR2(4))); 278218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2783de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_0 | 2784de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 2785de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_0 | 2786de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 2787de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 2788de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_R | 2789de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R)); 279018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2791de2362d3Smrg R500_ALPHA_OP_MAD | 2792de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 2793de2362d3Smrg R500_ALPHA_SWIZ_A_G | 2794de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 2795de2362d3Smrg R500_ALPHA_SWIZ_B_R)); 279618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2797de2362d3Smrg R500_ALU_RGBA_OP_MAD | 2798de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 2799de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 2800de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 2801de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 2802de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 2803de2362d3Smrg 2804de2362d3Smrg /* ADD temp3, temp3, input0.xyxy */ 280518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2806de2362d3Smrg R500_INST_RGB_WMASK_R | 2807de2362d3Smrg R500_INST_RGB_WMASK_G | 2808de2362d3Smrg R500_INST_RGB_WMASK_B | 2809de2362d3Smrg R500_INST_ALPHA_WMASK)); 281018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | 2811de2362d3Smrg R500_RGB_ADDR2(0))); 281218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | 2813de2362d3Smrg R500_ALPHA_ADDR2(0))); 281418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2815de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_1 | 2816de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_1 | 2817de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 2818de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 2819de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 2820de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 282118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 2822de2362d3Smrg R500_ALPHA_OP_MAD | 2823de2362d3Smrg R500_ALPHA_SWIZ_A_1 | 2824de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 2825de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 282618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 2827de2362d3Smrg R500_ALU_RGBA_OP_MAD | 2828de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 2829de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 2830de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 2831de2362d3Smrg R500_ALU_RGBA_B_SWIZ_R | 2832de2362d3Smrg R500_ALU_RGBA_A_SWIZ_G)); 2833de2362d3Smrg 2834de2362d3Smrg /* TEX temp1, temp3.zwxy, tex0, 2D */ 283518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2836de2362d3Smrg R500_INST_RGB_WMASK_R | 2837de2362d3Smrg R500_INST_RGB_WMASK_G | 2838de2362d3Smrg R500_INST_RGB_WMASK_B | 2839de2362d3Smrg R500_INST_ALPHA_WMASK)); 284018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2841de2362d3Smrg R500_TEX_INST_LD | 2842de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 284318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2844de2362d3Smrg R500_TEX_SRC_S_SWIZ_B | 2845de2362d3Smrg R500_TEX_SRC_T_SWIZ_A | 2846de2362d3Smrg R500_TEX_SRC_R_SWIZ_R | 2847de2362d3Smrg R500_TEX_SRC_Q_SWIZ_G | 2848de2362d3Smrg R500_TEX_DST_ADDR(1) | 2849de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2850de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2851de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2852de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 285318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 285418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 285518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2856de2362d3Smrg 2857de2362d3Smrg /* TEX temp3, temp3.xyzw, tex0, 2D */ 285818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2859de2362d3Smrg R500_INST_TEX_SEM_WAIT | 2860de2362d3Smrg R500_INST_RGB_WMASK_R | 2861de2362d3Smrg R500_INST_RGB_WMASK_G | 2862de2362d3Smrg R500_INST_RGB_WMASK_B | 2863de2362d3Smrg R500_INST_ALPHA_WMASK)); 286418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2865de2362d3Smrg R500_TEX_INST_LD | 2866de2362d3Smrg R500_TEX_SEM_ACQUIRE | 2867de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 286818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | 2869de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 2870de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 2871de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 2872de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 2873de2362d3Smrg R500_TEX_DST_ADDR(3) | 2874de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2875de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2876de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2877de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 287818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 287918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 288018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2881de2362d3Smrg 2882de2362d3Smrg /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ 288318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2884de2362d3Smrg R500_INST_RGB_WMASK_R | 2885de2362d3Smrg R500_INST_RGB_WMASK_G | 2886de2362d3Smrg R500_INST_RGB_WMASK_B | 2887de2362d3Smrg R500_INST_ALPHA_WMASK)); 288818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 2889de2362d3Smrg R500_RGB_ADDR0_CONST | 2890de2362d3Smrg R500_RGB_ADDR1(5) | 2891de2362d3Smrg R500_RGB_ADDR2(4))); 289218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 2893de2362d3Smrg R500_ALPHA_ADDR0_CONST | 2894de2362d3Smrg R500_ALPHA_ADDR1(5) | 2895de2362d3Smrg R500_ALPHA_ADDR2(4))); 289618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 2897de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_0 | 2898de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 2899de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_0 | 2900de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 2901de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 2902de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 2903de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G)); 290418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | 2905de2362d3Smrg R500_ALPHA_OP_MAD | 2906de2362d3Smrg R500_ALPHA_SEL_A_SRC0 | 2907de2362d3Smrg R500_ALPHA_SWIZ_A_G | 2908de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 2909de2362d3Smrg R500_ALPHA_SWIZ_B_G)); 291018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | 2911de2362d3Smrg R500_ALU_RGBA_OP_MAD | 2912de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 2913de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 2914de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 2915de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 2916de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 2917de2362d3Smrg 2918de2362d3Smrg /* ADD temp0, temp4, input0.xyxy */ 291918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 2920de2362d3Smrg R500_INST_RGB_WMASK_R | 2921de2362d3Smrg R500_INST_RGB_WMASK_G | 2922de2362d3Smrg R500_INST_RGB_WMASK_B | 2923de2362d3Smrg R500_INST_ALPHA_WMASK)); 292418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | 2925de2362d3Smrg R500_RGB_ADDR2(0))); 292618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | 2927de2362d3Smrg R500_ALPHA_ADDR2(0))); 292818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | 2929de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_1 | 2930de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_1 | 2931de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 2932de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 2933de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 2934de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 293518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 2936de2362d3Smrg R500_ALPHA_OP_MAD | 2937de2362d3Smrg R500_ALPHA_SWIZ_A_1 | 2938de2362d3Smrg R500_ALPHA_SEL_B_SRC1 | 2939de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 294018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 2941de2362d3Smrg R500_ALU_RGBA_OP_MAD | 2942de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 2943de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 2944de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 2945de2362d3Smrg R500_ALU_RGBA_B_SWIZ_R | 2946de2362d3Smrg R500_ALU_RGBA_A_SWIZ_G)); 2947de2362d3Smrg 2948de2362d3Smrg /* TEX temp4, temp0.zwzw, tex0, 2D */ 294918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2950de2362d3Smrg R500_INST_TEX_SEM_WAIT | 2951de2362d3Smrg R500_INST_RGB_WMASK_R | 2952de2362d3Smrg R500_INST_RGB_WMASK_G | 2953de2362d3Smrg R500_INST_RGB_WMASK_B | 2954de2362d3Smrg R500_INST_ALPHA_WMASK)); 295518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2956de2362d3Smrg R500_TEX_INST_LD | 2957de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 295818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2959de2362d3Smrg R500_TEX_SRC_S_SWIZ_B | 2960de2362d3Smrg R500_TEX_SRC_T_SWIZ_A | 2961de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 2962de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 2963de2362d3Smrg R500_TEX_DST_ADDR(4) | 2964de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2965de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2966de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2967de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 296818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 296918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 297018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2971de2362d3Smrg 2972de2362d3Smrg /* TEX temp0, temp0.xyzw, tex0, 2D */ 297318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 2974de2362d3Smrg R500_INST_TEX_SEM_WAIT | 2975de2362d3Smrg R500_INST_RGB_WMASK_R | 2976de2362d3Smrg R500_INST_RGB_WMASK_G | 2977de2362d3Smrg R500_INST_RGB_WMASK_B | 2978de2362d3Smrg R500_INST_ALPHA_WMASK)); 297918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 2980de2362d3Smrg R500_TEX_INST_LD | 2981de2362d3Smrg R500_TEX_SEM_ACQUIRE | 2982de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 298318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 2984de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 2985de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 2986de2362d3Smrg R500_TEX_SRC_R_SWIZ_B | 2987de2362d3Smrg R500_TEX_SRC_Q_SWIZ_A | 2988de2362d3Smrg R500_TEX_DST_ADDR(0) | 2989de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 2990de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 2991de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 2992de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 299318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 299418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 299518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 2996de2362d3Smrg 2997de2362d3Smrg /* LRP temp3, temp2.zzzz, temp1, temp3 -> 2998de2362d3Smrg * - PRESUB temps, temp1 - temp3 2999de2362d3Smrg * - MAD temp2.zzzz, temps, temp3 */ 300018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3001de2362d3Smrg R500_INST_RGB_WMASK_R | 3002de2362d3Smrg R500_INST_RGB_WMASK_G | 3003de2362d3Smrg R500_INST_RGB_WMASK_B | 3004de2362d3Smrg R500_INST_ALPHA_WMASK)); 300518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | 3006de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3007de2362d3Smrg R500_RGB_ADDR1(1) | 3008de2362d3Smrg R500_RGB_ADDR2(2))); 300918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | 3010de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3011de2362d3Smrg R500_ALPHA_ADDR1(1) | 3012de2362d3Smrg R500_ALPHA_ADDR2(2))); 301318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3014de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3015de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3016de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3017de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3018de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3019de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3020de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 302118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | 3022de2362d3Smrg R500_ALPHA_OP_MAD | 3023de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3024de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3025de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3026de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 302718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | 3028de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3029de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3030de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3031de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3032de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3033de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3034de2362d3Smrg 3035de2362d3Smrg /* LRP temp0, temp2.zzzz, temp4, temp0 -> 3036de2362d3Smrg * - PRESUB temps, temp4 - temp1 3037de2362d3Smrg * - MAD temp2.zzzz, temps, temp0 */ 303818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3039de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3040de2362d3Smrg R500_INST_RGB_WMASK_R | 3041de2362d3Smrg R500_INST_RGB_WMASK_G | 3042de2362d3Smrg R500_INST_RGB_WMASK_B | 3043de2362d3Smrg R500_INST_ALPHA_WMASK)); 304418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3045de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3046de2362d3Smrg R500_RGB_ADDR1(4) | 3047de2362d3Smrg R500_RGB_ADDR2(2))); 304818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3049de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3050de2362d3Smrg R500_ALPHA_ADDR1(4) | 3051de2362d3Smrg R500_ALPHA_ADDR2(2))); 305218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3053de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3054de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3055de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3056de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3057de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3058de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3059de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 306018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3061de2362d3Smrg R500_ALPHA_OP_MAD | 3062de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3063de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3064de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3065de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 306618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3067de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3068de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3069de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3070de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3071de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3072de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3073de2362d3Smrg 3074de2362d3Smrg /* LRP output, temp5.zzzz, temp3, temp0 -> 3075de2362d3Smrg * - PRESUB temps, temp3 - temp0 3076de2362d3Smrg * - MAD temp5.zzzz, temps, temp0 */ 307718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3078de2362d3Smrg R500_INST_LAST | 3079de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3080de2362d3Smrg R500_INST_RGB_WMASK_R | 3081de2362d3Smrg R500_INST_RGB_WMASK_G | 3082de2362d3Smrg R500_INST_RGB_WMASK_B | 3083de2362d3Smrg R500_INST_ALPHA_WMASK | 3084de2362d3Smrg R500_INST_RGB_OMASK_R | 3085de2362d3Smrg R500_INST_RGB_OMASK_G | 3086de2362d3Smrg R500_INST_RGB_OMASK_B | 3087de2362d3Smrg R500_INST_ALPHA_OMASK)); 308818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3089de2362d3Smrg R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | 3090de2362d3Smrg R500_RGB_ADDR1(3) | 3091de2362d3Smrg R500_RGB_ADDR2(5))); 309218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3093de2362d3Smrg R500_ALPHA_SRCP_OP_A1_MINUS_A0 | 3094de2362d3Smrg R500_ALPHA_ADDR1(3) | 3095de2362d3Smrg R500_ALPHA_ADDR2(5))); 309618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | 3097de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_B | 3098de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_B | 3099de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3100de2362d3Smrg R500_ALU_RGB_SEL_B_SRCP | 3101de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3102de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G | 3103de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B)); 310418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | 3105de2362d3Smrg R500_ALPHA_OP_MAD | 3106de2362d3Smrg R500_ALPHA_SEL_A_SRC2 | 3107de2362d3Smrg R500_ALPHA_SWIZ_A_B | 3108de2362d3Smrg R500_ALPHA_SEL_B_SRCP | 3109de2362d3Smrg R500_ALPHA_SWIZ_B_A)); 311018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | 3111de2362d3Smrg R500_ALU_RGBA_OP_MAD | 3112de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3113de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3114de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3115de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3116de2362d3Smrg R500_ALU_RGBA_A_SWIZ_A)); 3117de2362d3Smrg 3118de2362d3Smrg /* Shader constants. */ 311918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3120de2362d3Smrg 3121de2362d3Smrg /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ 3122de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); 3123de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); 3124de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3125de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); 3126de2362d3Smrg 312718781e08Smrg ADVANCE_RING(); 3128de2362d3Smrg } else { 312918781e08Smrg BEGIN_RING(2*19); 3130de2362d3Smrg /* 2 components: 2 for tex0 */ 313118781e08Smrg OUT_RING_REG(R300_RS_COUNT, 3132de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3133de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3134de2362d3Smrg 3135de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 313618781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3137de2362d3Smrg 3138de2362d3Smrg /* Pixel stack frame size. */ 313918781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */ 3140de2362d3Smrg 3141de2362d3Smrg /* FP length. */ 314218781e08Smrg OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3143de2362d3Smrg R500_US_CODE_END_ADDR(1))); 314418781e08Smrg OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3145de2362d3Smrg R500_US_CODE_RANGE_SIZE(1))); 3146de2362d3Smrg 3147de2362d3Smrg /* Prepare for FP emission. */ 314818781e08Smrg OUT_RING_REG(R500_US_CODE_OFFSET, 0); 314918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3150de2362d3Smrg 3151de2362d3Smrg /* tex inst */ 315218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3153de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3154de2362d3Smrg R500_INST_RGB_WMASK_R | 3155de2362d3Smrg R500_INST_RGB_WMASK_G | 3156de2362d3Smrg R500_INST_RGB_WMASK_B | 3157de2362d3Smrg R500_INST_ALPHA_WMASK | 3158de2362d3Smrg R500_INST_RGB_CLAMP | 3159de2362d3Smrg R500_INST_ALPHA_CLAMP)); 316018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3161de2362d3Smrg R500_TEX_INST_LD | 3162de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3163de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 316418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3165de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3166de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3167de2362d3Smrg R500_TEX_DST_ADDR(0) | 3168de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3169de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3170de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3171de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 317218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3173de2362d3Smrg R500_DX_S_SWIZ_R | 3174de2362d3Smrg R500_DX_T_SWIZ_R | 3175de2362d3Smrg R500_DX_R_SWIZ_R | 3176de2362d3Smrg R500_DX_Q_SWIZ_R | 3177de2362d3Smrg R500_DY_ADDR(0) | 3178de2362d3Smrg R500_DY_S_SWIZ_R | 3179de2362d3Smrg R500_DY_T_SWIZ_R | 3180de2362d3Smrg R500_DY_R_SWIZ_R | 3181de2362d3Smrg R500_DY_Q_SWIZ_R)); 318218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 318318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3184de2362d3Smrg 3185de2362d3Smrg /* ALU inst */ 318618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3187de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3188de2362d3Smrg R500_INST_LAST | 3189de2362d3Smrg R500_INST_RGB_OMASK_R | 3190de2362d3Smrg R500_INST_RGB_OMASK_G | 3191de2362d3Smrg R500_INST_RGB_OMASK_B | 3192de2362d3Smrg R500_INST_ALPHA_OMASK | 3193de2362d3Smrg R500_INST_RGB_CLAMP | 3194de2362d3Smrg R500_INST_ALPHA_CLAMP)); 319518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3196de2362d3Smrg R500_RGB_ADDR1(0) | 3197de2362d3Smrg R500_RGB_ADDR1_CONST | 3198de2362d3Smrg R500_RGB_ADDR2(0) | 3199de2362d3Smrg R500_RGB_ADDR2_CONST)); 320018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3201de2362d3Smrg R500_ALPHA_ADDR1(0) | 3202de2362d3Smrg R500_ALPHA_ADDR1_CONST | 3203de2362d3Smrg R500_ALPHA_ADDR2(0) | 3204de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 320518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3206de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3207de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3208de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3209de2362d3Smrg R500_ALU_RGB_SEL_B_SRC0 | 3210de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_1 | 3211de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_1 | 3212de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_1)); 321318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3214de2362d3Smrg R500_ALPHA_SWIZ_A_A | 3215de2362d3Smrg R500_ALPHA_SWIZ_B_1)); 321618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3217de2362d3Smrg R500_ALU_RGBA_R_SWIZ_0 | 3218de2362d3Smrg R500_ALU_RGBA_G_SWIZ_0 | 3219de2362d3Smrg R500_ALU_RGBA_B_SWIZ_0 | 3220de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 322118781e08Smrg ADVANCE_RING(); 3222de2362d3Smrg } 3223de2362d3Smrg } else { 3224de2362d3Smrg /* 3225de2362d3Smrg * y' = y - .0625 3226de2362d3Smrg * u' = u - .5 3227de2362d3Smrg * v' = v - .5; 3228de2362d3Smrg * 3229de2362d3Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 3230de2362d3Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 3231de2362d3Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 3232de2362d3Smrg * 3233de2362d3Smrg * DP3 might look like the straightforward solution 3234de2362d3Smrg * but we'd need to move the texture yuv values in 3235de2362d3Smrg * the same reg for this to work. Therefore use MADs. 3236de2362d3Smrg * Brightness just adds to the off constant. 3237de2362d3Smrg * Contrast is multiplication of luminance. 3238de2362d3Smrg * Saturation and hue change the u and v coeffs. 3239de2362d3Smrg * Default values (before adjustments - depend on colorspace): 3240de2362d3Smrg * yco = 1.1643 3241de2362d3Smrg * uco = 0, -0.39173, 2.017 3242de2362d3Smrg * vco = 1.5958, -0.8129, 0 3243de2362d3Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 3244de2362d3Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 3245de2362d3Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 3246de2362d3Smrg * 3247de2362d3Smrg * temp = MAD(yco, yuv.yyyy, off) 3248de2362d3Smrg * temp = MAD(uco, yuv.uuuu, temp) 3249de2362d3Smrg * result = MAD(vco, yuv.vvvv, temp) 3250de2362d3Smrg */ 3251de2362d3Smrg /* TODO: don't recalc consts always */ 3252de2362d3Smrg const float Loff = -0.0627; 3253de2362d3Smrg const float Coff = -0.502; 3254de2362d3Smrg float uvcosf, uvsinf; 3255de2362d3Smrg float yco; 3256de2362d3Smrg float uco[3], vco[3], off[3]; 3257de2362d3Smrg float bright, cont, gamma; 3258de2362d3Smrg int ref = pPriv->transform_index; 3259de2362d3Smrg 3260de2362d3Smrg cont = RTFContrast(pPriv->contrast); 3261de2362d3Smrg bright = RTFBrightness(pPriv->brightness); 3262de2362d3Smrg gamma = (float)pPriv->gamma / 1000.0; 3263de2362d3Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 3264de2362d3Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 3265de2362d3Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 3266de2362d3Smrg 3267de2362d3Smrg yco = trans[ref].RefLuma * cont; 3268de2362d3Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 3269de2362d3Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 3270de2362d3Smrg uco[2] = trans[ref].RefBCb * uvcosf; 3271de2362d3Smrg vco[0] = trans[ref].RefRCr * uvcosf; 3272de2362d3Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 3273de2362d3Smrg vco[2] = trans[ref].RefBCb * uvsinf; 3274de2362d3Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 3275de2362d3Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 3276de2362d3Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 3277de2362d3Smrg 3278de2362d3Smrg //XXX gamma 3279de2362d3Smrg 3280de2362d3Smrg if (pPriv->is_planar) { 328118781e08Smrg BEGIN_RING(2*56); 3282de2362d3Smrg /* 2 components: 2 for tex0 */ 328318781e08Smrg OUT_RING_REG(R300_RS_COUNT, 3284de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3285de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3286de2362d3Smrg 3287de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 328818781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3289de2362d3Smrg 3290de2362d3Smrg /* Pixel stack frame size. */ 329118781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */ 3292de2362d3Smrg 3293de2362d3Smrg /* FP length. */ 329418781e08Smrg OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3295de2362d3Smrg R500_US_CODE_END_ADDR(5))); 329618781e08Smrg OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3297de2362d3Smrg R500_US_CODE_RANGE_SIZE(5))); 3298de2362d3Smrg 3299de2362d3Smrg /* Prepare for FP emission. */ 330018781e08Smrg OUT_RING_REG(R500_US_CODE_OFFSET, 0); 330118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3302de2362d3Smrg 3303de2362d3Smrg /* tex inst */ 330418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3305de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3306de2362d3Smrg R500_INST_RGB_WMASK_R | 3307de2362d3Smrg R500_INST_RGB_WMASK_G | 3308de2362d3Smrg R500_INST_RGB_WMASK_B | 3309de2362d3Smrg R500_INST_ALPHA_WMASK | 3310de2362d3Smrg R500_INST_RGB_CLAMP | 3311de2362d3Smrg R500_INST_ALPHA_CLAMP)); 331218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3313de2362d3Smrg R500_TEX_INST_LD | 3314de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 331518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3316de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3317de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3318de2362d3Smrg R500_TEX_DST_ADDR(2) | 3319de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3320de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3321de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3322de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 332318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3324de2362d3Smrg R500_DX_S_SWIZ_R | 3325de2362d3Smrg R500_DX_T_SWIZ_R | 3326de2362d3Smrg R500_DX_R_SWIZ_R | 3327de2362d3Smrg R500_DX_Q_SWIZ_R | 3328de2362d3Smrg R500_DY_ADDR(0) | 3329de2362d3Smrg R500_DY_S_SWIZ_R | 3330de2362d3Smrg R500_DY_T_SWIZ_R | 3331de2362d3Smrg R500_DY_R_SWIZ_R | 3332de2362d3Smrg R500_DY_Q_SWIZ_R)); 333318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 333418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3335de2362d3Smrg 3336de2362d3Smrg /* tex inst */ 333718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3338de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3339de2362d3Smrg R500_INST_RGB_WMASK_R | 3340de2362d3Smrg R500_INST_RGB_WMASK_G | 3341de2362d3Smrg R500_INST_RGB_WMASK_B | 3342de2362d3Smrg R500_INST_ALPHA_WMASK | 3343de2362d3Smrg R500_INST_RGB_CLAMP | 3344de2362d3Smrg R500_INST_ALPHA_CLAMP)); 334518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | 3346de2362d3Smrg R500_TEX_INST_LD | 3347de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 334818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3349de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3350de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3351de2362d3Smrg R500_TEX_DST_ADDR(1) | 3352de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3353de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3354de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3355de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 335618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3357de2362d3Smrg R500_DX_S_SWIZ_R | 3358de2362d3Smrg R500_DX_T_SWIZ_R | 3359de2362d3Smrg R500_DX_R_SWIZ_R | 3360de2362d3Smrg R500_DX_Q_SWIZ_R | 3361de2362d3Smrg R500_DY_ADDR(0) | 3362de2362d3Smrg R500_DY_S_SWIZ_R | 3363de2362d3Smrg R500_DY_T_SWIZ_R | 3364de2362d3Smrg R500_DY_R_SWIZ_R | 3365de2362d3Smrg R500_DY_Q_SWIZ_R)); 336618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 336718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3368de2362d3Smrg 3369de2362d3Smrg /* tex inst */ 337018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3371de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3372de2362d3Smrg R500_INST_RGB_WMASK_R | 3373de2362d3Smrg R500_INST_RGB_WMASK_G | 3374de2362d3Smrg R500_INST_RGB_WMASK_B | 3375de2362d3Smrg R500_INST_ALPHA_WMASK | 3376de2362d3Smrg R500_INST_RGB_CLAMP | 3377de2362d3Smrg R500_INST_ALPHA_CLAMP)); 337818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | 3379de2362d3Smrg R500_TEX_INST_LD | 3380de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3381de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 338218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3383de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3384de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3385de2362d3Smrg R500_TEX_DST_ADDR(0) | 3386de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3387de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3388de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3389de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 339018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3391de2362d3Smrg R500_DX_S_SWIZ_R | 3392de2362d3Smrg R500_DX_T_SWIZ_R | 3393de2362d3Smrg R500_DX_R_SWIZ_R | 3394de2362d3Smrg R500_DX_Q_SWIZ_R | 3395de2362d3Smrg R500_DY_ADDR(0) | 3396de2362d3Smrg R500_DY_S_SWIZ_R | 3397de2362d3Smrg R500_DY_T_SWIZ_R | 3398de2362d3Smrg R500_DY_R_SWIZ_R | 3399de2362d3Smrg R500_DY_Q_SWIZ_R)); 340018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 340118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3402de2362d3Smrg 3403de2362d3Smrg /* ALU inst */ 3404de2362d3Smrg /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ 340518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3406de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3407de2362d3Smrg R500_INST_RGB_WMASK_R | 3408de2362d3Smrg R500_INST_RGB_WMASK_G | 3409de2362d3Smrg R500_INST_RGB_WMASK_B | 3410de2362d3Smrg R500_INST_ALPHA_WMASK)); 341118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3412de2362d3Smrg R500_RGB_ADDR0_CONST | 3413de2362d3Smrg R500_RGB_ADDR1(2) | 3414de2362d3Smrg R500_RGB_ADDR2(0) | 3415de2362d3Smrg R500_RGB_ADDR2_CONST)); 341618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3417de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3418de2362d3Smrg R500_ALPHA_ADDR1(2) | 3419de2362d3Smrg R500_ALPHA_ADDR2(0) | 3420de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 342118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3422de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_A | 3423de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_A | 3424de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_A | 3425de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3426de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3427de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3428de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 342918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3430de2362d3Smrg R500_ALPHA_ADDRD(2) | 3431de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3432de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 343318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3434de2362d3Smrg R500_ALU_RGBA_ADDRD(2) | 3435de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3436de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3437de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3438de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3439de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3440de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3441de2362d3Smrg 3442de2362d3Smrg /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ 344318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3444de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3445de2362d3Smrg R500_INST_RGB_WMASK_R | 3446de2362d3Smrg R500_INST_RGB_WMASK_G | 3447de2362d3Smrg R500_INST_RGB_WMASK_B | 3448de2362d3Smrg R500_INST_ALPHA_WMASK)); 344918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3450de2362d3Smrg R500_RGB_ADDR0_CONST | 3451de2362d3Smrg R500_RGB_ADDR1(1) | 3452de2362d3Smrg R500_RGB_ADDR2(2))); 345318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3454de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3455de2362d3Smrg R500_ALPHA_ADDR1(1) | 3456de2362d3Smrg R500_ALPHA_ADDR2(2))); 345718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3458de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3459de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3460de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3461de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3462de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3463de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3464de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 346518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3466de2362d3Smrg R500_ALPHA_ADDRD(2) | 3467de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3468de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 346918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3470de2362d3Smrg R500_ALU_RGBA_ADDRD(2) | 3471de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3472de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3473de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3474de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3475de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3476de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3477de2362d3Smrg 3478de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ 347918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3480de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3481de2362d3Smrg R500_INST_LAST | 3482de2362d3Smrg R500_INST_RGB_OMASK_R | 3483de2362d3Smrg R500_INST_RGB_OMASK_G | 3484de2362d3Smrg R500_INST_RGB_OMASK_B | 3485de2362d3Smrg R500_INST_ALPHA_OMASK | 3486de2362d3Smrg R500_INST_RGB_CLAMP | 3487de2362d3Smrg R500_INST_ALPHA_CLAMP)); 348818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3489de2362d3Smrg R500_RGB_ADDR0_CONST | 3490de2362d3Smrg R500_RGB_ADDR1(0) | 3491de2362d3Smrg R500_RGB_ADDR2(2))); 349218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | 3493de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3494de2362d3Smrg R500_ALPHA_ADDR1(0) | 3495de2362d3Smrg R500_ALPHA_ADDR2(2))); 349618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3497de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3498de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3499de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3500de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3501de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3502de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3503de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 350418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3505de2362d3Smrg R500_ALPHA_ADDRD(0) | 3506de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3507de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 350818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3509de2362d3Smrg R500_ALU_RGBA_ADDRD(0) | 3510de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3511de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3512de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3513de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3514de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3515de2362d3Smrg R500_ALU_RGBA_A_SWIZ_1)); 3516de2362d3Smrg 3517de2362d3Smrg } else { 351818781e08Smrg BEGIN_RING(2*44); 3519de2362d3Smrg /* 2 components: 2 for tex0/1/2 */ 352018781e08Smrg OUT_RING_REG(R300_RS_COUNT, 3521de2362d3Smrg ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | 3522de2362d3Smrg R300_RS_COUNT_HIRES_EN)); 3523de2362d3Smrg 3524de2362d3Smrg /* R300_INST_COUNT_RS - highest RS instruction used */ 352518781e08Smrg OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); 3526de2362d3Smrg 3527de2362d3Smrg /* Pixel stack frame size. */ 352818781e08Smrg OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */ 3529de2362d3Smrg 3530de2362d3Smrg /* FP length. */ 353118781e08Smrg OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | 3532de2362d3Smrg R500_US_CODE_END_ADDR(3))); 353318781e08Smrg OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | 3534de2362d3Smrg R500_US_CODE_RANGE_SIZE(3))); 3535de2362d3Smrg 3536de2362d3Smrg /* Prepare for FP emission. */ 353718781e08Smrg OUT_RING_REG(R500_US_CODE_OFFSET, 0); 353818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); 3539de2362d3Smrg 3540de2362d3Smrg /* tex inst */ 354118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | 3542de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3543de2362d3Smrg R500_INST_RGB_WMASK_R | 3544de2362d3Smrg R500_INST_RGB_WMASK_G | 3545de2362d3Smrg R500_INST_RGB_WMASK_B | 3546de2362d3Smrg R500_INST_ALPHA_WMASK | 3547de2362d3Smrg R500_INST_RGB_CLAMP | 3548de2362d3Smrg R500_INST_ALPHA_CLAMP)); 354918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | 3550de2362d3Smrg R500_TEX_INST_LD | 3551de2362d3Smrg R500_TEX_SEM_ACQUIRE | 3552de2362d3Smrg R500_TEX_IGNORE_UNCOVERED)); 355318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | 3554de2362d3Smrg R500_TEX_SRC_S_SWIZ_R | 3555de2362d3Smrg R500_TEX_SRC_T_SWIZ_G | 3556de2362d3Smrg R500_TEX_DST_ADDR(0) | 3557de2362d3Smrg R500_TEX_DST_R_SWIZ_R | 3558de2362d3Smrg R500_TEX_DST_G_SWIZ_G | 3559de2362d3Smrg R500_TEX_DST_B_SWIZ_B | 3560de2362d3Smrg R500_TEX_DST_A_SWIZ_A)); 356118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | 3562de2362d3Smrg R500_DX_S_SWIZ_R | 3563de2362d3Smrg R500_DX_T_SWIZ_R | 3564de2362d3Smrg R500_DX_R_SWIZ_R | 3565de2362d3Smrg R500_DX_Q_SWIZ_R | 3566de2362d3Smrg R500_DY_ADDR(0) | 3567de2362d3Smrg R500_DY_S_SWIZ_R | 3568de2362d3Smrg R500_DY_T_SWIZ_R | 3569de2362d3Smrg R500_DY_R_SWIZ_R | 3570de2362d3Smrg R500_DY_Q_SWIZ_R)); 357118781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 357218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000); 3573de2362d3Smrg 3574de2362d3Smrg /* ALU inst */ 3575de2362d3Smrg /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ 357618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3577de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3578de2362d3Smrg R500_INST_RGB_WMASK_R | 3579de2362d3Smrg R500_INST_RGB_WMASK_G | 3580de2362d3Smrg R500_INST_RGB_WMASK_B | 3581de2362d3Smrg R500_INST_ALPHA_WMASK)); 358218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | 3583de2362d3Smrg R500_RGB_ADDR0_CONST | 3584de2362d3Smrg R500_RGB_ADDR1(0) | 3585de2362d3Smrg R500_RGB_ADDR2(0) | 3586de2362d3Smrg R500_RGB_ADDR2_CONST)); 358718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | 3588de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3589de2362d3Smrg R500_ALPHA_ADDR1(0) | 3590de2362d3Smrg R500_ALPHA_ADDR2(0) | 3591de2362d3Smrg R500_ALPHA_ADDR2_CONST)); 359218781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3593de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_A | 3594de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_A | 3595de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_A | 3596de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3597de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_G | 3598de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_G | 3599de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_G)); 360018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3601de2362d3Smrg R500_ALPHA_ADDRD(1) | 3602de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3603de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 360418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3605de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 3606de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC0 | 3607de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3608de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3609de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3610de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3611de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3612de2362d3Smrg 3613de2362d3Smrg /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ 361418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | 3615de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3616de2362d3Smrg R500_INST_RGB_WMASK_R | 3617de2362d3Smrg R500_INST_RGB_WMASK_G | 3618de2362d3Smrg R500_INST_RGB_WMASK_B | 3619de2362d3Smrg R500_INST_ALPHA_WMASK)); 362018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | 3621de2362d3Smrg R500_RGB_ADDR0_CONST | 3622de2362d3Smrg R500_RGB_ADDR1(0) | 3623de2362d3Smrg R500_RGB_ADDR2(1))); 362418781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3625de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3626de2362d3Smrg R500_ALPHA_ADDR1(0) | 3627de2362d3Smrg R500_ALPHA_ADDR2(1))); 362818781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3629de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3630de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3631de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3632de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3633de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_B | 3634de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_B | 3635de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_B)); 363618781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3637de2362d3Smrg R500_ALPHA_ADDRD(1) | 3638de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3639de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 364018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3641de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 3642de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3643de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3644de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3645de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3646de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3647de2362d3Smrg R500_ALU_RGBA_A_SWIZ_0)); 3648de2362d3Smrg 3649de2362d3Smrg /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ 365018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | 3651de2362d3Smrg R500_INST_TEX_SEM_WAIT | 3652de2362d3Smrg R500_INST_LAST | 3653de2362d3Smrg R500_INST_RGB_OMASK_R | 3654de2362d3Smrg R500_INST_RGB_OMASK_G | 3655de2362d3Smrg R500_INST_RGB_OMASK_B | 3656de2362d3Smrg R500_INST_ALPHA_OMASK | 3657de2362d3Smrg R500_INST_RGB_CLAMP | 3658de2362d3Smrg R500_INST_ALPHA_CLAMP)); 365918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | 3660de2362d3Smrg R500_RGB_ADDR0_CONST | 3661de2362d3Smrg R500_RGB_ADDR1(0) | 3662de2362d3Smrg R500_RGB_ADDR2(1))); 366318781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | 3664de2362d3Smrg R500_ALPHA_ADDR0_CONST | 3665de2362d3Smrg R500_ALPHA_ADDR1(0) | 3666de2362d3Smrg R500_ALPHA_ADDR2(1))); 366718781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | 3668de2362d3Smrg R500_ALU_RGB_R_SWIZ_A_R | 3669de2362d3Smrg R500_ALU_RGB_G_SWIZ_A_G | 3670de2362d3Smrg R500_ALU_RGB_B_SWIZ_A_B | 3671de2362d3Smrg R500_ALU_RGB_SEL_B_SRC1 | 3672de2362d3Smrg R500_ALU_RGB_R_SWIZ_B_R | 3673de2362d3Smrg R500_ALU_RGB_B_SWIZ_B_R | 3674de2362d3Smrg R500_ALU_RGB_G_SWIZ_B_R)); 367518781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | 3676de2362d3Smrg R500_ALPHA_ADDRD(1) | 3677de2362d3Smrg R500_ALPHA_SWIZ_A_0 | 3678de2362d3Smrg R500_ALPHA_SWIZ_B_0)); 367918781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | 3680de2362d3Smrg R500_ALU_RGBA_ADDRD(1) | 3681de2362d3Smrg R500_ALU_RGBA_SEL_C_SRC2 | 3682de2362d3Smrg R500_ALU_RGBA_R_SWIZ_R | 3683de2362d3Smrg R500_ALU_RGBA_G_SWIZ_G | 3684de2362d3Smrg R500_ALU_RGBA_B_SWIZ_B | 3685de2362d3Smrg R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | 3686de2362d3Smrg R500_ALU_RGBA_A_SWIZ_1)); 3687de2362d3Smrg } 3688de2362d3Smrg 3689de2362d3Smrg /* Shader constants. */ 369018781e08Smrg OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); 3691de2362d3Smrg 3692de2362d3Smrg /* constant 0: off, yco */ 3693de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); 3694de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); 3695de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); 3696de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); 3697de2362d3Smrg /* constant 1: uco */ 3698de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); 3699de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); 3700de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); 3701de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); 3702de2362d3Smrg /* constant 2: vco */ 3703de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); 3704de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); 3705de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); 3706de2362d3Smrg OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); 3707de2362d3Smrg 370818781e08Smrg ADVANCE_RING(); 3709de2362d3Smrg } 3710de2362d3Smrg 3711de2362d3Smrg BEGIN_ACCEL_RELOC(6, 2); 371218781e08Smrg OUT_RING_REG(R300_TX_INVALTAGS, 0); 371318781e08Smrg OUT_RING_REG(R300_TX_ENABLE, txenable); 3714de2362d3Smrg 3715de2362d3Smrg EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); 3716de2362d3Smrg EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); 3717de2362d3Smrg 3718de2362d3Smrg /* no need to enable blending */ 371918781e08Smrg OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 3720de2362d3Smrg 372118781e08Smrg OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count); 372218781e08Smrg ADVANCE_RING(); 3723de2362d3Smrg 3724de2362d3Smrg if (pPriv->vsync) { 3725de2362d3Smrg xf86CrtcPtr crtc; 3726de2362d3Smrg if (pPriv->desired_crtc) 3727de2362d3Smrg crtc = pPriv->desired_crtc; 3728de2362d3Smrg else 372918781e08Smrg crtc = radeon_pick_best_crtc(pScrn, FALSE, 3730de2362d3Smrg pPriv->drw_x, 3731de2362d3Smrg pPriv->drw_x + pPriv->dst_w, 3732de2362d3Smrg pPriv->drw_y, 3733de2362d3Smrg pPriv->drw_y + pPriv->dst_h); 3734de2362d3Smrg if (crtc) 373518781e08Smrg RADEONWaitForVLine(pScrn, pPixmap, 373618781e08Smrg crtc, 373718781e08Smrg pPriv->drw_y - crtc->y, 373818781e08Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 3739de2362d3Smrg } 3740de2362d3Smrg 3741de2362d3Smrg return TRUE; 3742de2362d3Smrg} 3743de2362d3Smrg 3744de2362d3Smrgstatic void 374518781e08SmrgR500DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 3746de2362d3Smrg{ 3747de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 3748de2362d3Smrg PixmapPtr pPixmap = pPriv->pPixmap; 3749de2362d3Smrg int dstxoff, dstyoff; 3750de2362d3Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 3751de2362d3Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 3752de2362d3Smrg 3753de2362d3Smrg#ifdef COMPOSITE 3754de2362d3Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 3755de2362d3Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 3756de2362d3Smrg#else 3757de2362d3Smrg dstxoff = 0; 3758de2362d3Smrg dstyoff = 0; 3759de2362d3Smrg#endif 3760de2362d3Smrg 376118781e08Smrg if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3762de2362d3Smrg return; 3763de2362d3Smrg 3764de2362d3Smrg /* 3765de2362d3Smrg * Rendering of the actual polygon is done in two different 3766de2362d3Smrg * ways depending on chip generation: 3767de2362d3Smrg * 3768de2362d3Smrg * < R300: 3769de2362d3Smrg * 3770de2362d3Smrg * These chips can render a rectangle in one pass, so 3771de2362d3Smrg * handling is pretty straight-forward. 3772de2362d3Smrg * 3773de2362d3Smrg * >= R300: 3774de2362d3Smrg * 3775de2362d3Smrg * These chips can accept a quad, but will render it as 3776de2362d3Smrg * two triangles which results in a diagonal tear. Instead 3777de2362d3Smrg * We render a single, large triangle and use the scissor 3778de2362d3Smrg * functionality to restrict it to the desired rectangle. 3779de2362d3Smrg * Due to guardband limits on r3xx/r4xx, we can only use 3780de2362d3Smrg * the single triangle up to 2880 pixels; above that we 3781de2362d3Smrg * render as a quad. 3782de2362d3Smrg */ 3783de2362d3Smrg 3784de2362d3Smrg while (nBox--) { 3785de2362d3Smrg float srcX, srcY, srcw, srch; 3786de2362d3Smrg int dstX, dstY, dstw, dsth; 3787de2362d3Smrg int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3; 3788de2362d3Smrg 3789de2362d3Smrg if (draw_size > radeon_cs_space_remaining(pScrn)) { 379018781e08Smrg radeon_cs_flush_indirect(pScrn); 379118781e08Smrg if (!R500PrepareTexturedVideo(pScrn, pPriv)) 3792de2362d3Smrg return; 3793de2362d3Smrg } 3794de2362d3Smrg 3795de2362d3Smrg dstX = pBox->x1 + dstxoff; 3796de2362d3Smrg dstY = pBox->y1 + dstyoff; 3797de2362d3Smrg dstw = pBox->x2 - pBox->x1; 3798de2362d3Smrg dsth = pBox->y2 - pBox->y1; 3799de2362d3Smrg 3800de2362d3Smrg srcX = pPriv->src_x; 3801de2362d3Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 3802de2362d3Smrg pPriv->src_w) / (float)pPriv->dst_w; 3803de2362d3Smrg srcY = pPriv->src_y; 3804de2362d3Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 3805de2362d3Smrg pPriv->src_h) / (float)pPriv->dst_h; 3806de2362d3Smrg 3807de2362d3Smrg srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 3808de2362d3Smrg srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 3809de2362d3Smrg 381018781e08Smrg BEGIN_RING(2*2); 381118781e08Smrg OUT_RING_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | 3812de2362d3Smrg ((dstY) << R300_SCISSOR_Y_SHIFT))); 381318781e08Smrg OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | 3814de2362d3Smrg ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); 381518781e08Smrg ADVANCE_RING(); 3816de2362d3Smrg 3817de2362d3Smrg BEGIN_RING(3 * pPriv->vtx_count + 4); 3818de2362d3Smrg OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 3819de2362d3Smrg 3 * pPriv->vtx_count)); 3820de2362d3Smrg OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | 3821de2362d3Smrg RADEON_CP_VC_CNTL_PRIM_WALK_RING | 3822de2362d3Smrg (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); 382318781e08Smrg 3824de2362d3Smrg if (pPriv->bicubic_enabled) { 3825de2362d3Smrg VTX_OUT_6((float)dstX, (float)dstY, 3826de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h, 3827de2362d3Smrg (float)srcX + 0.5, (float)srcY + 0.5); 3828de2362d3Smrg VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), 3829de2362d3Smrg (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, 3830de2362d3Smrg (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); 3831de2362d3Smrg VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, 3832de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3833de2362d3Smrg (float)srcY / pPriv->h, 3834de2362d3Smrg (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, 3835de2362d3Smrg (float)srcY + 0.5); 3836de2362d3Smrg } else { 3837de2362d3Smrg /* 3838de2362d3Smrg * Render a big, scissored triangle. This means 3839de2362d3Smrg * increasing the triangle size and adjusting 3840de2362d3Smrg * texture coordinates. 3841de2362d3Smrg */ 3842de2362d3Smrg VTX_OUT_4((float)dstX, (float)dstY, 3843de2362d3Smrg (float)srcX / pPriv->w, (float)srcY / pPriv->h); 3844de2362d3Smrg VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), 3845de2362d3Smrg (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); 3846de2362d3Smrg VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, 3847de2362d3Smrg ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, 3848de2362d3Smrg (float)srcY / pPriv->h); 3849de2362d3Smrg } 3850de2362d3Smrg 3851de2362d3Smrg /* flushing is pipelined, free/finish is not */ 385218781e08Smrg OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); 3853de2362d3Smrg 3854de2362d3Smrg ADVANCE_RING(); 3855de2362d3Smrg 3856de2362d3Smrg pBox++; 3857de2362d3Smrg } 3858de2362d3Smrg 385918781e08Smrg BEGIN_RING(2*3); 386018781e08Smrg OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA); 386118781e08Smrg OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); 386218781e08Smrg OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); 386318781e08Smrg ADVANCE_RING(); 3864de2362d3Smrg 3865de2362d3Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 3866de2362d3Smrg} 3867de2362d3Smrg 3868de2362d3Smrg#undef VTX_OUT_4 3869de2362d3Smrg#undef VTX_OUT_6 3870